# This notebook takes in netCDF based climate model preditions and formats them for use in the SWB model 
Some notes: 
- The projection on these files seems messed up. Therefore I had to manually move them over somewhat to line them up with what I estimated to be the right spot
- So far I have only done rainfall. If I wanted to do temperature too I would fhave to do the temp max and min...

In [2]:
# make the screen bigger!
from IPython.display import display, HTML
display(HTML(data=""" <style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 85%; }
    div#maintoolbar-container { width: 99%; } </style> """))

# modules
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
from pyproj import Proj, transform
import pandas as pd
%matplotlib inline

# functions 
def writeArrayToArcGrid(arr,filename,xll,yll,cellsize,no_data_val):
    """ this takes a 2d numpy array and turns it into an .asc file """
    arr                = np.copy(arr)
    arr[np.isnan(arr)] = no_data_val
    headerstring       = bytes('NCOLS %d\nNROWS %d\nXLLCENTER %f\nYLLCENTER %f\nCELLSIZE %f\nNODATA_value %f\n' % 
        (arr.shape[1], arr.shape[0], xll, yll, cellsize, no_data_val), 'UTF-8')

    with open(filename,'wb') as fout:
        fout.write(headerstring)
        np.savetxt(fout,arr,'%5.2f')
        
# Paths
Datapath = os.path.join(".", 'RawData')
Outpath =  os.path.join(".", 'ProcessedData')

Rain_present = Datapath +'\\'+'samoa_hourly_present_RAIN_1990-2009.nc'
Rain_rcp45 = Datapath +'\\'+'samoa_hourly_rcp45_RAIN_1990-2009.nc'
Rain_rcp85 = Datapath +'\\'+'samoa_hourly_rcp85_RAIN_1990-2009.nc'

Temp_present = Datapath +'\\'+'samoa_hourly_present_T2_1990-2009.nc'
Temp_rcp45 = Datapath +'\\'+'samoa_hourly_rcp45_T2_1990-2009.nc'
Temp_rcp85 = Datapath +'\\'+'samoa_hourly_rcp85_T2_1990-2009.nc'

In [3]:
# load up all the rainfall data files 

data_R_p = xr.open_dataset(Rain_present)    # open up the nc file
data_R_45 = xr.open_dataset(Rain_rcp45)    
data_R_85 = xr.open_dataset(Rain_rcp85)    

rain_variable_R_p = data_R_p['RAIN']            # select the rain variable (has time, lat and lon dimentions)
rain_variable_R_45 = data_R_45['RAIN']  
rain_variable_R_85 = data_R_85['RAIN']  

#monthly_R_p = rain_variable.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
#monthly_R_p = monthly/20    # this is because the dataset is 20 years long, now er have average monthly values from the 20 year dataset, but for one month

yearly_R_p = rain_variable_R_p.groupby('Time.year').sum(dim='Time')    # turn it into 20 time arrays one for each year
yearly_R_45 = rain_variable_R_45.groupby('Time.year').sum(dim='Time')
yearly_R_85 = rain_variable_R_85.groupby('Time.year').sum(dim='Time')

yearlyave_R_p = np.mean(yearly_R_p, 0)                                 # Average all the years together into a single average yearly dataset
yearlyave_R_45 = np.mean(yearly_R_45, 0) 
yearlyave_R_85 = np.mean(yearly_R_85, 0) 

#yearlyave_R_p.plot(); plt.show()            # plot if want
#yearlyave_R_45.plot() ; plt.show()
#yearlyave_R_85.plot() ; plt.show()

FileNotFoundError: [Errno 2] No such file or directory: b'C:\\Users\\cshuler\\Desktop\\Github_repo_copies\\Tutuila-SWB-Scenarios\\Raw_GIS_Data\\Climate_scenarios\\RawData\\samoa_hourly_present_RAIN_1990-2009.nc'

### Rainfall Data Yearly loop, also this defines some of the geometry attributes used in the monthly ones so need to run first

In [3]:
scene_list = [yearlyave_R_p,  yearlyave_R_45,  yearlyave_R_85]     # list of the processed yearly average arrays from above
name_list = ["present", "rcp45", "rcp85"]                          # names 

# this little block does some projection from WGS 84 to UTM2  but just for getting the corner values really, 
# projection definitions
inProj = Proj("+proj=latlon +ellps=WGS84 + datum=WGS84 +no_defs +towgs84=0,0,0")
outProj = Proj("+proj=utm +zone=2 +south +ellps=WGS84 +datum=WGS84 +units=m +no_defs ")
LonData = np.array(data_R_p['XLONG']).flatten()
LatData = np.array(data_R_p['XLAT']).flatten()
LonDataUTM, LatDataUTM = transform(inProj, outProj, LonData , LatData)
MaxY = round(max(LatDataUTM))
MinY = round(min(LatDataUTM))
MaxX = round(max(LonDataUTM))
MinX = round(min(LonDataUTM))
Lx = MaxX - MinX
Ly = MaxY - MinY
Dy = Lx/50
Dx = Ly/26
XLLCORNER =  MinX+2800        # so the projection of the NETCDF file seems to be messed up. I Substituted these numbers in to transform it to more reasonable values
YLLCORNER = MinY+400

# this then writes the array to an asc file
for ix in range(0,3):    
    Why_upsidedown = np.flip(scene_list[ix], axis=0)
    writeArrayToArcGrid(Why_upsidedown*0.0393701, os.path.join(Outpath,  "yearly_rainfall_{}.asc".format(name_list[ix])), XLLCORNER, YLLCORNER, 800, -999)

### Rainfall Monthly Loop 
same as above but loops over each month to create monthyl files which the model will actually use. 

In [None]:
monthly = rain_variable_R_p.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_p = monthly/20    # this is because the dataset is 20 years long, now er have average monthly values from the 20 year dataset, but for one month

monthly = rain_variable_R_45.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_45 = monthly/20

monthly = rain_variable_R_85.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_85 = monthly/20

scene_list = [monthly_R_p,  monthly_R_45,  monthly_R_85]
Data_list = [data_R_p,  data_R_45,  data_R_85] 
name_list = ["present", "rcp45", "rcp85"]

for ix in range(0,3):  
    Why_upsidedown = scene_list[ix]
    
    for mo in range(1,13):       
        writeArrayToArcGrid(np.flip(Why_upsidedown[mo-1]*0.0393701, axis=0), os.path.join(Outpath,  "month_{}_rainfall_{}.asc".format(mo, name_list[ix])), XLLCORNER, YLLCORNER, 800, -999)

In [None]:
# eploratorty below

7305.0

In [6]:
# Temp Data preparation
data_T_p = xr.open_dataset(Temp_present)    # open up the nc file
data_T_45 = xr.open_dataset(Temp_rcp45)    
data_T_85 = xr.open_dataset(Temp_rcp85)  

temp_variable_T_p = data_T_p['T2']            # select the rain variable (has time, lat and lon dimentions)
temp_variable_T_45 = data_T_45['T2']  
temp_variable_T_85 = data_T_85['T2']

7305.0

7305.0

In [29]:
sliceo = temp_variable_T_p[0:48,:,:]
np.shape(sliceo)
pogomin = sliceo.groupby('Time.day').min(dim='Time')
np.shape(pogomin)


(2, 26, 50)

In [19]:
sliceo = temp_variable_T_p[0:100,:,:]
pogomin = sliceo.groupby('Time.day').min(dim='Time')
pogomax = sliceo.groupby('Time.day').max(dim='Time')


writeArrayToArcGrid(pogomax[0,:,:], os.path.join(".", "maxtemptest.asc"), XLLCORNER, YLLCORNER, 800, -999)
writeArrayToArcGrid(pogomin[0,:,:], os.path.join(".", "mintemptest.asc"), XLLCORNER, YLLCORNER, 800, -999)

In [9]:
dayminlist = []
for i in range (0, 7):   # 7305 is number of days in 20 years
    step = 24*i
    day_arr = temp_variable_T_p[0+step:24+step,:,:]
    day_min = day_arr.groupby('Time.day').min(dim='Time')
    dayminlist.append(day_min)
    
    print(i)
    
daily_min =  xr.concat(dayminlist, dim='day')

0
1
2
3
4
5
6


In [10]:
type(daily_min)

xarray.core.dataarray.DataArray

In [17]:
daily_min.to_netcdf(os.path.join(Outpath, "shitdong.nc"))

In [13]:
pwd

'C:\\Users\\WRRCuser\\Desktop\\NewWork\\SWB2_model\\Raw_GIS_Data\\Climate_scenarios'

In [11]:
for i in range (0, 7305):    # 7305 is number of days in 20 years
    step = 24*i
    arr =  np.amax(temp_variable_T_p[0+step:24+step,:,:],0)
    arr_list.append(arr)

daily_max =  xr.concat(arr_list, dim='time')

<xarray.DataArray 'T2' (Time: 24, west_east: 50)>
array([[ 300.640656,  300.638794,  300.63858 , ...,  300.712738,  300.717499,
         300.714447],
       [ 300.825165,  300.828766,  300.832947, ...,  300.716064,  300.671875,
         300.696045],
       [ 300.867126,  300.862274,  300.858185, ...,  300.822205,  300.817322,
         300.81546 ],
       ..., 
       [ 300.244202,  300.182739,  300.205841, ...,  299.690277,  299.882355,
         299.920471],
       [ 300.4021  ,  300.390289,  300.384125, ...,  300.54892 ,  300.529694,
         300.517151],
       [ 300.686218,  300.684723,  300.674866, ...,  300.772125,  300.76886 ,
         300.762329]], dtype=float32)
Coordinates:
  * Time     (Time) datetime64[ns] 1990-01-03 1990-01-03T01:00:00 ...
Dimensions without coordinates: west_east


In [73]:
times = pd.date_range('1990-01-01', periods=7305)

da = xr.DataArray(arr, dims=['south_north', 'west_east']) 
da.dims
bong = xr.concat([arr, arr2], dim='time')
bong

<xarray.DataArray 'T2' (time: 2, south_north: 26, west_east: 50)>
array([[[ 299.953552,  299.870758, ...,  300.102478,  300.103333],
        [ 299.987   ,  299.875183, ...,  300.110809,  300.106842],
        ..., 
        [ 299.669952,  299.590088, ...,  299.801178,  299.828644],
        [ 299.571808,  299.521912, ...,  299.857513,  299.866882]],

       [[ 300.04361 ,  300.039124, ...,  299.686584,  299.70163 ],
        [ 300.032684,  300.024689, ...,  299.937408,  299.938263],
        ..., 
        [ 300.007965,  299.998291, ...,  299.905182,  299.973846],
        [ 300.023621,  300.018097, ...,  299.858429,  299.917175]]], dtype=float32)
Coordinates:
    XLAT     (south_north, west_east) float32 -14.4063 -14.4063 -14.4063 ...
    XLONG    (south_north, west_east) float32 -170.905 -170.898 -170.891 ...
Dimensions without coordinates: time, south_north, west_east

In [81]:
bong = xr.concat([arr, arr2], dim='time')

In [None]:
data_T_p = xr.open_dataset(Temp_present)

#step = 24*i
#np.shape(data_T_p["T2"][0+step:24+step,:,:])

np.shape(data_T_p['T2'])
20*365.25

### Temperature Monthly Loop 
same as above but loops over each month to create monthyl files which the model will actually use. 

In [None]:
monthly = rain_variable_R_p.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_p = monthly/20    # this is because the dataset is 20 years long, now er have average monthly values from the 20 year dataset, but for one month

monthly = rain_variable_R_45.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_45 = monthly/20

monthly = rain_variable_R_85.groupby('Time.month').sum(dim='Time')   # turn it into a set of 12 time arrays one for each month (all 20 years of data ADDED together for each month) 
monthly_R_85 = monthly/20

scene_list = [monthly_R_p,  monthly_R_45,  monthly_R_85]
Data_list = [data_R_p,  data_R_45,  data_R_85] 
name_list = ["present", "rcp45", "rcp85"]

for ix in range(0,3):  
    Why_upsidedown = scene_list[ix]
    
    for mo in range(1,13):       
        writeArrayToArcGrid(np.flip(Why_upsidedown[mo-1]*0.0393701, axis=0), os.path.join(Outpath,  "month_{}_rainfall_{}.asc".format(mo, name_list[ix])), XLLCORNER, YLLCORNER, 800, -999)