In [2]:
import numpy as np
import pandas as pd
import xarray as xr
from joblib import dump, load
from sklearn.model_selection import train_test_split
import math as mt
from math import e
from sklearn.preprocessing import StandardScaler

In [3]:
# Read Orography
OroData = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5IGP_Orography.nc')

In [4]:
#Surface data
t2mData = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/tasNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
rhData  = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/hursNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
u10Data = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/uasNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
v10Data = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/vasNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')


In [5]:
# Level Data
tLevData = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/taNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
zLevData = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/zgNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
wLevData = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/wapNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')
uLevData = xr.open_dataset('../../../Data/CMIP6/ACCESS-CM2/uaNHRegrid_NDJFday_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231.nc')

In [6]:
t2mD = t2mData.sel(latitude=slice(32,24),longitude=slice(74,86))
u10D = u10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
v10D = v10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
rhD  = rhData.sel(latitude=slice(32,24),longitude=slice(74,86))
tLevD = tLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
uLevD = uLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
w700D = wLevData.sel(latitude=slice(32,24),longitude=slice(74,86)).sel(plev=70000,method='nearest')
zLevD = zLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
OroD = OroData.sel(latitude=slice(32,24),longitude=slice(74,86))
#w700D

In [7]:
# Calculate wind speed and relative humidity inv  ushear
ws = ((v10D.vas.values**2)+(u10D.uas.values**2))**0.5
ws_ds = xr.Dataset({'ws': (('time','latitude','longitude'), ws)},
                   coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})

rh_ds = xr.Dataset({'rh': (('time','latitude','longitude'), rhD.hurs)},
                   coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})

#Calculate inv
inv=t2mD.tas.values-tLevD.ta.sel(plev=85000,method='nearest').values
inv_ds = xr.Dataset({'inv': (('time','latitude','longitude'), inv)}, 
                    coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
inv_ds.attrs
inv_ds.attrs['units']='K'
inv_ds.attrs['long_name']='t2m - t850'

#u shear calculation
ushear=(uLevD.ua.sel(plev=85000,method='nearest').values-u10D.uas.values)/(zLevD.zg.sel(plev=85000,method='nearest').values) 
ushear_ds = xr.Dataset({'ushear': (('time','latitude','longitude'), ushear)}, 
                       coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
ushear_ds.attrs['units']='s-1'
ushear_ds.attrs['long_name']='(u10 - u850)/z850'

In [8]:
AOData = xr.open_dataset('ACCESS-CM2-AOindex-NDJF-Daily-1980-2014.nc')
AO5D=AOData.AO.rolling(time=5).mean()

AO5DAll=AO5D[((AO5D.time.dt.month>11) | (AO5D.time.dt.month<2)) & 
             (AO5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
#ao

In [9]:
EUData = xr.open_dataset('ACCESS-CM2-EUindex-NDJF-Daily-1980-2014.nc')
EU5D=EUData.EU.rolling(time=5).mean()

EU5DAll=EU5D[((EU5D.time.dt.month>11) | (EU5D.time.dt.month<2)) & 
             (EU5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))


In [10]:
# create mask
oro = OroD.z
oro.values = OroD.z.values/9.81
oro.attrs
oro.attrs['units']='meter'
oro.attrs['long_name']='Orography'
oro.values[oro.values>500.1]=np.NaN
mask=oro.values/oro.values

In [11]:
# Apply Mask 
t2mD.tas.values=t2mD.tas.values*mask
ws_ds.ws.values=ws_ds.ws.values*mask
rh_ds.rh.values=rh_ds.rh.values*mask
w700D.wap.values=w700D.wap.values*mask
inv_ds.inv.values=inv_ds.inv.values*mask
ushear_ds.ushear.values=ushear_ds.ushear.values*mask

In [12]:
# Feild mean
t2mTS1=t2mD.mean(dim=['latitude','longitude'], skipna=True).tas
wsTS1=ws_ds.mean(dim=['latitude','longitude'], skipna=True).ws
rhTS1=rh_ds.mean(dim=['latitude','longitude'], skipna=True).rh
wTS1=w700D.mean(dim=['latitude','longitude'], skipna=True).wap
invTS1=inv_ds.mean(dim=['latitude','longitude'], skipna=True).inv
ushearTS1=ushear_ds.mean(dim=['latitude','longitude'], skipna=True).ushear

In [13]:
# shift one day so we can take values of previous day
t2mTS=t2mTS1.shift(time=1)
wsTS=wsTS1.shift(time=1)
rhTS=rhTS1.shift(time=1)
wTS=wTS1.shift(time=1)
invTS=invTS1.shift(time=1)
ushearTS=ushearTS1.shift(time=1)

In [14]:
# Select required time 
t2m=t2mTS[((t2mTS.time.dt.month>11) | (t2mTS.time.dt.month<2)) & (t2mTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
ws=wsTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
inv=invTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
rh=rhTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
w=wTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
ushear=ushearTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))

In [15]:
X       = pd.DataFrame([t2m.values,ws.values,rh.values/100.0,inv.values,w.values,ushear.values*100.0,AO5DAll.values,
                        EU5DAll.values],
                       index=['t2m', 'ws', 'rh','inv', 'w', 'ushear', 'AO5D', 'EU5D']).T[:]
X.index = pd.to_datetime(t2m.time.values)
X
X.describe(include='all')

Unnamed: 0,t2m,ws,rh,inv,w,ushear,AO5D,EU5D
count,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0
mean,288.542541,1.216049,0.524149,2.860578,0.011511,0.145055,0.05282,-0.030743
std,2.033577,0.37666,0.150194,1.577638,0.097219,0.121128,1.019916,0.566579
min,281.933624,0.446215,0.165345,-4.763562,-0.453197,-0.331755,-2.8808,-1.761969
25%,287.235466,0.941786,0.404413,1.795293,-0.036699,0.067823,-0.723142,-0.399858
50%,288.632401,1.17048,0.513271,2.904227,0.027308,0.150432,0.07447,0.001088
75%,289.865646,1.436326,0.64037,3.985495,0.074067,0.226875,0.837952,0.35467
max,294.130615,3.104255,0.920271,7.273778,0.303052,0.538976,2.636928,1.408751


In [16]:
norm = StandardScaler().fit(X)
    # transform training data
X = norm.transform(X)
X
X = pd.DataFrame(norm.transform(X))
X.index = pd.to_datetime(t2m.time.values)
X
X.describe(include='all')

Unnamed: 0,0,1,2,3,4,5,6,7
count,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0,2170.0
mean,-141.921869,-3.229253,-3.490617,-1.813621,-0.118432,-1.197806,-0.051801,0.054274
std,0.491971,2.65614,6.661118,0.634151,10.290762,8.259508,0.980925,1.765794
min,-143.520725,-8.657992,-19.403595,-4.878239,-49.308249,-33.710553,-2.87327,-5.341247
25%,-142.238082,-5.163312,-8.800951,-2.241825,-5.221535,-6.464089,-0.798099,-1.096107
50%,-141.90013,-3.550596,-3.973087,-1.796076,1.553664,-0.831149,-0.030979,0.153477
75%,-141.601778,-1.675898,1.663769,-1.361447,6.503107,4.381313,0.703315,1.255449
max,-140.56998,10.086051,14.077374,-0.039681,30.741405,25.66289,2.433518,4.540586


In [17]:
regLin=load('../../March2021/Observation_models/LRModel.joblib') 

In [18]:
yLR=regLin.predict(X)
y_predLin_ds=xr.Dataset({'yLR': (('time'), yLR)}, coords={'time': t2m.time})

In [19]:
dump(y_predLin_ds.yLR,'../Model_plots/ACCESS-CM2-LR-Y.joblib')

['../Model_plots/ACCESS-CM2-LR-Y.joblib']

In [20]:
print(yLR)

[-0.67468503 -0.52614679 -0.43258723 ... -0.44668034 -0.38110805
 -0.31323671]


In [21]:
# test on observed data for checking code

In [22]:
testObsData=load('../../Feb2021/Final_Models/AO_EU_SST_included/X.joblib') 

In [23]:
testPred=regLin.predict(testObsData)

In [24]:
print(testPred)

[0.26434078 0.26316564 0.41732781 ... 0.17074161 0.31416257 0.31920742]


In [25]:
regLin2=load('../../March2021/Observation_models/SVRModel.joblib') 
yLR=regLin2.predict(X)
y_predLin_ds=xr.Dataset({'yLR': (('time'), yLR)}, coords={'time': t2m.time})
dump(y_predLin_ds.yLR,'../Model_plots/ACCESS-CM2-SVR-Y.joblib')

['../Model_plots/ACCESS-CM2-SVR-Y.joblib']