In [1]:
import numpy as np
import pandas as pd
import xarray as xr
from joblib import dump, load
from sklearn.model_selection import train_test_split
import math as mt
from math import e
from sklearn.preprocessing import StandardScaler

In [2]:
# Read Orography
OroData = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5IGP_Orography.nc')

In [3]:
#Surface data
t2mData = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHtas_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
rhData  = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHhurs_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
u10Data = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHuas_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
v10Data = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHvas_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')

In [4]:
# Level Data
tLevData = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHta_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
zLevData = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHzg_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
wLevData = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHwap_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')
uLevData = xr.open_dataset('../../../Data/CMIP6/CANESM5/RegridNHua_NDJFday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc')

In [5]:
t2mD = t2mData.sel(latitude=slice(32,24),longitude=slice(74,86))
u10D = u10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
v10D = v10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
rhD  = rhData.sel(latitude=slice(32,24),longitude=slice(74,86))
tLevD = tLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
uLevD = uLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
w700D = wLevData.sel(latitude=slice(32,24),longitude=slice(74,86)).sel(plev=70000,method='nearest')
zLevD = zLevData.sel(latitude=slice(32,24),longitude=slice(74,86))
OroD = OroData.sel(latitude=slice(32,24),longitude=slice(74,86))
#w700D

In [6]:
# Calculate wind speed and relative humidity inv  ushear
ws = ((v10D.vas.values**2)+(u10D.uas.values**2))**0.5
ws_ds = xr.Dataset({'ws': (('time','latitude','longitude'), ws)},
                   coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})

rh_ds = xr.Dataset({'rh': (('time','latitude','longitude'), rhD.hurs)},
                   coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})

#Calculate inv
inv=t2mD.tas.values-tLevD.ta.sel(plev=85000,method='nearest').values
inv_ds = xr.Dataset({'inv': (('time','latitude','longitude'), inv)}, 
                    coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
inv_ds.attrs
inv_ds.attrs['units']='K'
inv_ds.attrs['long_name']='t2m - t850'

#u shear calculation
ushear=(uLevD.ua.sel(plev=85000,method='nearest').values-u10D.uas.values)/(zLevD.zg.sel(plev=85000,method='nearest').values) 
ushear_ds = xr.Dataset({'ushear': (('time','latitude','longitude'), ushear)}, 
                       coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
ushear_ds.attrs['units']='s-1'
ushear_ds.attrs['long_name']='(u10 - u850)/z850'

In [10]:
AOData = xr.open_dataset('CanESM5-AOindex-NDJF-Daily-1980-2014.nc')

AO=AOData.AO
datetimeindex = AO.indexes['time'].to_datetimeindex()
datetimeindex
AO['time'] = datetimeindex
AO.values

AO5D=AO.rolling(time=5).mean()

AO5DAll=AO5D[((AO5D.time.dt.month>11) | (AO5D.time.dt.month<2)) & 
             (AO5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
#ao

  datetimeindex = AO.indexes['time'].to_datetimeindex()


In [13]:
EUData = xr.open_dataset('CanESM5-EUindex-NDJF-Daily-1980-2014.nc')
EU=EUData.EU
datetimeindex = EU.indexes['time'].to_datetimeindex()
datetimeindex
EU['time'] = datetimeindex
EU.values
EU5D=EU.rolling(time=5).mean()

EU5DAll=EU5D[((EU5D.time.dt.month>11) | (EU5D.time.dt.month<2)) & 
             (EU5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))


  datetimeindex = EU.indexes['time'].to_datetimeindex()


In [14]:
# create mask
oro = OroD.z
oro.values = OroD.z.values/9.81
oro.attrs
oro.attrs['units']='meter'
oro.attrs['long_name']='Orography'
oro.values[oro.values>500.1]=np.NaN
mask=oro.values/oro.values

In [15]:
# Apply Mask 
t2mD.tas.values=t2mD.tas.values*mask
ws_ds.ws.values=ws_ds.ws.values*mask
rh_ds.rh.values=rh_ds.rh.values*mask
w700D.wap.values=w700D.wap.values*mask
inv_ds.inv.values=inv_ds.inv.values*mask
ushear_ds.ushear.values=ushear_ds.ushear.values*mask

In [17]:
# Feild mean
t2mTS1=t2mD.mean(dim=['latitude','longitude'], skipna=True).tas
wsTS1=ws_ds.mean(dim=['latitude','longitude'], skipna=True).ws
rhTS1=rh_ds.mean(dim=['latitude','longitude'], skipna=True).rh
wTS1=w700D.mean(dim=['latitude','longitude'], skipna=True).wap
invTS1=inv_ds.mean(dim=['latitude','longitude'], skipna=True).inv
ushearTS1=ushear_ds.mean(dim=['latitude','longitude'], skipna=True).ushear

In [20]:
datetimeindex = t2mTS1.indexes['time'].to_datetimeindex()
t2mTS1['time'] = datetimeindex
wsTS1['time'] = datetimeindex
rhTS1['time'] = datetimeindex
wTS1['time'] = datetimeindex
invTS1['time'] = datetimeindex
ushearTS1['time'] = datetimeindex

  datetimeindex = t2mTS1.indexes['time'].to_datetimeindex()


In [21]:
# shift one day so we can take values of previous day
t2mTS=t2mTS1.shift(time=1)
wsTS=wsTS1.shift(time=1)
rhTS=rhTS1.shift(time=1)
wTS=wTS1.shift(time=1)
invTS=invTS1.shift(time=1)
ushearTS=ushearTS1.shift(time=1)

In [22]:
# Select required time 
t2m=t2mTS[((t2mTS.time.dt.month>11) | (t2mTS.time.dt.month<2)) & (t2mTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
ws=wsTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
inv=invTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
rh=rhTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
w=wTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))
ushear=ushearTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2014-12-31'))

In [23]:
X       = pd.DataFrame([t2m.values,ws.values,rh.values/100.0,inv.values,w.values,ushear.values*100.0,AO5DAll.values,
                        EU5DAll.values],
                       index=['t2m', 'ws', 'rh','inv', 'w', 'ushear', 'AO5D', 'EU5D']).T[:]
X.index = pd.to_datetime(t2m.time.values)
X

Unnamed: 0,t2m,ws,rh,inv,w,ushear,AO5D,EU5D
1980-01-01 12:00:00,283.638489,2.092558,0.368684,0.296294,0.042479,0.257382,0.940195,-0.244499
1980-01-02 12:00:00,282.503052,2.501947,0.307843,-0.207631,0.114584,0.123681,1.169467,-0.181746
1980-01-03 12:00:00,282.692078,2.562566,0.230027,-1.059524,0.144812,0.102232,1.295086,-0.262855
1980-01-04 12:00:00,284.083649,2.594080,0.204776,-1.671385,0.159052,0.058667,1.367690,-0.298903
1980-01-05 12:00:00,285.648254,2.052210,0.236367,-1.707266,0.062077,0.075519,1.399647,-0.439698
...,...,...,...,...,...,...,...,...
2014-12-27 12:00:00,284.906281,1.619889,0.485154,1.755276,0.066966,-0.004120,-1.616161,0.509587
2014-12-28 12:00:00,284.660187,1.897120,0.490724,1.624777,0.095552,-0.023856,-1.551134,0.624113
2014-12-29 12:00:00,285.306915,1.987440,0.470052,1.652645,0.046238,-0.112220,-1.491638,0.670498
2014-12-30 12:00:00,284.897919,1.547402,0.516568,1.153176,-0.060648,-0.052331,-1.502838,0.850938


In [24]:
norm = StandardScaler().fit(X)
    # transform training data
X = norm.transform(X)
X
X = pd.DataFrame(norm.transform(X))
X.index = pd.to_datetime(t2m.time.values)
X

Unnamed: 0,0,1,2,3,4,5,6,7
1980-01-01 12:00:00,-106.964362,-2.700639,-7.465333,-1.169432,3.785917,18.503420,0.851678,-0.539311
1980-01-02 12:00:00,-107.121867,-1.525335,-10.242302,-1.416608,9.664782,2.538143,1.125020,-0.363786
1980-01-03 12:00:00,-107.095646,-1.351306,-13.794097,-1.834463,12.129289,-0.022986,1.274785,-0.590652
1980-01-04 12:00:00,-106.902611,-1.260832,-14.946591,-2.134582,13.290319,-5.225186,1.361346,-0.691484
1980-01-05 12:00:00,-106.685573,-2.816471,-13.504706,-2.152182,5.383768,-3.212786,1.399445,-1.085297
...,...,...,...,...,...,...,...,...
2014-12-27 12:00:00,-106.788498,-4.057610,-2.149296,-0.453799,5.782413,-12.722523,-2.196051,1.569931
2014-12-28 12:00:00,-106.822635,-3.261715,-1.895037,-0.517809,8.113103,-15.079269,-2.118526,1.890270
2014-12-29 12:00:00,-106.732923,-3.002419,-2.838580,-0.504140,4.092458,-25.630762,-2.047593,2.020013
2014-12-30 12:00:00,-106.789658,-4.265710,-0.715424,-0.749130,-4.622197,-18.479381,-2.060946,2.524717


In [25]:
regLin=load('../../March2021/Observation_models/LRModel.joblib') 

In [26]:
yLR=regLin.predict(X)
y_predLin_ds=xr.Dataset({'yLR': (('time'), yLR)}, coords={'time': t2m.time})

In [27]:
dump(y_predLin_ds.yLR,'../Model_plots/CanESM5-LR-Y.joblib')

['../Model_plots/CanESM5-LR-Y.joblib']