In [5]:
import numpy as np
import pandas as pd
import xarray as xr
from joblib import dump, load
from sklearn.model_selection import train_test_split
import math as mt
from math import e
from sklearn.preprocessing import StandardScaler

In [6]:
fogData = xr.open_dataset('../../../../Data/FogData/CombinedFogData_25Stations.nc')
#plt.figure(figsize=[16,8])
StackFog=fogData.fogdata.stack(a=('years','months','days'))
StackFog
dd =[];
for i in range(StackFog.years.values.shape[0]):
    dd=dd+[str(StackFog.years[i].values)+'-'+str(StackFog.months[i].values)+"-"+str(StackFog.days[i].values)]
fg = xr.Dataset({'fogdata': (('time','stations'), StackFog.values.T)}, coords={'time': pd.to_datetime(dd),'stations': fogData.stations})
y=fg.fogdata.sum(dim='stations').sel(time=slice('1980-1-1','2018-12-31'))
y=y/25.0

In [7]:
#ERA5 Input

In [8]:
#Surface data
t2mData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_t2m_daily_NovDecJan.nc')
d2mData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_d2m_daily_NovDecJan.nc')
u10Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_u10_daily_NovDecJan.nc')
v10Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_v10_daily_NovDecJan.nc')
# 850 Data
tuv850Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_850_uvt_daily_NovDecJan.nc')
z850Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_850_z_daily_NovDecJan.nc')
#700 Data
w700Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_700_w_daily_NovDecJan.nc')
# Read Orography
OroData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5IGP_Orography.nc')
z500Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_500_z_daily_NovDecJan.nc')

In [9]:
t2mD = t2mData.sel(latitude=slice(32,24),longitude=slice(74,86))
u10D = u10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
v10D = v10Data.sel(latitude=slice(32,24),longitude=slice(74,86))
d2mD = d2mData.sel(latitude=slice(32,24),longitude=slice(74,86))
tuv850D = tuv850Data.sel(latitude=slice(32,24),longitude=slice(74,86))
z850D = z850Data.sel(latitude=slice(32,24),longitude=slice(74,86))
w700D = w700Data.sel(latitude=slice(32,24),longitude=slice(74,86))
OroD = OroData.sel(latitude=slice(32,24),longitude=slice(74,86))
zD=z500Data.sel(latitude=slice(32,24),longitude=slice(74,86))

In [10]:
v10D.time

In [11]:
v10D.latitude

In [12]:
# Calculate wind speed and relative humidity
ws = ((v10D.v10.values**2)+(u10D.u10.values**2))**0.5
ws_ds = xr.Dataset({'ws': (('time','latitude','longitude'), ws)}, coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
rh = (e**((17.625*(d2mD.d2m.values-273.15))/(243.04+(d2mD.d2m.values-273.15)))/e**((17.625*(t2mD.t2m.values-273.15))/(243.04+(t2mD.t2m.values-273.15))))
rh_ds = xr.Dataset({'rh': (('time','latitude','longitude'), rh)}, coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
#Calculate inv
inv=t2mD.t2m.values-tuv850D.t.values
inv_ds = xr.Dataset({'inv': (('time','latitude','longitude'), inv)}, coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
inv_ds.attrs
inv_ds.attrs['units']='K'
inv_ds.attrs['long_name']='t2m - t850'
#u shear calculation
ushear=(tuv850D.u.values-u10D.u10.values)/(z850D.z.values/9.81) 
ushear_ds = xr.Dataset({'ushear': (('time','latitude','longitude'), ushear)}, coords={'time': v10D.time,'latitude': v10D.latitude,'longitude': v10D.longitude})
ushear_ds.attrs['units']='s-1'
ushear_ds.attrs['long_name']='(u10 - u850)/z850'

In [13]:
AOData = xr.open_dataset('../../../../Data/AO_EU/AO_Calculated.nc')
aoTS=AOData.AO.shift(time=1)
ao=aoTS[((aoTS.time.dt.month>11) | (aoTS.time.dt.month<2)) & (aoTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
#ao

In [14]:
EUData = xr.open_dataset('../../../../Data/AO_EU/EU.nc')
EUData.EUVal
euTS=EUData.EUVal.shift(time=1)
eu=euTS[((euTS.time.dt.month>11) | (euTS.time.dt.month<2)) & (euTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))

In [15]:
# create mask
oro = OroD.z
oro.values = OroD.z.values/9.81
oro.attrs
oro.attrs['units']='meter'
oro.attrs['long_name']='Orography'
oro.values[oro.values>500.1]=np.NaN
mask=oro.values/oro.values

In [16]:
# Apply Mask 
t2mD.t2m.values=t2mD.t2m.values*mask
ws_ds.ws.values=ws_ds.ws.values*mask
rh_ds.rh.values=rh_ds.rh.values*mask
w700D.w.values=w700D.w.values*mask
inv_ds.inv.values=inv_ds.inv.values*mask
ushear_ds.ushear.values=ushear_ds.ushear.values*mask
zD.z.values=zD.z.values*mask

In [17]:
# Feild mean
t2mTS1=t2mD.mean(dim=['latitude','longitude'], skipna=True).t2m
wsTS1=ws_ds.mean(dim=['latitude','longitude'], skipna=True).ws
rhTS1=rh_ds.mean(dim=['latitude','longitude'], skipna=True).rh
wTS1=w700D.mean(dim=['latitude','longitude'], skipna=True).w
invTS1=inv_ds.mean(dim=['latitude','longitude'], skipna=True).inv
ushearTS1=ushear_ds.mean(dim=['latitude','longitude'], skipna=True).ushear
zTS1=zD.mean(dim=['latitude','longitude'], skipna=True).z

In [18]:
# shift one day so we can take values of previous day
t2mTS=t2mTS1.shift(time=1)
wsTS=wsTS1.shift(time=1)
rhTS=rhTS1.shift(time=1)
wTS=wTS1.shift(time=1)
invTS=invTS1.shift(time=1)
ushearTS=ushearTS1.shift(time=1)
zTS=zTS1.shift(time=1)



In [19]:
# Select required time 
t2m=t2mTS[((t2mTS.time.dt.month>11) | (t2mTS.time.dt.month<2)) & (t2mTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
ws=wsTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
inv=invTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
rh=rhTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
w=wTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
ushear=ushearTS[((wsTS.time.dt.month>11) | (wsTS.time.dt.month<2)) & (wsTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))
z=zTS[((zTS.time.dt.month>11) | (zTS.time.dt.month<2)) & (zTS.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'))

In [20]:
print('rh')
print(np.corrcoef(rh,y)[0,1])
print('t2m')
print(np.corrcoef(t2m,y)[0,1])
print('ws')
print(np.corrcoef(ws,y)[0,1])
print('w')
print(np.corrcoef(w,y)[0,1])
print('inv')
print(np.corrcoef(inv,y)[0,1])
print('ushear')
print(np.corrcoef(ushear,y)[0,1])
print('z')
print(np.corrcoef(z,y)[0,1])

rh
0.551984190261254
t2m
-0.27475434162179596
ws
-0.36287914975048996
w
-0.04575990405926304
inv
-0.23197758724160944
ushear
-0.14306905339288264
z
0.02148029925141571


In [21]:
ao.values

array([-1.7509392 , -1.8242075 , -1.4396307 , ...,  0.08315741,
        0.21666297,  0.21972694], dtype=float32)

In [22]:
# input X
X       = pd.DataFrame([t2m.values,ws.values,rh.values,inv.values,w.values,ushear.values*100.0,ao.values,eu.values],
                       index=['t2m', 'ws', 'rh','inv', 'w', 'ushear','ao','eu']).T[:]
#X       = pd.DataFrame([ws.values,rh.values,inv.values,w.values]).T[:]
X.index = pd.to_datetime(t2m.time.values)
X

Unnamed: 0,t2m,ws,rh,inv,w,ushear,ao,eu
1980-01-01 09:00:00,289.120636,1.737170,0.681204,5.449228,-0.103194,0.353420,-1.750939,0.326036
1980-01-02 09:00:00,288.971771,2.071385,0.743338,5.749483,-0.125205,0.273085,-1.824208,0.821619
1980-01-03 09:00:00,288.303253,1.375861,0.758891,6.781636,-0.049128,0.160400,-1.439631,0.484874
1980-01-04 09:00:00,287.083527,1.883535,0.746639,6.586329,0.070247,0.053234,-0.997704,0.013497
1980-01-05 09:00:00,286.249054,1.991354,0.709822,5.626440,0.038802,0.038249,-1.095475,0.433607
...,...,...,...,...,...,...,...,...
2018-12-27 09:00:00,287.116974,1.433200,0.588758,4.247632,-0.025993,0.314777,-0.047821,0.367309
2018-12-28 09:00:00,287.040436,1.897564,0.547522,5.767006,0.163933,0.249560,0.075607,0.866002
2018-12-29 09:00:00,286.180817,2.137837,0.552296,4.413174,0.137729,0.143824,0.083157,1.291670
2018-12-30 09:00:00,286.110046,1.636086,0.564291,1.974165,0.078003,0.081137,0.216663,0.938830


In [23]:
norm = StandardScaler().fit(X)
    # transform training data
X = norm.transform(X)
X
X = pd.DataFrame(norm.transform(X))
X.index = pd.to_datetime(t2m.time.values)
X

Unnamed: 0,0,1,2,3,4,5,6,7
1980-01-01 09:00:00,-151.803446,-4.064469,-0.709943,-1.704749,-33.468375,14.908108,-1.503011,0.719577
1980-01-02 09:00:00,-151.844542,-1.727495,9.293493,-1.611327,-38.473908,7.829461,-1.566859,1.811889
1980-01-03 09:00:00,-152.029096,-6.590885,11.797603,-1.290181,-21.173405,-2.099535,-1.231729,1.069670
1980-01-04 09:00:00,-152.365818,-3.041019,9.825014,-1.350949,5.973481,-11.542335,-0.846622,0.030712
1980-01-05 09:00:00,-152.596186,-2.287107,3.897496,-1.649611,-1.177278,-12.862747,-0.931823,0.956673
...,...,...,...,...,...,...,...,...
2018-12-27 09:00:00,-152.356585,-6.189951,-15.593767,-2.078616,-15.912171,11.503087,-0.018868,0.810546
2018-12-28 09:00:00,-152.377714,-2.942925,-22.232640,-1.605875,27.278408,5.756574,0.088690,1.909714
2018-12-29 09:00:00,-152.615024,-1.262835,-21.464061,-2.027109,21.319440,-3.560168,0.095270,2.847924
2018-12-30 09:00:00,-152.634561,-4.771283,-19.532871,-2.785987,7.737331,-9.083744,0.211610,2.070233


In [18]:
y

In [19]:
#dump(X, 'X.joblib') 
#dump(y,'y.joblib')

['y.joblib']

In [36]:
#X

Unnamed: 0,0,1,2,3,4,5,6,7
1980-01-01 09:00:00,-151.803446,-4.064469,-0.709943,-1.704749,-33.468375,14.908108,-1.503011,0.719577
1980-01-02 09:00:00,-151.844542,-1.727495,9.293493,-1.611327,-38.473908,7.829461,-1.566859,1.811889
1980-01-03 09:00:00,-152.029096,-6.590885,11.797603,-1.290181,-21.173405,-2.099535,-1.231729,1.069670
1980-01-04 09:00:00,-152.365818,-3.041019,9.825014,-1.350949,5.973481,-11.542335,-0.846622,0.030712
1980-01-05 09:00:00,-152.596186,-2.287107,3.897496,-1.649611,-1.177278,-12.862747,-0.931823,0.956673
...,...,...,...,...,...,...,...,...
2018-12-27 09:00:00,-152.356585,-6.189951,-15.593767,-2.078616,-15.912171,11.503087,-0.018868,0.810546
2018-12-28 09:00:00,-152.377714,-2.942925,-22.232640,-1.605875,27.278408,5.756574,0.088690,1.909714
2018-12-29 09:00:00,-152.615024,-1.262835,-21.464061,-2.027109,21.319440,-3.560168,0.095270,2.847924
2018-12-30 09:00:00,-152.634561,-4.771283,-19.532871,-2.785987,7.737331,-9.083744,0.211610,2.070233


In [None]:
X