In [40]:
import xarray as xr
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Conv3D, Flatten,MaxPooling3D,AveragePooling3D, concatenate,Input ,SpatialDropout3D,Dropout
import keras
from math import e
from keras.models import Model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
from keras.models import load_model

In [41]:
from sklearn.metrics import confusion_matrix

In [42]:
#Surface data
t2mData = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_t2m_daily.nc')
d2mData = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_d2m_daily.nc')
d2mData.time
u10Data = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_u10_daily.nc')
v10Data = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_v10_daily.nc')
v10Data.time

In [43]:
rh = (e**((17.625*(d2mData.d2m.values-273.15))/(243.04+(d2mData.d2m.values-273.15)))/e**((17.625*(t2mData.t2m.values-273.15))/(243.04+(t2mData.t2m.values-273.15))))

rh_ds = xr.Dataset({'rh': (('time','latitude','longitude'), rh)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})


In [44]:
# Calculate wind speed 
ws = ((v10Data.v10.values**2)+(u10Data.u10.values**2))**0.5
ws_ds = xr.Dataset({'ws': (('time','latitude','longitude'), ws)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})


In [45]:
# 850 Data
tuv850Data = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_850_uvt_daily.nc')
tuv850Data.time
z850Data = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_850_z_daily.nc')
z850Data.time
#700 Data
w700Data = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/2019_2020_verification/DailyMain/ERA5NH_700_w_daily.nc')
w700Data.time
# Read Orography
OroData = xr.open_dataset('../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5IGP_Orography.nc')

In [46]:
#Calculate inv
inv=t2mData.t2m.values-tuv850Data.t.values
inv_ds = xr.Dataset({'inv': (('time','latitude','longitude'), inv)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})
inv_ds.attrs
inv_ds.attrs['units']='K'
inv_ds.attrs['long_name']='t2m - t850'

In [47]:
#u shear calculation
ushear=(tuv850Data.u.values-u10Data.u10.values)/(z850Data.z.values/9.81) 
ushear_ds = xr.Dataset({'ushear': (('time','latitude','longitude'), ushear)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})
ushear_ds.attrs['units']='s-1'
ushear_ds.attrs['long_name']='(u10 - u850)/z850'

In [48]:
# AO data
AOData = xr.open_dataset('../../May2021/2019_2020Prediction/AOindex-NDJF-Daily-1979-2020_old.nc')
aoTS=AOData.AO
Darray=np.zeros((AOData.time.shape[0],t2mData.latitude.shape[0], t2mData.longitude.shape[0]))
for t in range(aoTS.time.shape[0]) :
    Darray[t,:,:]=np.full((t2mData.latitude.shape[0], t2mData.longitude.shape[0]), aoTS[t].values)
AOData=xr.Dataset({'AO': (('time','latitude','longitude'), Darray)},
                  coords={'time': AOData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude}) 
# EU data
EUData = xr.open_dataset('../../May2021/2019_2020Prediction/EUindex-NDJF-Daily-1979-2020.nc')
EUData.EU
euTS=EUData.EU
Darray=np.zeros((AOData.time.shape[0],t2mData.latitude.shape[0], t2mData.longitude.shape[0]))
for t in range(euTS.time.shape[0]) :
    Darray[t,:,:]=np.full((t2mData.latitude.shape[0], t2mData.longitude.shape[0]), euTS[t].values)
EUData=xr.Dataset({'EU': (('time','latitude','longitude'), Darray)},
                  coords={'time': AOData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})


In [49]:
# create mask
oro = OroData.z.sel(latitude=slice(35,0),longitude=slice(50,100))
oro.values = OroData.z.sel(latitude=slice(35,0),longitude=slice(50,100)).values/9.81
oro.attrs
oro.attrs['units']='meter'
oro.attrs['long_name']='Orography'
oro.values[oro.values>500.1]=np.NaN
mask=oro.values/oro.values

In [50]:
# 5Day and 30Day Mean
#AO
AO5D=AOData.AO.rolling(time=5).mean()

AO5DAll=AO5D[((AO5D.time.dt.month>11) | (AO5D.time.dt.month<2)) & 
             (AO5D.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#EU
EU5D=EUData.EU.rolling(time=5).mean()

EU5DAll=EU5D[((EU5D.time.dt.month>11) | (EU5D.time.dt.month<2)) & 
             (EU5D.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO5DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO5DAll.values=t1.unstack()

t1=EU5DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU5DAll.values=t1.unstack()

AO5DAll.values=AO5DAll.values*mask
AO5DAll.values=xr.where(np.isnan(AO5DAll.values),  0.000000000001,AO5DAll.values)

EU5DAll.values=EU5DAll.values*mask
EU5DAll.values=xr.where(np.isnan(EU5DAll.values),  0.000000000001,EU5DAll.values)
#AO5DAll

In [51]:
t2m=t2mData.t2m.shift(time=1)
ws=ws_ds.ws.shift(time=1)
rh=rh_ds.rh.shift(time=1)
inv=inv_ds.inv.shift(time=1)
w=w700Data.w.shift(time=1)
ushear=ushear_ds.ushear.shift(time=1)

In [52]:
t2mTsAll=t2m[((t2m.time.dt.month>11) | (t2m.time.dt.month<2)) & (t2m.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))
wsTsAll=ws[((ws.time.dt.month>11) | (ws.time.dt.month<2)) & (ws.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))
rhTsAll=rh[((rh.time.dt.month>11) | (rh.time.dt.month<2)) & (rh.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))
invTsAll=inv[((inv.time.dt.month>11) | (inv.time.dt.month<2)) & (inv.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))
ushearTsAll=ushear[((ushear.time.dt.month>11) | (ushear.time.dt.month<2)) & (ushear.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))
wTsAll=w[((w.time.dt.month>11) | (w.time.dt.month<2)) & (w.time.dt.year<2021)].sel(time=slice('1980-12-1','2020-1-31'),latitude=slice(35,0),longitude=slice(50,100))

In [53]:
t1=t2mTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
t2mTsAll.values=t1.unstack()

t1=wsTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
wsTsAll.values=t1.unstack()

t1=rhTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
rhTsAll.values=t1.unstack()

t1=invTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
invTsAll.values=t1.unstack()

t1=ushearTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
ushearTsAll.values=t1.unstack()


t1=wTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
wTsAll.values=t1.unstack()

In [54]:
t2mTsAll.values=t2mTsAll.values*mask
wsTsAll.values=wsTsAll.values*mask
rhTsAll.values=rhTsAll.values*mask
invTsAll.values=invTsAll.values*mask
ushearTsAll.values=ushearTsAll.values*mask
wTsAll.values=wTsAll.values*mask

In [55]:
t2mTsAll.values=xr.where(np.isnan(t2mTsAll.values),  0.000000000001,t2mTsAll.values)
wsTsAll.values=xr.where(np.isnan(wsTsAll.values),  0.000000000001,wsTsAll.values)
rhTsAll.values=xr.where(np.isnan(rhTsAll.values),  0.000000000001,rhTsAll.values)
invTsAll.values=xr.where(np.isnan(invTsAll.values),  0.000000000001,invTsAll.values)
ushearTsAll.values=xr.where(np.isnan(ushearTsAll.values),  0.000000000001,ushearTsAll.values)
wTsAll.values=xr.where(np.isnan(wTsAll.values),  0.000000000001,wTsAll.values)


In [56]:
t2mAll=t2mTsAll.values
t2mAll=t2mAll[:,:,:,None]
t2mAll.shape


wsAll=wsTsAll.values
wsAll=wsAll[:,:,:,None]
wsAll.shape

rhAll=rhTsAll.values
rhAll=rhAll[:,:,:,None]
rhAll.shape


invAll=invTsAll.values
invAll=invAll[:,:,:,None]
invAll.shape

wAll=wTsAll.values
wAll=wAll[:,:,:,None]
wAll.shape

ushearAll=ushearTsAll.values
ushearAll=ushearAll[:,:,:,None]
ushearAll.shape

aoAll=AO5DAll.values
aoAll=aoAll[:,:,:,None]
aoAll.shape

euAll=EU5DAll.values
euAll=euAll[:,:,:,None]

print(aoAll.shape)
ushearAll.shape

(2480, 18, 26, 1)


(2480, 18, 26, 1)

In [57]:
#X_All=np.array([t2mAll,rhAll,wsAll,invAll,wAll,ushearAll,aoAll,euAll,sstAll])
X_All=np.array([t2mAll,rhAll,wsAll,invAll,wAll,ushearAll,aoAll,euAll])
X_All.shape

X_All_reshape = np.einsum('lkija->klija',X_All)
X_All_reshape.shape

(2480, 8, 18, 26, 1)

In [74]:
# Load saved model
# load model
model = load_model('modelDFogCNN.h5')
# summarize model.
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_1 (Conv3D)            (None, 8, 18, 26, 16)     448       
_________________________________________________________________
average_pooling3d_1 (Average (None, 4, 9, 13, 16)      0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 4, 9, 13, 32)      13856     
_________________________________________________________________
average_pooling3d_2 (Average (None, 2, 5, 7, 32)       0         
_________________________________________________________________
dropout (Dropout)            (None, 2, 5, 7, 32)       0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 2, 5, 7, 64)       55360     
_________________________________________________________________
average_pooling3d_3 (Average (None, 1, 3, 4, 64)      

In [75]:
yLR=model.predict(X_All_reshape)

In [76]:
yLR.shape

(2480, 1)

In [77]:
yf=[]

for y in yLR[:,0]:
    if y >= 0.5: 
        yf.append(1)
    else: 
        yf.append(0)
    
yfArray_ds=xr.Dataset({'yfArray': (('time'), yf)}, coords={'time':t2mTsAll.time.values})
    
print(yfArray_ds.yfArray)
#print(y_test)


#tn, fp, fn, tp = confusion_matrix(y_test.values, yfArray_ds.yfArray.values).ravel()

#print(tn, fp, fn, tp)

<xarray.DataArray 'yfArray' (time: 2480)>
array([0, 0, 0, ..., 1, 1, 0])
Coordinates:
  * time     (time) datetime64[ns] 1980-12-01T09:00:00 ... 2020-01-31T09:00:00


In [78]:
fogData = xr.open_dataset('../../../Data/FogData/CombinedFogData_25Stations.nc')
#plt.figure(figsize=[16,8])
fogData
StackFog=fogData.fogdata.stack(a=('years','months','days'))
StackFog
dd =[];
for i in range(StackFog.years.values.shape[0]):
    dd=dd+[str(StackFog.years[i].values)+'-'+str(StackFog.months[i].values)+"-"+str(StackFog.days[i].values)]
fg = xr.Dataset({'fogdata': (('time','stations'), StackFog.values.T)}, coords={'time': pd.to_datetime(dd),'stations': fogData.stations})
yAll=fg.fogdata.sel(stations=42182)
yTrain=yAll.sel(time=slice('1980-12-1','2018-1-31'))
yTest=yAll.sel(time=slice('2018-12-1','2019-1-31'))

y_predTrain=yfArray_ds.yfArray.sel(time=slice('1980-12-1','2018-1-31'))
y_predTest=yfArray_ds.yfArray.sel(time=slice('2018-12-1','2019-1-31'))

In [79]:
# Trainning
tn, fp, fn, tp = confusion_matrix(yTrain.values, y_predTrain.values).ravel()

print(tn, fp, fn, tp)

822 210 363 961


In [80]:
# testing
# Trainning
tn, fp, fn, tp = confusion_matrix(yTest.values, y_predTest.values).ravel()

print(tn, fp, fn, tp)

8 39 3 12
