In [9]:
import xarray as xr
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Conv3D, Flatten,MaxPooling3D,AveragePooling3D, concatenate,Input ,SpatialDropout3D,Dropout
import keras
from math import e
from sklearn.model_selection import train_test_split
from keras.models import Model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
from sklearn.model_selection import KFold

In [10]:
fogData = xr.open_dataset('../../../../Data/FogData/CombinedFogData_25Stations.nc')
#plt.figure(figsize=[16,8])
StackFog=fogData.fogdata.stack(a=('years','months','days'))
StackFog
dd =[];
for i in range(StackFog.years.values.shape[0]):
    dd=dd+[str(StackFog.years[i].values)+'-'+str(StackFog.months[i].values)+"-"+str(StackFog.days[i].values)]
fg = xr.Dataset({'fogdata': (('time','stations'), StackFog.values.T)}, coords={'time': pd.to_datetime(dd),'stations': fogData.stations})
yAll=fg.fogdata.sum(dim='stations').sel(time=slice('1980-1-1','2018-12-31'))
yAll=yAll/25.0

In [11]:
#Surface data
t2mData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_t2m_daily_NovDecJan.nc')
d2mData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_d2m_daily_NovDecJan.nc')
u10Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_u10_daily_NovDecJan.nc')
v10Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_v10_daily_NovDecJan.nc')
# 850 Data
tuv850Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_850_uvt_daily_NovDecJan.nc')
z850Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_850_z_daily_NovDecJan.nc')
#700 Data
w700Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_700_w_daily_NovDecJan.nc')
# Read Orography
OroData = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5IGP_Orography.nc')
z500Data = xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/ERA5NH_500_z_daily_NovDecJan.nc')

In [12]:
# Calculate rh
rh = (e**((17.625*(d2mData.d2m.values-273.15))/(243.04+(d2mData.d2m.values-273.15)))/e**((17.625*(t2mData.t2m.values-273.15))/(243.04+(t2mData.t2m.values-273.15))))
rh_ds = xr.Dataset({'rh': (('time','latitude','longitude'), rh)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})

# Calculate wind speed 
ws = ((v10Data.v10.values**2)+(u10Data.u10.values**2))**0.5
ws_ds = xr.Dataset({'ws': (('time','latitude','longitude'), ws)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})

#Calculate inv
inv=t2mData.t2m.values-tuv850Data.t.values
inv_ds = xr.Dataset({'inv': (('time','latitude','longitude'), inv)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})
inv_ds.attrs
inv_ds.attrs['units']='K'
inv_ds.attrs['long_name']='t2m - t850'

#u shear calculation
ushear=(tuv850Data.u.values-u10Data.u10.values)/(z850Data.z.values/9.81) 
ushear_ds = xr.Dataset({'ushear': (('time','latitude','longitude'), ushear)}, coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})
ushear_ds.attrs['units']='s-1'
ushear_ds.attrs['long_name']='(u10 - u850)/z850'


In [32]:
# AO data
AOData = xr.open_dataset('../../../../Data/AO_EU/AO_Calculated.nc')
aoTS=AOData.AO
Darray=np.zeros((t2mData.time.shape[0],t2mData.latitude.shape[0], t2mData.longitude.shape[0]))
for t in range(aoTS.time.shape[0]) :
    Darray[t,:,:]=np.full((t2mData.latitude.shape[0], t2mData.longitude.shape[0]), aoTS[t].values)
AOData=xr.Dataset({'AO': (('time','latitude','longitude'), Darray)},
                  coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude}) 
# EU data
EUData = xr.open_dataset('../../../../Data/AO_EU/EU.nc')
EUData.EUVal
euTS=EUData.EUVal
Darray=np.zeros((t2mData.time.shape[0],t2mData.latitude.shape[0], t2mData.longitude.shape[0]))
for t in range(euTS.time.shape[0]) :
    Darray[t,:,:]=np.full((t2mData.latitude.shape[0], t2mData.longitude.shape[0]), euTS[t].values)
EUData=xr.Dataset({'EU': (('time','latitude','longitude'), Darray)},
                  coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude})
# sst data
sstData=xr.open_dataset('../../../../Data/eraDown/ERA5_2degree_Down/DailyMean/AnomERA5_sst_daily.nc')
sst1=sstData.sst.sel(latitude=slice(5,-5),longitude=slice(-170%360,-120%360)) # nino3.4 region 5S-5N 170W-120W
sstTS=sst1.mean(dim=['latitude','longitude'], skipna=True) 
Darray=np.zeros((t2mData.time.shape[0],t2mData.latitude.shape[0], t2mData.longitude.shape[0]))
for t in range(sstTS.time.shape[0]) :
    Darray[t,:,:]=np.full((t2mData.latitude.shape[0], t2mData.longitude.shape[0]), sstTS[t].values)
SSTData=xr.Dataset({'SST': (('time','latitude','longitude'), Darray)},
                  coords={'time': t2mData.time,'latitude': t2mData.latitude,'longitude': t2mData.longitude}) 

In [27]:
# create mask
oro = OroData.z.sel(latitude=slice(35,0),longitude=slice(50,100))
oro.values = OroData.z.sel(latitude=slice(35,0),longitude=slice(50,100)).values/9.81
oro.attrs
oro.attrs['units']='meter'
oro.attrs['long_name']='Orography'
oro.values[oro.values>500.1]=np.NaN
mask=oro.values/oro.values

In [28]:
t2m=t2mData.t2m.shift(time=1)
ws=ws_ds.ws.shift(time=1)
rh=rh_ds.rh.shift(time=1)
inv=inv_ds.inv.shift(time=1)
w=w700Data.w.shift(time=1)
ushear=ushear_ds.ushear.shift(time=1)

In [95]:
t2mTsAll=t2m[((t2m.time.dt.month>11) | (t2m.time.dt.month<2)) & (t2m.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))
wsTsAll=ws[((ws.time.dt.month>11) | (ws.time.dt.month<2)) & (ws.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))
rhTsAll=rh[((rh.time.dt.month>11) | (rh.time.dt.month<2)) & (rh.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))
invTsAll=inv[((inv.time.dt.month>11) | (inv.time.dt.month<2)) & (inv.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))
ushearTsAll=ushear[((ushear.time.dt.month>11) | (ushear.time.dt.month<2)) & (ushear.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))
wTsAll=w[((w.time.dt.month>11) | (w.time.dt.month<2)) & (w.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),longitude=slice(50,100))

In [96]:
t1=t2mTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
t2mTsAll.values=t1.unstack()

t1=wsTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
wsTsAll.values=t1.unstack()

t1=rhTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
rhTsAll.values=t1.unstack()

t1=invTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
invTsAll.values=t1.unstack()

t1=ushearTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
ushearTsAll.values=t1.unstack()

t1=wTsAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
wTsAll.values=t1.unstack()


In [97]:
t2mTsAll.values=t2mTsAll.values*mask
wsTsAll.values=wsTsAll.values*mask
rhTsAll.values=rhTsAll.values*mask
invTsAll.values=invTsAll.values*mask
ushearTsAll.values=ushearTsAll.values*mask
wTsAll.values=wTsAll.values*mask

t2mTsAll.values=xr.where(np.isnan(t2mTsAll.values),  0.000000000001,t2mTsAll.values)
wsTsAll.values=xr.where(np.isnan(wsTsAll.values),  0.000000000001,wsTsAll.values)
rhTsAll.values=xr.where(np.isnan(rhTsAll.values),  0.000000000001,rhTsAll.values)
invTsAll.values=xr.where(np.isnan(invTsAll.values),  0.000000000001,invTsAll.values)
ushearTsAll.values=xr.where(np.isnan(ushearTsAll.values),  0.000000000001,ushearTsAll.values)
wTsAll.values=xr.where(np.isnan(wTsAll.values),  0.000000000001,wTsAll.values)

In [100]:
# 1 Day before
#AO
AO1D=AOData.AO.shift(time=1)

AO1DAll=AO1D[((AO1D.time.dt.month>11) | (AO1D.time.dt.month<2)) & 
             (AO1D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO1DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO1DAll.values=t1.unstack()


AO1DAll.values=AO1DAll.values*mask
AO1DAll.values=xr.where(np.isnan(AO1DAll.values),  0.000000000001,AO1DAll.values)


#EU
EU1D=EUData.EU.shift(time=1)

EU1DAll=EU1D[((EU1D.time.dt.month>11) | (EU1D.time.dt.month<2)) & 
             (EU1D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=EU1DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU1DAll.values=t1.unstack()

EU1DAll.values=EU1DAll.values*mask
EU1DAll.values=xr.where(np.isnan(EU1DAll.values),  0.000000000001,EU1DAll.values)



#SST
SST1D=SSTData.SST.shift(time=1)
SST1DAll=SST1D[((SST1D.time.dt.month>11) | (SST1D.time.dt.month<2)) & 
             (SST1D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=SST1DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
SST1DAll.values=t1.unstack()

SST1DAll.values=SST1DAll.values*mask
SST1DAll.values=xr.where(np.isnan(SST1DAll.values),  0.000000000001,SST1DAll.values)


In [102]:
# 5Day Mean
#AO
AO5D=AOData.AO.rolling(time=5).mean()

AO5DAll=AO5D[((AO5D.time.dt.month>11) | (AO5D.time.dt.month<2)) & 
             (AO5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#EU
EU5D=EUData.EU.rolling(time=5).mean()

EU5DAll=EU5D[((EU5D.time.dt.month>11) | (EU5D.time.dt.month<2)) & 
             (EU5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#SST
SST5D=SSTData.SST.rolling(time=5).mean()
SST5DAll=SST5D[((SST5D.time.dt.month>11) | (SST5D.time.dt.month<2)) & 
             (SST5D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO5DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO5DAll.values=t1.unstack()

t1=EU5DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU5DAll.values=t1.unstack()

t1=SST5DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
SST5DAll.values=t1.unstack()


AO5DAll.values=AO5DAll.values*mask
AO5DAll.values=xr.where(np.isnan(AO5DAll.values),  0.000000000001,AO5DAll.values)

EU5DAll.values=EU5DAll.values*mask
EU5DAll.values=xr.where(np.isnan(EU5DAll.values),  0.000000000001,EU5DAll.values)

SST5DAll.values=SST5DAll.values*mask
SST5DAll.values=xr.where(np.isnan(SST5DAll.values),  0.000000000001,SST5DAll.values)


In [103]:
# 7Day Mean
#AO
AO7D=AOData.AO.rolling(time=7).mean()

AO7DAll=AO7D[((AO7D.time.dt.month>11) | (AO7D.time.dt.month<2)) & 
             (AO7D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#EU
EU7D=EUData.EU.rolling(time=7).mean()

EU7DAll=EU7D[((EU7D.time.dt.month>11) | (EU7D.time.dt.month<2)) & 
             (EU7D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#SST
SST7D=SSTData.SST.rolling(time=7).mean()
SST7DAll=SST7D[((SST7D.time.dt.month>11) | (SST7D.time.dt.month<2)) & 
             (SST7D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO7DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO7DAll.values=t1.unstack()

t1=EU7DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU7DAll.values=t1.unstack()

t1=SST7DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
SST7DAll.values=t1.unstack()

AO7DAll.values=AO7DAll.values*mask
AO7DAll.values=xr.where(np.isnan(AO7DAll.values),  0.000000000001,AO7DAll.values)

EU7DAll.values=EU7DAll.values*mask
EU7DAll.values=xr.where(np.isnan(EU7DAll.values),  0.000000000001,EU7DAll.values)

SST7DAll.values=SST7DAll.values*mask
SST7DAll.values=xr.where(np.isnan(SST7DAll.values),  0.000000000001,SST7DAll.values)

In [104]:
# 14Day Mean
#AO
AO14D=AOData.AO.rolling(time=14).mean()

AO14DAll=AO14D[((AO14D.time.dt.month>11) | (AO14D.time.dt.month<2)) & 
             (AO14D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#EU
EU14D=EUData.EU.rolling(time=14).mean()

EU14DAll=EU14D[((EU14D.time.dt.month>11) | (EU14D.time.dt.month<2)) & 
             (EU14D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#SST
SST14D=SSTData.SST.rolling(time=14).mean()
SST14DAll=SST14D[((SST14D.time.dt.month>11) | (SST14D.time.dt.month<2)) & 
             (SST14D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO14DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO14DAll.values=t1.unstack()

t1=EU14DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU14DAll.values=t1.unstack()

t1=SST14DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
SST14DAll.values=t1.unstack()

AO14DAll.values=AO14DAll.values*mask
AO14DAll.values=xr.where(np.isnan(AO14DAll.values),  0.000000000001,AO14DAll.values)

SST14DAll.values=SST14DAll.values*mask
SST14DAll.values=xr.where(np.isnan(SST14DAll.values),  0.000000000001,SST14DAll.values)

EU14DAll.values=EU14DAll.values*mask
EU14DAll.values=xr.where(np.isnan(EU14DAll.values),  0.000000000001,EU14DAll.values)


In [105]:
# 30Day Mean
#AO
AO30D=AOData.AO.rolling(time=30).mean()

AO30DAll=AO30D[((AO30D.time.dt.month>11) | (AO30D.time.dt.month<2)) & 
             (AO30D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#EU
EU30D=EUData.EU.rolling(time=30).mean()

EU30DAll=EU30D[((EU30D.time.dt.month>11) | (EU30D.time.dt.month<2)) & 
             (EU30D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

#SST
SST30D=SSTData.SST.rolling(time=30).mean()
SST30DAll=SST30D[((SST30D.time.dt.month>11) | (SST30D.time.dt.month<2)) & 
             (SST30D.time.dt.year<2020)].sel(time=slice('1980-1-1','2018-12-31'),latitude=slice(35,0),
                                           longitude=slice(50,100))

t1=AO30DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
AO30DAll.values=t1.unstack()

t1=EU30DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
EU30DAll.values=t1.unstack()

t1=SST30DAll.stack(z=("latitude", "longitude"))
# fit scaler on training data
norm = StandardScaler().fit(t1)
# transform training data
t1.values = norm.transform(t1)
SST30DAll.values=t1.unstack()

AO30DAll.values=AO30DAll.values*mask
AO30DAll.values=xr.where(np.isnan(AO30DAll.values),  0.000000000001,AO30DAll.values)

EU30DAll.values=EU30DAll.values*mask
EU30DAll.values=xr.where(np.isnan(EU30DAll.values),  0.000000000001,EU30DAll.values)

SST30DAll.values=SST30DAll.values*mask
SST30DAll.values=xr.where(np.isnan(SST30DAll.values),  0.000000000001,SST30DAll.values)


In [106]:

#### Write to netcdf
ds = xr.Dataset(
    {
        "t2mTsAll": ([ 'time','latitude','longitude'], t2mTsAll),
        "wsTsAll": (['time','latitude','longitude'], wsTsAll),
        "rhTsAll": ([ 'time','latitude','longitude'], rhTsAll),
        "wTsAll": (['time','latitude','longitude'], wTsAll),
        "invTsAll": (['time','latitude','longitude'], invTsAll),
        "ushearTsAll": (['time','latitude','longitude'], ushearTsAll),
        "AO1DAll": ([ 'time','latitude','longitude'], AO1DAll),
        "EU1DAll": (['time','latitude','longitude'], EU1DAll),
        "SST1DAll": ([ 'time','latitude','longitude'], SST1DAll),
        "AO5DAll": ([ 'time','latitude','longitude'], AO5DAll),
        "EU5DAll": (['time','latitude','longitude'], EU5DAll),
        "SST5DAll": ([ 'time','latitude','longitude'], SST5DAll),
        "AO7DAll": ([ 'time','latitude','longitude'], AO7DAll),
        "EU7DAll": (['time','latitude','longitude'], EU7DAll),
        "SST7DAll": ([ 'time','latitude','longitude'], SST7DAll),
        "AO14DAll": ([ 'time','latitude','longitude'], AO14DAll),
        "EU14DAll": (['time','latitude','longitude'], EU14DAll),
        "SST14DAll": ([ 'time','latitude','longitude'], SST14DAll),
        "AO30DAll": ([ 'time','latitude','longitude'], AO30DAll),
        "EU30DAll": (['time','latitude','longitude'], EU30DAll),
        "SST30DAll": ([ 'time','latitude','longitude'], SST30DAll),
    },
    coords={
        
        "time": t2mTsAll.time,
        "latitude": t2mTsAll.latitude,
        "longitude": t2mTsAll.longitude,
        
    },
)

ds.attrs["title"] = "CNN_input"
ds.to_netcdf("CNN_input.nc")

In [107]:
ds

In [109]:
AO1DAll[0,:,:]