In [23]:
import xarray as xr
import numpy as np
import pandas as pd
import sys
from matplotlib import pyplot as plt
%matplotlib qt
import matplotlib

In [2]:
path = '/media/onno/Algemeen/Thesis/GFS_T850/GSS/'

Create netCDF files

In [3]:
coordinatez = [
(54,46,6,14),#Germany
(44,36,352,360), #Spain
(54,46,26,34), #Ukraine
(58,50,352,360), #UK
(42,34,28,36), #Turkey
(68,60,22,30), #Finland
(66,58,6,14), #Norway/Sweden
(60,52,46,54)] #Russia Samara/Kazan region
#loading GFS and ERA5 data for specific bo
fcst_modelz = ['GFS','ERA5RF']
lead_dayz = [1,3,5,7,9]
columnz = ['persistent_hw','short_hw','persistent_cw','short_cw']
regionz = ['Germany','Spain','Ukraine','UK','Turkey','Finland','Norway/Sweden','Russia']

xtickz = np.arange(4)
lead_dayz = [1,3,5,7,9]

seasonz = {
    1:'DJF',2:'DJF',3:'MAM',4:'MAM',5:'MAM',6:'JJA',7:'JJA',8:'JJA',9:'SON',10:'SON',11:'SON',12:'DJF'
}

for fcst_model in fcst_modelz:
    list_region = []
    for lat_0,lat_1,lon_0,lon_1 in coordinatez:
        list_lead_day = []
        for lead_day in lead_dayz:
            file = 'Gilbert_Skill_Score_lead_day_{}_lon_{}_{}_lat_{}_{}_{}.csv'.format(lead_day,
                                                                                      lon_0,lon_1,
                                                                                      lat_0,lat_1,
                                                                                      fcst_model)
            df = pd.read_csv(path+file,index_col=0)
            df.index = pd.to_datetime(df.index)
            df_seasonz = df.copy()
            df_seasonz.index = [seasonz[month] for month in df_seasonz.index.month.values ]
            df_seasonz_groupby = df_seasonz.groupby(df_seasonz.index).mean().reindex(['DJF','MAM','JJA','SON'])
            ds = xr.Dataset(
            data_vars=dict(GSS=(['season','event'],df_seasonz_groupby.values)),
            coords=dict(season=['DJF','MAM','JJA','SON'],
                        event=columnz))
            list_lead_day.append(ds)
        ds_lead_day = xr.concat(list_lead_day,dim='lead_day')
        ds_lead_day = ds_lead_day.assign_coords(lead_day=[1,3,5,7,9])
        list_region.append(ds_lead_day)
    ds_total = xr.concat(list_region,dim='region')
    ds_total = ds_total.assign_coords(region = regionz)
    ds_total.attrs['model'] = fcst_model
    ds_total.attrs['region_coords'] = ['Germany : 46-54 °N 6-14 °E',
                                      'Spain : 36-44 °N 0-8 °W',
                                      'Ukraine : 46-54 °N 26-34 °E',
                                      'UK : 50-58 °N 0-8 °W',
                                      'Turkey : 34-42 °N 28-36 °E',
                                      'Finland : 60-68 °N 22-30 °E',
                                      'Norway/Sweden : 58-66 °N 6-14 °E',
                                      'Russia : 52-60 °N 46-54 °E']
    ds_total.to_netcdf(path+'GSS_data_{}.nc'.format(fcst_model))


Process netCDF files

In [24]:
lead_dayz = [1,3,5,7,9]
eventz = ['PH','SH','PC','SC']
regionz = ['Germany','Spain','Ukraine','UK','Turkey','Finland','Norway/Sweden','Russia']
seasonz = ['DJF','MAM','JJA','SON']
colorz = [(0,'#ff0000'),(0.25,'#ff6600'),(0.5,'#ffff00'),
          (0.7,'#33cc33'),(1,'#006600')]   
cmap = matplotlib.colors.LinearSegmentedColormap.from_list('custom', colorz, N=1024)

In [70]:
#Regional mean GSS

lead_day = 5
width=0.35
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['event','season']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['event','season']).GSS
fig,ax = plt.subplots()
x=np.arange(len(ds_GFS_mean.values))
ax.bar(x-width/2,ds_GFS_mean.values,width,label='GFS')
ax.bar(x+width/2,ds_ERA5RF_mean.values,width,label='ERA5RF')
plt.xticks(x,regionz,rotation=45)
ax.set_ylim([0,1])
ax.set_ylabel('Gilbert Skill Score')
ax.set_title('Day {} Forecast'.format(lead_day))
ax.grid(axis='y')
ax.legend()
fig.tight_layout()



In [73]:
#Seasonal mean GSS
lead_day = 5
width=0.35
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['event','region']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['event','region']).GSS
fig,ax = plt.subplots()
x=np.arange(len(ds_GFS_mean.values))
ax.bar(x-width/2,ds_GFS_mean.values,width,label='GFS')
ax.bar(x+width/2,ds_ERA5RF_mean.values,width,label='ERA5RF')
plt.xticks(x,seasonz,rotation=45)
ax.set_ylim([0,1])
ax.set_ylabel('Gilbert Skill Score')
ax.set_title('Day {} Forecast'.format(lead_day))
ax.grid(axis='y')
ax.legend()
fig.tight_layout()


In [72]:
#mean GSS per event
lead_day = 5
width=0.35
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['season','region']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['season','region']).GSS
fig,ax = plt.subplots()
x=np.arange(len(ds_GFS_mean.values))
ax.bar(x-width/2,ds_GFS_mean.values,width,label='GFS')
ax.bar(x+width/2,ds_ERA5RF_mean.values,width,label='ERA5RF')
plt.xticks(x,eventz,rotation=45)
ax.set_ylim([0,1])
ax.set_ylabel('Gilbert Skill Score')
ax.set_title('Day {} Forecast'.format(lead_day))
ax.grid(axis='y')
ax.legend()
fig.tight_layout()

In [75]:
#mean GSS per event and per season
lead_day =5
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['region']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['region']).GSS
titlez=['GEFS Reforecast','ERA5 Reforecast']
meanz = [ds_GFS_mean,ds_ERA5RF_mean]

fig,axz=plt.subplots(1,2)
fig.suptitle('Gilbert Skill Score Day {} Forecast'.format(lead_day))
for i,ax in enumerate(axz): 
    ax.imshow(meanz[i],cmap=cmap,vmin=0,vmax=1)
    ax.set_yticks(np.arange(len(seasonz)))
    ax.set_xticks(np.arange(len(eventz)))
    ax.set_yticklabels(seasonz)
    ax.set_xticklabels(eventz)
    ax.set_title('{}: Mean GSS = {:.2f}'.format(titlez[i],float(meanz[i].mean())))
    for j in range(len(eventz)):
        for k in range(len(seasonz)):
            text = ax.text(k, j, '{:.2f}'.format(meanz[i].values[j, k]),
                           ha="center", va="center", color="k")


In [69]:
#Mean GSS per event and region
lead_day =5
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['season']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['season']).GSS
titlez=['GEFS Reforecast','ERA5 Reforecast']
meanz = [ds_GFS_mean,ds_ERA5RF_mean]

fig,axz=plt.subplots(1,2,figsize=(16,9))
fig.suptitle('Gilbert Skill Score Day {} Forecast'.format(lead_day))
for i,ax in enumerate(axz): 
    ax.imshow(meanz[i].values,cmap=cmap,vmin=0,vmax=1)
    ax.set_yticks(np.arange(len(regionz)))
    ax.set_xticks(np.arange(len(eventz)))
    ax.set_yticklabels(regionz)
    ax.set_xticklabels(eventz,)
    ax.set_title('{}: Mean GSS = {:.2f}'.format(titlez[i],float(meanz[i].mean())))
    for j in range(len(regionz)):
        for k in range(len(eventz)):
            text = ax.text(k, j, '{:.2f}'.format(meanz[i].values[j, k]),
                           ha="center", va="center", color="k")


In [68]:
#Mean GSS per season and region
lead_day =5
ds_GFS = xr.open_dataset(path+'GSS_data_GFS.nc').sel(lead_day=lead_day)
ds_ERA5RF = xr.open_dataset(path+'GSS_data_ERA5RF.nc').sel(lead_day=lead_day)
ds_GFS_mean = ds_GFS.mean(['event']).GSS
ds_ERA5RF_mean = ds_ERA5RF.mean(['event']).GSS
titlez=['GEFS Reforecast','ERA5 Reforecast']
meanz = [ds_GFS_mean,ds_ERA5RF_mean]

fig,axz=plt.subplots(1,2,figsize=(16,9))
fig.suptitle('Gilbert Skill Score Day {} Forecast'.format(lead_day))
for i,ax in enumerate(axz): 
    ax.imshow(meanz[i].values,cmap=cmap,vmin=0,vmax=1)
    ax.set_yticks(np.arange(len(regionz)))
    ax.set_xticks(np.arange(len(seasonz)))
    ax.set_yticklabels(regionz)
    ax.set_xticklabels(seasonz)
    ax.set_title('{}: Mean GSS = {:.2f}'.format(titlez[i],float(meanz[i].mean())))
    for j in range(len(regionz)):
        for k in range(len(seasonz)):
            text = ax.text(k, j, '{:.2f}'.format(meanz[i].values[j, k]),
                           ha="center", va="center", color="k")
