In [1]:
%load_ext autoreload
%autoreload 2
import xarray as xr
from matplotlib import pyplot as plt
import numpy as np
#set path to import my_toools.py
import sys
sys.path.append('/home/onno/Thesis/Scripts')
import my_tools
import pandas as pd
import glob
%matplotlib qt



In [4]:
#set path change accordingly
path = '/media/onno/Algemeen/Thesis/'
#List specific variable data
file_gfs = 'gefsrf2_env_wledit2000-10000_latavg_v300_control0-252h_6hourly_2x2_dec84-nov19.nc'
file_era = 'era51_env_wledit2000-10000_latavg_v300_79-19_6hourly_anom_from_smoothed04_clim_smoothed.nc'
#open xarray datasets
gfs = xr.open_dataset(path+file_gfs,decode_times=False)
era = xr.open_dataset(path+file_era,decode_times=False)
#change time values to normal datetime values
gfs['time']=my_tools.convert_date_gefs_r(gfs.time.values)
era['time']=my_tools.convert_date_era_r(era.time.values)

In [5]:
#Select specific 24 by 24 degrees area 
gfs_europe = gfs.sel(lat=slice(64,40),lon=(slice(0,24)))
era_europe = era.sel(lat=slice(64,40),lon=(slice(0,24)))

In [45]:
#Loop over all lead days (from 0 till 10)
for i in range(11):
    #Select data for each day and specific lead time
    gfs_europe_d = gfs_europe.sel(time=[i for i in pd.date_range('1984-12-1','2019-11-{}'.format(30-i))],lead=i*24)
    era_europe_d = era_europe.sel(time=[i for i in pd.date_range('1984-12-{}'.format(1+i),'2019-11-30')]) 
    #Using the xarray weighted function to calculate latitude weighted mean error
    #Calculating the weights
    weights = np.cos(np.deg2rad(gfs_europe_d.lat))
    #Calculate difference between datasets to get array of errors for all days in dataset
    diff = gfs_europe_d - era_europe_d
    #Caluclate latitude weighted mean average
    diff_weighted = diff.weighted(weights)
    weighted_mean = diff_weighted.mean(dim=['lat','lon'])
    #write data to netcdf file
    weighted_mean.to_netcdf(path+'/Thesis/GFS_error_weighted_mean/GFS_error_weighted_mean_envelope_day_{:02d}_europe.nc'.format(i))


In [13]:
#list all relevant files
filez = sorted(glob.glob('/media/onno/Algemeen/Thesis/GFS_error_weighted_mean/*'))
#create base figure
fig,axz = plt.subplots(2,5)
#set number of bins for histogram
bins = 40
#loop over all subplots
for i,ax in enumerate(axz.flat):
    #open accompanying 1D datasets for each error
    ds = xr.open_dataset(filez[i+1],decode_times=False)
    #Calculate median error and standard deviation for each lead day
    stdv = float(ds.v.std())
    median = float(ds.v.median())
    #plot data
    ax.hist(x=ds.v.values,bins=bins,range=(-40,40))
    ax.set_ylim([0,2000])
    ax.grid()
    ax.set_ylabel('N')
    ax.set_xlabel('Error (m/s)')
    #set figure eproperties
    if i<5:
        ax.text(x=0.5,y=-0.125,s='\u03BC = {:.2f} \u03C3 = {:.2f}'.format(median,stdv),
           transform=ax.transAxes,horizontalalignment='center',fontsize=12)
    else:
        ax.text(x=0.5,y=-0.225,s='\u03BC = {:.2f} \u03C3 = {:.2f}'.format(median,stdv),
           transform=ax.transAxes,horizontalalignment='center',fontsize=12)        
    ax.label_outer()
fig.suptitle('GEFS Envelope Forecast 10 day Error Distribution')
# plt.subplots_adjust(hspace=0.3)
fig.show()
    
    