In [16]:
%load_ext autoreload
%autoreload 2
import xarray as xr
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import sys
sys.path.append('/home/onno/Thesis/Scripts')
import my_tools
from my_tools import file_dic, plot_dic
from cmap import ncl_colormap
from mpl_toolkits.basemap import Basemap
from scipy import stats
%matplotlib qt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
#set path for 
path = '/media/onno/Algemeen/Thesis/fcst_RWP_properties/duration/'

Plot Histograms of standardized RWP forecast errors

In [34]:
eventz = ['persistent_hw','persistent_cw','short_hw','short_cw']
event_titlez = ['Persistent Warm Extreme','Persistent Cold Extreme',
               'Short-Lived Warm Extreme','Short-Lived Cold Extreme']
rankz = ['good','bad']
seasonz = ['DJF','JJA']
coordinatez = [
(54,46,6,14),#Germany
(44,36,352,360), #Spain
(54,46,26,34), #Ukraine
(58,50,352,360), #UK
(42,34,28,36), #Turkey
(68,60,22,30), #Finland
(66,58,6,14), #Norway/Sweden
(60,52,46,54)]
file = '{}_forecasts_duration_errors_standardized_{}_lon_{}_{}_lat_{}_{}_{}_{}_lead_day_{}.txt'
lead_dayz = [3,5]
modelz = ['GFS','ERA5RF']


for lead_day in lead_dayz:
    df = pd.DataFrame(index = np.arange(8),columns=['p_env_GFS','p_cp_GFS','μ_env_good_GFS','μ_env_bad_GFS',
                                                    'μ_cp_good_GFS','μ_cp_bad_GFS','μ_t850_good_GFS','μ_t850_bad_GFS',
                                                    'p_env_ERA5RF','p_cp_ERA5RF','μ_env_good_ERA5RF','μ_env_bad_ERA5RF',
                                                    'μ_cp_good_ERA5RF','μ_cp_bad_ERA5RF','μ_t850_good_ERA5RF','μ_t850_bad_ERA5RF'])
    count = 0    
    for i,event in enumerate(eventz):
        for season in seasonz:
            fig,axz = plt.subplots(2,3,figsize=(16,9),sharey=True)
            for j,model in enumerate(modelz):   
                env_error_good = np.array([])
                env_error_bad = np.array([])
                cp_error_good = np.array([])
                cp_error_bad = np.array([])
                t850_error_good = np.array([])
                t850_error_bad = np.array([])            
                for lat_0,lat_1,lon_0,lon_1 in coordinatez:
                    df_good = pd.read_csv(path+file.format('good',event,
                                                          lon_0,lon_1,lat_0,lat_1,
                                                          season,model,lead_day),index_col=0)
                    df_bad = pd.read_csv(path+file.format('bad',event,
                                              lon_0,lon_1,lat_0,lat_1,
                                              season,model,lead_day),index_col=0)
                    env_error_good = np.concatenate((env_error_good,df_good['envelope'].values)).astype(float)
                    env_error_bad = np.concatenate((env_error_bad,df_bad['envelope'].values)).astype(float)
                    cp_error_good = np.concatenate((cp_error_good,df_good['phasespeed'].values)).astype(float)
                    cp_error_bad = np.concatenate((cp_error_bad,df_bad['phasespeed'].values)).astype(float)
                    t850_error_good = np.concatenate((t850_error_good,df_good['T850'].values)).astype(float)
                    t850_error_bad = np.concatenate((t850_error_bad,df_bad['T850'].values)).astype(float)

                p_env = stats.ttest_ind(env_error_good,env_error_bad,equal_var=False)[1]
                p_cp = stats.ttest_ind(cp_error_good,cp_error_bad,equal_var=False,nan_policy='omit')[1]
                p_t850 = stats.ttest_ind(t850_error_good,t850_error_bad,equal_var=False,nan_policy='omit')[1]
                df.loc[count,'p_cp_{}'.format(model)]=p_cp
                df.loc[count,'p_env_{}'.format(model)]=p_env
                df.loc[count,'μ_env_good_{}'.format(model)]=np.nanmean(env_error_good)
                df.loc[count,'μ_env_bad_{}'.format(model)]=np.nanmean(env_error_bad)
                df.loc[count,'μ_cp_good_{}'.format(model)]=np.nanmean(cp_error_good)
                df.loc[count,'μ_cp_bad_{}'.format(model)]=np.nanmean(cp_error_bad)
                df.loc[count,'μ_t850_good_{}'.format(model)]=np.nanmean(t850_error_good)
                df.loc[count,'μ_t850_bad_{}'.format(model)]=np.nanmean(t850_error_bad)

                to_list = df.index.to_list()
                to_list[count] = '{}_{}'.format(event,season)
                df.index = to_list
                count +=1
                bins = np.linspace(-4,4,17)
                ax1 = axz[j,0]
                ax1.hist([env_error_good,env_error_bad],bins,
                         label=['Good N = {}'.format(len(env_error_good)),'Bad N = {}'.format(len(env_error_bad))],density=True)
                if j==0:
                    ax1.set_title('RWP Envelope')
                ax1.set_xticks(np.linspace(-4,4,9))
                if j==1:
                    ax1.set_xlabel('Standardized Forecast Error E (m/s)')
                ax1.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
                        .format(np.mean(env_error_good),np.mean(env_error_bad),np.std(env_error_good),np.std(env_error_bad),p_env),
                        transform = ax1.transAxes,verticalalignment='top',horizontalalignment='left')
                if j==0:
                    ax1.set_ylabel('GEFS Reforecast')
                else:
                    ax1.set_ylabel('ERA5 Reforecast')
                ax2 = axz[j,1]
                if j==0:
                    ax2.hist([cp_error_good,cp_error_bad],bins,
                             label=['Good N = {}'.format(len(cp_error_good)),'Bad N = {}'.format(len(cp_error_bad))],density=True)
                    ax2.set_title('RWP Phasespeed')
                    ax2.set_xticks(np.linspace(-4,4,9))
                    ax2.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
                    .format(np.nanmean(cp_error_good),np.nanmean(cp_error_bad),np.nanstd(cp_error_good),np.nanstd(cp_error_bad),p_cp),
                    transform = ax2.transAxes,verticalalignment='top',horizontalalignment='left')
                else:
                    
                    ax2.set_xticks(np.linspace(-4,4,9))
                    ax2.set_xlabel('Standardized Forecast Error Cp (m/s)')
                ax3 = axz[j,2]
                ax3.hist([t850_error_good,t850_error_bad],bins,
                         label=['Good N = {}'.format(len(t850_error_good)),'Bad N = {}'.format(len(t850_error_bad))],density=True)
                if j==0:
                    ax3.set_title('850 hPa Temperature')
                ax3.set_xticks(np.linspace(-4,4,9))
                if j==1:
                    ax3.set_xlabel('Standardized Forecast Error T850 (K)')
                ax3.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
                .format(np.nanmean(t850_error_good),np.nanmean(t850_error_bad),np.nanstd(t850_error_good),np.nanstd(t850_error_bad),p_t850),
                transform = ax3.transAxes,verticalalignment='top',horizontalalignment='left')
                ax3.legend(bbox_to_anchor=(1,1),loc='upper left')
            
            fig.suptitle('Standardized Forecast Error {} {} GFS {} Day Forecast'.format(event_titlez[i],season,lead_day))
            fig.subplots_adjust(left=0.05,bottom=0.07,right=0.9,top=0.9,wspace=0.1,hspace=0.1)
            sys.exit()
            fig.savefig(path + 'histograms/forecast_errors_standardized_{}_{}_lead_day_{}'.format(event,season,lead_day))
            plt.close(fig)
        df.to_csv(path+'histograms/forecast_errors_stat_significance_lead_day_{}.txt'.format(lead_day))        



SystemExit: 

Plot Histograms of standardized RWP property values

In [34]:
eventz = ['persistent_hw','persistent_cw','short_hw','short_cw']
event_titlez = ['Persistent Warm Extreme','Persistent Cold Extreme',
               'Short-Lived Warm Extreme','Short-Lived Cold Extreme']
rankz = ['good','bad']
seasonz = ['DJF','JJA']
coordinatez = [
(54,46,6,14),#Germany
(44,36,352,360), #Spain
(54,46,26,34), #Ukraine
(58,50,352,360), #UK
(42,34,28,36), #Turkey
(68,60,22,30), #Finland
(66,58,6,14), #Norway/Sweden
(60,52,46,54)]
file = '{}_forecasts_duration_RWP_properties_standardized_{}_lon_{}_{}_lat_{}_{}_{}_{}_lead_day_{}.txt'
lead_dayz = [3,5]
for lead_day in lead_dayz:
    df = pd.DataFrame(index = np.arange(8),columns=['p_env','p_cp','μ_env_good','μ_env_bad',
                                                    'μ_cp_good','μ_cp_bad','μ_t850_good','μ_t850_bad'])
    count = 0
    for i,event in enumerate(eventz):
        for season in seasonz:
            env_error_good = np.array([])
            env_error_bad = np.array([])
            cp_error_good = np.array([])
            cp_error_bad = np.array([])
            t850_error_good = np.array([])
            t850_error_bad = np.array([])            
            for lat_0,lat_1,lon_0,lon_1 in coordinatez:
                df_good = pd.read_csv(path+file.format('good',event,
                                                      lon_0,lon_1,lat_0,lat_1,
                                                      season,model,lead_day),index_col=0)
                df_bad = pd.read_csv(path+file.format('bad',event,
                                          lon_0,lon_1,lat_0,lat_1,
                                          season,model,lead_day),index_col=0)
                env_error_good = np.concatenate((env_error_good,df_good['envelope'].values)).astype(float)
                env_error_bad = np.concatenate((env_error_bad,df_bad['envelope'].values)).astype(float)
                cp_error_good = np.concatenate((cp_error_good,df_good['phasespeed'].values)).astype(float)
                cp_error_bad = np.concatenate((cp_error_bad,df_bad['phasespeed'].values)).astype(float)
                t850_error_good = np.concatenate((t850_error_good,df_good['T850'].values)).astype(float)
                t850_error_bad = np.concatenate((t850_error_bad,df_bad['T850'].values)).astype(float)
                
            fig,axz = plt.subplots(1,3,figsize=(16,9))
            bins = np.linspace(-4,4,19)
            p_env = stats.ttest_ind(env_error_good,env_error_bad,equal_var=False)[1]
            p_cp = stats.ttest_ind(cp_error_good,cp_error_bad,nan_policy='omit',equal_var=False)[1]
            df.loc[count,'p_cp']=p_cp
            df.loc[count,'p_env']=p_env
            df.loc[count,'μ_env_good']=np.nanmean(env_error_good)
            df.loc[count,'μ_env_bad']=np.nanmean(env_error_bad)
            df.loc[count,'μ_cp_good']=np.nanmean(cp_error_good)
            df.loc[count,'μ_cp_bad']=np.nanmean(cp_error_bad)            
            df.loc[count,'μ_t850_good']=np.nanmean(t850_error_good)
            df.loc[count,'μ_t850_bad']=np.nanmean(t850_error_bad)   
            
            to_list = df.index.to_list()
            to_list[count] = '{}_{}'.format(event,season)
            df.index = to_list
            count +=1
            ax1 = axz.flat[0]
            ax1.hist([env_error_good,env_error_bad],bins,
                     label=['Good N = {}'.format(len(env_error_good)),'Bad N = {}'.format(len(env_error_bad))],density=True)
            ax1.set_title('Envelope')
            ax1.set_xticks(np.linspace(-4,4,9))
            ax1.set_xlabel('Standardized E (m/s)')
            ax1.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
                    .format(np.mean(env_error_good),np.mean(env_error_bad),np.std(env_error_good),np.std(env_error_bad),p_env),
                    transform = ax1.transAxes,verticalalignment='top',horizontalalignment='left')
            ax1.legend()
            ax2 = axz.flat[1]
            ax2.hist([cp_error_good,cp_error_bad],bins,
                     label=['Good N = {}'.format(len(cp_error_good)),'Bad N = {}'.format(len(cp_error_bad))],density=True)
            ax2.set_title('Phasespeed')
            ax2.set_xticks(np.linspace(-4,4,9))
            ax2.set_xlabel('Standardized Cp (m/s)')
            ax2.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
            .format(np.nanmean(cp_error_good),np.nanmean(cp_error_bad),np.nanstd(cp_error_good),np.nanstd(cp_error_bad),p_cp),
            transform = ax2.transAxes,verticalalignment='top',horizontalalignment='left')
            ax2.legend()
            ax3 = axz.flat[2]
            ax3.hist([t850_error_good,t850_error_bad],bins,
                     label=['Good N = {}'.format(len(t850_error_good)),'Bad N = {}'.format(len(t850_error_bad))],density=True)
            ax3.set_title('850 hPa Temperature')
            ax3.set_xticks(np.linspace(-4,4,9))
            ax3.set_xlabel('Standardized T850 (K)')
            ax3.text(0.01,0.99,'$μ_{{good}}$ = {:.2f} \n$μ_{{bad}}$ = {:.2f} \n$σ_{{good}}$ = {:.2f} \n$σ_{{bad}}$ = {:.2f} \n$p$ = {:.3f}'\
            .format(np.nanmean(t850_error_good),np.nanmean(t850_error_bad),np.nanstd(t850_error_good),np.nanstd(t850_error_bad),p_t850),
            transform = ax3.transAxes,verticalalignment='top',horizontalalignment='left')
            ax3.legend()
            fig.suptitle('Standardized Mean RWP Properties {} {} GFS {} Day Forecast'.format(event_titlez[i],season,lead_day))
            sys.exit()
            fig.savefig(path + 'histograms/RWP_properties_standardized_{}_{}_GFS_lead_day_{}'.format(event,season,lead_day))
            plt.close(fig)
    df.to_csv(path+'histograms/RWP_properties_stat_significance_GFS_lead_day_{}.txt'.format(lead_day))                      

SystemExit: 