In [1]:
%matplotlib notebook

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import re
import calendar
import os

from apbplotlib.colors import reanalysis_color

## Make dictionary of suggested colors for reanalysis comparison

dummy = """r,g,b,c,m,y,k,RGB Hexadecimal,reanalysis
    226, 31, 38,  0,86.28,83.19,11.37,  E21F26,  MERRA2
    246, 153, 153,  0,37.8,37.8,3.53,  F69999,  MERRA
    41, 95, 138,  70.29,31.16,0,45.88,  295F8A,  ERAI
    95, 152, 198,  52.02,23.23,0,22.35,  5F98C6,  ERA5
    175, 203, 227,  22.91,10.57,0,10.98,  AFCBE3,  ERA40
    114, 59, 122,  6.56,51.64,0,52.16,  723B7A,  JRA55
    173, 113, 181,  4.42,37.57,0,29.02,  AD71B5,  JRA-55C or JRA-55 AMIP
    214, 184, 218,  1.83,15.6,0,14.51,  D6B8DA,  JRA-25
    245, 126, 32,  0,48.57,86.94,3.92,  F57E20,  NCEP-R1
    253, 191, 110,  0,24.51,56.52,0.78,  FDBF6E,  NCEP-R2
    236, 0, 140,  0,100,40.68,7.45,  EC008C,  20CRv2c
    247, 153, 209,  0,38.06,15.38,3.14,  F799D1,  20CRv2
    0, 174, 239,  100,27.2,0,6.27,  00AEEF,  CERA-20C
    96, 200, 232,  58.62,13.79,0,9.02,  60C8E8,  ERA-20C
    52, 160, 72,  67.5,0,55,37.25,  34A048,  CFSR
    179, 91, 40,  0,49.16,77.65,29.8,  B35B28,  REM
    255,215,0,  0,15.69,100,0,  FFD700,  Other
    0, 0, 0,  0,0,0,100,  000000,  Obs
    119, 119, 119,  0,0,0,53.33,  777777,  Other Obs"""

reanalysis_color = {}
for line in dummy.split('\n')[1:]:
    v, k = line.split(',')[-2:]
    reanalysis_color[k.strip()] = '#'+v.strip()


## Get data

In [4]:
#filepath = r'C:\Users\apbarret\Documents\data\Arctic_precip\np_reanalysis_month_comparison.csv'
filepath = '/home/apbarret/src/SnowOnSeaIce/source/np_reanalysis_month_comparison.csv'
df = pd.read_csv(filepath, index_col=0, parse_dates=['Date'])
df = df.dropna()
df.head()

Unnamed: 0,Date,Lat,Lon,NP,Pg,Pc,ERAI_prectot,CFSR_prectot,MERRA_prectot,MERRA2_prectot,JRA55_prectot,ERA5_prectot
69,1980-01-01,77.65109,156.965,22.0,11.1,20.8,16.181337,17.735622,17.998478,20.466925,16.106415,16.787014
70,1980-02-01,77.895357,153.956488,22.0,5.5,9.7,7.366097,8.458306,7.518522,11.490108,7.683252,8.200598
71,1980-03-01,78.249753,153.594198,22.0,6.8,13.3,9.585857,11.132812,8.174619,13.588424,8.141494,9.099018
72,1980-04-01,78.580778,151.989833,22.0,2.0,5.7,2.645907,3.820143,7.373949,7.497791,2.491471,2.656623
73,1980-05-01,79.146167,147.978611,22.0,1.4,4.8,8.435853,13.876427,17.61336,11.865479,12.269393,15.831741


## Calculate bias as P_reanalysis/P_obs

In [5]:
p = re.compile('_prectot')
#these_columns = [col for col in df.columns if p.search(col)]
for col in df.columns:
    if p.search(col):
       df[col.replace('_prectot', '_bias')] = df[col]*100./df['Pc']
       #df[col.replace('_prectot', '_bias')].where(df[col.replace('_prectot', '_bias')] > 0., inplace=True)
    
df.head()

Unnamed: 0,Date,Lat,Lon,NP,Pg,Pc,ERAI_prectot,CFSR_prectot,MERRA_prectot,MERRA2_prectot,JRA55_prectot,ERA5_prectot,ERAI_bias,CFSR_bias,MERRA_bias,MERRA2_bias,JRA55_bias,ERA5_bias
69,1980-01-01,77.65109,156.965,22.0,11.1,20.8,16.181337,17.735622,17.998478,20.466925,16.106415,16.787014,77.794889,85.267413,86.531144,98.398678,77.434687,80.706798
70,1980-02-01,77.895357,153.956488,22.0,5.5,9.7,7.366097,8.458306,7.518522,11.490108,7.683252,8.200598,75.939144,87.199031,77.510536,118.454727,79.208787,84.542247
71,1980-03-01,78.249753,153.594198,22.0,6.8,13.3,9.585857,11.132812,8.174619,13.588424,8.141494,9.099018,72.074113,83.705353,61.463301,102.168602,61.214241,68.413669
72,1980-04-01,78.580778,151.989833,22.0,2.0,5.7,2.645907,3.820143,7.373949,7.497791,2.491471,2.656623,46.419428,67.020053,129.367535,131.540198,43.710023,46.607421
73,1980-05-01,79.146167,147.978611,22.0,1.4,4.8,8.435853,13.876427,17.61336,11.865479,12.269393,15.831741,175.746938,289.092229,366.945,247.197479,255.612354,329.827938


In [6]:
df = df.where(df > 0.)
df[['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']].min()

ERAI_bias      13.624942
MERRA_bias     13.690263
MERRA2_bias    21.150983
JRA55_bias     13.725106
CFSR_bias      14.246598
ERA5_bias      16.307092
dtype: float64

In [7]:
df[df.Date.dt.month == 7]['MERRA_bias']

75      84.262775
87      57.414465
121    131.101165
126    136.854957
138    164.923656
150     95.929977
159           NaN
171     81.186576
178    177.498567
182     74.782951
201    161.169690
213    124.263167
225    109.757470
239     65.478526
251    167.800675
Name: MERRA_bias, dtype: float64

In [25]:
def setBoxColors(bp):
    
    reanalyses = ['ERAI', 'MERRA', 'MERRA2', 'JRA55', 'CFSR', 'ERA5']

    for r, box in zip(reanalyses,bp['boxes']):
        # change outline color
        box.set( color=reanalysis_color[r], linewidth=1)
        
    for r, whisker0, whisker1 in zip(reanalyses, bp['whiskers'][0::2], bp['whiskers'][1::2]):
        whisker0.set(color=reanalysis_color[r], linewidth=2)
        whisker1.set(color=reanalysis_color[r], linewidth=2)

    for r, cap0, cap1 in zip(reanalyses, bp['caps'][0::2], bp['caps'][1::2]):
        cap0.set(color=reanalysis_color[r], linewidth=2)
        cap1.set(color=reanalysis_color[r], linewidth=2)
        
    for r, flier in zip(reanalyses, bp['fliers']):
        flier.set(marker='.', markerfacecolor=reanalysis_color[r],                  
                  markeredgecolor=reanalysis_color[r], markersize=5)
    
    for median in bp['medians']:
        median.set(color='k', linewidth=2)
        
    return



In [9]:
def columns2list(df, column=None):
    if not column:
        column = ['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
    print (column)
    result = []
    for c in column:
        x = df[c].values
        result.append(x[~np.isnan(x)])
        
    return result

In [35]:
fig, ax = plt.subplots(figsize=(10,8))

column = ['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
labels = [c.replace('_bias','') for c in column]

#flierprops = dict(marker='.', markerfacecolor=)

for month in range(1,13):
    pos = [month+p for p in [-0.375, -0.225, -0.075, 0.075, 0.225 ,0.375]]
    x = columns2list(df[df['Date'].dt.month == month])
    bp = ax.boxplot(x, positions=pos, widths=0.1, labels=None, patch_artist=True)
    setBoxColors(bp)

for x in np.arange(0,13):
    ax.axvline(x+0.5, color='0.6', linestyle=':')
    
ax.set_ylim(-10.,600.)
ax.set_xlim(0.5,12.5)

# Hide all but left y-axis
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.set_xticks([])

# Format left y-axis
ax.set_ylabel('Bias %', fontsize=20)
ax.axhline(100., linestyle='--', linewidth=2, color='k', zorder=1)

ax.set_xticks(range(1,13))
ax.set_xticklabels(calendar.month_abbr[1:])

ax.tick_params(axis='both', which='major', labelsize=15)
ax.tick_params(axis='x', length=0)

# Create legend
handles = [mpatches.Patch(color=reanalysis_color[r], label=r) for r in labels]
ax.legend(handles=handles, fontsize=18, frameon=True, loc='upper left',)

#fig.savefig(os.path.join(r'C:\Users\apbarret\Documents\Presentations\AGU2018','yang_trajectory_reanalysis_bias.png'))
fig.savefig(os.path.join('/home/apbarret/src/SnowOnSeaIce/source',
                         'yang_trajectory_reanalysis_bias.png'))

<IPython.core.display.Javascript object>

['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']
['ERAI_bias', 'MERRA_bias', 'MERRA2_bias', 'JRA55_bias', 'CFSR_bias', 'ERA5_bias']


In [32]:
def sampleMean(x):
    return x.sample(10, axis=0).mean(axis=0)*0.01

columns = ['ERAI_bias','MERRA_bias','MERRA2_bias','JRA55_bias','CFSR_bias','ERA5_bias']
df[columns].groupby(df.Date.dt.month).apply(sampleMean)

Unnamed: 0_level_0,ERAI_bias,MERRA_bias,MERRA2_bias,JRA55_bias,CFSR_bias,ERA5_bias
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.661661,0.684695,1.250962,0.730071,0.914838,0.810173
2,0.631884,0.778913,1.234764,0.718447,0.942058,0.842394
3,0.663889,0.684428,1.186719,0.633248,0.794283,0.746506
4,0.711183,1.111308,1.602147,0.843689,1.031868,0.914827
5,1.134251,1.729758,1.803052,1.545161,1.846028,1.684381
6,2.09238,1.666458,2.071308,2.099509,2.893487,2.1285
7,1.519437,1.052395,1.564883,1.704888,2.078178,1.581744
8,1.361901,0.801562,1.41582,1.400598,1.905149,1.374771
9,0.942809,0.797141,1.184485,0.935703,1.348067,1.01614
10,1.363263,1.130068,2.166648,1.296008,1.736867,1.357612


In [33]:
from sklearn.model_selection import train_test_split

ImportError: No module named model_selection

In [None]:
def doCorrect(x, correction):
    columns = ['ERAI_prectot','MERRA_prectot','MERRA2_prectot','JRA55_prectot','CFSR_prectot','ERA5_prectot']
    month = x.Date.dt.month.values[0]
    for c in columns:
        x[c.replace('_prectot','_corr')] = x[c]/correction[c.replace('_prectot','_bias')].loc[month]
        x[c.replace('_prectot','_corrbias')] = x[c.replace('_prectot','_corr')]*100./x['Pc']
    return x

In [None]:
result_list = []
for i in range(50):
    train, test = train_test_split(df, test_size=0.33)
    correction = train[columns].groupby(train.Date.dt.month).mean()*0.01
    result_list.append(test.groupby(test.Date.dt.month).apply(doCorrect, correction))
    
result = pd.concat(result_list,axis=0)

In [31]:
fig2, ax2 = plt.subplots(figsize=(10,8))

column = ['ERAI_corrbias', 'MERRA_corrbias', 'MERRA2_corrbias', 'JRA55_corrbias', 'CFSR_corrbias', 'ERA5_corrbias']
labels = [c.replace('_corrbias','') for c in column]

for month in range(1,13):
    pos = [month+p for p in [-0.375, -0.225, -0.075, 0.075, 0.225 ,0.375]]
    #x = [df[df['Date'].dt.month == month][c].values for c in column]
    x = columns2list(result[result['Date'].dt.month == month], column=column)
    bp = ax2.boxplot(x, positions=pos, widths=0.1, labels=None, patch_artist=True)
    setBoxColors(bp)
    #ax2.plot(month, junk[month-1], 'ko')

ax2.set_ylim(-10.,700.)
ax2.set_xlim(0,13)

# Hide all but left y-axis
ax2.spines['top'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['right'].set_visible(False)
#ax.set_xticks([])

# Format left y-axis
ax2.set_ylabel('Bias %', fontsize=20)
ax2.axhline(100., linestyle='--', color='k')

ax2.set_xticks(range(1,13))
ax2.set_xticklabels(calendar.month_abbr[1:])

ax2.tick_params(axis='both', which='major', labelsize=15)
ax2.tick_params(axis='x', length=0)

# Create legend
handles = [mpatches.Patch(color=reanalysis_color[r], label=r) for r in labels]
ax2.legend(handles=handles, fontsize=18, frameon=False)

#fig2.savefig(os.path.join(r'C:\Users\apbarret\Documents\Presentations\AGU2018',
#                          'yang_trajectory_reanalysis_corrected_bias_ensemble.png'))

<IPython.core.display.Javascript object>

NameError: name 'result' is not defined

In [None]:
result['ERAI_bias'].groupby(result['Date'].dt.month).median()*1e-2

In [None]:
df.columns