In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import sys, os
import numpy as np
import pandas as pd
from scipy import stats
from datetime import datetime
import matplotlib.gridspec as gridspec
import argparse
import xarray as xr

ImportError: libjpeg.so.9: failed to map segment from shared object

In [36]:
def get_modified_KGE(obs,sim):    
    sd_sim=np.std(sim, ddof=1)
    sd_obs=np.std(obs, ddof=1)
    
    m_sim=np.mean(sim)
    m_obs=np.mean(obs)
    
    r=(np.corrcoef(sim,obs))[0,1]
    relvar=(float(sd_sim)/float(m_sim))/(float(sd_obs)/float(m_obs))
    bias=float(m_sim)/float(m_obs)
    
    kge=1.0-np.sqrt((r-1)**2 +(relvar-1)**2 + (bias-1)**2)
    return kge

def get_RMSE(obs,sim):
    rmse = np.sqrt(np.nanmean(np.power((sim - obs),2)))
    return rmse

In [37]:
levelArray=['0', '1a', '1b', '1c', '2a', '2b', '2c', '3']

aw_apriori_baseDir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/06282000'
aw_sim_basedir = os.path.join(aw_apriori_baseDir,'source_info/apriori_results')

calib_dir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/calib/06282000'
domain_basename='DDS'

# obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.06282000.cfs.csv'
obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.BBR_IN.cfs.csv'
obsFile = obs_file
obs_unit = 'cfs'
q_seg_index=1
statStartDate,statEndDate='2007-10-01','2012-09-30'
plotStartDate,plotEndDate='1970-10-01','2019-12-31'

output_baseDir = os.path.join(calib_dir,'analysis','4_apriori_results')
if not os.path.exists(output_baseDir):
    os.makedirs(output_baseDir)
    

#### 1. Calculate a-priori statistics from Hongli's run

#### Initialize at 2007<t>
This turns out that the model needs a spin up. Otherwise, model performance is poor.

In [25]:
for complexity_level in levelArray:
    
    domain_name=complexity_level+'_'+domain_basename+'2007'
    simFile = os.path.join(calib_dir,domain_name,'calib/output_archive/experiment1/run1/sflow.mizuRoute.nc')
    stat_output = os.path.join(output_baseDir,'trial_stats_lev%s_HL.txt'%(complexity_level))

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    rmse = get_RMSE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge['obs'].values, sim=df_merge['sim'].values, sim_time=sim_time)

    # --- save --- 
    f = open(stat_output, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()
    
    print(complexity_level,kge,rmse)

0 -4.011730432539086 571.9685954134139
1a -4.024521659814307 572.064317599153
1b -4.011767088010839 571.9596261406884
1c -3.6379230670854623 572.2205165861408
2a -4.014482375815964 571.9964094031495
2b -3.6662459597391583 572.0056161198117
2c -3.7092592588697624 571.9253160004369
3 -3.71404018183625 571.8631198118549


#### Initialize at 2005<t>
A two-year spin up is helpful in getting the right simulation results.

In [23]:
for complexity_level in levelArray:
    
    domain_name=complexity_level+'_'+domain_basename+'2005'
    simFile = os.path.join(calib_dir,domain_name,'calib/output_archive/experiment1/run1/sflow.mizuRoute.nc')
    stat_output = os.path.join(output_baseDir,'trial_stats_lev%s_HL.txt'%(complexity_level))

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    rmse = get_RMSE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge['obs'].values, sim=df_merge['sim'].values, sim_time=sim_time)

    # --- save --- 
    f = open(stat_output, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()
    
    print(complexity_level,kge,rmse)

0 0.3909972839406797 41.253475188522465
1a 0.3946594337890499 42.62685801580675
1b 0.4168935021508725 41.25146351866628
1c 0.722437325853413 46.92782476369667
2a 0.398185995061905 41.798031535593054
2b 0.7407271612419424 44.01017638068732
2c 0.7339599042207572 42.7654144666763
3 0.7358076634979539 41.72585852201133


#### 2. Calculate a-priori statistics from Andy's run

In [31]:
for complexity_level in levelArray:
    
    simFile = os.path.join(aw_sim_basedir,complexity_level,'sflow.h.1970-01-01-00000.nc')
    stat_output = os.path.join(output_baseDir,'trial_stats_lev%s_AW.txt'%(complexity_level))

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    rmse = get_RMSE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge['obs'].values, sim=df_merge['sim'].values, sim_time=sim_time)

    # --- save --- 
    f = open(stat_output, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()
    
    print(complexity_level,kge,rmse)

0 0.3910192295648307 41.24252460589737
1a 0.39469312732525297 42.61492882779527
1b 0.4169343345787716 41.23978594005742
1c 0.7226686528381707 46.95503642485085
2a 0.39821425741204863 41.786413764111515
2b 0.7415125206984685 44.10119279239361
2c 0.7361046870336252 42.89978247973635
3 0.7408427006879781 41.8600302383614


In [None]:
for complexity_level in levelArray:
    
    simFile = os.path.join(aw_sim_basedir,complexity_level,'sflow.h.1970-01-01-00000.nc')
    stat_output = os.path.join(output_baseDir,'trial_stats_lev%s_AW.txt'%(complexity_level))

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- drop calibration period (for manuscript purpose) ---
    calibStartDate,calibEndDate = '2007-10-01','2012-09-30'
    calibStartDate = datetime.datetime.strptime(calibStartDate,time_format)
    calibEndDate = datetime.datetime.strptime(calibEndDate,time_format)    
    df_merge_new = df_merge.loc[(df_merge.index < calibStartDate) | (df_merge.index > calibEndDate)]
    
    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    rmse = get_RMSE(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values, sim_time=sim_time)
    
    # --- save --- 
    stat_output_valid = stat_output.replace('.txt','_valid.txt')
    stat_output_valid = os.path.join(output_baseDir, stat_output_valid)

    f = open(stat_output_valid, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()

    ######### Calculate calib period statistics #########
    # --- extract calibration period (for manuscript purpose) ---
    calibStartDate,calibEndDate = '2007-10-01','2012-09-30'
    calibStartDate = datetime.datetime.strptime(calibStartDate,time_format)
    calibEndDate = datetime.datetime.strptime(calibEndDate,time_format)    
    df_merge_new = df_merge.loc[(df_merge.index >= calibStartDate) | (df_merge.index <= calibEndDate)]
    
    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    rmse = get_RMSE(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge_new['obs'].values, sim=df_merge_new['sim'].values, sim_time=sim_time)
    
    # --- save --- 
    stat_output_calib = stat_output.replace('.txt','_calib.txt')
    stat_output_calib = os.path.join(output_baseDir, stat_output_calib)

    f = open(stat_output_calib, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()


#### 3. Plot hydrographs for the a priori and best calibrated results 

In [38]:
for complexity_level in levelArray:
    
    simFile = os.path.join(aw_sim_basedir,complexity_level,'sflow.h.1970-01-01-00000.nc')
    ofile = os.path.join(output_baseDir,'%s_hydrograph_apriori.png'%(complexity_level))

    simVarName = 'IRFroutedRunoff'
    # simFile = os.path.join(archive_path, route_outFilePrefix+'.mizuRoute.nc')
    #     simFile = os.path.join(archive_path, route_outFilePrefix+'.nc') # for temporaty use
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obsFile, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- # combine daily sim & obs timeseries df based on time index--- 
    df_sim_eval = df_sim.truncate(before=plotStartDate, after=plotEndDate)
    df_obs_eval = df_obs.truncate(before=plotStartDate, after=plotEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()
    df_merge.index = pd.to_datetime(df_merge.index)

    # --- make dataframes ---
    df_final          = df_merge  # combine daily sim & obs timeseries
    df_final_calib    = df_merge   # daily, calib. period only, defined in config file

    df_final_WY       = df_final.resample('AS-OCT').mean()  # resampled to annual mean starting in October
    df_final_calib_WY = df_final_calib.resample('AS-OCT').mean()

    df_final_AJ       = df_final[(df_final.index.month>=4) & (df_final.index.month<=7)].resample('AS-OCT').mean()
    df_final_calib_AJ = df_final_calib[(df_final_calib.index.month>=4) & (df_final_calib.index.month<=7)].resample('AS-OCT').mean()
    df_final_M        = df_final.resample('M').mean()[df_final.resample('M').count()>=28]  # only for months with at least 28 days
    df_final_MA       = df_final.groupby(df_final.index.month).mean()                     # monthly avg
    df_final_MA.columns   = ['Sim (all yrs)', 'Obs (all yrs)']
    df_final_calib_MA = df_final_calib.groupby(df_final_calib.index.month).mean()         # monthly avg, cal period
    df_final_calib_MA.columns = ['Sim (calib)','Obs (calib)']

    # --- calculate some statistics ---
    # calc some statistics
    if len(df_final_AJ) > 1:
        corr_AJ = stats.pearsonr(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1]) 
        corr_WY = stats.pearsonr(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1]) 
        print("correlations (AJ, WY): ", corr_AJ[0], corr_WY[0])
    else:
        corr_AJ = [None, None]
        corr_WY = [None, None]

    # --- make plot --- 
    # fig, ax = plt.subplots(4, 1)
    width  = 6.5  # in inches
    height = 9.0
    lwd    = 0.8  # line thickness

    # plot layout
#     print("plot layout")
    fig = plt.figure()

    AX = gridspec.GridSpec(4,2)
    AX.update(wspace = 0.5, hspace = 0.3)
    ax1  = plt.subplot(AX[0,:])
    ax2 = plt.subplot(AX[1,:])
    ax3 = plt.subplot(AX[2,:])
    ax4 = plt.subplot(AX[3,0])
    ax5 = plt.subplot(AX[3,1])

    # plot monthly
#     print("plot monthly")
    df_final_M.plot(ax=ax1, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot daily calibration period
#     print("plot daily calibration period")
    df_final_calib.plot(ax=ax2, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot monthly long term averages for period
#     print("plot monthly long term averages")
    df_final_calib_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd)
    df_final_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd, linestyle=':')

    # plot scatter for water year mean flow
#     print("plot scatter for water year mean flow")
    axmax = df_final_WY.max().max()
    ax4.scatter(df_final_WY.iloc[:,0], df_final_WY.iloc[:,1], c='black', s=5)
    ax4.scatter(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1], c='red', s=10, label='Calib')
    ax4.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_WY[0] is not None:
        ax4.annotate('corr: '+str(round(corr_WY[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # plot scatter for spring runoff period (Apr-Jul)
#     print("plot scatter for for spring runoff period")
    axmax = df_final_AJ.max().max()
    ax5.scatter(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1],c='black', s=5)
    ax5.scatter(df_final_calib_AJ.iloc[:,0], df_final_calib_AJ.iloc[:,1], c='red', s=10, label='Calib')
    ax5.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_AJ[0] is not None:
        ax5.annotate('corr: '+str(round(corr_AJ[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # other plot details
#     print("other plot details")
    ax1.axvline(x=statStartDate, color='grey', linewidth=0)
    ax1.axvline(x=statEndDate, color='grey', linewidth=0)
    ax1.axvspan(statStartDate, statEndDate, color='grey', alpha=0.2, label='Calib Period')
    ax1.set_ylabel('Flow, Monthly (cms)')
    ax2.set_ylabel('Flow, Daily (cms)')
    ax3.set_ylabel('Flow, Monthly (cms)')
    ax3.set_xlabel('Calendar Month')
    ax4.set_ylabel('WY Obs (cms)')
    ax4.set_xlabel('WY Sim (cms)')
    ax5.set_ylabel('Apr-Jul Obs (cms)')
    ax5.set_xlabel('Apr-Jul Sim (cms)')
    ax1.legend(loc='upper right')
    ax2.legend().remove()
    ax3.legend(loc='upper right')
    ax4.legend()
    # ax5.legend().remove()
    # ax1.set_title('Streamflow: ' + domain_name, fontsize='medium',weight='semibold')
    ax1.set_title('Streamflow', fontsize='medium',weight='semibold')

    # --- save plot
    print("save plot")
    plt.savefig(ofile, dpi=100)

correlations (AJ, WY):  0.8880171812166281 0.8804915955926533
save plot
correlations (AJ, WY):  0.8857990666084913 0.8803886383339636
save plot
correlations (AJ, WY):  0.887889687599406 0.8810898564677687
save plot
correlations (AJ, WY):  0.8515635291444162 0.878384899931839
save plot
correlations (AJ, WY):  0.887735413468195 0.8809188801758282
save plot
correlations (AJ, WY):  0.8609751751636379 0.8783749095127578
save plot
correlations (AJ, WY):  0.8715753087396141 0.8789838579279339
save plot
correlations (AJ, WY):  0.8747957812582986 0.8792533018706546
save plot


#### 4. Calculate a-prior statistics (1979-2019)

In [39]:
for complexity_level in levelArray:
    
    simFile = os.path.join(aw_sim_basedir,complexity_level,'sflow.h.1970-01-01-00000.nc')
    stat_output = os.path.join(output_baseDir,'trial_stats_lev%s_AW_1970_2019.txt'%(complexity_level))

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=plotStartDate, after=plotEndDate)
    df_obs_eval = df_obs.truncate(before=plotStartDate, after=plotEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    rmse = get_RMSE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge['obs'].values, sim=df_merge['sim'].values, sim_time=sim_time)

    # --- save --- 
    f = open(stat_output, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()
    
    print(complexity_level,kge,rmse)

0 0.20552055635869915 35.83858046232348
1a 0.2195870919188787 36.19450888476285
1b 0.23971236497858228 35.67167343974038
1c 0.6379341272073684 41.16405299838253
2a 0.21559832371987186 36.12079574150985
2b 0.6442725059040585 39.294638474950645
2c 0.6323683428230291 37.97446540262428
3 0.6323797572848928 37.150613824482754


#### 5. Plot hydrographs for the a priori results (1979-2019)

In [40]:
for complexity_level in levelArray:
    
simFile = os.path.join(aw_sim_basedir,complexity_level,'sflow.h.1970-01-01-00000.nc')
    ofile = os.path.join(output_baseDir,'%s_hydrograph_apriori_1970_2019.png'%(complexity_level))

    simVarName = 'IRFroutedRunoff'
    # simFile = os.path.join(archive_path, route_outFilePrefix+'.mizuRoute.nc')
    #     simFile = os.path.join(archive_path, route_outFilePrefix+'.nc') # for temporaty use
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obsFile, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- # combine daily sim & obs timeseries df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()
    df_merge.index = pd.to_datetime(df_merge.index)

    # --- make dataframes ---
    df_final          = df_merge  # combine daily sim & obs timeseries
    df_final_calib    = df_merge   # daily, calib. period only, defined in config file

    df_final_WY       = df_final.resample('AS-OCT').mean()  # resampled to annual mean starting in October
    df_final_calib_WY = df_final_calib.resample('AS-OCT').mean()

    df_final_AJ       = df_final[(df_final.index.month>=4) & (df_final.index.month<=7)].resample('AS-OCT').mean()
    df_final_calib_AJ = df_final_calib[(df_final_calib.index.month>=4) & (df_final_calib.index.month<=7)].resample('AS-OCT').mean()
    df_final_M        = df_final.resample('M').mean()[df_final.resample('M').count()>=28]  # only for months with at least 28 days
    df_final_MA       = df_final.groupby(df_final.index.month).mean()                     # monthly avg
    df_final_MA.columns   = ['Sim (all yrs)', 'Obs (all yrs)']
    df_final_calib_MA = df_final_calib.groupby(df_final_calib.index.month).mean()         # monthly avg, cal period
    df_final_calib_MA.columns = ['Sim (calib)','Obs (calib)']

    # --- calculate some statistics ---
    # calc some statistics
    if len(df_final_AJ) > 1:
        corr_AJ = stats.pearsonr(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1]) 
        corr_WY = stats.pearsonr(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1]) 
        print("correlations (AJ, WY): ", corr_AJ[0], corr_WY[0])
    else:
        corr_AJ = [None, None]
        corr_WY = [None, None]

    # --- make plot --- 
    # fig, ax = plt.subplots(4, 1)
    width  = 6.5  # in inches
    height = 9.0
    lwd    = 0.8  # line thickness

    # plot layout
#     print("plot layout")
    fig = plt.figure()

    AX = gridspec.GridSpec(4,2)
    AX.update(wspace = 0.5, hspace = 0.3)
    ax1  = plt.subplot(AX[0,:])
    ax2 = plt.subplot(AX[1,:])
    ax3 = plt.subplot(AX[2,:])
    ax4 = plt.subplot(AX[3,0])
    ax5 = plt.subplot(AX[3,1])

    # plot monthly
#     print("plot monthly")
    df_final_M.plot(ax=ax1, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot daily calibration period
#     print("plot daily calibration period")
    df_final_calib.plot(ax=ax2, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot monthly long term averages for period
#     print("plot monthly long term averages")
    df_final_calib_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd)
    df_final_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd, linestyle=':')

    # plot scatter for water year mean flow
#     print("plot scatter for water year mean flow")
    axmax = df_final_WY.max().max()
    ax4.scatter(df_final_WY.iloc[:,0], df_final_WY.iloc[:,1], c='black', s=5)
    ax4.scatter(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1], c='red', s=10, label='Calib')
    ax4.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_WY[0] is not None:
        ax4.annotate('corr: '+str(round(corr_WY[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # plot scatter for spring runoff period (Apr-Jul)
#     print("plot scatter for for spring runoff period")
    axmax = df_final_AJ.max().max()
    ax5.scatter(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1],c='black', s=5)
    ax5.scatter(df_final_calib_AJ.iloc[:,0], df_final_calib_AJ.iloc[:,1], c='red', s=10, label='Calib')
    ax5.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_AJ[0] is not None:
        ax5.annotate('corr: '+str(round(corr_AJ[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # other plot details
#     print("other plot details")
    ax1.axvline(x=statStartDate, color='grey', linewidth=0)
    ax1.axvline(x=statEndDate, color='grey', linewidth=0)
    ax1.axvspan(statStartDate, statEndDate, color='grey', alpha=0.2, label='Calib Period')
    ax1.set_ylabel('Flow, Monthly (cms)')
    ax2.set_ylabel('Flow, Daily (cms)')
    ax3.set_ylabel('Flow, Monthly (cms)')
    ax3.set_xlabel('Calendar Month')
    ax4.set_ylabel('WY Obs (cms)')
    ax4.set_xlabel('WY Sim (cms)')
    ax5.set_ylabel('Apr-Jul Obs (cms)')
    ax5.set_xlabel('Apr-Jul Sim (cms)')
    ax1.legend(loc='upper right')
    ax2.legend().remove()
    ax3.legend(loc='upper right')
    ax4.legend()
    # ax5.legend().remove()
    # ax1.set_title('Streamflow: ' + domain_name, fontsize='medium',weight='semibold')
    ax1.set_title('Streamflow', fontsize='medium',weight='semibold')

    # --- save plot
#     print("save plot")
    plt.savefig(ofile, dpi=100)

correlations (AJ, WY):  0.7865254291271722 0.7980659258844878
save plot
correlations (AJ, WY):  0.774921740478325 0.7931106460458657
save plot
correlations (AJ, WY):  0.77813912847936 0.795058367682351
save plot


  fig = plt.figure()


correlations (AJ, WY):  0.6530715596094149 0.7879251098961113
save plot


  fig = plt.figure()


correlations (AJ, WY):  0.7798955896053014 0.7951916838633509
save plot


  fig = plt.figure()


correlations (AJ, WY):  0.6813494117799008 0.7830588924511181
save plot


  fig = plt.figure()


correlations (AJ, WY):  0.7050062263812242 0.7843847794474312
save plot


  fig = plt.figure()


correlations (AJ, WY):  0.7125123779685668 0.7854631873395342
save plot


  fig = plt.figure()
