In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import sys, os, shutil
import numpy as np
import pandas as pd
from scipy import stats
from datetime import datetime
import matplotlib.gridspec as gridspec
import xarray as xr

In [18]:
levelArray=['0', '1a', '1b', '1c', '2a', '2b', '2c', '3']

valid_dir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/valid/06282000'
domain_basename='DDS'

summa_outFilePrefix = 'wbout'
route_outFilePrefix = 'sflow'

aw_apriori_baseDir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/06282000'
aw_sim_basedir = os.path.join(aw_apriori_baseDir,'source_info/apriori_results')

# obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.06282000.cfs.csv'
obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.BBR_IN.cfs.csv'
obsFile = obs_file
obs_unit = 'cfs'
q_seg_index=1
statStartDate,statEndDate='2007-10-01','2012-09-30'
plotStartDate,plotEndDate='1970-01-01','2019-12-31'

output_baseDir = os.path.join(valid_dir,'analysis','1_valid_results')
if not os.path.exists(output_baseDir):
    os.makedirs(output_baseDir)

plot_output_dir = os.path.join(output_baseDir,'hydrographs')
if not os.path.exists(plot_output_dir):
    os.makedirs(plot_output_dir)


#### 1. Save model valid run results

In [16]:
for complexity_level in levelArray:

    # --- save --- 
    domain_name=complexity_level+'_'+domain_basename
    src_dir = os.path.join(valid_dir,domain_name,'model/simulations/')
    dst_dir = os.path.join(output_baseDir,domain_name)
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)
    
    # save simulation results
    shutil.copy2(os.path.join(src_dir,'SUMMA', summa_outFilePrefix+'_day.nc'), dst_dir)
    shutil.copy2(os.path.join(src_dir,'mizuRoute', route_outFilePrefix+'.mizuRoute.nc'), dst_dir)
    # save param and statistical results
    shutil.copy2(os.path.join(valid_dir,domain_name, 'model/settings/SUMMA/trialParams.hru_lev%s.nc'%(complexity_level)), dst_dir)
    shutil.copy2(os.path.join(valid_dir,domain_name, 'calib/trial_stats.txt'), dst_dir)
    
    objs = np.loadtxt(os.path.join(valid_dir,domain_name,'calib/trial_stats.txt'),usecols=[0])
    print('%s: KGE =%f'%(complexity_level, objs[0]))

0: KGE =0.801679
1a: KGE =0.697704
1b: KGE =0.797110
1c: KGE =0.630332
2a: KGE =0.523760
2b: KGE =0.647522
2c: KGE =0.826765
3: KGE =0.581754


#### 2. Calculate KGE again for details (bias,r,var)

In [19]:
def get_modified_KGE(obs,sim):    
    sd_sim=np.std(sim, ddof=1)
    sd_obs=np.std(obs, ddof=1)
    
    m_sim=np.mean(sim)
    m_obs=np.mean(obs)
    
    r=(np.corrcoef(sim,obs))[0,1]
    relvar=(float(sd_sim)/float(m_sim))/(float(sd_obs)/float(m_obs))
    bias=float(m_sim)/float(m_obs)
    
    kge=1.0-np.sqrt((r-1)**2 +(relvar-1)**2 + (bias-1)**2)
    return kge,r,relvar,bias


In [24]:
print('complexity_level,kge,r,relvar,bias')
for complexity_level in levelArray:
    
    domain_name=complexity_level+'_'+domain_basename
    dst_dir = os.path.join(output_baseDir,domain_name)

    simFile = os.path.join(dst_dir, route_outFilePrefix+'.mizuRoute.nc')
    stat_output = os.path.join(dst_dir,'trial_stats_details.txt')

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 

    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=plotStartDate, after=plotEndDate)
    df_obs_eval = df_obs.truncate(before=plotStartDate, after=plotEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge,r,relvar,bias = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)

    # --- save --- 
    f = open(stat_output, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %r + '\t#linear correlation\n')
    f.write('%.6f' %relvar + '\t#variability error\n')
    f.write('%.6f' %bias + '\t#bias\n')
    f.close()
    
#     print("{:.4f}".format(kge),"{:.4f}".format(kge),"{:.4f}".format(kge))
    print('%s,%.4f,%.4f,%.4f,%.4f'%(complexity_level,kge,r,relvar,bias))

complexity_level,kge,r,relvar,bias
0,0.8017,0.8214,1.0622,0.9402
1a,0.6977,0.7618,1.1848,0.9779
1b,0.7971,0.8306,1.1093,0.9772
1c,0.6303,0.6906,1.2010,1.0233
2a,0.5238,0.6506,1.3227,0.9749
2b,0.6475,0.7228,1.2172,1.0144
2c,0.8268,0.8300,1.0297,0.9850
3,0.5818,0.6755,1.2633,1.0167


In [25]:
print('complexity_level,kge,r,relvar,bias')
for complexity_level in levelArray[0]:
    
    domain_name=complexity_level+'_'+domain_basename
    dst_dir = os.path.join(output_baseDir,domain_name)

    simFile = os.path.join(dst_dir, route_outFilePrefix+'.mizuRoute.nc')

    # #### 2. Calculate 
    # --- read simulated flow (cms) --- 
    simVarName = 'IRFroutedRunoff'
    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    df_sim

complexity_level,kge,r,relvar,bias


In [26]:
df_sim

Unnamed: 0,sim
1970-01-02,1549.455200
1970-01-03,1535.081787
1970-01-04,1075.625610
1970-01-05,726.262085
1970-01-06,520.088318
...,...
2019-12-27,6.983592
2019-12-28,6.931815
2019-12-29,6.880777
2019-12-30,6.830490


#### 3. Plot valid mdoel outputs

In [15]:
for complexity_level in levelArray:

    print('---%s---'%(complexity_level))
    valid_output_dir = os.path.join(output_baseDir,complexity_level+'_'+domain_basename)
    ofile = os.path.join(plot_output_dir,'%s_hydrograph_valid.png'%(complexity_level))
    
    simVarName = 'IRFroutedRunoff'
    simFile = os.path.join(valid_output_dir, route_outFilePrefix+'.mizuRoute.nc')

    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obsFile, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- # combine daily sim & obs timeseries df based on time index--- 
    df_sim_eval = df_sim.truncate(before=plotStartDate, after=plotEndDate)
    df_obs_eval = df_obs.truncate(before=plotStartDate, after=plotEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()
    df_merge.index = pd.to_datetime(df_merge.index)

    # --- make dataframes ---
    df_final          = df_merge  # combine daily sim & obs timeseries
    df_final_calib    = df_merge   # daily, calib. period only, defined in config file

    df_final_WY       = df_final.resample('AS-OCT').mean()  # resampled to annual mean starting in October
    df_final_calib_WY = df_final_calib.resample('AS-OCT').mean()

    df_final_AJ       = df_final[(df_final.index.month>=4) & (df_final.index.month<=7)].resample('AS-OCT').mean()
    df_final_calib_AJ = df_final_calib[(df_final_calib.index.month>=4) & (df_final_calib.index.month<=7)].resample('AS-OCT').mean()
    df_final_M        = df_final.resample('M').mean()[df_final.resample('M').count()>=28]  # only for months with at least 28 days
    df_final_MA       = df_final.groupby(df_final.index.month).mean()                     # monthly avg
    df_final_MA.columns   = ['Sim (all yrs)', 'Obs (all yrs)']
    df_final_calib_MA = df_final_calib.groupby(df_final_calib.index.month).mean()         # monthly avg, cal period
    df_final_calib_MA.columns = ['Sim (calib)','Obs (calib)']

    # --- calculate some statistics ---
    # calc some statistics
    if len(df_final_AJ) > 1:
        corr_AJ = stats.pearsonr(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1]) 
        corr_WY = stats.pearsonr(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1]) 
        print("correlations (AJ, WY): ", corr_AJ[0], corr_WY[0])
    else:
        corr_AJ = [None, None]
        corr_WY = [None, None]

    # --- make plot --- 
    # fig, ax = plt.subplots(4, 1)
    width  = 6.5  # in inches
    height = 9.0
    lwd    = 0.8  # line thickness

    # plot layout
    print("plot layout")
    fig = plt.figure()

    AX = gridspec.GridSpec(4,2)
    AX.update(wspace = 0.5, hspace = 0.3)
    ax1  = plt.subplot(AX[0,:])
    ax2 = plt.subplot(AX[1,:])
    ax3 = plt.subplot(AX[2,:])
    ax4 = plt.subplot(AX[3,0])
    ax5 = plt.subplot(AX[3,1])

    # plot monthly
    print("plot monthly")
    df_final_M.plot(ax=ax1, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot daily calibration period
    print("plot daily calibration period")
    df_final_calib.plot(ax=ax2, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot monthly long term averages for period
    print("plot monthly long term averages")
    df_final_calib_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd)
    df_final_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd, linestyle=':')

    # plot scatter for water year mean flow
    print("plot scatter for water year mean flow")
    axmax = df_final_WY.max().max()
    ax4.scatter(df_final_WY.iloc[:,0], df_final_WY.iloc[:,1], c='black', s=5)
    ax4.scatter(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1], c='red', s=10, label='Calib')
    ax4.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_WY[0] is not None:
        ax4.annotate('corr: '+str(round(corr_WY[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # plot scatter for spring runoff period (Apr-Jul)
    print("plot scatter for for spring runoff period")
    axmax = df_final_AJ.max().max()
    ax5.scatter(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1],c='black', s=5)
    ax5.scatter(df_final_calib_AJ.iloc[:,0], df_final_calib_AJ.iloc[:,1], c='red', s=10, label='Calib')
    ax5.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_AJ[0] is not None:
        ax5.annotate('corr: '+str(round(corr_AJ[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # other plot details
    print("other plot details")
    ax1.axvline(x=statStartDate, color='grey', linewidth=0)
    ax1.axvline(x=statEndDate, color='grey', linewidth=0)
    ax1.axvspan(statStartDate, statEndDate, color='grey', alpha=0.2, label='Calib Period')
    ax1.set_ylabel('Flow, Monthly (cms)')
    ax2.set_ylabel('Flow, Daily (cms)')
    ax3.set_ylabel('Flow, Monthly (cms)')
    ax3.set_xlabel('Calendar Month')
    ax4.set_ylabel('WY Obs (cms)')
    ax4.set_xlabel('WY Sim (cms)')
    ax5.set_ylabel('Apr-Jul Obs (cms)')
    ax5.set_xlabel('Apr-Jul Sim (cms)')
    ax1.legend(loc='upper right')
    ax2.legend().remove()
    ax3.legend(loc='upper right')
    ax4.legend()
    # ax5.legend().remove()
    # ax1.set_title('Streamflow: ' + domain_name, fontsize='medium',weight='semibold')
    ax1.set_title('Streamflow', fontsize='medium',weight='semibold')

    # --- save plot
    print("save plot")
    plt.savefig(ofile, dpi=100)

---0---
correlations (AJ, WY):  0.8778215452071123 0.8059695223726541
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1a---
correlations (AJ, WY):  0.8647078466105703 0.7912828282006473
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1b---
correlations (AJ, WY):  0.8768521969801863 0.8355273766542519
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1c---
correlations (AJ, WY):  0.826290441776136 0.8041637631948404
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot 

In [None]:
df_obs