In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import sys, os, shutil
import numpy as np
import pandas as pd
from scipy import stats
# import netCDF4 as nc
from datetime import datetime
import matplotlib.gridspec as gridspec
# import glob
# import argparse
import xarray as xr

In [2]:
levelArray=['0', '1a', '1b', '1c', '2a', '2b', '2c', '3']
startId=1
endId=5

aw_apriori_baseDir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/06282000'
aw_sim_basedir = os.path.join(aw_apriori_baseDir,'source_info/apriori_results')

calib_dir = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/calib/06282000'
domain_basename='DDS'
expers = range(startId,endId+1)

route_outFilePrefix = 'sflow'

# obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.06282000.cfs.csv'
obs_file = '/glade/u/home/hongli/scratch/2020_06_02HRUcomplexity/model/obs/obs_flow.BBR_IN.cfs.csv'
obsFile = obs_file
obs_unit = 'cfs'
q_seg_index=1
statStartDate,statEndDate='2007-10-01','2012-09-30'

output_baseDir = os.path.join(calib_dir,'analysis','5_best_calib_results')
if not os.path.exists(output_baseDir):
    os.makedirs(output_baseDir)

plot_output_dir = os.path.join(output_baseDir,'hydrographs')
if not os.path.exists(plot_output_dir):
    os.makedirs(plot_output_dir)


#### 1. Find out the best model run 

In [3]:
for complexity_level in levelArray:
    
    best_objs = []
    best_runs = []
    total_runs = []
    
    for experId in expers:
    
        domain_name=complexity_level+'_'+domain_basename+str(experId)
        ostModelFile = os.path.join(calib_dir,domain_name,'calib/OstModel0.txt')

        # Read OstModel0.txt 
        data = np.loadtxt(ostModelFile,skiprows=1)
        runs = data[:,0]
        objs = data[:,1]

        # Identify the best obj per calib experiment. 
        best_objs.append(np.min(objs))
        best_runs.append(runs[np.argmin(objs)])
        total_runs.append(len(runs))
    
    best_obj = np.nanmin(best_objs)
    best_obj_indx = np.nanargmin(best_objs)
    best_run = best_runs[best_obj_indx]
    best_exper = expers[best_obj_indx]
    total_run = total_runs[best_obj_indx]
    print('%s: best obj = %.6f, calib exper %d, best_run %d, total_run %d.'\
          %(complexity_level,best_obj,best_exper,best_run,total_run))   

    # --- save --- 
    domain_name=complexity_level+'_'+domain_basename+str(best_exper)
    src_dir = os.path.join(calib_dir,domain_name,'calib/output_archive/experiment1/run%d'%(best_run))
    dst_dir = os.path.join(output_baseDir,complexity_level+'_'+domain_basename)
    if os.path.exists(dst_dir):
        shutil.rmtree(dst_dir)
    shutil.copytree(src_dir, dst_dir)
    
    ofile = os.path.join(dst_dir,'best_run_source.txt')
    f = open(ofile, "w") 
    f.write('%s: best obj = %.6f, calib exper %d, best_run %d, total_run %d.'\
            %(complexity_level,best_obj,best_exper,best_run, total_run))
    f.close()
    

0: best obj = -0.927500, calib exper 1, best_run 135, total_run 979.
1a: best obj = -0.918689, calib exper 5, best_run 149, total_run 617.
1b: best obj = -0.936477, calib exper 2, best_run 126, total_run 541.
1c: best obj = -0.884804, calib exper 5, best_run 108, total_run 694.
2a: best obj = -0.925989, calib exper 5, best_run 719, total_run 1741.
2b: best obj = -0.893748, calib exper 4, best_run 70, total_run 357.
2c: best obj = -0.893670, calib exper 3, best_run 36, total_run 614.
3: best obj = -0.902371, calib exper 4, best_run 81, total_run 452.


#### 2. Plot the best mdoel output

In [4]:
for complexity_level in levelArray:

    print('---%s---'%(complexity_level))
    best_run_dir = os.path.join(output_baseDir,complexity_level+'_'+domain_basename)
    ofile = os.path.join(plot_output_dir,'%s_hydrograph_calib.png'%(complexity_level))
    
    simVarName = 'IRFroutedRunoff'
    simFile = os.path.join(best_run_dir, route_outFilePrefix+'.mizuRoute.nc')

    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obsFile, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- # combine daily sim & obs timeseries df based on time index--- 
df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()
    df_merge.index = pd.to_datetime(df_merge.index)

    # --- make dataframes ---
    df_final          = df_merge  # combine daily sim & obs timeseries
    df_final_calib    = df_merge   # daily, calib. period only, defined in config file

    df_final_WY       = df_final.resample('AS-OCT').mean()  # resampled to annual mean starting in October
    df_final_calib_WY = df_final_calib.resample('AS-OCT').mean()

    df_final_AJ       = df_final[(df_final.index.month>=4) & (df_final.index.month<=7)].resample('AS-OCT').mean()
    df_final_calib_AJ = df_final_calib[(df_final_calib.index.month>=4) & (df_final_calib.index.month<=7)].resample('AS-OCT').mean()
    df_final_M        = df_final.resample('M').mean()[df_final.resample('M').count()>=28]  # only for months with at least 28 days
    df_final_MA       = df_final.groupby(df_final.index.month).mean()                     # monthly avg
    df_final_MA.columns   = ['Sim (all yrs)', 'Obs (all yrs)']
    df_final_calib_MA = df_final_calib.groupby(df_final_calib.index.month).mean()         # monthly avg, cal period
    df_final_calib_MA.columns = ['Sim (calib)','Obs (calib)']

    # --- calculate some statistics ---
    # calc some statistics
    if len(df_final_AJ) > 1:
        corr_AJ = stats.pearsonr(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1]) 
        corr_WY = stats.pearsonr(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1]) 
        print("correlations (AJ, WY): ", corr_AJ[0], corr_WY[0])
    else:
        corr_AJ = [None, None]
        corr_WY = [None, None]

    # --- make plot --- 
    # fig, ax = plt.subplots(4, 1)
    width  = 6.5  # in inches
    height = 9.0
    lwd    = 0.8  # line thickness

    # plot layout
    print("plot layout")
    fig = plt.figure()

    AX = gridspec.GridSpec(4,2)
    AX.update(wspace = 0.5, hspace = 0.3)
    ax1  = plt.subplot(AX[0,:])
    ax2 = plt.subplot(AX[1,:])
    ax3 = plt.subplot(AX[2,:])
    ax4 = plt.subplot(AX[3,0])
    ax5 = plt.subplot(AX[3,1])

    # plot monthly
    print("plot monthly")
    df_final_M.plot(ax=ax1, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot daily calibration period
    print("plot daily calibration period")
    df_final_calib.plot(ax=ax2, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot monthly long term averages for period
    print("plot monthly long term averages")
    df_final_calib_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd)
    df_final_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd, linestyle=':')

    # plot scatter for water year mean flow
    print("plot scatter for water year mean flow")
    axmax = df_final_WY.max().max()
    ax4.scatter(df_final_WY.iloc[:,0], df_final_WY.iloc[:,1], c='black', s=5)
    ax4.scatter(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1], c='red', s=10, label='Calib')
    ax4.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_WY[0] is not None:
        ax4.annotate('corr: '+str(round(corr_WY[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # plot scatter for spring runoff period (Apr-Jul)
    print("plot scatter for for spring runoff period")
    axmax = df_final_AJ.max().max()
    ax5.scatter(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1],c='black', s=5)
    ax5.scatter(df_final_calib_AJ.iloc[:,0], df_final_calib_AJ.iloc[:,1], c='red', s=10, label='Calib')
    ax5.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_AJ[0] is not None:
        ax5.annotate('corr: '+str(round(corr_AJ[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # other plot details
    print("other plot details")
    ax1.axvline(x=statStartDate, color='grey', linewidth=0)
    ax1.axvline(x=statEndDate, color='grey', linewidth=0)
    ax1.axvspan(statStartDate, statEndDate, color='grey', alpha=0.2, label='Calib Period')
    ax1.set_ylabel('Flow, Monthly (cms)')
    ax2.set_ylabel('Flow, Daily (cms)')
    ax3.set_ylabel('Flow, Monthly (cms)')
    ax3.set_xlabel('Calendar Month')
    ax4.set_ylabel('WY Obs (cms)')
    ax4.set_xlabel('WY Sim (cms)')
    ax5.set_ylabel('Apr-Jul Obs (cms)')
    ax5.set_xlabel('Apr-Jul Sim (cms)')
    ax1.legend(loc='upper right')
    ax2.legend().remove()
    ax3.legend(loc='upper right')
    ax4.legend()
    # ax5.legend().remove()
    # ax1.set_title('Streamflow: ' + domain_name, fontsize='medium',weight='semibold')
    ax1.set_title('Streamflow', fontsize='medium',weight='semibold')

    # --- save plot
    print("save plot")
    plt.savefig(ofile, dpi=100)

---0---
correlations (AJ, WY):  0.7311148751974688 0.7484421119251399
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1a---
correlations (AJ, WY):  0.6820894559752355 0.7015571683076651
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1b---
correlations (AJ, WY):  0.7511194054681285 0.726461543114851
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot scatter for for spring runoff period
other plot details
save plot
---1c---
correlations (AJ, WY):  0.593809360068252 0.6731032961873309
plot layout
plot monthly
plot daily calibration period
plot monthly long term averages
plot scatter for water year mean flow
plot s

#### Plot only hydrograph

In [None]:
for complexity_level in levelArray:

    print('---%s---'%(complexity_level))
    best_run_dir = os.path.join(output_baseDir,complexity_level+'_'+domain_basename)
    ofile = os.path.join(plot_output_dir,'%s_hydrograph_calib.png'%(complexity_level))
    
    simVarName = 'IRFroutedRunoff'
    simFile = os.path.join(best_run_dir, route_outFilePrefix+'.mizuRoute.nc')

    f    = xr.open_dataset(simFile)
    time = f['time'].values
    sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
    df_sim = pd.DataFrame({'sim':sim},index = time)
    df_sim.index = pd.to_datetime(df_sim.index)

    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obsFile, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']

    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    

    # --- # combine daily sim & obs timeseries df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStartDate, after=statEndDate)
    df_obs_eval = df_obs.truncate(before=statStartDate, after=statEndDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()
    df_merge.index = pd.to_datetime(df_merge.index)

    # --- make dataframes ---
    df_final          = df_merge  # combine daily sim & obs timeseries
    df_final_calib    = df_merge   # daily, calib. period only, defined in config file

    df_final_WY       = df_final.resample('AS-OCT').mean()  # resampled to annual mean starting in October
    df_final_calib_WY = df_final_calib.resample('AS-OCT').mean()

    df_final_AJ       = df_final[(df_final.index.month>=4) & (df_final.index.month<=7)].resample('AS-OCT').mean()
    df_final_calib_AJ = df_final_calib[(df_final_calib.index.month>=4) & (df_final_calib.index.month<=7)].resample('AS-OCT').mean()
    df_final_M        = df_final.resample('M').mean()[df_final.resample('M').count()>=28]  # only for months with at least 28 days
    df_final_MA       = df_final.groupby(df_final.index.month).mean()                     # monthly avg
    df_final_MA.columns   = ['Sim (all yrs)', 'Obs (all yrs)']
    df_final_calib_MA = df_final_calib.groupby(df_final_calib.index.month).mean()         # monthly avg, cal period
    df_final_calib_MA.columns = ['Sim (calib)','Obs (calib)']

    # --- calculate some statistics ---
    # calc some statistics
    if len(df_final_AJ) > 1:
        corr_AJ = stats.pearsonr(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1]) 
        corr_WY = stats.pearsonr(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1]) 
        print("correlations (AJ, WY): ", corr_AJ[0], corr_WY[0])
    else:
        corr_AJ = [None, None]
        corr_WY = [None, None]

    # --- make plot --- 
    # fig, ax = plt.subplots(4, 1)
    width  = 6.5  # in inches
    height = 9.0
    lwd    = 0.8  # line thickness

    # plot layout
    print("plot layout")
    fig = plt.figure()

    AX = gridspec.GridSpec(4,2)
    AX.update(wspace = 0.5, hspace = 0.3)
    ax1  = plt.subplot(AX[0,:])
    ax2 = plt.subplot(AX[1,:])
    ax3 = plt.subplot(AX[2,:])
    ax4 = plt.subplot(AX[3,0])
    ax5 = plt.subplot(AX[3,1])

    # plot monthly
    print("plot monthly")
    df_final_M.plot(ax=ax1, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot daily calibration period
    print("plot daily calibration period")
    df_final_calib.plot(ax=ax2, figsize=(width,height), color=['red','black'], linewidth=lwd)

    # plot monthly long term averages for period
    print("plot monthly long term averages")
    df_final_calib_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd)
    df_final_MA.plot(ax=ax3, figsize=(width,height), color=['red','black'], linewidth=lwd, linestyle=':')

    # plot scatter for water year mean flow
    print("plot scatter for water year mean flow")
    axmax = df_final_WY.max().max()
    ax4.scatter(df_final_WY.iloc[:,0], df_final_WY.iloc[:,1], c='black', s=5)
    ax4.scatter(df_final_calib_WY.iloc[:,0], df_final_calib_WY.iloc[:,1], c='red', s=10, label='Calib')
    ax4.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_WY[0] is not None:
        ax4.annotate('corr: '+str(round(corr_WY[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # plot scatter for spring runoff period (Apr-Jul)
    print("plot scatter for for spring runoff period")
    axmax = df_final_AJ.max().max()
    ax5.scatter(df_final_AJ.iloc[:,0], df_final_AJ.iloc[:,1],c='black', s=5)
    ax5.scatter(df_final_calib_AJ.iloc[:,0], df_final_calib_AJ.iloc[:,1], c='red', s=10, label='Calib')
    ax5.plot((0, axmax), (0, axmax), c='orange', linestyle=':')
    if corr_AJ[0] is not None:
        ax5.annotate('corr: '+str(round(corr_AJ[0], 3)), xy=(axmax*0.97, axmax*0.03), horizontalalignment='right')

    # other plot details
    print("other plot details")
    ax1.axvline(x=statStartDate, color='grey', linewidth=0)
    ax1.axvline(x=statEndDate, color='grey', linewidth=0)
    ax1.axvspan(statStartDate, statEndDate, color='grey', alpha=0.2, label='Calib Period')
    ax1.set_ylabel('Flow, Monthly (cms)')
    ax2.set_ylabel('Flow, Daily (cms)')
    ax3.set_ylabel('Flow, Monthly (cms)')
    ax3.set_xlabel('Calendar Month')
    ax4.set_ylabel('WY Obs (cms)')
    ax4.set_xlabel('WY Sim (cms)')
    ax5.set_ylabel('Apr-Jul Obs (cms)')
    ax5.set_xlabel('Apr-Jul Sim (cms)')
    ax1.legend(loc='upper right')
    ax2.legend().remove()
    ax3.legend(loc='upper right')
    ax4.legend()
    # ax5.legend().remove()
    # ax1.set_title('Streamflow: ' + domain_name, fontsize='medium',weight='semibold')
    ax1.set_title('Streamflow', fontsize='medium',weight='semibold')

    # --- save plot
    print("save plot")
    plt.savefig(ofile, dpi=100)