#### Calculate model performance evaluation metrics.

In [None]:
# import module
import os, parser
# import functions.utils as ut
import numpy as np
import datetime
import pandas as pd
import netCDF4 as nc
import argparse

In [None]:
def process_command_line():
    '''Parse the commandline'''
    parser = argparse.ArgumentParser(description='Script to icalculate model evaluation statistics.')
    parser.add_argument('controlFile', help='path of the overall control file.')
    args = parser.parse_args()
    return(args)

def get_modified_KGE(obs,sim):    
    sd_sim=np.std(sim, ddof=1)
    sd_obs=np.std(obs, ddof=1)
    
    m_sim=np.mean(sim)
    m_obs=np.mean(obs)
    
    r=(np.corrcoef(sim,obs))[0,1]
    relvar=(float(sd_sim)/float(m_sim))/(float(sd_obs)/float(m_obs))
    bias=float(m_sim)/float(m_obs)
    
    kge=1.0-np.sqrt((r-1)**2 +(relvar-1)**2 + (bias-1)**2)
    return kge

def get_RMSE(obs,sim):
    rmse = np.sqrt(np.nanmean(np.power((sim - obs),2)))
    return rmse

def get_mean_error(obs,sim):
    bias_err = np.nanmean(sim - obs)
    abs_err = np.nanmean(np.absolute(sim - obs))
    return bias_err, abs_err

def get_month_mean_flow(obs,sim,sim_time):
    month = [dt.month for dt in sim_time]

    data = {'sim':sim, 'obs':obs, 'month':month} 
    df = pd.DataFrame(data, index = sim_time)
    
    gdf = df.groupby(['month'])
    sim_month_mean = gdf.aggregate({'sim':np.nanmean})
    obs_month_mean = gdf.aggregate({'obs':np.nanmean})
    return obs_month_mean, sim_month_mean

In [None]:
# Function to extract a given setting from the configuration file
def read_from_control(control_file, setting):
    
    # Open 'control_active.txt' and ...
    with open(control_file) as contents:
        for line in contents:
            read_setting = line.split('|',1)[0].strip()
            
            # find the line with the requested setting
            if (read_setting == setting) and (not line.startswith('#')):
                break
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines                
    # Return this value    
    return substring
       
# Function to extract a given setting from the summa and mizuRoute manager/control files
def read_from_summa_mizuRoute_control(control_file, setting):

    with open(control_file) as ff:
        for line in ff:
            line = line.strip()
            if line.startswith(setting):
                substring = line.split('!',1)[0].strip().split(None,1)[1].strip("'")
                break
    # Return this value    
    return substring

In [None]:
# read paths from control_file
control_file = '../control_active.txt'
root_path = ut.read_from_control(control_file, 'root_path')
domain_name = ut.read_from_control(control_file, 'domain_name')
domain_path = os.path.join(root_path, domain_name)

In [None]:
# read new hydrologic model path
model_dst_path = ut.read_from_control(control_file, 'model_dst_path')
if model_dst_path == 'default':
    model_dst_path = os.path.join(domain_path, 'model')

In [None]:
# read summa and mizuRoute setting and filemanager/control files paths.
summa_setting_path = os.path.join(model_dst_path, 'settings/SUMMA')
summa_filemanager = ut.read_from_control(control_file, 'summa_filemanager')
summa_filemanager = os.path.join(summa_setting_path, summa_filemanager)

mizuroute_setting_path = os.path.join(model_dst_path, 'settings/mizuRoute')
mizuroute_control = ut.read_from_control(control_file, 'mizuroute_control')
mizuroute_control = os.path.join(mizuroute_setting_path, mizuroute_control)

In [None]:
# read calib path
calib_path = ut.read_from_control(control_file, 'calib_path')
if calib_path == 'default':
    calib_path = os.path.join(domain_path, 'calib')
calib_tpl_path = os.path.join(calib_path, 'tpl')

#### 1. Read input and output arguments

In [None]:
# (input) mizuRoute output file
output_dir = ut.read_from_summa_mizuRoute_control(mizuroute_control, '<output_dir>')
case_name = ut.read_from_summa_mizuRoute_control(mizuroute_control, '<case_name>')
routeOutputFile = os.path.join(output_dir, case_name+'.nc')

# (input) segment id, observations, statistics relevant configs.
q_seg_index = int(ut.read_from_control(control_file, 'q_seg_index')) # start from one.
obs_file = ut.read_from_control(control_file, 'obs_file')
obs_unit = ut.read_from_control(control_file, 'obs_unit')

stat_output = ut.read_from_control(control_file, 'stat_output')
statStartDate = ut.read_from_control(control_file, 'statStartDate') 
statEndDate = ut.read_from_control(control_file, 'statEndDate')

# others
q_vname = 'IRFroutedRunoff'
t_vname = 'time'
time_format='%Y-%m-%d'
statStartDate = datetime.datetime.strptime(statStartDate,time_format)
statEndDate = datetime.datetime.strptime(statEndDate,time_format)    


In [None]:
#!/usr/bin/env python
# coding: utf-8
# H. Liu, 2021/05/13


# main
if __name__ == '__main__':

    # an example: python calc_sim_stats.py $simFile $segNdx $obsFile csf $oFile 2002-10-01 2008-09-30
    
    q_vname = 'IRFroutedRunoff'
    t_vname = 'time'
    time_format='%Y-%m-%d'

    # --- process command line --- 
    args = process_command_line()    
    routeOutputFile = args.routeOutputFile
    q_seg_index = int(args.q_seg_index) # start from one, not zero.
    
    obs_file = args.obs_file
    obs_unit = args.obs_unit 
    
    ofile = args.ofile
    statStatDate = datetime.datetime.strptime(args.statStatDate,time_format)
    endStatDate = datetime.datetime.strptime(args.endStatDate,time_format)    
        
    # --- read simulated flow (cms) --- 
    f = nc.Dataset(routeOutputFile)
    sim_irf = f.variables[q_vname][:,q_seg_index-1]
    time = f.variables[t_vname]
    sim_time = nc.num2date(time[:], time.units)
    f.close() 
    df_sim = pd.DataFrame({'sim':sim_irf},index = sim_time)
    
    # --- read observed flow (cfs or cms) --- 
    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']
    
    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147    
        
    # --- merge the two df based on time index--- 
    df_sim_eval = df_sim.truncate(before=statStatDate, after=endStatDate)
    df_obs_eval = df_obs.truncate(before=statStatDate, after=endStatDate)
    df_merge = pd.concat([df_obs_eval, df_sim_eval], axis=1)
    df_merge = df_merge.dropna()

    # --- calculate diagnostics --- 
    kge = get_modified_KGE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    rmse = get_RMSE(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # bias_err, abs_err = get_mean_error(obs=df_merge['obs'].values, sim=df_merge['sim'].values)
    # obs_month_mean, sim_month_mean = get_month_mean_flow(obs=df_merge['obs'].values, sim=df_merge['sim'].values, sim_time=sim_time)
    
    # --- save --- 
    f = open(ofile, 'w+')
    f.write('%.6f' %kge + '\t#KGE\n')
    f.write('%.6f' %rmse + '\t#RMSE (cms)\n')
    # f.write('%.6f' %bias_err + '\t#MBE (cms)\n')
    # f.write('%.6f' %abs_err + '\t#MAE (cms)\n')
    f.close()
