In [2]:
#!/usr/bin/env python
# coding: utf-8

# This code plots the parameter trace during a parameter estimation process.
# Note: This code plots the incomplete trace of samples because it reads sampels from ostOutput.txt.
# Only the parameter sets that improve the objective function in comparison with the previous parameter set are plotted.

# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt

import os, sys, argparse, datetime
from glob import glob
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt 
import xarray as xr
import pandas as pd
from matplotlib.dates import DateFormatter

# Function to extract a given setting from the configuration file
def read_from_control(control_file, setting):
    
    # Open 'control_active.txt' and locate the line with setting
    with open(control_file) as ff:
        for line in ff:
            line = line.strip()
            if line.startswith(setting):
                break
    # Extract the setting's value
    substring = line.split('|',1)[1].split('#',1)[0].strip() 
    # Return this value    
    return substring

def read_obs_flow(obs_file, StartDate, EndDate):
    df_obs = pd.read_csv(obs_file, index_col='Date', na_values=["-99.0","-999.0","-9999.0"],
                         parse_dates=True, infer_datetime_format=True)  
    df_obs.columns = ['obs']
    
    # convert obs from cfs to cms
    if obs_unit == 'cfs':
        df_obs = df_obs/35.3147     
    df_obs = df_obs.truncate(before=StartDate, after=EndDate)
    return df_obs

def read_route_output(route_outFile, StartDate, EndDate):
    simVarName = 'IRFroutedRunoff'
    with xr.open_dataset(route_outFile) as f:
        time = f['time'].values
        sim  = f[simVarName][:,(q_seg_index-1)].values #(time, segments)
        df_sim = pd.DataFrame({'sim':sim},index = time)
        df_sim.index = pd.to_datetime(df_sim.index)
    df_sim = df_sim.truncate(before=StartDate, after=EndDate)
    return df_sim

def read_summa_output(summa_outFile, StartDate, EndDate):
    with xr.open_dataset(summa_outFile) as f:
        time = f['time'].values
        stateVar_list = list(f.keys())
        stateVar_name_list = stateVar_list.copy()
        for x in ['time','hru','gru','hruId','gruId']:
            if x in stateVar_list:
                stateVar_name_list.remove(x)
        stateVar_num = len(stateVar_name_list)
        
        # read each state variable and calculate GRU/HRU mean value
        for i in range(stateVar_num):
            stateVarName = stateVar_name_list[i]
            stateVar_long_name = f[stateVarName].attrs['long_name']
            stateVar_units = f[stateVarName].attrs['units']
            stateVar_data = np.nanmean(f[stateVarName].values, axis=1) #(time,hru) or (time,gru)
            
            # save state data into dataframe
            if i == 0:
                df_state = pd.DataFrame({stateVarName:stateVar_data},index = time)
                df_sim.index = pd.to_datetime(df_sim.index)         
                stateVar_long_name_list = [stateVar_long_name]
                stateVar_units_list = [stateVar_units]
            else:
                df_state[stateVarName] = stateVar_data
                stateVar_long_name_list.append(stateVar_long_name)
                stateVar_units_list.append(stateVar_units)    

    # (2) truncate state dataframe based on time
    df_state = df_state.truncate(before=StartDate, after=EndDate)
    df_state = df_state.dropna()
    return df_state, stateVar_num, stateVar_name_list, stateVar_long_name_list, stateVar_units_list

# main
if __name__ == '__main__':
    
    # ----------------------------- Settings ------------------------------        
    # calib inputs
    root_path = '/home/h294liu/project/proj/5_summaCalib'  # root path where parameter estimation will be stored.
    domain_name = 'BowAtBanff_LA_calib' #'BowAtBanff' #'BowAtBanff_LA_calib'
    
    calib_basename = 'GLUE' #SCE #GA #DDS #GLUE  
    default_model_folder = 'BowAtBanff_LA' #'BowAtBanff_LA' #'BowAtBanff_default'
    default_model_output_folder = 'simulations_2010_2013'
    outFilePrefix = 'run1'

    calib_output_path = os.path.join(root_path, domain_name,calib_basename+'_summary')
    default_output_path = os.path.join(root_path, default_model_folder)

    # identify plot output path and file
    output_path = os.path.join(calib_output_path, 'analysis', '10_plot_state_compare')
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    ofile_fig = os.path.join(output_path, '%s_best_state.png'%(calib_basename))   # output plot figure
    ofile_txt = os.path.join(output_path, '%s_best_summary.txt'%(calib_basename)) # output best param information
   
    # --------------------------- End settings -----------------------------        
    
    print('Identify best run.')
    # 1. identify run number of the best parameter set
    data = np.loadtxt(os.path.join(calib_output_path, 'OstModel.txt'), delimiter='\t', usecols=[0,1], skiprows=1)
    best_run_id = int(data[np.argmin(data[:,1]),0])
    
    # 2. identify summa and route simualtion output from the best run and default run
    best_run_path = os.path.join(calib_output_path, 'runs', 'run'+str(best_run_id))
    summa_outFile = os.path.join(best_run_path, outFilePrefix+'_day.nc')
    route_outFile = os.path.join(best_run_path, outFilePrefix+'.mizuRoute.nc')
    
    summa_outFile_default = os.path.join(default_output_path, 'model', default_model_output_folder, outFilePrefix,
                                         'SUMMA', outFilePrefix+'_day.nc')
    route_outFile_default = os.path.join(default_output_path, 'model', default_model_output_folder, outFilePrefix,
                                         'mizuRoute', outFilePrefix+'.mizuRoute.nc')
    
    # 3. read control_active.txt for route segment id, observations, and time series configs.
    control_file = os.path.join(default_output_path, 'calib/control_active.txt')
    q_seg_index = int(read_from_control(control_file, 'q_seg_index')) # start from one.   
    obs_file = read_from_control(control_file, 'obs_file')
    obs_unit = read_from_control(control_file, 'obs_unit')

    statStartDate = read_from_control(control_file, 'statStartDate') 
    statEndDate = read_from_control(control_file, 'statEndDate')

    time_format='%Y-%m-%d'
    statStartDate = datetime.datetime.strptime(statStartDate,time_format) + datetime.timedelta(days=3) # add two days to avoid very bad initial results
    statEndDate = datetime.datetime.strptime(statEndDate,time_format)    

    print('Read outputs.')
    # 4. read best run model outputs
    # read observed streamflow
    df_obs = read_obs_flow(obs_file, statStartDate, statEndDate)
    # read route streamflow          
    df_sim = read_route_output(route_outFile, statStartDate, statEndDate)    
    # merge the two df based on time index 
    df_sim_obs = pd.concat([df_obs, df_sim], axis=1)
    df_sim_obs = df_sim_obs.dropna()    
    # read summa state outputs
    df_state,stateVar_num,stateVar_name_list,stateVar_long_name_list,stateVar_units_list\
    = read_summa_output(summa_outFile, statStartDate, statEndDate)

    # 5. read default run model outputs
    # read route streamflow        
    df_sim_default = read_route_output(route_outFile_default, statStartDate, statEndDate)    
    # merge the two df based on time index 
    df_sim_obs_default = pd.concat([df_obs, df_sim_default], axis=1)
    df_sim_obs_default = df_sim_obs_default.dropna()       
    # read summa state outputs
    df_state_default,stateVar_num_default,stateVar_name_list_default,stateVar_long_name_list_default,stateVar_units_list_default \
    = read_summa_output(summa_outFile_default, statStartDate, statEndDate)
    
    # 6. Plot
    print('Plot.')
    col_num = 4 #3        
    var_plot_total = stateVar_num+1
    row_num = int(np.ceil(var_plot_total/float(col_num))) # state(including forcing) + hydrograph
    
    fig, ax = plt.subplots(row_num,col_num, figsize=(5.5*col_num, 3.54*0.75*row_num))#, constrained_layout=True)
    fig.suptitle(domain_name, fontsize='large', fontweight='bold')
    dpi_value=80    
    ax_id = 0
    
    # (1) plot hydrograph
    ax_id = 0 
    iRow = ax_id//col_num
    iCol = ax_id%col_num

    df_sim_obs_default['obs'].plot(ax=ax[iRow,iCol], linewidth=0.75, markersize=0.0, color='black', alpha=0.6)
    df_sim_obs_default['sim'].plot(ax=ax[iRow,iCol], linewidth=0.75, markersize=0.0, color='blue', alpha=0.6)
    df_sim_obs['sim'].plot(ax=ax[iRow,iCol], linewidth=0.75, markersize=0.0, color='red', alpha=0.6)
    
    ax[iRow,iCol].legend(["Observed","A priori", "Calibrated"], loc='best', fontsize='medium')
    ax[iRow,iCol].set_title('('+chr(ord('a') + ax_id) +') ' + 'Hydrograph', fontsize='medium', fontweight='semibold')
    ax[iRow,iCol].set_ylabel('Flow (cms)', fontsize='medium')
    
    # (2) plot states
    for j in range(stateVar_num):
        # identify subplot variable info
        stateVarName = stateVar_name_list[j]
        stateVar_long_name = stateVar_long_name_list[j]
        stateVar_units = stateVar_units_list[j]

        # identify subplot ax
        ax_id = 1+j
        iRow = ax_id//col_num
        iCol = ax_id%col_num

        # plot state variables 
        df_state_default[stateVarName].plot(ax=ax[iRow,iCol], linewidth=0.75, markersize=0.0, color='blue', alpha=0.6)
        df_state[stateVarName].plot(ax=ax[iRow,iCol], linewidth=0.75, markersize=0.0, color='red', alpha=0.6)
        
        title = '('+chr(ord('a') + ax_id) +') ' + stateVarName.replace('scalar','').capitalize()# + \
        #'\n[ie, ' + stateVar_long_name.rstrip('(instant)').capitalize() + ']'
        ax[iRow,iCol].set_title(title, fontsize='medium', fontweight='semibold')
        ax[iRow,iCol].set_ylabel('('+stateVar_units+')', fontsize='medium')
#         if iRow==0 and iCol==0:
#             ax[iRow,iCol].legend(["A priori", "Calibrated"]);

    # (3) make extra suplots blank
    for iRow in range(row_num):
        for iCol in range(col_num):
            ax_id = iRow*col_num + iCol
#             date_form = DateFormatter("%b-%Y")
#             ax[iRow,iCol].xaxis.set_major_formatter(date_form)
            if ax_id >= var_plot_total:
                ax[iRow,iCol].axis('off')           

    plt.rc('xtick',labelsize='medium')
    plt.rc('ytick',labelsize='medium')   
    fig.tight_layout()
    fig.subplots_adjust(top=0.95)
    fig.savefig(ofile_fig, dpi=dpi_value)
    plt.close(fig)  
    
    # 7. save best run information to ofile_txt
    f = open(ofile_txt, 'w')
    f.write('Best run ID = %d\n'%(best_run_id))
    f.close()
    

Identify best run.
Read outputs.
Plot.
