In [84]:
#!/usr/bin/env python
# coding: utf-8

# This code plots the parameter trace during a parameter estimation process.
# Note: This code plots the incomplete trace of samples because it reads sampels from ostOutput.txt.
# Only the parameter sets that improve the objective function in comparison with the previous parameter set are plotted.

# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt

import os, sys, argparse, datetime, shutil
from glob import glob
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt 
import xarray as xr
import pandas as pd
from matplotlib.dates import DateFormatter
from tqdm import tqdm

# Function to extract a given setting from the configuration file
def read_from_control(control_file, setting):
    
    # Open 'control_active.txt' and locate the line with setting
    with open(control_file) as ff:
        for line in ff:
            line = line.strip()
            if line.startswith(setting):
                break
    # Extract the setting's value
    substring = line.split('|',1)[1].split('#',1)[0].strip() 
    # Return this value    
    return substring

# Function to extract a given setting from the summa and mizuRoute manager/control files
def read_from_summa_route_control(control_file, setting):

    # Open fileManager.txt or route_control and locate the line with setting
    with open(control_file) as ff:
        for line in ff:
            line = line.strip()
            if line.startswith(setting):
                break
    # Extract the setting's value
    substring = line.split('!',1)[0].strip().split(None,1)[1].strip("'")
    # Return this value    
    return substring

def is_number(s):
    try:
        float(s)
        return True 
    except (ValueError,AttributeError):
        return False 
                    
# Function to extract the param default values and bounds from basinParam and localParam.txt.
def read_basinParam_localParam(filename):
    param_names = []
    param_default = []
    param_min = []
    param_max =[]
    with open (filename, 'r') as f:
        for line in f:
            line=line.strip()
            if line and not line.startswith('!') and not line.startswith("'"):
                splits=line.split('|')
                if isinstance(splits[0].strip(), str):
                    param_names.append(splits[0].strip())
                    param_default.append(str_to_float(splits[1].strip()))
                    param_min.append(str_to_float(splits[2].strip()))
                    param_max.append(str_to_float(splits[3].strip()))
    return param_names, param_default, param_min, param_max

# Function to convert data from Fortran format to scientific format.
def str_to_float(data_str):
    if 'd' in data_str:
        x = data_str.split('d')[0]+'e'+data_str.split('d')[1]
        return float(x)
    else:
        return float(data_str)
    
# main
if __name__ == '__main__':
    
    # ----------------------------- Settings ------------------------------        
    # inputs
    calib_output_path = '/home/h294liu/project/proj/5_summaCalib/BowAtBanff_LA_calib/GLUE_summary'
    control_file = os.path.join(calib_output_path, '6_collect_run_records/control_active.txt')         
    param_sample_path = os.path.join(calib_output_path, '7_collect_param_samples')
    default_model_output_path ='/home/h294liu/project/proj/5_summaCalib/BowAtBanff_LA_default/model/simulations_2010_2013'    
    
    calib_basename = 'GLUE' #SCE #GA #DDS #GLUE  
        
    # identify plot output path and file
    output_path = os.path.join(calib_output_path, '8_plot_param_vs_iteration')
    if not os.path.exists(output_path):
        os.makedirs(output_path)
       
    # --------------------------- End settings -----------------------------        

    ##########################################################################
    # PART 1. GRU and HRU indepedent part
    # --- 1.  Read interested param name list from control file  
    param_names = read_from_control(control_file, 'object_parameters')
    param_names = param_names.split(',')
    param_names = [x.strip() for x in param_names]    
    Nparam = len(param_names)

    # --- 2. Read parameter names and ranges from default model.
    root_path = read_from_control(control_file, 'root_path')
    domain_name = read_from_control(control_file, 'domain_name')
    domain_path = os.path.join(root_path, domain_name)

    # hydrologic model path, settings, fileManager.txt, trialParam.nc.
    model_dst_path = read_from_control(control_file, 'model_dst_path')
    if model_dst_path == 'default':
        model_dst_path = os.path.join(domain_path, 'model')

    summa_settings_relpath = read_from_control(control_file, 'summa_settings_relpath')
    summa_settings_path = os.path.join(model_dst_path, summa_settings_relpath)

    summa_filemanager = read_from_control(control_file, 'summa_filemanager')
    summa_filemanager = os.path.join(summa_settings_path, summa_filemanager)
      
    trialParamFile = read_from_summa_route_control(summa_filemanager, 'trialParamFile')
    trialParamFile_priori = trialParamFile.split('.nc')[0] + '.priori.nc' # a priori param file
    trialParamFile_priori = os.path.join(summa_settings_path, trialParamFile_priori)   

    # (1) read parameter names from summa_parameter_bounds.txt
    df_param_range = pd.read_csv(os.path.join(domain_path, 'calib', 'summa_parameter_bounds.txt'),
                                 sep=',',index_col=[0],usecols=[0,2,3],header=0,names=['variable','lower','upper'])
    
    # (2) read parameter ranges from localParam.txt and basinParam.txt.
    basinParam = read_from_summa_route_control(summa_filemanager, 'globalGruParamFile')
    localParam = read_from_summa_route_control(summa_filemanager, 'globalHruParamFile')

    basinParam = os.path.join(summa_settings_path, basinParam)
    localParam = os.path.join(summa_settings_path, localParam)

    basin_param_names, basin_param_default, basin_param_min, basin_param_max = read_basinParam_localParam(basinParam)    
    local_param_names, local_param_default, local_param_min, local_param_max = read_basinParam_localParam(localParam)

    df_basin_param_range = pd.DataFrame({'lower': basin_param_min, 'upper':basin_param_max}, index=basin_param_names)
    df_local_param_range = pd.DataFrame({'lower': local_param_min, 'upper':local_param_max}, index=local_param_names)
 
    # --- 2. Read default model performance.
    obj_default = np.loadtxt(os.path.join(default_model_output_path,'trial_stats.txt'), delimiter='#',usecols=[0])
    obj_default = obj_default[0] * (-1) # negative KGE

    
    ##########################################################################
    # PART 2. GRU and HRU depedent part
    
#     iGru,iHru = 0,0         # index starts from zero
    for iGru in [0,3,4]:#range(1):
        print('GRU%d'%(iGru+1))
        
        # initialize
        iHru = iGru
        ofile_fig = os.path.join(output_path, '%s_param_vs_iteration_GRU%d_HRU%d.png'%(calib_basename,iGru+1,iHru+1))       # output plot figure
        ofile_txt = os.path.join(output_path, '%s_best_sample_GRU%d_HRU%d.txt'%(calib_basename,iGru+1,iHru+1))       # output best param information
        ofile_param_txt = os.path.join(output_path, '%s_all_samples_GRU%d_HRU%d.txt'%(calib_basename,iGru+1,iHru+1)) # output all param information

        # 1. Read param samples of the given GRU and HRU.     
        print('--- Read param samples.')
        iGru_paramFile = os.path.join(param_sample_path,'GRU%d_param_samples.txt'%(iGru+1))
        iHru_paramFile = os.path.join(param_sample_path,'HRU%d_param_samples.txt'%(iGru+1))

        df_gru_params = pd.read_csv(iGru_paramFile, sep='\t',index_col=False)
        df_hru_params = pd.read_csv(iHru_paramFile, sep='\t',index_col=False)

        # Concatenating dataframes without duplicates
        df_params = pd.concat([df_gru_params, df_hru_params],axis=1)
        df_params = df_params.loc[:,~df_params.columns.duplicated()] # shape(nSamples, nParams)    

        # 2. Save param samples of the given GRU and HRU.     
        print('--- Save parameter samples.')
        # identify the iteration/runthe best parameter sets
        best_indices = df_params['obj.function'].idxmin()
        df_params.loc[best_indices].to_csv(ofile_txt, sep='\t',index=True)
        df_params.to_csv(ofile_param_txt, sep='\t',header=True, index=False)    

        # 3. Read a priori param values.
        print('--- Read a priori param values.')
        priori_param_values = np.zeros((Nparam,)) #store the a priori param values in array
        f    = xr.open_dataset(trialParamFile_priori)   
        for iParam in range(Nparam):
            iParam_name = param_names[iParam]
            iParam_dims = f[iParam_name].dims 

            if 'gru' in iParam_dims:
                priori_param_values[iParam] = f[iParam_name].values[iGru]
            elif 'hru' in iParam_dims:
                priori_param_values[iParam] = f[iParam_name].values[iHru]
        df_priori = pd.DataFrame(data=priori_param_values, index=param_names, columns=['priori'])     

        # 3. Plot param samples of the given GRU and HRU.    
        print('--- Plot parameter traces.')
        col_num = 4        
        row_num = int(np.ceil((Nparam+2)/float(col_num)))

        fig, ax = plt.subplots(row_num,col_num, figsize=(4.0*col_num, 4.0*0.75*row_num))#, constrained_layout=True)
#         fig.suptitle('GRUId = %d, HRUId = %d'%(iGru+1,iHru+1), fontsize='large', fontweight='bold')

        dpi_value=80
        ms_sample = 0.5  #1 # marker size for samples
        ms_highlight = 4 # marker size for highlight points   

        for i in range(row_num):
            for j in range(col_num):

                subplot_count = i*col_num + j
                param_index = subplot_count-1

                if subplot_count <= Nparam: 

                    if i==0 and j==0: 
                        # plot obj function 
                        ax[i,j].plot(df_params['Run'], df_params['obj.function'], color='g',marker='^',
                                     linewidth=0.0, markersize=ms_sample) 
                        fig_count = i*col_num + j
                        title_str = '('+chr(ord('a') + subplot_count) +') ' + 'Objective function'   
                        y_label = '-KGE'

                        # plot a priori and best obj functions
                        ax[i,j].plot(df_params['Run'].iloc[0], obj_default, 
                                     'D', markerfacecolor="none", markeredgecolor="k", markersize=ms_highlight); #darkorange
                        ax[i,j].plot(df_params['Run'].iloc[best_indices], df_params.loc[best_indices,'obj.function'], 
                                     's', markerfacecolor="none", markeredgecolor="red", markersize=ms_highlight);

                    else:
                        # plot parameters
                        param_name = param_names[param_index]
                        ax[i,j].plot(df_params['Run'], df_params[param_name], color='blue',marker='o',
                                     linewidth=0.0, markersize=ms_sample)
                        title_str = '('+chr(ord('a') + subplot_count) +') ' + param_name
                        y_label = 'Parameter value' 

                        # plot the a priori and best points 
                        ax[i,j].plot(df_params['Run'].iloc[0], df_priori.loc[param_name,'priori'], 
                                     'D', markerfacecolor="none", markeredgecolor="k", markersize=ms_highlight);

                        ax[i,j].plot(df_params['Run'].iloc[best_indices], df_params[param_name].iloc[best_indices],
                                     's', markerfacecolor="none", markeredgecolor="red", markersize=ms_highlight);

                        # ylimit, axis, label, title
                        if param_name in df_param_range.index:
                            df = df_param_range.copy()
                        elif param_name in df_local_param_range.index:
                            df = df_local_param_range.copy()
                        elif param_name in df_basin_param_range.index:
                            df = df_basin_param_range.copy()
                        param_min = df.loc[param_name, 'lower']
                        param_max = df.loc[param_name, 'upper']
                        ax[i,j].set_ylim([param_min, param_max])

                    ax[i,j].set_title(title_str, fontsize='medium', fontweight='semibold')
                    ax[i,j].set_xlabel('Iterations', fontsize='medium')
                    ax[i,j].set_ylabel(y_label, fontsize='medium')

                # blank subplots           
                else: 
                    # plot legend
                    if subplot_count == Nparam+1: 
                        ax[i,j].set_frame_on(False)
                        ax[i,j].get_xaxis().set_visible(False)
                        ax[i,j].get_yaxis().set_visible(False)
                        ax[i,j].plot(np.nan, np.nan, '^', markerfacecolor="green", markeredgecolor='none', label = 'Objective function')
                        ax[i,j].plot(np.nan, np.nan, 'o', markerfacecolor="blue", markeredgecolor='none', label = 'Parameter sample')
                        ax[i,j].plot(np.nan, np.nan, 'D', markerfacecolor="none", markeredgecolor="k", markersize=ms_highlight, label='A priori value') #darkorange
                        ax[i,j].plot(np.nan, np.nan, 's', markerfacecolor="none", markeredgecolor="red", markersize=ms_highlight, label='Best value')
                        ax[i,j].legend(loc = 'center left')
                    else:
                        ax[i,j].axis('off')

        plt.rc('xtick',labelsize='medium')
        plt.rc('ytick',labelsize='medium') 
        plt.tight_layout()
        plt.savefig(ofile_fig, dpi=dpi_value,bbox_inches='tight')
        plt.close(fig)          
  

GRU1
--- Read param samples.
--- Save parameter samples.
--- Read a priori param values.
--- Plot parameter traces.
GRU4
--- Read param samples.
--- Save parameter samples.
--- Read a priori param values.
--- Plot parameter traces.
GRU5
--- Read param samples.
--- Save parameter samples.
--- Read a priori param values.
--- Plot parameter traces.


In [82]:
col_num = 4        
row_num = int(np.ceil((Nparam+1)/float(col_num)))
row_num

4

In [41]:
df_local_param_range.loc[param_name,'lower'],df_local_param_range.loc[param_name,'upper']

(0.75, 0.75)

In [49]:
idx = local_param_names.index('heightCanopyTop')
idx
local_param_default[idx], local_param_min[idx], local_param_max[idx]

(20.0, 0.05, 100.0)

In [64]:
data = np.asarray([local_param_min, local_param_max]).reshape((len(local_param_min),2))
df_local_param_range = pd.DataFrame({'lower': local_param_min, 'upper':local_param_max}, index=local_param_names)
df_local_param_range

Unnamed: 0,lower,upper
upperBoundHead,-100.000,-0.010
lowerBoundHead,-100.000,-0.010
upperBoundTheta,0.102,0.368
lowerBoundTheta,0.102,0.368
upperBoundTemp,270.160,280.160
...,...,...
zmaxLayer4_lower,1.000,1.000
zmaxLayer1_upper,0.030,0.030
zmaxLayer2_upper,0.150,0.150
zmaxLayer3_upper,0.300,0.300


In [65]:
df_local_param_range.loc['heightCanopyTop','lower'],df_local_param_range.loc['heightCanopyTop','upper']

(0.05, 100.0)

In [66]:
df_local_param_range.loc['heightCanopyTop',:],df_local_param_range.loc['heightCanopyBottom',:]

(lower      0.05
 upper    100.00
 Name: heightCanopyTop, dtype: float64,
 lower    0.0
 upper    5.0
 Name: heightCanopyBottom, dtype: float64)

In [61]:
a=np.asarray((local_param_names,local_param_min, local_param_max))
a = a.reshape((len(local_param_names),3))
a

ValueError: cannot reshape array of size 1 into shape (159,3)