# Evaluation Streamflow analyses of calibrated wflow results

In [None]:
import numpy as np
import xarray as xr
import pandas as pd
import hydroeval

from glob import glob
from pathlib import Path

# Set Paths

In [None]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
MODELS = Path(f'{ROOT}/wflow/data/')
AUXDATA = Path(f"{ROOT}/aux_data")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")
OUTPUT = Path(f"{ROOT}/results/wflow_sbm/evaluation_period_calibrated/")

# Set Config

In [None]:
# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.sort()


# Period (drop first year)
start_date = '2008-10-01'
end_date   = '2015-09-30'

# Define functions

In [None]:
def get_simulations(basin_id, start_date, end_date):
    dataframes = []

    # Set simulation file
    sim_file = glob(f'{MODELS}/{basin_id}/evaluation/output.csv')[0]

    # Load simulation dataframe
    df = pd.read_csv(sim_file, parse_dates=True, index_col='time')

    # Select calibration period (drop first year)
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    # Rename column
    df = df.rename(columns={'Q_1': f'evaluation'})

    return df


def get_observations(basin_id, start_date, end_date):
    # Set observation file
    obs_file = glob(f'{OBSDIR}/*_{basin_id}_*.csv')[0]
    
    # Load observation dataframe
    df_obs = pd.read_csv(obs_file, parse_dates=True, index_col='date')
    
    # Select calibration period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    return df_obs
    
    
def calculate_objective_functions(basin_id, df_sim, df_obs):
    
    # Create empty dataframe and lists
    df = pd.DataFrame()
    basin_ids = []
    ksathorfracs = []
    nse_values = []
    kge_2009_values = []
    kge_2012_values = []
    kge_np_values = []
    kge_np_r_values = []
    kge_np_alpha_values = []
    kge_np_beta_values = []

    # Calculate objective functions for each parameter value

    basin_ids.append(basin_id)

    # Calculate objective functions and round
    nse = hydroeval.evaluator(hydroeval.nse, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    nse_values.append(np.round(nse[0], 4))

    kge_2009 = hydroeval.evaluator(hydroeval.kge, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_2009_values.append(np.round(kge_2009[0][0], 4))

    kge_2012 = hydroeval.evaluator(hydroeval.kgeprime, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_2012_values.append(np.round(kge_2012[0][0], 4))    

    kge_np = hydroeval.evaluator(hydroeval.kgenp, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_np_values.append(np.round(kge_np[0][0], 4))    
    kge_np_r_values.append(np.round(kge_np[0][1], 4))
    kge_np_alpha_values.append(np.round(kge_np[0][2], 4))
    kge_np_beta_values.append(np.round(kge_np[0][3], 4))
    
    df['basin_id'] = basin_ids
    df['nse'] = nse_values
    df['kge_2009'] = kge_2009_values
    df['kge_2012'] = kge_2012_values
    df['kge_np'] = kge_np_values
    df['kge_np_r'] = kge_np_r_values
    df['kge_np_alpha'] = kge_np_alpha_values
    df['kge_np_beta'] = kge_np_beta_values

    return df

# Check if output exists

In [None]:
df = pd.DataFrame()
basins = []
exists = []

for basin_id in basin_ids:
    basins.append(basin_id)

    # check if file exists
    sim_file = Path(f'{MODELS}/{basin_id}/evaluation/output.csv')
    if sim_file.is_file() is False:
        exists.append(False)
    else:
        df_sim = pd.read_csv(sim_file)
    
        # Check if csv containes output
        if len(df_sim) == 0:
            exists.append(False)
        else:
            exists.append(True)
        
df['basin_id'] = basins
df['completed'] = exists
df = df.reset_index()
df = df[df['completed'] == True]

basin_ids = df.basin_id.to_list()

# Calculate Objective functions

In [None]:
for basin_id in basin_ids:
    print(basin_id)
    
    # Get sim and obs timeseries
    df_sim = get_simulations(basin_id, start_date, end_date)
    df_obs = get_observations(basin_id, start_date, end_date)
    
    df_sim.to_csv(f'{OUTPUT}/simulations/{basin_id}_wflow_calibrated_evaluation_simulations.csv')
    df_obs.to_csv(f'{OUTPUT}/observations/{basin_id}_wflow_calibrated_evaluation_observations.csv')   
    
    # Calculate objective function for each water year and take average
    years = list(range(int(start_date[:4]), int(end_date[:4])))
    
    objective_dfs = []
    for year in years:
        start_year = f'{year}-10-01'
        end_year = f'{year+1}-09-30'
        
        # Select water year
        mask = (df_sim.index >= start_year) & (df_sim.index <= end_year)
        df_sim_year = df_sim.loc[mask]
        df_obs_year = df_obs.loc[mask]

        # Calculate objective function
        df_objective = calculate_objective_functions(basin_id, df_sim_year, df_obs_year)
        objective_dfs.append(df_objective)
    
    # Merge water years objective values and take the mean value
    df = pd.concat(objective_dfs,axis=1)
    df = df.groupby(level=0,axis=1).mean()
    df = df.sort_values('kge_np', ascending=False)
    df['basin_id'] = [basin_id] * len(df)
    df.to_csv(f'{OUTPUT}/objective_functions/{basin_id}_wflow_calibrated_evaluation_objective_functions.csv', index=False)

# Create overview dataframe

In [None]:
# Load results and create overview dataframe
result_files = glob(f"{OUTPUT}/objective_functions/*_evaluation_objective_functions.csv")

# Create empty dataframe and lists
df_out = pd.DataFrame()
basin_ids = []
ls_kge_np = []
ls_kge_np_r = []
ls_kge_np_alpha = []
ls_kge_np_beta = []
ls_kge_2009 = []
ls_kge_2012 = []
ls_nse = []

for file in result_files:
    # Read results and rank descending (kge_np)
    df = pd.read_csv(file)
    df = df.set_index('kge_np')
    df = df.sort_index(ascending=False)
    df = df.reset_index()
    
    # Select first row
    df = df.loc[0]
    
    # Append results
    basin_ids.append(int(df['basin_id']))
    ls_kge_np.append(df['kge_np'])
    ls_kge_np_r.append(df['kge_np_r'])
    ls_kge_np_alpha.append(df['kge_np_alpha'])
    ls_kge_np_beta.append(df['kge_np_beta'])
    ls_kge_2009.append(df['kge_2009'])
    ls_kge_2012.append(df['kge_2012'])
    ls_nse.append(df['nse'])

# Create output dataframe
df_out['basin_id'] = basin_ids       
df_out['kge_np'] = ls_kge_np    
df_out['kge_np_r'] = ls_kge_np_r    
df_out['kge_np_alpha'] = ls_kge_np_alpha    
df_out['kge_np_beta'] = ls_kge_np_beta    
df_out['kge_2009'] = ls_kge_2009    
df_out['kge_2012'] = ls_kge_2012    
df_out['nse'] = ls_nse 

# Write output
df_out.to_csv(f'{ROOT}/results/wflow_sbm/wflow_calibrated_evaluation_objective_functions_overview.csv')