# Calculate objective function difference and determine if larger than sampling uncertainty average

In [10]:
from glob import glob
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd

import pylab as plot
import matplotlib.pyplot as plt
import seaborn as sns

## Set Paths

In [11]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/results/")

## Set Config

In [12]:
# Set GUMBOOT result files
wflow_calibrated_kge_csv = f'{RESULTS}/wflow_sbm/wflow_calibrated_kge_np_gumboot.csv'
wflow_uncalibrated_kge_csv = f'{RESULTS}/wflow_sbm/wflow_uncalibrated_kge_np_gumboot.csv'
pcrglob_kge_csv = f'{RESULTS}/pcr-globwb/pcr-globwb_kge_np_gumboot.csv'

wflow_calibrated_nse_csv = f'{RESULTS}/wflow_sbm/wflow_calibrated_nse_gumboot.csv'
wflow_uncalibrated_nse_csv = f'{RESULTS}/wflow_sbm/wflow_uncalibrated_nse_gumboot.csv'
pcrglob_nse_csv = f'{RESULTS}/pcr-globwb/pcr-globwb_nse_gumboot.csv'

# Set objective function result files
wflow_calibrated_objective_cvs = f"{RESULTS}/wflow_sbm/wflow_calibrated_evaluation_objective_functions_overview.csv"
wflow_uncalibrated_objective_cvs = f"{RESULTS}/wflow_sbm/wflow_uncalibrated_evaluation_objective_functions_overview.csv"
pcrglob_objective_csv = f"{RESULTS}/pcr-globwb/pcr-globwb_evaluation_period_objective_functions_overview.csv"

# Load available basin_IDs
df_basin_ids = pd.read_csv(f"{AUXDATA}/available_basin_ids_uncertainty.csv", index_col='basin_id')

## Load Results Dataframes

In [13]:
def load_result_dataframe(csv_file, df_basin_ids):
    
    # Load dataframes
    df = pd.read_csv(csv_file, index_col='basin_id')
    
    # Combine dataframes
    df = df_basin_ids.join(df)
    
    # Drop NaN values
    df = df[df['seJack'].notna()]
    
    return df

def load_objective_dataframe(csv_file, df_basin_ids):
    
    # Load dataframes
    df = pd.read_csv(csv_file, index_col='basin_id')
    df = df.drop(columns=['Unnamed: 0'])
    # Combine dataframes
    df = df_basin_ids.join(df)
    
    # Drop NaN values
    df = df[df['kge_np'].notna()]
    
    return df

In [14]:
# Load KGE-NP gumboot results
df_kge_wflow_calibrated = load_result_dataframe(wflow_calibrated_kge_csv, df_basin_ids)
df_kge_wflow_uncalibrated = load_result_dataframe(wflow_uncalibrated_kge_csv, df_basin_ids)
df_kge_pcrglob = load_result_dataframe(pcrglob_kge_csv, df_basin_ids)

# Load NSE gumboot results
df_nse_wflow_calibrated = load_result_dataframe(wflow_calibrated_nse_csv, df_basin_ids)
df_nse_wflow_uncalibrated = load_result_dataframe(wflow_uncalibrated_nse_csv, df_basin_ids)
df_nse_pcrglob = load_result_dataframe(pcrglob_nse_csv, df_basin_ids)

# Load objective function results
df_wflow_calibrated_objective = load_objective_dataframe(wflow_calibrated_objective_cvs, df_basin_ids)
df_wflow_uncalibrated_objective = load_objective_dataframe(wflow_uncalibrated_objective_cvs, df_basin_ids)
df_pcrglob_objective = load_objective_dataframe(pcrglob_objective_csv, df_basin_ids)

## Calculate tolerance interval

In [15]:
def calculate_tolerance(df):
    # Calculate tolerance interval 
    df['2xJack'] = df['seJack'] * 2
    df['2xBoot'] = df['seBoot'] * 2
    df['tolerance'] = df['p95'] - df['p05']

    df = df.sort_values(by=['2xJack'])
    
    return df

In [16]:
# Tolerance interval KGE-NP results
df_kge_wflow_calibrated = calculate_tolerance(df_kge_wflow_calibrated)
df_kge_wflow_uncalibrated = calculate_tolerance(df_kge_wflow_uncalibrated)
df_kge_pcrglob = calculate_tolerance(df_kge_pcrglob)

# Tolerance interval NSE results
df_nse_wflow_calibrated = calculate_tolerance(df_nse_wflow_calibrated)
df_nse_wflow_uncalibrated = calculate_tolerance(df_nse_wflow_uncalibrated)
df_nse_pcrglob = calculate_tolerance(df_nse_pcrglob)

## Calculate Average Sampling Uncertainty Tolerance Interval & Objective function difference

### Wflow calibrated & uncalibrated

In [26]:
# Create empty dataframe
df_wflow_comp = pd.DataFrame()

# Join Model A and Model B gumboot results
df_kge = df_kge_wflow_calibrated.join(df_kge_wflow_uncalibrated,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')
df_nse = df_nse_wflow_calibrated.join(df_nse_wflow_uncalibrated,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')

# Add average tolerance interval
df_wflow_comp['kge_average_tolerance'] = df_kge[['tolerance_wflow_calibrated', 'tolerance_wflow_uncalibrated']].mean(axis=1).to_list()
df_wflow_comp['nse_average_tolerance'] = df_nse[['tolerance_wflow_calibrated', 'tolerance_wflow_uncalibrated']].mean(axis=1).to_list()

# # Add basin_ids and set index
df_wflow_comp['basin_id'] = df_kge.index
df_wflow_comp = df_wflow_comp.set_index('basin_id')

# Join Model A and Model B objective function results
df = df_wflow_calibrated_objective.join(df_wflow_uncalibrated_objective,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')

# Add absolute objective function difference
df_kge_difference = (df['kge_np_wflow_calibrated'] - df['kge_np_wflow_uncalibrated']).abs()
df_kge_difference = df_kge_difference.to_frame('kge_difference')
df_nse_difference = (df['nse_wflow_calibrated'] - df['nse_wflow_uncalibrated']).abs()
df_nse_difference = df_nse_difference.to_frame('nse_difference')

# Construct dataframe
df_wflow_comp = df_wflow_comp.join(df_kge_difference)
df_wflow_comp = df_wflow_comp.join(df_nse_difference)

# Add sampling uncertainty larger than (LT) objective function difference
kge_sampling_LT_objective = []
nse_sampling_LT_objective = []

for index, row in df_wflow_comp.iterrows():
    
    if row.kge_average_tolerance > row.kge_difference:
        kge_sampling_LT_objective.append(True)
    else:
        kge_sampling_LT_objective.append(False)
    
    if row.nse_average_tolerance > row.kge_difference:
        nse_sampling_LT_objective.append(True)
    else:
        nse_sampling_LT_objective.append(False)

df_wflow_comp['kge_tolerance_LT_objective'] = kge_sampling_LT_objective    
df_wflow_comp['nse_tolerance_LT_objective'] = nse_sampling_LT_objective

df_wflow_comp.to_csv(f'{RESULTS}/sampling_uncertainty_overview_wflow_calibrated_uncalibrated.csv')

### wflow calibrated vs pcrglobwb

In [27]:
# Create empty dataframe
df_wflow_pcrglob_comp = pd.DataFrame()

# Join Model A and Model B gumboot results
df_kge = df_kge_wflow_calibrated.join(df_kge_pcrglob,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')
df_nse = df_nse_wflow_calibrated.join(df_nse_pcrglob,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')

# Add average tolerance interval
df_wflow_pcrglob_comp['kge_average_tolerance'] = df_kge[['tolerance_wflow_calibrated', 'tolerance_pcrglob']].mean(axis=1).to_list()
df_wflow_pcrglob_comp['nse_average_tolerance'] = df_nse[['tolerance_wflow_calibrated', 'tolerance_pcrglob']].mean(axis=1).to_list()

# # Add basin_ids and set index
df_wflow_pcrglob_comp['basin_id'] = df_kge.index
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.set_index('basin_id')

# Join Model A and Model B objective function results
df = df_wflow_calibrated_objective.join(df_pcrglob_objective,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')

# Add absolute objective function difference
df_kge_difference = (df['kge_np_wflow_calibrated'] - df['kge_np_pcrglob']).abs()
df_kge_difference = df_kge_difference.to_frame('kge_difference')
df_nse_difference = (df['nse_wflow_calibrated'] - df['nse_pcrglob']).abs()
df_nse_difference = df_nse_difference.to_frame('nse_difference')

# Construct dataframe
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.join(df_kge_difference)
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.join(df_nse_difference)

# Add sampling uncertainty larger than (LT) objective function difference
kge_sampling_LT_objective = []
nse_sampling_LT_objective = []

for index, row in df_wflow_pcrglob_comp.iterrows():
    print(index)
    print(row.kge_average_tolerance, row.kge_difference)
    if row.kge_average_tolerance > row.kge_difference:
        print(True)
        kge_sampling_LT_objective.append(True)
    else:
        print(False)
        kge_sampling_LT_objective.append(False)
    
    if row.nse_average_tolerance > row.kge_difference:
        nse_sampling_LT_objective.append(True)
    else:
        nse_sampling_LT_objective.append(False)

df_wflow_pcrglob_comp['kge_tolerance_LT_objective'] = kge_sampling_LT_objective    
df_wflow_pcrglob_comp['nse_tolerance_LT_objective'] = nse_sampling_LT_objective

df_wflow_pcrglob_comp.to_csv(f'{RESULTS}/sampling_uncertainty_overview_wflow_calibrated_pcr-globwb.csv')

50008
0.07050463746805381 0.42154285714285733
False
50002
0.07066160008166011 0.4871428571428573
False
80005
0.05382263904171449 0.09264285714285711
False
45001
0.07923211579540698 0.4432000000000001
False
84030
0.057567745418385025 0.45838571428571434
False
72007
0.15566008666600395 1.5476
False
81004
0.071252760025018 0.35457142857142865
False
27034
0.08780868525277752 0.3308428571428571
False
57004
0.113593810644679 0.32768571428571436
False
74005
0.0593983897728035 0.6280999999999999
False
27043
0.07729449720117701 0.36477142857142864
False
80006
0.08297738449563545 0.7959999999999999
False
84017
0.059269840665330004 0.4038999999999999
False
27047
0.06242558818541849 1.412757142857143
False
50006
0.08364682530931897 0.32121428571428556
False
46003
0.06406841298442999 0.3711285714285716
False
81007
0.05100929705440425 0.585
False
66006
0.13931433183811898 0.19934285714285715
False
50007
0.15964410797677303 1.0343714285714285
False
57008
0.09555961777677652 0.2933142857142855
False
5

In [25]:
df_wflow_pcrglob_comp

Unnamed: 0_level_0,kge_average_tolerance,nse_average_tolerance,kge_difference,nse_difference,kge_tolerance_LT_objective,nse_tolerance_LT_objective
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
50008,0.070505,0.637226,0.421543,0.702671,False,True
50002,0.070662,0.062299,0.487143,0.700486,False,False
80005,0.053823,0.057222,0.092643,0.264243,False,False
45001,0.079232,0.117516,0.443200,0.563414,False,False
84030,0.057568,0.062703,0.458386,0.643800,False,False
...,...,...,...,...,...,...
38003,3.200172,80.367383,0.411500,252.725829,True,True
38017,5.428099,81.074911,2.243671,1305.713143,True,True
42008,3.947505,230.115677,1.606443,368.850214,True,True
26008,6.699543,368.034151,5.758857,1046.538971,True,True
