# Plot spatial sampling uncertainty results

In [116]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd

import pylab as plot
import matplotlib.pyplot as plt
import seaborn as sns

## Set Paths

In [117]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/results/")

## Set Config

In [118]:
# Set GUMBOOT result files
wflow_calibrated_kge_csv = f'{RESULTS}/wflow_sbm/wflow_calibrated_kge_np_gumboot.csv'
wflow_uncalibrated_kge_csv = f'{RESULTS}/wflow_sbm/wflow_uncalibrated_kge_np_gumboot.csv'
pcrglob_kge_csv = f'{RESULTS}/pcr-globwb/pcr-globwb_kge_np_gumboot.csv'

wflow_calibrated_nse_csv = f'{RESULTS}/wflow_sbm/wflow_calibrated_nse_gumboot.csv'
wflow_uncalibrated_nse_csv = f'{RESULTS}/wflow_sbm/wflow_uncalibrated_nse_gumboot.csv'
pcrglob_nse_csv = f'{RESULTS}/pcr-globwb/pcr-globwb_nse_gumboot.csv'

# Set objective function result files
wflow_calibrated_objective_cvs = f"{RESULTS}/wflow_sbm/wflow_calibrated_evaluation_objective_functions_overview.csv"
wflow_uncalibrated_objective_cvs = f"{RESULTS}/wflow_sbm/wflow_uncalibrated_evaluation_objective_functions_overview.csv"
pcrglob_objective_csv = f"{RESULTS}/pcr-globwb/pcr-globwb_evaluation_period_objective_functions_overview.csv"

# Load available basin_IDs
df_basin_ids = pd.read_csv(f"{AUXDATA}/available_basin_ids_uncertainty.csv", index_col='basin_id')

## Load Results Dataframes

In [119]:
def load_result_dataframe(csv_file, df_basin_ids):
    
    # Load dataframes
    df = pd.read_csv(csv_file, index_col='basin_id')
    
    # Combine dataframes
    df = df_basin_ids.join(df)
    
    # Drop NaN values
    df = df[df['seJack'].notna()]
    
    return df

def load_objective_dataframe(csv_file, df_basin_ids):
    
    # Load dataframes
    df = pd.read_csv(csv_file, index_col='basin_id')
    df = df.drop(columns=['Unnamed: 0'])
    # Combine dataframes
    df = df_basin_ids.join(df)
    
    # Drop NaN values
    df = df[df['kge_np'].notna()]
    
    return df

In [120]:
# Load KGE-NP gumboot results
df_kge_wflow_calibrated = load_result_dataframe(wflow_calibrated_kge_csv, df_basin_ids)
df_kge_wflow_uncalibrated = load_result_dataframe(wflow_uncalibrated_kge_csv, df_basin_ids)
df_kge_pcrglob = load_result_dataframe(pcrglob_kge_csv, df_basin_ids)

# Load NSE gumboot results
df_nse_wflow_calibrated = load_result_dataframe(wflow_calibrated_nse_csv, df_basin_ids)
df_nse_wflow_uncalibrated = load_result_dataframe(wflow_uncalibrated_nse_csv, df_basin_ids)
df_nse_pcrglob = load_result_dataframe(pcrglob_nse_csv, df_basin_ids)

# Load objective function results
df_wflow_calibrated_objective = load_objective_dataframe(wflow_calibrated_objective_cvs, df_basin_ids)
df_wflow_uncalibrated_objective = load_objective_dataframe(wflow_uncalibrated_objective_cvs, df_basin_ids)
df_pcrglob_objective = load_objective_dataframe(pcrglob_objective_csv, df_basin_ids)

## Calculate tolerance interval

In [121]:
def calculate_tolerance(df):
    # Calculate tolerance interval 
    df['2xJack'] = df['seJack'] * 2
    df['2xBoot'] = df['seBoot'] * 2
    df['tolerance'] = df['p95'] - df['p05']

    df = df.sort_values(by=['2xJack'])
    
    return df

In [122]:
# Tolerance interval KGE-NP results
df_kge_wflow_calibrated = calculate_tolerance(df_kge_wflow_calibrated)
df_kge_wflow_uncalibrated = calculate_tolerance(df_kge_wflow_uncalibrated)
df_kge_pcrglob = calculate_tolerance(df_kge_pcrglob)

# Tolerance interval NSE results
df_nse_wflow_calibrated = calculate_tolerance(df_nse_wflow_calibrated)
df_nse_wflow_uncalibrated = calculate_tolerance(df_nse_wflow_uncalibrated)
df_nse_pcrglob = calculate_tolerance(df_nse_pcrglob)

## Calculate Average Sampling Uncertainty Tolerance Interval & Objective function difference

### Wflow calibrated & uncalibrated

In [123]:
# Create empty dataframe
df_wflow_comp = pd.DataFrame()

# Join Model A and Model B gumboot results
df_kge = df_kge_wflow_calibrated.join(df_kge_wflow_uncalibrated,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')
df_nse = df_nse_wflow_calibrated.join(df_nse_wflow_uncalibrated,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')

# Add average tolerance interval
df_wflow_comp['kge_average_tolerance'] = df_kge[['tolerance_wflow_calibrated', 'tolerance_wflow_uncalibrated']].mean(axis=1).to_list()
df_wflow_comp['nse_average_tolerance'] = df_nse[['tolerance_wflow_calibrated', 'tolerance_wflow_uncalibrated']].mean(axis=1).to_list()

# # Add basin_ids and set index
df_wflow_comp['basin_id'] = df_kge.index
df_wflow_comp = df_wflow_comp.set_index('basin_id')

# Join Model A and Model B objective function results
df = df_wflow_calibrated_objective.join(df_wflow_uncalibrated_objective,lsuffix='_wflow_calibrated',rsuffix='_wflow_uncalibrated')

# Add absolute objective function difference
df_kge_difference = (df['kge_np_wflow_calibrated'] - df['kge_np_wflow_uncalibrated']).abs()
df_kge_difference = df_kge_difference.to_frame('kge_difference')
df_nse_difference = (df['nse_wflow_calibrated'] - df['nse_wflow_uncalibrated']).abs()
df_nse_difference = df_nse_difference.to_frame('nse_difference')

# Construct dataframe
df_wflow_comp = df_wflow_comp.join(df_kge_difference)
df_wflow_comp = df_wflow_comp.join(df_nse_difference)

# Add sampling uncertainty larger than (LT) objective function difference
kge_sampling_LT_objective = []
nse_sampling_LT_objective = []

for index, row in df_wflow_comp.iterrows():
    
    if row.kge_average_tolerance > row.kge_difference:
        kge_sampling_LT_objective.append(True)
    else:
        kge_sampling_LT_objective.append(False)
    
    if row.nse_average_tolerance > row.kge_difference:
        nse_sampling_LT_objective.append(True)
    else:
        nse_sampling_LT_objective.append(False)

df_wflow_comp['kge_sampling_LT_objective'] = kge_sampling_LT_objective    
df_wflow_comp['nse_sampling_LT_objective'] = nse_sampling_LT_objective

df_wflow_comp.to_csv(f'{RESULTS}/sampling_uncertainty_overview_wflow_calibrated_uncalibrated.csv')

### wflow calibrated vs pcrglobwb

In [124]:
# Create empty dataframe
df_wflow_pcrglob_comp = pd.DataFrame()

# Join Model A and Model B gumboot results
df_kge = df_kge_wflow_calibrated.join(df_kge_pcrglob,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')
df_nse = df_nse_wflow_calibrated.join(df_nse_pcrglob,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')

# Add average tolerance interval
df_wflow_pcrglob_comp['kge_average_tolerance'] = df_kge[['tolerance_wflow_calibrated', 'tolerance_pcrglob']].mean(axis=1).to_list()
df_wflow_pcrglob_comp['nse_average_tolerance'] = df_nse[['tolerance_wflow_calibrated', 'tolerance_pcrglob']].mean(axis=1).to_list()

# # Add basin_ids and set index
df_wflow_pcrglob_comp['basin_id'] = df_kge.index
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.set_index('basin_id')

# Join Model A and Model B objective function results
df = df_wflow_calibrated_objective.join(df_wflow_uncalibrated_objective,lsuffix='_wflow_calibrated',rsuffix='_pcrglob')

# Add absolute objective function difference
df_kge_difference = (df['kge_np_wflow_calibrated'] - df['kge_np_pcrglob']).abs()
df_kge_difference = df_kge_difference.to_frame('kge_difference')
df_nse_difference = (df['nse_wflow_calibrated'] - df['nse_pcrglob']).abs()
df_nse_difference = df_nse_difference.to_frame('nse_difference')

# Construct dataframe
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.join(df_kge_difference)
df_wflow_pcrglob_comp = df_wflow_pcrglob_comp.join(df_nse_difference)

# Add sampling uncertainty larger than (LT) objective function difference
kge_sampling_LT_objective = []
nse_sampling_LT_objective = []

for index, row in df_wflow_comp.iterrows():
    
    if row.kge_average_tolerance > row.kge_difference:
        kge_sampling_LT_objective.append(True)
    else:
        kge_sampling_LT_objective.append(False)
    
    if row.nse_average_tolerance > row.kge_difference:
        nse_sampling_LT_objective.append(True)
    else:
        nse_sampling_LT_objective.append(False)

df_wflow_pcrglob_comp['kge_sampling_LT_objective'] = kge_sampling_LT_objective    
df_wflow_pcrglob_comp['nse_sampling_LT_objective'] = nse_sampling_LT_objective

df_wflow_pcrglob_comp.to_csv(f'{RESULTS}/sampling_uncertainty_overview_wflow_calibrated_pcr-globwb.csv')