# Calculate model differences for all flow categories

In [None]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd

import pylab as plot
import matplotlib.pyplot as plt
import seaborn as sns

# Set Paths

In [None]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/results/")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")

# Set Config

In [None]:
# Load available basin_IDs
df_basin_ids = pd.read_csv(f"{AUXDATA}/available_basin_ids_uncertainty.csv", index_col='basin_id')
basin_ids = df_basin_ids.index.to_list()
# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

# Model simulation difference wflow calibrated & default

In [None]:
model_a_path = f'{RESULTS}/wflow_sbm/evaluation_period_calibrated/flow_categories/'
model_b_path = f'{RESULTS}/wflow_sbm/evaluation_period_uncalibrated/flow_categories/'

for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        model_a_files = glob(f'{model_a_path}/{basin_id}*{category}*')

        for file_a in model_a_files:
            basin_id = file_a.split('/')[-1].split('_')[0]
            file_b = glob(f'{model_b_path}/{basin_id}*{category}*')[0]

            df_a = pd.read_csv(file_a, index_col='date')
            df_b = pd.read_csv(file_b, index_col='date')
            df_diff = df_a.join(df_b, lsuffix='_a',rsuffix='_b')
            df_diff['model_difference_wflow_calibrated_uncalibrated'] = (df_diff['evaluation_a'] - df_diff['evaluation_b']).abs()
            df_diff.to_csv(f'{RESULTS}/model_differences/wflow_calibrated_uncalibrated/{basin_id}_model_simulation_difference_wflow_calibrated_uncalibrated_{category}.csv')


# Model simulation difference wflow calibrated & PCR-GLOBWB

In [None]:
model_a_path = f'{RESULTS}/wflow_sbm/evaluation_period_calibrated/flow_categories/'
model_b_path = f'{RESULTS}/pcr-globwb/evaluation_period/flow_categories/'

for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        model_a_files = glob(f'{model_a_path}/{basin_id}*{category}*')
    
        for file_a in model_a_files:
            basin_id = file_a.split('/')[-1].split('_')[0]
            file_b = glob(f'{model_b_path}/{basin_id}*{category}*')[0]

            df_a = pd.read_csv(file_a, index_col='date')
            df_b = pd.read_csv(file_b)
            df_b = df_b.set_index(df_b.iloc[:, 0].name)
            df_diff = df_a.join(df_b, lsuffix='_a',rsuffix='_b')
            df_diff['model_difference_wflow_calibrated_pcr-globwb'] = (df_diff['evaluation'] - df_diff['sim']).abs()
            df_diff.to_csv(f'{RESULTS}/model_differences/wflow_pcr-globwb/{basin_id}_model_simulation_difference_wflow_calibrated_pcr-globwb_{category}.csv')
