# Calculate model differences for all flow categories

In [7]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd

import pylab as plot
import matplotlib.pyplot as plt
import seaborn as sns

# Set Paths

In [8]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/results/")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")

# Set Config

In [10]:
# Load available basin_IDs
df_basin_ids = pd.read_csv(f"{AUXDATA}/available_basin_ids_uncertainty.csv", index_col='basin_id')
basin_ids = df_basin_ids.index.to_list()
# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

# Model simulation difference wflow calibrated & uncalibrated

In [14]:
model_a_path = f'{RESULTS}/wflow_sbm/evaluation_period_calibrated/flow_categories/'
model_b_path = f'{RESULTS}/wflow_sbm/evaluation_period_uncalibrated/flow_categories/'
basin_ids = [17018]
for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        model_a_files = glob(f'{model_a_path}/{basin_id}*{category}*')

        for file_a in model_a_files:
            basin_id = file_a.split('/')[-1].split('_')[0]
            file_b = glob(f'{model_b_path}/{basin_id}*{category}*')[0]

            df_a = pd.read_csv(file_a, index_col='date')
            df_b = pd.read_csv(file_b, index_col='date')
            df_diff = df_a.join(df_b, lsuffix='_a',rsuffix='_b')
            df_diff['model_difference_wflow_calibrated_uncalibrated'] = (df_diff['evaluation_a'] - df_diff['evaluation_b']).abs()
            df_diff.to_csv(f'{RESULTS}/model_differences/wflow_calibrated_uncalibrated/{basin_id}_model_simulation_difference_wflow_calibrated_uncalibrated_{category}.csv')


17018


In [15]:
model_a_files

[]

# Model simulation difference wflow calibrated & PCR-GLOBWB

In [12]:
model_a_path = f'{RESULTS}/wflow_sbm/evaluation_period_calibrated/flow_categories/'
model_b_path = f'{RESULTS}/pcr-globwb/evaluation_period/flow_categories/'

for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        model_a_files = glob(f'{model_a_path}/{basin_id}*{category}*')
    
        for file_a in model_a_files:
            basin_id = file_a.split('/')[-1].split('_')[0]
            file_b = glob(f'{model_b_path}/{basin_id}*{category}*')[0]

            df_a = pd.read_csv(file_a, index_col='date')
            df_b = pd.read_csv(file_b)
            df_b = df_b.set_index(df_b.iloc[:, 0].name)
            df_diff = df_a.join(df_b, lsuffix='_a',rsuffix='_b')
            df_diff['model_difference_wflow_calibrated_pcr-globwb'] = (df_diff['evaluation'] - df_diff['sim']).abs()
            df_diff.to_csv(f'{RESULTS}/model_differences/wflow_pcr-globwb/{basin_id}_model_simulation_difference_wflow_calibrated_pcr-globwb_{category}.csv')


1001
102001
11001
12001
12002
12005
12006
12007
12008
13004
13007
13008
14001
14005
15006
15007
15010
15011
15013
15014
15016
15021
15023
15024
15025
15030
16001
16003
16004
17015
17018
18008
18011
19001
21003
21008
21009
21012
21013
21017
21023
22001
22006
22009
23001
23004
23006
23007
23008
23011
24001
24003
24004
24005
25001
25006
25012
25020
25021
26003
26008
27001
27002
27003
27007
27009
27021
27023
27025
27029
27032
27034
27035
27041
27042
27043
27047
27049
27051
27064
27065
27071
27077
27080
27084
27087
27089
27090
28001
28003
28008
28009
28018
28022
28023
28026
28031
28033
28039
28040
28043
28046
28048
28055
28056
28060
28072
28074
28085
28091
28115
29002
29003
29009
30001
30004
30005
30011
3003
31002
31010
31021
32008
33018
33033
34002
34006
36003
37008
37009
37010
37018
38003
38004
38007
38012
38017
38018
38026
38030
39005
39010
39011
39012
39016
39019
39020
39021
39022
39025
39027
39028
39029
39034
39037
39042
39049
39056
39069
39078
39081
39095
39108
39142
39143
39144
40004