# Evaluation Streamflow analyses

In [1]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd
import hydroeval

# Set Paths

In [2]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
MODELS = Path(f'{ROOT}/wflow/data/')
AUXDATA = Path(f"{ROOT}/aux_data")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")
OUTPUT = Path(f"{ROOT}/results/wflow_sbm/evaluation/")

# Set Config

In [3]:
# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.sort()


# Period (drop first year)
start_date = '2008-10-01'
end_date   = '2015-09-30'

# Define functions

In [4]:
def get_simulations(basin_id, start_date, end_date):
    dataframes = []

    # Set simulation file
    sim_file = glob(f'{MODELS}/{basin_id}/evaluation/output.csv')[0]

    # Load simulation dataframe
    df = pd.read_csv(sim_file, parse_dates=True, index_col='time')

    # Select calibration period (drop first year)
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    # Rename column
    df = df.rename(columns={'Q_1': f'evaluation'})

    return df


def get_observations(basin_id, start_date, end_date):
    # Set observation file
    obs_file = glob(f'{OBSDIR}/*_{basin_id}_*.csv')[0]
    
    # Load observation dataframe
    df_obs = pd.read_csv(obs_file, parse_dates=True, index_col='date')
    
    # Select calibration period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    return df_obs
    
    
def calculate_objective_functions(basin_id, df_sim, df_obs):
    
    # Create empty dataframe and lists
    df = pd.DataFrame()
    basin_ids = []
    ksathorfracs = []
    nse_values = []
    kge_2009_values = []
    kge_2012_values = []
    kge_np_values = []
    kge_np_r_values = []
    kge_np_alpha_values = []
    kge_np_beta_values = []

    # Calculate objective functions for each parameter value

    basin_ids.append(basin_id)

    # Calculate objective functions and round
    nse = hydroeval.evaluator(hydroeval.nse, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    nse_values.append(np.round(nse[0], 4))

    kge_2009 = hydroeval.evaluator(hydroeval.kge, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_2009_values.append(np.round(kge_2009[0][0], 4))

    kge_2012 = hydroeval.evaluator(hydroeval.kgeprime, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_2012_values.append(np.round(kge_2012[0][0], 4))    

    kge_np = hydroeval.evaluator(hydroeval.kgenp, df_sim[f'evaluation'], df_obs.discharge_vol, axis=1)
    kge_np_values.append(np.round(kge_np[0][0], 4))    
    kge_np_r_values.append(np.round(kge_np[0][1], 4))
    kge_np_alpha_values.append(np.round(kge_np[0][2], 4))
    kge_np_beta_values.append(np.round(kge_np[0][3], 4))
    
    df['basin_id'] = basin_ids
    df['nse'] = nse_values
    df['kge_2009'] = kge_2009_values
    df['kge_2012'] = kge_2012_values
    df['kge_np'] = kge_np_values
    df['kge_np_r'] = kge_np_r_values
    df['kge_np_alpha'] = kge_np_alpha_values
    df['kge_np_beta'] = kge_np_beta_values

    return df

# Check if output exists

In [5]:
df = pd.DataFrame()
basins = []
exists = []

for basin_id in basin_ids:
    basins.append(basin_id)

    # check if file exists
    sim_file = Path(f'{MODELS}/{basin_id}/evaluation/output.csv')
    if sim_file.is_file() is False:
        exists.append(False)
    else:
        df_sim = pd.read_csv(sim_file)
    
        # Check if csv containes output
        if len(df_sim) == 0:
            exists.append(False)
        else:
            exists.append(True)
        
df['basin_id'] = basins
df['completed'] = exists
df = df.reset_index()
df = df[df['completed'] == True]

basin_ids = df.basin_id.to_list()

In [6]:
len(basin_ids)

650

# Calculate Objective functions

In [7]:
for basin_id in basin_ids:
    print(basin_id)
    
    # Get sim and obs timeseries
    df_sim = get_simulations(basin_id, start_date, end_date)
    df_obs = get_observations(basin_id, start_date, end_date)
    
    df_sim.to_csv(f'{OUTPUT}/{basin_id}_evaluation_simulations.csv')
    df_obs.to_csv(f'{OUTPUT}/{basin_id}_evaluation_observations.csv', index=False)   
    
    # Prep GUMBOOT dataframes
    df_gumboot = df_sim.join(df_obs.discharge_vol)
    df_gumboot = df_gumboot.reset_index()
    df_gumboot = df_gumboot.rename(columns={'time':'date','evaluation':'sim', 'discharge_vol':'obs'})
    df_gumboot = df_gumboot.set_index('date')
    df_gumboot = df_gumboot[['obs','sim']]
    df_gumboot.to_csv(f'{OUTPUT}/{basin_id}_evaluation_simulations_gumboot.csv')  
    
    # Calculate objective function for each water year and take average
    years = list(range(int(start_date[:4]), int(end_date[:4])))
    
    objective_dfs = []
    for year in years:
        start_year = f'{year}-10-01'
        end_year = f'{year+1}-09-30'
        
        # Select water year
        mask = (df_sim.index >= start_year) & (df_sim.index <= end_year)
        df_sim_year = df_sim.loc[mask]
        df_obs_year = df_obs.loc[mask]

        # Calculate objective function
        df_objective = calculate_objective_functions(basin_id, df_sim_year, df_obs_year)
        objective_dfs.append(df_objective)
    
    # Merge water years objective values and take the mean value
    df = pd.concat(objective_dfs,axis=1)
    df = df.groupby(level=0,axis=1).mean()
    df = df.sort_values('kge_np', ascending=False)
    df['basin_id'] = [basin_id] * len(df)
    df.to_csv(f'{OUTPUT}/{basin_id}_evaluation_objective_functions.csv', index=False)

10003
1001
101002
101005
102001
106001
107001
11001
11003
11004
12001
12002
12005
12006
12007
12008
12009
13001
13004
13005
13007
13008
14001
14002
14005
15006
15007
15010
15011
15012
15013
15014
15016
15021
15023
15024
15025
15030
15039
16001
16003
16004
17001
17003
17004
17005
17015
18001
18002
18003
18008
18010


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


18011
18014
18017


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mea

18018
19001
19006
19010
19017
19020


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


20002
20003
20007


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


2001
2002
21003
21006
21008


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


21009
21011
21012


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


21013
21015
21016


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


21017
21018
21022
21023
21024
21026


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


21027
22001
22006
22007
22009
23001
23004
23006
23007
23008
23011
23016
24001
24003
24004
24005
25001
25003
25006
25012
25020
25021
25029
26003
26005
26006
26008
26009
27001
27002
27003
27006
27007
27009
27021
27023
27025
27026
27029
27030
27032
27034
27035
27038
27041
27042
27043
27047
27049
27051
27062
27064
27065
27071
27073
27077
27079
27080
27084
27087
27089
27090
28003
28008
28009
28012
28015
28018
28022
28023
28024
28026
28031
28033
28039
28040
28043
28044
28046
28048
28050
28052
28055
28056
28060
28066
28067
28072
28074
28080
28081
28082
28085
28091
28093
28115
28116
28117
29002
29003
29005
29009
30001
30004
30005
30011
30012
30014
30015
3003
31002
31006
31021
31023
31025
32003
32004
32006
32008
33006
33007
33012
33013
33014
33018
33019
33020


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


33021
33022
33023
33024
33026
33028
33029
33031
33032
33033
33034
33035
33039
33053
33054
33058
34002
34003
34004
34005
34006
34007
34008
34010
34011
34012
35003
35008
36003
36006
36009
36010
36011
36012
37001
37003
37005
37008
37009
37010
37011
37018
37019
37020
37031
37033
38001
38003
38004
38007
38012
38014
38017
38018
38021
38026
38029
38030
39001
39002
39004
39005
39006
39007
39008
39010
39011
39012
39014
39015
39016
39017
39019
39020
39021
39022
39023
39025
39027
39028
39029
39034
39035
39036


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


39037
39042
39049
39052
39054
39056
39061
39065
39069
39072
39078
39081
39087
39088
39095
39099
39101
39105
39108
39114
39120
39125
39127
39141
39142
39143
39144
40003
40004
40005
40006
40007
40009
4001
40010
40011
40012
40013


  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  r = r_num / r_den
  alpha = np.std(simulations, axis=0) / np.std(evaluation, dtype=np.float64)
  r = r_num / r_den
  gamma = ((np.std(simulations, axis=0, dtype=np.float64) / sim_mean)
  r = r_num / r_den


40016
40017
40018
40020
40021
40022
4003
40033
4005
4006
41001
41003
41004
41005
41006
41009
41011
41012
41013
41014
41016
41019
41020
41022
41023
41025
41027
41028
41029
42001
42003
42004
42006
42008
42010
42011
42016
42017
42024
42026
42027
43005
43006
43007
43008
43009
43014
43017
43018
43021
44001
44002
44006
44009
44011
45001
45003
45004
45005
45009
45012
46003
46005
46008
46014
47001
47004
47005
47008
47009
47011
47014
47018
48004
48005
48011
49001
49002
49004
50001
50002
50006
50007
50008
50011
5003
52004
52005
52006
52007
52009
52010
52011
52015
52016
53005
53006
53008
53009
53013
53018
53022
53023
53028
54001
54002
54004
54005
54007
54008
54011
54012
54015
54016
54017
54018
54020
54022
54024
54025


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mea

54027
54028
54029
54032
54034
54036
54038
54044
54048
54049
54052
54057
54060
54063


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


54080
54095
54096
55002
55007
55008
55013
55014


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mea

55016
55018
55021
55023
55025
55026
55029
55032
56001
56007
56012
56013
57004
57005
57006
57007
57008
57009
58001
58002
58006
58008
58009
58010
58012
59001
60002
60003
60005
60006
60010
60012
6007
6008
61001


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


61002
62001
63001
63004
64002


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


64006
65001
65004
65005
65007


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


65008
66001
66004
66006
66011
67001
67005
67006
67008
67015
67018
67025
67033


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  np.sum((evaluation - simulations) ** 2, axis=0, dtype=np.float64)
  ret = um.true_divide(
  r = r_num / r_den
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  beta = (np.sum(simulations, axis=0, dtype=np.float64)
  r = r_num / r_den
  r = r_num / r_den


68001
68003
68004
68005
68020
69002
69007
69012
69015
69017
69023
69027
69030
69032
69043
70002
7001
7003
7005
7006
71001
71004
71006
71008
71014
72002
72004
72005
72007
72009
72014
72015
73005
73009
73010
73011
73014
73015
74001
74005
74006
74007
75004
76003
76005
76007
76008
76011
76014
77002
77003
77004
78003
78004
78006
79002
79003
79004
79005
79006
80001
80003
80004
80005
80006
8004
8005
8006
8009
8011
8013
81002
81003
81004
81006
81007
82001
82003
83003
83004
83006
83007
83009
83010
83013
84001
84004
84005
84008
84009
84012
84013
84014
84016
84017
84019
84020
84022
84026
84030
85001
85002
85003
85004
88001
89002
89005
90003
9002
9003
9004
9005
92001
93001
94001
95001
96001
96002
96004
97002


# Create overview dataframe

In [13]:
# Load results and create overview dataframe
result_files = glob(f"{OUTPUT}/*_evaluation_objective_functions.csv")

# Create empty dataframe and lists
df_out = pd.DataFrame()
basin_ids = []
ls_kge_np = []
ls_kge_np_r = []
ls_kge_np_alpha = []
ls_kge_np_beta = []
ls_kge_2009 = []
ls_kge_2012 = []
ls_nse = []

for file in result_files:
    # Read results and rank descending (kge_np)
    df = pd.read_csv(file)
    df = df.set_index('kge_np')
    df = df.sort_index(ascending=False)
    df = df.reset_index()
    
    # Select first row
    df = df.loc[0]
    
    # Append results
    basin_ids.append(int(df['basin_id']))
    ls_kge_np.append(df['kge_np'])
    ls_kge_np_r.append(df['kge_np_r'])
    ls_kge_np_alpha.append(df['kge_np_alpha'])
    ls_kge_np_beta.append(df['kge_np_beta'])
    ls_kge_2009.append(df['kge_2009'])
    ls_kge_2012.append(df['kge_2012'])
    ls_nse.append(df['nse'])

# Create output dataframe
df_out['basin_id'] = basin_ids       
df_out['kge_np'] = ls_kge_np    
df_out['kge_np_r'] = ls_kge_np_r    
df_out['kge_np_alpha'] = ls_kge_np_alpha    
df_out['kge_np_beta'] = ls_kge_np_beta    
df_out['kge_2009'] = ls_kge_2009    
df_out['kge_2012'] = ls_kge_2012    
df_out['nse'] = ls_nse 

# Write output
df_out.to_csv(f'{ROOT}/results/wflow_sbm/evaluation_overview_wflow.csv')

# Calculate objective functions per flow category

In [42]:
def get_flow_category_simulations(basin_id, flow_category, start_date, end_date):
    dataframes = []

    # Set simulation file
    sim_file = glob(f'{ROOT}/results/categories/{category}/{basin_id}_model_difference.csv')[0]
    
    # Load simulation dataframe
    df = pd.read_csv(sim_file, parse_dates=True, index_col='time', usecols=['time','wflow_sim'])
    df = df.rename(columns={'wflow_sim':'sim'})
    
    
    
    # Select calibration period (drop first year)
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    # Rename column
    df = df.rename(columns={'Q_1': f'evaluation'})

    return df


def get_flow_category_observations(basin_id, start_date, end_date):
    # Set observation file
    obs_file = glob(f'{OBSDIR}/*_{basin_id}_*.csv')[0]
    
    # Load observation dataframe
    df_obs = pd.read_csv(obs_file, parse_dates=True, index_col='date')
    
    # Select calibration period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    return df_obs

# Set flow categories

In [43]:
# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

In [65]:
for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        # Get sim and obs timeseries
        df_sim = get_flow_category_simulations(basin_id, category, start_date, end_date)
        df_obs = get_flow_category_observations(basin_id, start_date, end_date)
      
        # Prep GUMBOOT dataframes
        df_gumboot = df_sim.join(df_obs.discharge_vol)
        df_gumboot = df_gumboot.reset_index()
        df_gumboot = df_gumboot.rename(columns={'time':'date','evaluation':'sim', 'discharge_vol':'obs'})
        df_gumboot = df_gumboot.set_index('date')
        df_gumboot = df_gumboot[['obs','sim']]
        df_gumboot.to_csv(f'{OUTPUT}/categories/{category}/{basin_id}_evaluation_simulations_gumboot.csv')  
    
        # Calculate objective function for each water year and take average
        years = list(range(int(start_date[:4]), int(end_date[:4])))

        objective_dfs = []
        for year in years:
            start_year = f'{year}-10-01'
            end_year = f'{year+1}-09-30'

            # Select water year
            mask = (df_sim.index >= start_year) & (df_sim.index <= end_year)
            df_sim_year = df_sim.loc[mask]
            df_obs_year = df_obs.loc[mask]

            # Calculate objective function
            df_objective = calculate_objective_functions(basin_id, df_sim_year, df_obs_year)
            objective_dfs.append(df_objective)
    
        # Merge water years objective values and take the mean value
        df = pd.concat(objective_dfs,axis=1)
        df = df.groupby(level=0,axis=1).mean()
        df = df.sort_values('kge_np', ascending=False)
        df['basin_id'] = [basin_id] * len(df)
        df.to_csv(f'{OUTPUT}/categories/{category}/{basin_id}_evaluation_objective_functions.csv', index=False)

10003


IndexError: Boolean index has wrong length: 0 instead of 2555

In [67]:
df_sim

Unnamed: 0_level_0,sim
time,Unnamed: 1_level_1


# Create overview dataframe

In [21]:
for category in flow_categories:
    
    # Load results and create overview dataframe
    result_files = glob(f"{OUTPUT}/categories/{category}/*_evaluation_objective_functions.csv")

    # Create empty dataframe and lists
    df_out = pd.DataFrame()
    
    basin_ids = []
    categories = []
    ls_kge_np = []
    ls_kge_np_r = []
    ls_kge_np_alpha = []
    ls_kge_np_beta = []
    ls_kge_2009 = []
    ls_kge_2012 = []
    ls_nse = []

    for file in result_files:
        # Read results and rank descending (kge_np)
        df = pd.read_csv(file)
        df = df.set_index('kge_np')
        df = df.sort_index(ascending=False)
        df = df.reset_index()

        # Select first row
        df = df.loc[0]

        # Append results
        basin_ids.append(int(df['basin_id']))
        categories.append(category)
        ls_kge_np.append(df['kge_np'])
        ls_kge_np_r.append(df['kge_np_r'])
        ls_kge_np_alpha.append(df['kge_np_alpha'])
        ls_kge_np_beta.append(df['kge_np_beta'])
        ls_kge_2009.append(df['kge_2009'])
        ls_kge_2012.append(df['kge_2012'])
        ls_nse.append(df['nse'])

    # Create output dataframe
    df_out['basin_id'] = basin_ids
    df_out['flow_category'] = categories
    df_out['kge_np'] = ls_kge_np    
    df_out['kge_np_r'] = ls_kge_np_r    
    df_out['kge_np_alpha'] = ls_kge_np_alpha    
    df_out['kge_np_beta'] = ls_kge_np_beta    
    df_out['kge_2009'] = ls_kge_2009    
    df_out['kge_2012'] = ls_kge_2012    
    df_out['nse'] = ls_nse 

    # Write output
    df_out.to_csv(f'{ROOT}/results/wflow_sbm/evaluation_overview_{category}_wflow.csv')