# Prepare calibrated wflow simulation timeseries for the GUMBOOT package

In [None]:
import pandas as pd

from glob import glob
from pathlib import Path

## Set Paths

In [None]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
MODELS = Path(f'{ROOT}/wflow/data/')
AUXDATA = Path(f"{ROOT}/aux_data")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")
OUTPUT = Path(f"{ROOT}/results/wflow_sbm/evaluation_period_calibrated/")

## Config

In [None]:
# Get available basin IDs wflow_sbm
basin_dirs = glob(f'{MODELS}/*')
basin_ids = [s.split('/')[-1] for s in basin_dirs]
basin_ids.sort()


# Period (drop first year)
start_date = '2008-10-01'
end_date   = '2015-09-30'

In [None]:
df = pd.DataFrame()
basins = []
exists = []

for basin_id in basin_ids:
    basins.append(basin_id)

    # check if file exists
    sim_file = Path(f'{MODELS}/{basin_id}/evaluation/output.csv')
    if sim_file.is_file() is False:
        exists.append(False)
    else:
        df_sim = pd.read_csv(sim_file)
    
        # Check if csv containes output
        if len(df_sim) == 0:
            exists.append(False)
        else:
            exists.append(True)
        
df['basin_id'] = basins
df['completed'] = exists
df = df.reset_index()
df = df[df['completed'] == True]

basin_ids = df.basin_id.to_list()

# Remove basin_ids that return nan values
basin_ids.remove('18017')
basin_ids.remove('18018')
basin_ids.remove('54038')
basin_ids.remove('76011')

# Prepare files for whole evaluation period

In [None]:
for i, basin_id in enumerate(basin_ids):
    print(i, end='\r')
    
    # Load simulation dataframe and adjust time
    df_sim = pd.read_csv(f'{OUTPUT}/simulations/{basin_id}_wflow_calibrated_evaluation_simulations.csv')
    df_sim['time'] = pd.to_datetime(df_sim['time'])
    df_sim = df_sim.set_index('time')   
    
    # Load observation dataframe

    df_obs = pd.read_csv(f'{OUTPUT}/observations/{basin_id}_wflow_calibrated_evaluation_observations.csv', parse_dates=True, index_col='date')
    
    # Select evaluation period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    # Join dataframes and rename columns
    df_eval = df_sim.join(df_obs.discharge_vol)
    df_eval = df_eval.reset_index()
    df_eval = df_eval.rename(columns={'time':'date', 'discharge_vol':'obs'})
    
    if df_eval.columns[0] == 'index':
        df_eval = df_eval.rename(columns={'index':'date'})
    df_eval = df_eval.set_index('date')
    
    # Save Gumboot dataframe
    df_eval.to_csv(f'{OUTPUT}/gumboot/{basin_id}_gumboot_wflow_calibrated_evaluation_simulations.csv')

# Prepare Files per flow category evaluation period

In [None]:
# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

In [None]:
for i, basin_id in enumerate(basin_ids):
    print(i, end='\r')

    for category in flow_categories:

        # Load simulation dataframe and adjust time
        df_sim = pd.read_csv(f'{OUTPUT}/flow_categories/{basin_id}_wflow_calibrated_evaluation_simulations_{category}.csv')
        df_sim['date'] = pd.to_datetime(df_sim['date'])
        df_sim = df_sim.set_index('date')   

        # Load observation dataframe

        df_obs = pd.read_csv(f'{OUTPUT}/observations/{basin_id}_wflow_calibrated_evaluation_observations_{category}.csv', parse_dates=True, index_col='date')

        # Select evaluation period (drop first year)
        mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
        df_obs = df_obs.loc[mask]

        # Join dataframes and rename columns
        df_eval = df_sim.join(df_obs.discharge_vol)
        df_eval = df_eval.reset_index()
        df_eval = df_eval.rename(columns={'discharge_vol':'obs'})

        if df_eval.columns[0] == 'index':
            df_eval = df_eval.rename(columns={'index':'date'})
        df_eval = df_eval.set_index('date')

        # Save Gumboot dataframe
        df_eval.to_csv(f'{OUTPUT}/gumboot/{basin_id}_gumboot_wflow_calibrated_evaluation_simulations_{category}.csv')