# Prepare simulation timeseries for the GUMBOOT package

In [None]:
import pandas as pd

from glob import glob
from pathlib import Path

## Set Paths

In [None]:
# Set Paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/')
MODELS = Path(f'{ROOT}/pcr-globwb/')
AUXDATA = Path(f'{ROOT}/aux_data/')
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")
OUTPUT = Path(f'{ROOT}/results/pcr-globwb/evaluation_period/')

## Config

In [None]:
# Get available basin IDs
df_ids = pd.read_csv(f"{AUXDATA}/CAMELS-GB/data/CAMELS_GB_topographic_attributes.csv", index_col='gauge_id')
basin_ids = df_ids.index.to_list()

# Remove basin_ids that return nan values
basin_ids.remove(18017)
basin_ids.remove(18018)
basin_ids.remove(54038)
basin_ids.remove(76011)

# Time period (drop first year)
start_date = '2008-01-01'
end_date   = '2015-09-30'

# Prepare files for whole evaluation period

In [None]:
for i, basin_id in enumerate(basin_ids):
    print(i, end='\r')
    
    # Load simulation dataframe and adjust time
    df_sim = pd.read_csv(f'{OUTPUT}/simulations/{basin_id}_pcr-globwb_evaluation_simulations.csv')
    df_sim = df_sim.rename(columns={'sim':'sim'})
    df_sim['time'] = pd.to_datetime(df_sim['time'])
    df_sim = df_sim.set_index('time')   
    
    # Load observation dataframe

    df_obs = pd.read_csv(f'{OUTPUT}/observations/{basin_id}_pcr-globwb_evaluation_observations.csv', parse_dates=True, index_col='date')
    
    # Select evaluation period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    # Join dataframes and rename columns
    df_eval = df_sim.join(df_obs.discharge_vol)
    df_eval = df_eval.reset_index()
    df_eval = df_eval.rename(columns={'time':'date', 'discharge_vol':'obs'})
    
    if df_eval.columns[0] == 'index':
        df_eval = df_eval.rename(columns={'index':'date'})
    df_eval = df_eval.set_index('date')
    df_eval = df_eval[['obs', 'sim']]
    
    # Save Gumboot dataframe
    df_eval.to_csv(f'{OUTPUT}/gumboot/{basin_id}_gumboot_pcr-globwb_evaluation_simulations.csv')

# Prepare Files per flow category evaluation period

In [None]:
# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

In [None]:
for i, basin_id in enumerate(basin_ids):
    
    for category in flow_categories:
        print(f'{basin_id}: {category}', end='\r')
        # Load simulation dataframe and adjust time
        df_sim = pd.read_csv(f'{OUTPUT}/flow_categories/{basin_id}_pcr-globwb_evaluation_simulations_{category}.csv')
        df_sim['date'] = pd.to_datetime(df_sim['date'])
        df_sim = df_sim.set_index('date')   

        # Load observation dataframe
        df_obs = pd.read_csv(f'{OUTPUT}/observations/{basin_id}_pcr-globwb_evaluation_observations_{category}.csv', parse_dates=True, index_col='date')
        
        # Select evaluation period (drop first year)
        mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
        df_obs = df_obs.loc[mask]

        # Join dataframes and rename columns
        df_eval = df_sim.join(df_obs.discharge_vol)
        df_eval = df_eval.reset_index()
        df_eval = df_eval.rename(columns={'discharge_vol':'obs'})

        if df_eval.columns[0] == 'index':
            df_eval = df_eval.rename(columns={'index':'date'})
        df_eval = df_eval.set_index('date')

        # Save Gumboot dataframe
        df_eval.to_csv(f'{OUTPUT}/gumboot/{basin_id}_gumboot_pcr-globwb_evaluation_simulations_{category}.csv')