# Calculate observation uncertainty in m3/s

In [None]:
import numpy as np
import pandas as pd

from glob import glob
from pathlib import Path

# Set Path

In [None]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/results/")
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")

# Set Config

In [None]:
uncertainty_file = f"{AUXDATA}/CAMELS-GB/data/CAMELS_GB_hydrometry_attributes.csv"

# load uncertainty file and drop nan
df_uncertainty = pd.read_csv(uncertainty_file, index_col='gauge_id')

df_uncertainty = df_uncertainty[df_uncertainty['q5_uncert_upper'].notna()]
df_uncertainty = df_uncertainty[df_uncertainty['q5_uncert_lower'].notna()]
df_uncertainty = df_uncertainty[df_uncertainty['q95_uncert_upper'].notna()]
df_uncertainty = df_uncertainty[df_uncertainty['q95_uncert_lower'].notna()]

# Load available basin_IDs
df_basin_ids = pd.read_csv(f"{AUXDATA}/available_basin_ids_uncertainty.csv", index_col='basin_id')
basin_ids = df_basin_ids.index.to_list()

# Remove basin_ids that return nan values
basin_ids.remove(54038)

# Set flow categories based on percentiles
flow_categories = {'low_flow': (5, 25),
                   'mean_flow': (25, 75),
                   'high_flow': (75, 95)}

In [None]:
for basin_id in basin_ids:
    print(basin_id)
    for category in flow_categories:
        # Load percentile based observations
        df_obs = pd.read_csv(f"{RESULTS}/pcr-globwb/evaluation_period/observations/{basin_id}_pcr-globwb_evaluation_observations_{category}.csv")
        
        # Get uncertainty per percentiles
        lower = flow_categories[category][0]
        upper = flow_categories[category][1]
        df = df_uncertainty.loc[basin_id]
        
        # Calculate average uncertainty per category in percentage
        uncertainty_lower = (np.abs(df[f'q{lower}_uncert_lower']) + np.abs(df[f'q{lower}_uncert_upper'])) / 2
        uncertainty_upper = (np.abs(df[f'q{upper}_uncert_lower']) + np.abs(df[f'q{upper}_uncert_upper'])) / 2
        uncertainty_average = (uncertainty_lower + uncertainty_upper) / 2
        
        # Calculate observation uncertainty in m3/s
        df_obs['observation_uncertainty'] = df_obs.discharge_vol * (uncertainty_average/100)
        df_obs['uncertainty_percentage'] = [uncertainty_average] * len(df_obs)
        # Save output
        df_obs.to_csv(f"{RESULTS}/obs_flow_categories/{category}_{basin_id}_observation_uncertainty_m3s.csv")