In [1]:
import numpy as np
import xarray as xr
import pandas as pd

from pathlib import Path
from glob import glob

In [2]:
# Set Paths
ROOT = Path("/gpfs/work1/0/wtrcycle/users/jaerts/camels_uk/")
AUXDATA = Path(f"{ROOT}/aux_data")
RESULTS = Path(f"{ROOT}/calibration_results/run_standard/wflow_sbm/")
MODELS = Path(f'{ROOT}/wflow/data/')
OBSDIR = Path(f"{AUXDATA}/CAMELS-GB/data/timeseries/")
basin_id = '89002'

# Time period (drop first year)
start_date = '2008-01-01'
end_date   = '2015-09-30'

In [3]:
def get_station_location(basin_id):
    # Load location file
    location_file = f"{AUXDATA}/CAMELS-GB/data/CAMELS_GB_topographic_attributes.csv"
    df_loc = pd.read_csv(location_file, index_col='gauge_id')
    
    # Select basin_ids and retrieve lat lon
    df_loc = df_loc.loc[int(basin_id)]
    latlon = (df_loc.gauge_lat, df_loc.gauge_lon)

    return latlon

def get_simulations_pcrglobwb(basin_id, start_date, end_date):
    
    MODELS = Path(f'{ROOT}/pcr-globwb/')
    # Load simulation results
    sim_file = f"{MODELS}/uk/netcdf/discharge_dailyTot_output.nc"
    ds_sim = xr.open_dataset(sim_file)

    # Get station location
    latlon = get_station_location(basin_id)

    # Extract station location timeseries
    ds_sim = ds_sim.discharge.sel(lat=latlon[0], lon=latlon[1], method='nearest')

    # Convert to dataframe
    df_sim = ds_sim.to_dataframe()

    # Select calibration period (drop first year)
    mask = (df_sim.index > start_date) & (df_sim.index <= end_date)
    df_sim = df_sim.loc[mask]

    # Rename column
    df_sim = df_sim.drop(columns=['lat','lon'])
    df_sim = df_sim.rename(columns={'discharge': f'sim'})

    return df_sim

def get_simulations_wflow(basin_id, start_date, end_date):
    dataframes = []

    # Set simulation file
    sim_file = glob(f'{MODELS}/{basin_id}/evaluation/output.csv')[0]

    # Load simulation dataframe
    df = pd.read_csv(sim_file, parse_dates=True, index_col='time')

    # Select calibration period (drop first year)
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    # Rename column
    df = df.rename(columns={'Q_1': f'evaluation'})

    return df

def get_observations(basin_id, start_date, end_date):
    # Set observation file
    obs_file = glob(f'{OBSDIR}/*_{basin_id}_*.csv')[0]
    
    # Load observation dataframe
    df_obs = pd.read_csv(obs_file, parse_dates=True, index_col='date')
    
    # Select calibration period (drop first year)
    mask = (df_obs.index > start_date) & (df_obs.index <= end_date)
    df_obs = df_obs.loc[mask]
    
    return df_obs

In [4]:
basin_id = '23008'
df_pcrglob = get_simulations_pcrglobwb(basin_id, start_date, end_date)
df_wflow = get_simulations_wflow(basin_id, start_date, end_date)
df_obs = get_observations(basin_id, start_date, end_date)
df_obs = df_obs.dropna(how='all')

# Calculate quantiles

In [5]:
percentiles = [5, 25, 50, 75, 95]

p_wflow_ls = []
p_pcrglob_ls = []
p_obs_ls = []
model_uncertainties = []


for percentile in percentiles:
    
    pcrglob = np.percentile(df_pcrglob.sim,percentile)
    p_pcrglob_ls.append(pcrglob)
    
    wflow = np.percentile(df_wflow.evaluation,percentile)
    p_wflow_ls.append(wflow)
    
    obs = np.percentile(df_obs.discharge_vol.dropna(),percentile)
    p_obs_ls.append(obs)
    
    model_uncertainty = abs(((pcrglob - wflow) / wflow) * 100)
    model_uncertainties.append(model_uncertainty)

In [6]:
df = pd.DataFrame()
df['basin_id'] = [basin_id] * len(percentiles)
df['percentiles'] = percentiles
df['wflow_streamflow_percentile'] = p_wflow_ls
df['pcrglobwb_streamflow_percentile'] = p_pcrglob_ls
df['observation_streamflow_percentile'] = p_obs_ls
df['model_uncertainty'] = model_uncertainties

In [7]:
df

Unnamed: 0,basin_id,percentiles,wflow_streamflow_percentile,pcrglobwb_streamflow_percentile,observation_streamflow_percentile,model_uncertainty
0,23008,5,0.892552,1.26603,0.76,41.843861
1,23008,25,1.79743,3.669074,1.37,104.128877
2,23008,50,3.58203,7.386771,3.08,106.217417
3,23008,75,8.261567,11.856215,7.6,43.510497
4,23008,95,26.299509,21.191864,27.7,19.421067


In [16]:
p5_wflow = df_wflow[df_wflow.evaluation <= 0.892552]
p5_pcrglob = df_pcrglob[df_pcrglob.sim <= 1.266030]

In [22]:
p5_wflow_mean = p5_wflow.mean()
p5_pcrglob_mean = p5_pcrglob.mean()

sampling_mean = p5_wflow_mean.values - p5_pcrglob_mean.values

In [23]:
sampling_mean

array([0.00085346])

In [21]:
p5_wflow_mean.values

array([0.763849])

# Plot flow duration curve

In [27]:
df_wflow

Unnamed: 0_level_0,evaluation
time,Unnamed: 1_level_1
2008-01-02,0.000176
2008-01-03,0.000429
2008-01-04,0.024771
2008-01-05,0.044279
2008-01-06,0.046911
...,...
2015-09-26,0.045578
2015-09-27,0.045597
2015-09-28,0.045569
2015-09-29,0.045523


# Calculate percentage difference quantiles