In [1]:
import pandas as pd
import hvplot.pandas
from pathlib import Path
import geopandas as gpd
import hvplot.xarray
import numpy as np

## Select the reservoir

In [7]:
RESERVOIR = '0810'
ALG_VERSION = '0.1' # remove temporal resampling

RESULTS_DIR = Path(f'../results/')
DATA_DIR = Path(f'../data/')

In [8]:
# read the bounding box of the study area
### all 100 reservoirs
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-polys.geojson'))

selected_reservoirs = [
    '0078','0079','0193','0197','0214','0217','0340','0365','0484','0486',
    '0498','0503','0505','0507','0508','0523','0524','0529','0532','0535',
    '0549','0552','0569','0787','0803','0807','0810','0816','0819','0824',
    '0828','0830','0833','0930','0931','0935','0936','1078','1097','1134',
    '1135','1162','1284','1320','1385','1388','1392','1398','1400','1426',
    '1498','0502'
]
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name']

RESERVOIR_NAME = res_names[RESERVOIR]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

nominal_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_SKM'].values[0]
nominal_area_poly = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_POLY'].values[0]
max_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MAX'].values[0]
max_area = np.nan if max_area == -99 else max_area
min_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MIN'].values[0]
min_area = 0 if min_area == -99 else min_area
area_rep = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_REP'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
elev_msl = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['ELEV_MASL'].values[0])
depth = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DEPTH_M'].values[0])
capacity = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['CAP_MCM'].values[0])


global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

In [9]:
(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR].hvplot(
    geo=True, tiles='OSM', shared_axes=False
)).opts(title=f"{RESERVOIR_NAME}")

In [10]:
srtm_extrapolated_dir = Path('../data/aec/srtm_extrapolated_storage/')

poly_deg = 2

aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}_storage.csv'
aec = pd.read_csv(aec_fp, parse_dates=True)
aec

Unnamed: 0,Elevation,CumArea,obs_or_extrapolated,Storage,Storage (mil. m3)
0,124.35,0.0,extrapolated,0.0,0.0
1,125.39,8.82,extrapolated,4586400.0,4.5864
2,126.42,26.06,extrapolated,31394600.0,31.3946
3,127.46,42.92,extrapolated,75062000.0,75.062
4,128.5,59.39,extrapolated,134848100.0,134.8481
5,129.54,75.47,extrapolated,209941700.0,209.9417
6,130.57,91.16,extrapolated,299453200.0,299.45315
7,131.61,106.46,extrapolated,402575200.0,402.57515
8,132.65,121.38,extrapolated,518652800.0,518.65275
9,133.69,135.91,extrapolated,646807400.0,646.80735


In [11]:
import holoviews as hv

# what is the reported capacity?
capacity_hv = hv.HLine(capacity).opts(color='red', ylim=(0, capacity + capacity*0.1), ylabel='capacity (Mil. m3)')
capacity_hv

## [storage] read insitu-data

In [12]:
import xarray as xr

ALG_VERSION = '0.1' # remove temporal resampling

def get_insitu_df(
        tmsos_id,
        val_polys,
        deltares_insitu_dir=Path('/tiger1/pdas47/tmsosPP/data/insitu/deltares'),
        rid_insitu_dir=Path('/tiger1/pdas47/tmsosPP/data/insitu/rid'),
        resops_insitu_dir=Path('/tiger1/pdas47/tmsosPP/data/insitu/resops'),
    ):
    row = val_polys[val_polys['tmsos_id']==tmsos_id]
    db = row['db'].values

    insitu_df = None

    if db == 'deltares':
        deltares_id = row['deltares_id'].values[0]
        
        fn = deltares_insitu_dir / f'{int(deltares_id):07}.csv'
        
        insitu_df = pd.read_csv(fn, parse_dates=['time'])
        insitu_df.sort_values('time', inplace=True)
        insitu_df['date'] = pd.to_datetime(insitu_df['time'].dt.date)
        insitu_df['observed area [km2]'] = insitu_df['area'] * 1e-6
        insitu_df['observed wse [m]'] = np.nan
        insitu_df['observed storage [Mm3]'] = np.nan
        insitu_df['db'] = 'deltares'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

        # calculate other columns
        calculated_wse = np.interp(insitu_df['observed area [km2]'], aec['CumArea'], aec['Elevation'])
        insitu_df['calculated wse [m]'] = calculated_wse
        
        calculated_storage = np.interp(insitu_df['observed area [km2]'], aec['CumArea'], aec['Storage'])
        insitu_df['calculated storage [m3]'] = calculated_storage
        insitu_df['calculated storage [Mm3]'] = calculated_storage * 1e-6

    if db == 'rid':
        rid_filename = row['rid_filename'].values[0]
        
        fn = rid_insitu_dir / rid_filename
        insitu_df = pd.read_csv(fn, parse_dates=['date'])
        insitu_df['observed area [km2]'] = np.nan
        insitu_df['observed wse [m]'] = insitu_df['water_level (m)']
        insitu_df['observed storage [Mm3]'] = insitu_df['storage (mil. m3)']
        insitu_df['db'] = 'rid'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

    if db == 'resops':
        resops_id = int(row['resops_id'].values[0])
        fn = resops_insitu_dir / f'ResOpsUS_{resops_id}.csv'

        insitu_df = pd.read_csv(fn, parse_dates=['date'])
        insitu_df['observed area [km2]'] = np.nan
        insitu_df['observed wse [m]'] = insitu_df['elevation']
        insitu_df['observed storage [Mm3]'] = insitu_df['storage']
        insitu_df['db'] = 'resops'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

    return insitu_df

insitu_df = get_insitu_df(
    RESERVOIR, val_polys
)
insitu_df

  insitu_df = pd.read_csv(fn, parse_dates=['date'])


Unnamed: 0,date,observed area [km2],observed wse [m],observed storage [Mm3],db
0,1980-01-01,,139.960458,1429.12,rid
1,1980-01-02,,139.944712,1425.20,rid
2,1980-01-03,,139.928934,1421.29,rid
3,1980-01-04,,139.913125,1417.38,rid
4,1980-01-05,,139.898076,1413.47,rid
...,...,...,...,...,...
16043,2024-03-24,,139.310000,1236.71,rid
16044,2024-03-25,,139.290000,1232.36,rid
16045,2024-03-26,,139.280000,1230.19,rid
16046,2024-03-27,,139.270000,1228.02,rid


In [13]:
print(capacity)
(capacity_hv * insitu_df.hvplot.scatter(x='date', y='calculated storage [Mm3]').opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}. Storage (mil. m3)', ylabel='Storage', xlabel='Date'
))

1966.0


DataError: Supplied data does not contain specified dimensions, the following dimensions were not found: ['calculated storage [Mm3]']

PandasInterface expects tabular data, for more information on supported datatypes see https://holoviews.org/user_guide/Tabular_Datasets.html

In [18]:
# read tmsos data
VERSION = '0.1'
ALG = 'tmsos'

csv_save_fp = DATA_DIR / 'storage_change' / ALG / f'v{VERSION}' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_dels.csv'
csv_save_fp.parent.mkdir(parents=True, exist_ok=True)

tmsos_df = pd.read_csv(csv_save_fp, parse_dates=['date'])

In [19]:
VARIABLE_TO_COMPARE = 'storage'

import HydroErr as he

metrics = [
    'ME', 'MAE', 'rmse', 'NRMSE range', 
    'R^2', 'Pearson r', 'NSE', 'KGE 2012'
]

metrics_fn = [
    he.me, he.mae, he.rmse, he.nrmse_range, he.r_squared, he.pearson_r, he.nse, he.kge_2012, 
]

metrics_df_data = {
    'metric': [],
    'tmsos_id': [],
    'metric_value': [],
    'variable': []
}

ALG_NAME = 'tmsos'

##### TODO: CHANGE TO MORE GENERAL CODE LATER
mod = tmsos_df.set_index('date')['storage']
obs = insitu_df.set_index('date')['calculated storage [m3]'].rename('storage')

# some obs have multiple values for same date. take the mean
obs = obs.groupby('date').mean()

common_idx = mod.index.intersection(obs.index)

metric_values = []

metrics_df_data = {
    'metric': [],
    'tmsos_id': [],
    'metric_value': [],
    'variable': []
}

for metric_name, metric_fn in zip(metrics, metrics_fn):
    try:
        metric_value = metric_fn(
            mod.loc[common_idx], obs.loc[common_idx]
        )
    except Exception as e:
        print(f'Error: {e}')
        metric_value = np.nan

    metrics_df_data['metric'].append(metric_name)
    metrics_df_data['tmsos_id'].append(RESERVOIR)
    metrics_df_data['metric_value'].append(metric_value)
    metrics_df_data['variable'].append('storage')
    
    metrics_df = pd.DataFrame(metrics_df_data)
    metrics_df['algorithm'] = ALG_NAME

metrics_df

Unnamed: 0,metric,tmsos_id,metric_value,variable,algorithm
0,ME,217,12030210.0,storage,tmsos
1,MAE,217,39808140.0,storage,tmsos
2,rmse,217,54066560.0,storage,tmsos
3,NRMSE range,217,0.2011882,storage,tmsos
4,R^2,217,0.7261323,storage,tmsos
5,Pearson r,217,0.852134,storage,tmsos
6,NSE,217,0.5482769,storage,tmsos
7,KGE 2012,217,0.7721937,storage,tmsos


In [20]:
metrics_df[metrics_df['metric'] == 'rmse']

Unnamed: 0,metric,tmsos_id,metric_value,variable,algorithm
2,rmse,217,54066560.0,storage,tmsos


## [STORAGE CHANGE] Read insitu and satellite data

In [9]:
import xarray as xr

ALG_VERSION = '0.1' # remove temporal resampling

insitu_storage_change_fp = DATA_DIR / 'storage' / 'insitu' / f'v{ALG_VERSION}' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_dels.nc'
insitu_storage_change = xr.open_dataset(insitu_storage_change_fp)
insitu_storage_change

FileNotFoundError: [Errno 2] No such file or directory: '/tiger1/pdas47/tmsosPP/data/storage/insitu/v0.1/0217_Cernadilla_Dam_dels.nc'

In [6]:
insitu_storage_change.hvplot.scatter(
    x='date', y='storage_change', shared_axes=False
).opts(title=f"{RESERVOIR_NAME} storage change")

In [7]:
ALG_VERSION = '0.1' # remove temporal resampling


satellite_storage_change_fp = DATA_DIR / 'storage_change' / 'tmsos' / f'v{ALG_VERSION}' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_dels.nc'
satellite_storage_change = xr.open_dataset(satellite_storage_change_fp)
satellite_storage_change

In [8]:
def get_test_df(reservoir_ids):
    obss = []
    mods = []
    for reservoir_id in reservoir_ids:
        reservoir_name = res_names[reservoir_id]

        insitu_storage_change_fp = DATA_DIR / 'storage_change' / 'insitu' / f'v{ALG_VERSION}' / f'{reservoir_id}_{reservoir_name.split(",")[0].replace(" ", "_")}_dels.nc'
        insitu_storage_change = xr.open_dataset(insitu_storage_change_fp)

        satellite_storage_change_fp = DATA_DIR / 'storage_change' / 'tmsos' / f'v{ALG_VERSION}' / f'{reservoir_id}_{reservoir_name.split(",")[0].replace(" ", "_")}_dels.nc'
        satellite_storage_change = xr.open_dataset(satellite_storage_change_fp)

        common_idx = insitu_storage_change['date'].values
        common_idx = list(filter(lambda x: x in satellite_storage_change['date'].values, common_idx))

        obs = insitu_storage_change.sel(date=common_idx).to_pandas()
        mod = satellite_storage_change.sel(date=common_idx).to_pandas()

        # calculate storage_change_daily if not available
        if 'storage_change_daily' not in mod.columns:
            delt = mod.index.to_series().diff().dt.days
            mod['storage_change_daily'] = mod['storage_change'] / delt
        if 'storage_change_daily' not in obs.columns:
            delt = obs.index.to_series().diff().dt.days
            obs['storage_change_daily'] = obs['storage_change'] / delt

        obs['reservoir'] = reservoir_id
        mod['reservoir'] = reservoir_id

        obss.append(obs)
        mods.append(mod)
    
    obs_concat = pd.concat(obss)
    mod_concat = pd.concat(mods)
    return obs_concat, mod_concat

In [9]:
obs_df, mod_df = get_test_df(selected_reservoirs)
mod_df

Unnamed: 0_level_0,area,elevation,storage_change,storage_change_daily,reservoir
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-01,37.969032,92.011712,-5.700973e+05,,0505
2019-02-06,37.564507,91.936638,-1.410059e+06,-2.820117e+05,0505
2019-02-11,38.130842,92.041742,2.003844e+06,4.007688e+05,0505
2019-02-21,37.668325,91.955905,1.775949e+07,1.775949e+06,0505
2019-03-03,34.994938,91.360309,-9.065416e+06,-9.065416e+05,0505
...,...,...,...,...,...
2019-11-17,143.982359,105.876566,-5.239997e+06,-1.047999e+06,1385
2019-11-22,145.387759,106.613503,5.357085e+07,1.071417e+07,1385
2019-11-24,148.369896,108.112861,1.112298e+08,5.561489e+07,1385
2019-11-27,149.213136,108.525834,3.081046e+07,1.027015e+07,1385


In [14]:
import holoviews as hv
import panel as pn

# select = pn.widgets.Select(name='Reservoir', options=selected_reservoirs)

subplot_width = 400
subplot_height = 300

def get_reservoir_plots(reservoir_id):
    RESERVOIR_NAME = res_names[reservoir_id]
    storage_change_hv = mod_df[mod_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='storage_change', shared_axes=False, color='blue'
    ).opts(title=f"{RESERVOIR_NAME} daily storage change", width=subplot_width, height=subplot_height) * obs_df[obs_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='storage_change', shared_axes=False, color='red'
    ).opts(title=f"{RESERVOIR_NAME} daily storage change", width=subplot_width, height=subplot_height)

    area_hv = mod_df[mod_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='area', shared_axes=False, color='blue'
    ).opts(title=f"{RESERVOIR_NAME} area", width=subplot_width, height=subplot_height) * obs_df[obs_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='area', shared_axes=False, color='red'
    ).opts(title=f"{RESERVOIR_NAME} area", width=subplot_width, height=subplot_height)

    elevation_hv = mod_df[mod_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='elevation', shared_axes=False, color='blue'
    ).opts(title=f"{RESERVOIR_NAME} elevation", width=subplot_width, height=subplot_height) * obs_df[obs_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='date', y='elevation', shared_axes=False, color='red'
    ).opts(title=f"{RESERVOIR_NAME} elevation", width=subplot_width, height=subplot_height)

    storage_hv = mod_df[mod_df['reservoir'] == reservoir_id].hvplot.scatter(
        x='elevation', y='area', shared_axes=False, color='blue', 
    ).opts(title=f"{RESERVOIR_NAME} Elevation-Area Relationship", width=subplot_width, height=subplot_height) * obs_df[obs_df['reservoir'] == RESERVOIR].hvplot.scatter(
        x='elevation', y='area', shared_axes=False, color='red', 
    ).opts(title=f"{RESERVOIR_NAME} Elevation-Area Relationship", width=subplot_width, height=subplot_height)

    return (storage_change_hv + storage_hv + area_hv + elevation_hv).cols(2)

dmap = hv.DynamicMap(
    get_reservoir_plots, kdims=['reservoir']
).redim.values(
    reservoir = selected_reservoirs
)
hv_panel = pn.pane.HoloViews(dmap)

pn.Column(
    hv_panel.widget_box,
    hv_panel,
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'75007b5f-057e-45bf-ab16-33c997f619c1': {'version…

In [15]:
import hvplot.xarray

(obs_df.hvplot(
    x='date',
    y='storage_change_daily',
    # title=f"Storage Change (m3/d). {RESERVOIR_NAME}",
    ylabel='Storage change (m3/d)',
    xlabel='Time',
    color='blue',
    kind='scatter',
    label='In-Situ',
    by='reservoir',
    subplots=True,
    shared_axes=False,
) * mod_df.hvplot(
    x='date',
    y='storage_change_daily',
    # title=f"Storage Change (m3/d). {RESERVOIR_NAME}",
    ylabel='Storage change (m3/d)',
    xlabel='Time',
    color='red',
    kind='scatter',
    label='Satellite',
    by='reservoir',
    subplots=True,
    shared_axes=False,
)).cols(1)

In [15]:
# deltares_insitu_dir = Path('../data/insitu/deltares/')
# rid_insitu_dir = Path('../data/insitu/rid')
# resops_insitu_dir = Path('../data/insitu/resopsus')
# area_column = 'tmsos area [km2]'
# area_dir = Path('../data/area/tmsos/v0.1')

# def get_insitu_df(tmsos_id):
#     idx = val_polys['tmsos_id'].isin(selected_reservoirs)
#     subset = val_polys[idx]

#     row = subset[subset['tmsos_id']==tmsos_id]
#     db = row['db'].values
    
#     insitu_df = None

#     if db == 'deltares':
#         deltares_id = row['deltares_id'].values[0]
        
#         fn = deltares_insitu_dir / f'{int(deltares_id):07}.csv'
        
#         insitu_df = pd.read_csv(fn, parse_dates=['time'])
#         insitu_df.sort_values('time', inplace=True)
#         insitu_df['date'] = pd.to_datetime(insitu_df['time'].dt.date)
#         insitu_df['observed area [km2]'] = insitu_df['area'] * 1e-6
#         insitu_df['observed wse [m]'] = np.nan
#         insitu_df['observed storage [Mm3]'] = np.nan
#         insitu_df['db'] = 'deltares'
#         insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

#     if db == 'rid':
#         rid_id = int(row['rid_id'].values[0])
#         name = row['name'].values[0]
        
#         fn = rid_insitu_dir / f'{rid_id}-{name}_Dam.csv'
#         insitu_df = pd.read_csv(fn, parse_dates=['date'])
#         insitu_df['observed area [km2]'] = np.nan
#         insitu_df['observed wse [m]'] = insitu_df['water_level (m)']
#         insitu_df['observed storage [Mm3]'] = insitu_df['storage (mil. m3)']
#         insitu_df['db'] = 'rid'
#         insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

#     if db == 'resops':
#         resops_id = int(row['resops_id'].values[0])
#         fn = resops_insitu_dir / f'ResOpsUS_{resops_id}.csv'

#         insitu_df = pd.read_csv(fn, parse_dates=['date'])
#         insitu_df['observed area [km2]'] = np.nan
#         insitu_df['observed wse [m]'] = insitu_df['elevation']
#         insitu_df['observed storage [Mm3]'] = insitu_df['storage']
#         insitu_df['db'] = 'resops'
#         insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

#     return insitu_df

# insitu_dfs = []
# sat_dfs = []
# test_dfs = []

# perf_dfs = []

# for reservoir in selected_reservoirs:
#     insitu_df = get_insitu_df(reservoir)

#     insitu_df['tmsos_id'] = reservoir
#     insitu_df.set_index(['tmsos_id', 'date'], inplace=True)
#     insitu_dfs.append(insitu_df)
    
#     sat_fn = Path(f'{area_dir}/{reservoir}.csv')
#     sat_df = pd.read_csv(sat_fn, parse_dates=['time'], dtype={'tmsos_id': str})
#     sat_df['date'] = pd.to_datetime(sat_df['time'].dt.date)
#     sat_df = sat_df.drop(['time'], axis=1)
#     sat_df.set_index(['tmsos_id', 'date'], inplace=True)
#     sat_dfs.append(sat_df)

# insitu_df = pd.concat(insitu_dfs)
# sat_df = pd.concat(sat_dfs)

In [16]:
# silence warnings

import os
import sys
sys.stderr = open(os.devnull, "w")  # silence stderr
# from sklearn.ensemble import RandomForestRegressor
# sys.stderr = sys.__stderr__  # unsilence stderr

In [17]:
# he.nrmse_range(
#     mod_df[mod_df['reservoir'] == RESERVOIR]['storage_change_daily'],
#     obs_df[obs_df['reservoir'] == RESERVOIR]['storage_change_daily']
# )

# from sklearn.metrics import mean_squared_error

# rms = mean_squared_error(
#     obs_df[obs_df['reservoir'] == RESERVOIR]['storage_change_daily'].fillna(0), 
#     mod_df[mod_df['reservoir'] == RESERVOIR]['storage_change_daily'].fillna(0), 
#     squared=False
# )

# rms

In [18]:
VARIABLE_TO_COMPARE = 'storage_change_daily'

# common_idx = insitu_storage_change['date'].values
# common_idx = list(filter(lambda x: x in satellite_storage_change['date'].values, common_idx))

import HydroErr as he

metrics = [
    'ME', 'MAE', 'rmse', 'NRMSE range', 
    'R^2', 'Pearson r', 'NSE', 'KGE 2012'
]

metrics_fn = [
    he.me, he.mae, he.rmse, he.nrmse_range, he.r_squared, he.pearson_r, he.nse, he.kge_2012, 
]

metrics_df_data = {
    'metric': [],
    'tmsos_id': [],
    'metric_value': [],
    'variable': []
}

ALG_NAME = 'tmsos'

metrics_dfs = []
for VARIABLE_TO_COMPARE in ['storage_change_daily', 'elevation', 'area']:
    for reservoir_id, gp in obs_df.groupby('reservoir'):
        print(f'{reservoir_id = }')
        for metric_name, metric_fn in zip(metrics, metrics_fn):
            obs = obs_df.loc[obs_df['reservoir'] == reservoir_id, VARIABLE_TO_COMPARE]
            mod = mod_df.loc[mod_df['reservoir'] == reservoir_id, VARIABLE_TO_COMPARE]

            try:
                metric_value = metric_fn(mod, obs)
            except Exception as e:
                print(f'Error: {e}')
                metric_value = np.nan
            metrics_df_data['metric'].append(metric_name)
            metrics_df_data['tmsos_id'].append(reservoir_id)
            metrics_df_data['metric_value'].append(metric_value)
            metrics_df_data['variable'].append(VARIABLE_TO_COMPARE)
    metrics_df = pd.DataFrame(metrics_df_data)
    metrics_df['algorithm'] = ALG_NAME

    metrics_dfs.append(metrics_df)

metrics_df_all = pd.concat(metrics_dfs)
metrics_df_all

reservoir_id = '0214'
reservoir_id = '0349'
reservoir_id = '0464'
reservoir_id = '0486'
reservoir_id = '0502'
reservoir_id = '0505'
reservoir_id = '0518'
reservoir_id = '0524'
Error: zero-size array to reduction operation maximum which has no identity
reservoir_id = '0810'
reservoir_id = '0830'
reservoir_id = '1078'
reservoir_id = '1284'
reservoir_id = '1385'
reservoir_id = '0214'
reservoir_id = '0349'
reservoir_id = '0464'
reservoir_id = '0486'
reservoir_id = '0502'
reservoir_id = '0505'
reservoir_id = '0518'
reservoir_id = '0524'
reservoir_id = '0810'
reservoir_id = '0830'
reservoir_id = '1078'
Error: zero-size array to reduction operation maximum which has no identity
reservoir_id = '1284'
Error: zero-size array to reduction operation maximum which has no identity
reservoir_id = '1385'
Error: zero-size array to reduction operation maximum which has no identity
reservoir_id = '0214'
reservoir_id = '0349'
reservoir_id = '0464'
reservoir_id = '0486'
reservoir_id = '0502'
reservoir_id =

Unnamed: 0,metric,tmsos_id,metric_value,variable,algorithm
0,ME,0214,-2.021262e+06,storage_change_daily,tmsos
1,MAE,0214,1.186399e+07,storage_change_daily,tmsos
2,rmse,0214,2.905685e+07,storage_change_daily,tmsos
3,NRMSE range,0214,9.138947e-02,storage_change_daily,tmsos
4,R^2,0214,3.376373e-05,storage_change_daily,tmsos
...,...,...,...,...,...
307,NRMSE range,1385,,area,tmsos
308,R^2,1385,,area,tmsos
309,Pearson r,1385,,area,tmsos
310,NSE,1385,,area,tmsos


In [64]:
variable = pn.widgets.Select(name='variable', options=['storage_change_daily', 'elevation', 'area'])
tmsos_id = pn.widgets.Select(name='tmsos_id', options=selected_reservoirs)

explanation = pn.pane.Markdown(
    """
    ## Performance Metrics

    This table shows the performance metrics of the algorithm compared to the in-situ data.
    """
)

def get_metrics_table(tmsos_id, variable):
    metrics_table = metrics_df_all[(metrics_df_all['tmsos_id'] == tmsos_id)&(metrics_df_all['variable'] == variable)]

    metrics_table = metrics_table[['metric', 'metric_value']].groupby('metric').aggregate(np.mean)

    # return metrics_table.reset_index()
    return metrics_table.reset_index().hvplot.table(
        columns=['tmsos_id', 'metric', 'metric_value', 'variable'],
        width=800, height=400
    )

# dmap = hv.DynamicMap(
#     get_metrics_table, kdims=['tmsos_id', 'variable']
# ).redim.values(
#     tmsos_id = selected_reservoirs, variable=['storage_change_daily', 'elevation', 'area']
# )
# hv_panel = pn.pane.HoloViews(dmap)

# hv_panel
interactive_plot = pn.bind(get_metrics_table, tmsos_id=tmsos_id, variable=variable)

pn.Row(
    pn.WidgetBox(
        tmsos_id, variable
    ),
    interactive_plot
)
# get_metrics_table('0505', 'storage_change_daily')

DataError: Supplied data does not contain specified dimensions, the following dimensions were not found: ['tmsos_id', 'variable']

PandasInterface expects tabular data, for more information on supported datatypes see http://holoviews.org/user_guide/Tabular_Datasets.html

In [27]:
metrics_df_all[metrics_df_all['variable'] == 'area'].hvplot.bar(
    y='metric_value', x='tmsos_id'
)

In [28]:
metrics_df_fp = RESULTS_DIR / 'metrics' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_{ALG_NAME}_v{ALG_VERSION}_{VARIABLE_TO_COMPARE}_metrics.csv'
metrics_df_fp.parent.mkdir(parents=True, exist_ok=True)
metrics_df_all.to_csv(metrics_df_fp, index=False)

In [29]:
metrics_df_all.hvplot.barh(
    subplots=True, by='metric', x='tmsos_id', y='metric_value',
    ylabel='Value', xlabel='Metric', rot=45, shared_axes=False
).cols(1)

## todo: error metrics

In [None]:
# all in one
for reservoir_id in selected_reservoirs:
    

In [None]:
from datetime import datetime

result_dir = Path('../data/results')
result_dir.mkdir(exist_ok=True)

d = datetime.today().strftime('%Y%m%d_%H%M%S')
save_dir = result_dir / f'{d}'
save_dir.mkdir(exist_ok=False)

# perf_df.to_csv(save_dir / 'performance.csv', index=False)