# Select reservoirs and study period
***

**Author:** Chus Casado Rodríguez<br>
**Date:** 30-09-2024<br>

**Introduction:**<br>
This notebook reads all the attributes and time series in the dataset and selects the reservoirs appropriate for testing the different reservoir routines. Several conditions need to be met for a reservoir to be selected:

1. It must contain observed time series of the variables `inflow`, `storage` and `outflow`.
2. The longest period without gaps in those three time series needs to be longer than `min_years` years.
3. The bias between the observed inflow and outflow timeseries needs to be between 1+-`tol_bias`.

In [1]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from pathlib import Path
import pickle

from lisfloodreservoirs.utils import DatasetConfig
from lisfloodreservoirs import read_attributes, read_timeseries
from lisfloodreservoirs.utils.timeseries import define_period

## Configuration

In [2]:
cfg = DatasetConfig('config_dataset.yml')

PATH_OUT = cfg.PATH_RESOPS / cfg.VERSION / 'selection'
PATH_OUT.mkdir(parents=True, exist_ok=True)
print(f'Selected reservoirs and periods will be saved in:\n\t{PATH_OUT}\n')

variables = ['inflow', 'storage', 'outflow']

Selected reservoirs and periods will be saved in:
	Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsMX\v1.0\selection



## Data

### Attributes

In [3]:
# import all tables of attributes
attributes = read_attributes(cfg.PATH_ATTRS, reservoirs=None)
print(f'{attributes.shape[0]} reservoirs in the attribute tables')

# # keep only reservoirs with all observed variables
# mask = pd.concat([attributes[var.upper()] == 1 for var in variables], axis=1).all(axis=1)
# attributes = attributes[mask]
# attributes.sort_index(axis=0, inplace=True)
# print('{0} reservoirs include observed time series for all variables: {1}'.format(attributes.shape[0],
#                                                                                 ', '.join(variables)))

# NOTE!! The checks below (area and volume) were alreday done in notebook 1

# # keep reservoirs that comply with the catchment area and total storage conditions
# if cfg.MIN_AREA is not None:
#     mask_area = attributes.CATCH_SKM >= cfg.MIN_AREA
#     attributes = attributes[mask_area]
#     print('{0} reservoirs comply with the minimum catchment area: {1} km²'.format(attributes.shape[0],
#                                                                                            cfg.MIN_AREA))
# if cfg.MIN_VOL is not None:
#     mask_volume = attributes.CAP_MCM >= cfg.MIN_VOL
#     attributes = attributes[mask_volume]
#     print('{0} reservoirs comply with the minimum storage capacity: {1} hm3'.format(attributes.shape[0],
#                                                                                 cfg.MIN_VOL))

99 reservoirs in the attribute tables


#### Time series

In [4]:
# read time series
timeseries = read_timeseries(cfg.PATH_TS / 'csv', attributes.index)
print(f'{len(timeseries)} reservoirs with timeseries\n')

# keep only reservors with all variables
timeseries = {ID: ts for ID, ts in timeseries.items() if len(ts.columns.intersection(variables)) == len(variables)}
attributes = attributes.loc[list(timeseries)]
print(f'{len(timeseries)} reservoirs with timeseries or all variables\n')

# remove reservoirs with excessively low degree of regulation
if cfg.MIN_DOR is not None:
    dor = pd.Series({grand_id: attributes.loc[grand_id, 'CAP_MCM'] * 1e6 / (ts.inflow.mean() * 365 * 24 * 3600) for grand_id, ts in timeseries.items()},
                    name='DOR')
    mask_dor = dor > cfg.MIN_DOR
    attributes = attributes[mask_dor]
    timeseries = {grand_id: ts for grand_id, ts in timeseries.items() if mask_dor[grand_id]}
    print('{0} reservoirs comply with the minimum degre of regulation: {1}'.format(attributes.shape[0],
                                                                                   cfg.MIN_DOR))

99 reservoirs with timeseries

89 reservoirs with timeseries or all variables

87 reservoirs comply with the minimum degre of regulation: 0.08


## Selection

In [5]:
bias = {}
periods = {}
for grand_id, ts in tqdm(timeseries.items(), desc='select reservoirs', total=len(timeseries)):
    
    # select study period
    start, end = define_period(ts[variables])
    if np.isnan(start) or np.isnan(end):
        print(f'{grand_id:>4} discarded for lack of records')
        continue
    duration = (end - start) / np.timedelta64(1, 'D')
    if duration >= cfg.MIN_YEARS * 365:
        ts = ts.loc[start:end]
    else:
        print(f'{grand_id:>4} discarded for lack of records:\t{duration:.0f} days')
        continue
        
    # bias between inflow and outflow
    bias[grand_id] = ts.outflow.mean() / ts.inflow.mean()
    if (1 - cfg.TOL_BIAS) <= bias[grand_id] <= (1 + cfg.TOL_BIAS):
        # save periods
        periods[str(grand_id)] = {
            'start_dates': [pd.Timestamp(start)],
            'end_dates': [pd.Timestamp(end)]
        }
    else:
        print(f'{grand_id:>4} discarded for excesive bias:\t{bias[grand_id]:.2f}')
    
print(f'\n{len(periods)} reservoirs selected')

select reservoirs:   0%|          | 0/87 [00:00<?, ?it/s]

 673 discarded for excesive bias:	0.63
 675 discarded for excesive bias:	0.58
 677 discarded for excesive bias:	0.36
 678 discarded for excesive bias:	0.58
 682 discarded for excesive bias:	0.51
 683 discarded for excesive bias:	0.53
 685 discarded for excesive bias:	0.69
 687 discarded for excesive bias:	0.69
1307 discarded for excesive bias:	1.34
1323 discarded for excesive bias:	0.67
1328 discarded for excesive bias:	0.61
1331 discarded for excesive bias:	0.42
1333 discarded for excesive bias:	0.01
1334 discarded for excesive bias:	0.46
1335 discarded for excesive bias:	0.57
1336 discarded for excesive bias:	0.61
1341 discarded for excesive bias:	0.64
1349 discarded for excesive bias:	0.00
1350 discarded for excesive bias:	0.33
1351 discarded for excesive bias:	0.43
1353 discarded for excesive bias:	0.36
1364 discarded for excesive bias:	0.58
1376 discarded for excesive bias:	0.51
1378 discarded for excesive bias:	0.45
1383 discarded for excesive bias:	0.68
1385 discarded for excesi

### Export

In [6]:
# export list of selected reservoirs
with open(PATH_OUT / 'reservoirs.txt', 'w') as f:
    for grand_id in periods.keys():
        f.write(f'{grand_id}\n')

In [7]:
# export selected study period
with open(PATH_OUT / 'periods.pkl', 'wb') as f:
    pickle.dump(periods, f)