In [1]:
# import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
# import matplotlib.pyplot as plt
import pickle
import logging

from inputs import read_reservoir_attributes, read_reservoir_data
from starfit import Starfit

from lisfloodreservoirs.utils.metrics import KGEmod, compute_performance
from lisfloodreservoirs import Config, read_attributes, read_timeseries

## Configuration 

In [2]:
config_file = 'Z:/nahaUsers/casadje/datasets/reservoirs/ResOpsUS/v2.0/results/starfit/config.yml'
cfg = Config(config_file)

### Logger

In [3]:
# set up root logger
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)

# define format
log_format = logging.Formatter('%(asctime)s | %(levelname)s | %(name)s | %(message)s',
                               datefmt='%Y-%m-%d %H:%M:%S')

# # define log file
# log_path = cfg.PATH_CALIB / 'logs'
# log_path.mkdir(exist_ok=True)
# log_file = log_path / '{0:%Y%m%d%H%M}_calibrate_{1}.log'.format(
#     datetime.now(),
#     '_'.join(config_file.split('.')[0].split('_')[1:]))

# prevent duplicate logs in Jupyter or re-runs
if not root_logger.handlers:
    # console handler
    c_handler = logging.StreamHandler()
    c_handler.setFormatter(log_format)
    root_logger.addHandler(c_handler)

    # # file handler
    # f_handler = logging.FileHandler(log_file)
    # f_handler.setFormatter(log_format)
    # root_logger.addHandler(f_handler)

# create a named logger for this script
logger = logging.getLogger('run-starfit')

In [4]:
logger.info(f'Results will be saved in: {cfg.PATH_CALIB}')

2025-04-10 16:31:58 | INFO | run-starfit | Results will be saved in: Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsUS\v2.0\results\starfit\calibration\bivariate


## Data

In [5]:
# list of reservoirs to be trained
try:
    reservoirs = pd.read_csv(cfg.RESERVOIRS_FILE, header=None).squeeze().tolist()
except IOError as e:
    logger.error(f'Failed to open {cfg.RESERVOIRS_FILE}: {e}')
    raise

# import all tables of attributes
try:
    attributes = read_attributes(cfg.PATH_DATA / 'attributes', reservoirs)
except IOError as e:
    logger.error('Failed to read attribute tables from {0}: {1}'.format(cfg.PATH_DATA / 'attributes', e))
    raise
logger.info(f'{attributes.shape[0]} reservoirs in the attribute tables')

2025-04-10 16:31:58 | INFO | run-starfit | 123 reservoirs in the attribute tables


In [6]:
# training periods
try:
    with open(cfg.PERIODS_FILE, 'rb') as file:
        periods = pickle.load(file)
except IOError as e:
    logger.error(f'Failed to open {cfg.PERIODS_FILE}: {e}')
    raise

# read time series
try:
    timeseries = read_timeseries(cfg.PATH_DATA / 'time_series' / 'csv',
                                 attributes.index,
                                 periods)
except IOError as e:
    logger.error('Failed to read time series from {0}: {1}'.format(cfg.PATH_DATA / 'time_series' / 'csv', e))
    raise
logger.info(f'{len(timeseries)} reservoirs with timeseries')

2025-04-10 16:32:10 | INFO | run-starfit | 123 reservoirs with timeseries


## Run

In [16]:
for grand_id, obs in tqdm(timeseries.items(), desc='simulating reservoir'):

    logger.info(f'Simulating reservoir {grand_id}')
    # add day of the year to the observed time series
    obs['doy'] = obs.index.dayofyear

    # SIMULTE OPTIMIZED RESERVOIR

    # load fitted storage model
    try:
        with open(cfg.PATH_DEF.parent / 'NOR' / f'{grand_id}.pkl', 'rb') as file:
            model_storage = pickle.load(file)
        NOR = pd.DataFrame({
                'flood': model_storage["NOR upper bound"],
                'conservation': model_storage["NOR lower bound"]
            })
        Vtot = model_storage['capacity (MCM)'] * 1e6
        logger.debug('Storage model correctly loaded')
    except Exception as e:
        logger.error(f'Storage modeld could not be loaded: {e}')
        continue
        
    # load fitted release model
    try:
        with open(cfg.PATH_DEF.parent / 'release' / f'{grand_id}.pkl', 'rb') as file:
            model_release = pickle.load(file)
        avg_inflow = model_release['mean inflow (MCM/wk)'] * 1e6 / (7 * 86400) # m3/s
        Qmin, Qmax = (model_release['constraints'] + 1) * avg_inflow
        logger.debug('Release model correctly loaded')
    except Exception as e:
        logger.error(f'Release model could not be loaded: {e}')
        continue

    # declare reservoir
    try:
        res = Starfit(
            Vtot=Vtot,
            avg_inflow=avg_inflow,
            pars_Vf=NOR['flood'],
            pars_Vc=NOR['conservation'],
            pars_Qharm=model_release['harmonic parameters'],
            pars_Qresid=model_release['residual parameters'],
            Qmin=Qmin,
            Qmax=Qmax
        )
        # export calibrated parameters
        # TODO: `get_params()` not implemented in class `Starfit`
        # with open(cfg.PATH_CALIB / f'{grand_id}_optimal_parameters.yml', 'w') as file:
        #     yaml.dump(res.get_params(), file)
        logger.debug('Starfit class correctly declared')
    except Exception as e:
        logger.error(f'Starfit class could not be declared: {e}')
        continue

    # run simulation
    try:
        sim = res.simulate(obs.inflow, obs.storage.iloc[0])
        sim.to_csv(cfg.PATH_CALIB / f'{grand_id}_simulation.csv', float_format='%.3f')
        logger.info('Simulation successfully finished')
    except RuntimeError as e:
        logger.error(f'The simulation crashed: {e}')
        continue

    # ANALYSE RESULTS

    # performance
    try:
        performance_cal = compute_performance(obs, sim)
        performance_cal.to_csv(cfg.PATH_CALIB / f'{grand_id}_performance.csv', float_format='%.3f')
        logger.info(f'Performance of reservoir {grand_id} has been computed')
    except IOError as e:
        logger.error(f'The performance of reservoir {grand_id} could not be exported: {e}')

    # scatter plot simulation vs observation
    try:
        res.scatter(
            sim,
            obs,
            norm=False,
            title=f'grand_id: {grand_id}',
            save=cfg.PATH_CALIB / f'{grand_id}_scatter.jpg',
            )
        logger.debug('Scatter plot of simulation successfully created')
    except IOError as e:
        logger.error(f'The scatter plot of reservoir {grand_id} could not be generated: {e}')

    # line plot calibration (vs default simulation) vs observation
    try:
        res.lineplot(
            {'starfit': sim},
            obs, 
            Vlims=[res.Vtot],
            Qlims=[res.Qmin, res.Qmax],
            figsize=(12, 6),
            save=cfg.PATH_CALIB / f'{grand_id}_line.jpg',
            )
        logger.debug(f'Line plot of simulation successfully created')
    except IOError as e:
        logger.error(f'The line plot of reservoir {grand_id} could not be generated: {e}')

simulating reservoir:   0%|          | 0/123 [00:00<?, ?it/s]

2025-04-10 16:50:27 | INFO | run-starfit | Simulating reservoir 41
2025-04-10 16:50:29 | INFO | run-starfit | Simulation of the calibrated reservoir 41 successfully finished
2025-04-10 16:50:29 | INFO | run-starfit | Performance of reservoir 41 has been computed
2025-04-10 16:50:30 | INFO | run-starfit | Scatter plot of simulation from reservoir 41
2025-04-10 16:50:30 | INFO | run-starfit | Line plot of simulation from reservoir 41
2025-04-10 16:50:30 | INFO | run-starfit | Simulating reservoir 63
2025-04-10 16:50:34 | INFO | run-starfit | Simulation of the calibrated reservoir 63 successfully finished
2025-04-10 16:50:34 | INFO | run-starfit | Performance of reservoir 63 has been computed
2025-04-10 16:50:35 | INFO | run-starfit | Scatter plot of simulation from reservoir 63
2025-04-10 16:50:35 | INFO | run-starfit | Line plot of simulation from reservoir 63
2025-04-10 16:50:35 | INFO | run-starfit | Simulating reservoir 131
2025-04-10 16:50:36 | INFO | run-starfit | Simulation of the