In [1]:
import numpy as np
import pandas as pd
# from pathlib import Path
from tqdm.auto import tqdm
# import matplotlib.pyplot as plt
import pickle
import logging

from storage import fit_storage, create_storage_harmonic
from release import fit_release, create_release_harmonic, create_release_linear
from inputs import read_reservoir_attributes, read_reservoir_data#, rank_and_filter_data
from functions import plot_nor, plot_release, epiweek_to_date

# from lisfloodreservoirs.utils.metrics import KGEmod
from lisfloodreservoirs import Config, read_attributes, read_timeseries

## Configuration 

In [2]:
config_file = 'Z:/nahaUsers/casadje/datasets/reservoirs/ResOpsUS/v2.0/results/starfit/config.yml'
cfg = Config(config_file)

### Logger

In [3]:
# set up root logger
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)

# define format
log_format = logging.Formatter('%(asctime)s | %(levelname)s | %(name)s | %(message)s',
                               datefmt='%Y-%m-%d %H:%M:%S')

# # define log file
# log_path = cfg.PATH_CALIB / 'logs'
# log_path.mkdir(exist_ok=True)
# log_file = log_path / '{0:%Y%m%d%H%M}_calibrate_{1}.log'.format(
#     datetime.now(),
#     '_'.join(config_file.split('.')[0].split('_')[1:]))

# prevent duplicate logs in Jupyter or re-runs
if not root_logger.handlers:
    # console handler
    c_handler = logging.StreamHandler()
    c_handler.setFormatter(log_format)
    root_logger.addHandler(c_handler)

    # # file handler
    # f_handler = logging.FileHandler(log_file)
    # f_handler.setFormatter(log_format)
    # root_logger.addHandler(f_handler)

# Optional: create a named logger for this script
logger = logging.getLogger('fit-starfit')

In [4]:
PATH_STORAGE = cfg.PATH_DEF.parent / 'NOR'
PATH_STORAGE.mkdir(parents=True, exist_ok=True)
logger.info(f'Storage fitted parameters will be saved in: {PATH_STORAGE}')

PATH_RELEASE = cfg.PATH_DEF.parent / 'release'
PATH_RELEASE.mkdir(parents=True, exist_ok=True)
logger.info(f'Release fitted parameters will be saved in: {PATH_RELEASE}')

2025-04-10 13:24:30 | INFO | fit-starfit | Storage fitted parameters will be saved in: Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsUS\v2.0\results\starfit\NOR
2025-04-10 13:24:30 | INFO | fit-starfit | Release fitted parameters will be saved in: Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsUS\v2.0\results\starfit\release


## Data

### Attributes

In [5]:
# list of reservoirs to be trained
try:
    reservoirs = pd.read_csv(cfg.RESERVOIRS_FILE, header=None).squeeze().tolist()
except IOError as e:
    logger.error(f'Failed to open {cfg.RESERVOIRS_FILE}: {e}')
    raise

# import all tables of attributes
try:
    attributes = read_attributes(cfg.PATH_DATA / 'attributes', reservoirs)
except IOError as e:
    logger.error('Failed to read attribute tables from {0}: {1}'.format(cfg.PATH_DATA / 'attributes', e))
    raise
logger.info(f'{attributes.shape[0]} reservoirs in the attribute tables')

2025-04-10 13:24:30 | INFO | fit-starfit | 123 reservoirs in the attribute tables


### Time series

In [20]:
from datetime import datetime

In [27]:
log_file = 'asdf.log'

In [28]:
if log_file is not None:
    print('hola')

hola


In [6]:
# training periods
try:
    with open(cfg.PERIODS_FILE, 'rb') as file:
        periods = pickle.load(file)
except IOError as e:
    logger.error(f'Failed to open {cfg.PERIODS_FILE}: {e}')
    raise

# read time series
try:
    timeseries = read_timeseries(
        cfg.PATH_DATA / 'time_series' / 'csv',
        attributes.index,
        periods
    )
    for grand_id, obs in timeseries.items():
        obs['s'] = obs.storage * 1e-6 # MCM
        obs[['i', 'r']] = obs[['inflow', 'outflow']] * 1e-6 * 86400 # MCM/day
except IOError as e:
    logger.error('Failed to read time series from {0}: {1}'.format(cfg.PATH_DATA / 'time_series' / 'csv', e))
    raise
logger.info(f'{len(timeseries)} reservoirs with timeseries')

2025-04-10 13:24:54 | INFO | fit-starfit | 123 reservoirs with timeseries


## Fit reservoir

### Storage functions

In [7]:
for grand_id, obs in tqdm(timeseries.items()):
        
    # update reservoir capacity, if maximum observeation exceeds GRanD
    attributes.loc[grand_id, 'CAP_MCM'] = max(attributes.loc[grand_id, 'CAP_MCM'], obs.s.max())

    # fit storage model
    for years, n_points in zip([8, 6, 4], [3, 2, 2]):
        model_storage = fit_storage(
            grand_id, 
            storage_daily=obs.s, 
            attributes=attributes.loc[grand_id],
            min_days=years * 365,
            n_points=n_points,
        )
        if not model_storage['weekly storage'].empty:
            break

    if model_storage['weekly storage'].empty:
        logger.warning(f'The normal operating rules could not be fitted')
        continue

    # export fitted parameters
    pars_to_export = ['capacity (MCM)', 'NOR upper bound', 'NOR lower bound']
    pars = {key: value for key, value in model_storage.items() if key in pars_to_export}
    with open(PATH_STORAGE / f'{grand_id}.pkl', 'wb') as file:
             pickle.dump(pars, file)
    # pd.DataFrame({
    #             'flood': model_storage["NOR upper bound"],
    #             'conservation': model_storage["NOR lower bound"]
    #         }).to_csv(PATH_STORAGE / f'{grand_id}.csv', index=False)
    
    # define normal operating range (NOR)
    NORup = create_storage_harmonic(model_storage['NOR upper bound'], name='flood').set_index('epiweek')
    NORdown = create_storage_harmonic(model_storage['NOR lower bound'], name='conservation').set_index('epiweek')
    NOR = pd.concat((NORup, NORdown), axis=1)

    # weekly time series of standardised storage combined with NOR
    weekly_storage = model_storage['weekly storage']

    # plot model
    plot_nor(
        weekly_storage,
        NOR,
        n_points=n_points,
        title='{0} - {1}'.format(grand_id, attributes.loc[grand_id, 'DAM_NAME']),
        save=PATH_STORAGE / f'{grand_id}.jpg'
    )

  0%|          | 0/123 [00:00<?, ?it/s]

2025-04-10 12:00:25 | INFO | storage | Fitting targets for dam 41: Ross
2025-04-10 12:00:25 | INFO | storage | Cutoff year set back to 1999
2025-04-10 12:00:25 | INFO | storage | Fitting targets for dam 63: Tieton
2025-04-10 12:00:25 | INFO | storage | Cutoff year set back to 1982
2025-04-10 12:00:26 | INFO | storage | Fitting targets for dam 131: Trinity
2025-04-10 12:00:26 | INFO | storage | Fitting targets for dam 131: Trinity
2025-04-10 12:00:26 | INFO | storage | Cutoff year set back to 1998
2025-04-10 12:00:27 | INFO | storage | Fitting targets for dam 132: Shasta
2025-04-10 12:00:27 | INFO | storage | Fitting targets for dam 132: Shasta
2025-04-10 12:00:27 | INFO | storage | Fitting targets for dam 132: Shasta
2025-04-10 12:00:27 | INFO | storage | Cutoff year set back to 2013
2025-04-10 12:00:27 | INFO | storage | Fitting targets for dam 133: Whiskeytown
2025-04-10 12:00:27 | INFO | storage | Fitting targets for dam 133: Whiskeytown
2025-04-10 12:00:27 | INFO | storage | Fittin

### Release function

In [18]:
ids = [int(file.stem) for file in PATH_STORAGE.glob('*.pkl')]
for grand_id in tqdm(ids):

    obs = timeseries[grand_id]
    
    # update reservoir capacity, if maximum observeation exceeds GRanD
    attributes.loc[grand_id, 'CAP_MCM'] = max(attributes.loc[grand_id, 'CAP_MCM'], obs.s.max())

    # fit release model
    for years in [5, 4]:
        model_release = fit_release(
            grand_id,
            daily_ops=obs[['s', 'i', 'r']],
            attributes=attributes.loc[grand_id],
            NOR_path=PATH_STORAGE,
            cutoff_year=None,
            min_weeks=52 * years
        )
        if pd.notna(model_release['mean inflow (MCM/wk)']):
            break

    if not model_release or all(np.isnan(model_release['harmonic parameters'])):
        logger.warning(f'The relase model for reservoir {grand_id} could not be fitted')
        continue

    # export fitted parameters
    pars_to_export = ['mean inflow (MCM/wk)', 'harmonic parameters', 'residual parameters', 'constraints']
    pars = {key: value for key, value in model_release.items() if key in pars_to_export}
    with open(PATH_RELEASE / f'{grand_id}.pkl', 'wb') as file:
             pickle.dump(pars, file)

    # extract info from the fitted release: average inflow, harmonic release (standardised) and release contraints
    avg_inflow = model_release['mean inflow (MCM/wk)']
    release_harmonic = create_release_harmonic(model_release['harmonic parameters']).set_index('epiweek').squeeze()
    release_linear = create_release_linear(model_release['residual parameters'])
    Qmin, Qmax = model_release['constraints']
    weekly_release = model_release['weekly release'].set_index('epiweek')

    # plot model
    title = '{0} - {1}'.format(grand_id, attributes.loc[grand_id, 'DAM_NAME'])
    plot_release(weekly_release.r, avg_inflow, release_harmonic, release_linear, Qmin, Qmax, title=title,
                 save=PATH_RELEASE / f'{grand_id}.jpg')

  0%|          | 0/123 [00:00<?, ?it/s]

2025-04-10 13:28:18 | INFO | release | Fitting release function for dam 1006: Stockton Dam
2025-04-10 13:28:19 | INFO | release | Fitting release function for dam 1020: Copan Lake
2025-04-10 13:28:19 | INFO | release | Fitting release function for dam 1023: Kaw Lake
2025-04-10 13:28:19 | INFO | release | Fitting release function for dam 1026: Table Rock Dam
2025-04-10 13:28:20 | INFO | release | Fitting release function for dam 1027: Fort Supply Lake
2025-04-10 13:28:20 | INFO | release | Fitting release function for dam 1032: Oologah Lake
2025-04-10 13:28:20 | INFO | release | Fitting release function for dam 1033: Beaver
2025-04-10 13:28:21 | INFO | release | Fitting release function for dam 1036: Bull Shoals
2025-04-10 13:28:21 | INFO | release | Fitting release function for dam 1042: Norfork
2025-04-10 13:28:22 | INFO | release | Fitting release function for dam 1048: Keystone Lake
2025-04-10 13:28:22 | INFO | release | Fitting release function for dam 1053: Fort Gibson Lake
2025-0