In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import pickle
import logging

from storage import fit_storage, create_storage_harmonic
from release import fit_release, create_release_harmonic, create_release_linear
from inputs import read_reservoir_attributes, read_reservoir_data#, rank_and_filter_data
from functions import plot_nor, plot_release, epiweek_to_date

from lisfloodreservoirs.utils.metrics import KGEmod
from lisfloodreservoirs import Config, read_attributes, read_timeseries

## Configuration 

In [2]:
cfg = Config('Z:/nahaUsers/casadje/datasets/reservoirs/ResOpsUS/results/starfit/config.yml')

### Logger

In [3]:
# create logger
logger = logging.getLogger('fit-starfit')
logger.setLevel(logging.INFO)
logger.propagate = False
log_format = logging.Formatter('%(asctime)s | %(levelname)s | %(name)s | %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# log on screen
c_handler = logging.StreamHandler()
c_handler.setFormatter(log_format)
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)
# # log file
# log_path = cfg.PATH_CALIB / 'logs'
# log_path.mkdir(exist_ok=True)
# log_file = log_path / '{0:%Y%m%d%H%M}_calibrate_{1}.log'.format(datetime.now(),
#                                                                '_'.join(args.config_file.split('.')[0].split('_')[1:]))
# f_handler = logging.FileHandler(log_file)
# f_handler.setFormatter(log_format)
# f_handler.setLevel(logging.INFO)
# logger.addHandler(f_handler)

In [4]:
PATH_STORAGE = cfg.PATH_DEF.parent / 'NOR'
PATH_STORAGE.mkdir(parents=True, exist_ok=True)
logger.info(f'Storage fitted parameters will be saved in: {PATH_STORAGE}')

PATH_RELEASE = cfg.PATH_DEF.parent / 'release'
PATH_RELEASE.mkdir(parents=True, exist_ok=True)
logger.info(f'Release fitted parameters will be saved in: {PATH_RELEASE}')

2024-07-30 17:45:55 | INFO | fit-starfit | Storage fitted parameters will be saved in: Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsUS\results\starfit\NOR
2024-07-30 17:45:55 | INFO | fit-starfit | Release fitted parameters will be saved in: Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsUS\results\starfit\release


In [5]:
# USRDATS_path = Path('Z:/nahaUsers/casadje/datasets/reservoirs/ResOpsUS/raw')
# GRanD_path = Path('Z:/nahaUsers/casadje/datasets/reservoirs/GRanD/v1_3')

# # grand_id = 753
# cutoff_year = 1982

## Data

### Attributes

In [6]:
# list of reservoirs to be trained
try:
    reservoirs = pd.read_csv(cfg.RESERVOIRS_FILE, header=None).squeeze().tolist()
except IOError as e:
    logger.error(f'Failed to open {cfg.RESERVOIRS_FILE}: {e}')
    raise

# import all tables of attributes
try:
    attributes = read_attributes(cfg.PATH_DATA / 'attributes', reservoirs)
except IOError as e:
    logger.error('Failed to read attribute tables from {0}: {1}'.format(cfg.PATH_DATA / 'attributes', e))
    raise
logger.info(f'{attributes.shape[0]} reservoirs in the attribute tables')

2024-07-30 17:45:55 | INFO | fit-starfit | 90 reservoirs in the attribute tables


### Time series

In [7]:
# training periods
try:
    with open(cfg.PERIODS_FILE, 'rb') as file:
        periods = pickle.load(file)
except IOError as e:
    logger.error(f'Failed to open {cfg.PERIODS_FILE}: {e}')
    raise

# read time series
try:
    timeseries = read_timeseries(cfg.PATH_DATA / 'time_series' / 'csv',
                                 attributes.index,
                                 periods)
    for grand_id, obs in timeseries.items():
        obs['s'] = obs.storage * 1e-6 # MCM
        obs[['i', 'r']] = obs[['inflow', 'outflow']] * 1e-6 * 86400 # MCM/day
except IOError as e:
    logger.error('Failed to read time series from {0}: {1}'.format(cfg.PATH_DATA / 'time_series' / 'csv', e))
    raise
logger.info(f'{len(timeseries)} reservoirs with timeseries')

2024-07-30 17:45:59 | INFO | fit-starfit | 90 reservoirs with timeseries


## Data

In [8]:
# # read reservoir attributes and extract storage capacity
# attributes = read_reservoir_attributes(GRanD_path, grand_id)
# Vtot = attributes.loc[grand_id, 'CAP_MCM']

In [9]:
# # read daily time series
# daily = (
#     read_reservoir_data(USRDATS_path, grand_id)
#     .assign(
#         i=lambda x: x['i_cumecs'] * 1e-6 * 86400,  # MCM/day
#         r=lambda x: x['r_cumecs'] * 1e-6 * 86400,  # MCM/day
#         year=lambda x: x.date.dt.year,
#         epiweek=lambda x: x.date.dt.isocalendar().week
#     )
#     .rename(columns={'s_MCM': 's'})
#     .loc[:, ['date', 's', 'i', 'r', 'year', 'epiweek']]
#     .query('year >= @cutoff_year')
#     .set_index('date')
#     )
# daily.epiweek = daily.epiweek.astype(int)

## Fit reservoir

### Storage functions

In [11]:
for grand_id, obs in tqdm(timeseries.items()):
        
    # update reservoir capacity, if maximum observeation exceeds GRanD
    attributes.loc[grand_id, 'CAP_MCM'] = max(attributes.loc[grand_id, 'CAP_MCM'], obs.s.max())

    # fit storage model
    model_storage = fit_storage(grand_id, 
                                storage_daily=obs.s, 
                                attributes=attributes.loc[grand_id])

    # export fitted parameters
    pars_to_export = ['capacity (MCM)', 'NOR upper bound', 'NOR lower bound']
    pars = {key: value for key, value in model_storage.items() if key in pars_to_export}
    with open(PATH_STORAGE / f'{grand_id}.pkl', 'wb') as file:
             pickle.dump(pars, file)
    # pd.DataFrame({
    #             'flood': model_storage["NOR upper bound"],
    #             'conservation': model_storage["NOR lower bound"]
    #         }).to_csv(PATH_STORAGE / f'{grand_id}.csv', index=False)
    
    # define normal operating range (NOR)
    NORup = create_storage_harmonic(model_storage['NOR upper bound'], name='flood').set_index('epiweek')
    NORdown = create_storage_harmonic(model_storage['NOR lower bound'], name='conservation').set_index('epiweek')
    NOR = pd.concat((NORup, NORdown), axis=1)

    # weekly time series of standardised storage combined with NOR
    weekly_storage = model_storage['weekly storage']

    # plot model
    plot_nor(weekly_storage,
             NOR,
             title='{0} - {1}'.format(grand_id, attributes.loc[grand_id, 'DAM_NAME']),
             save=PATH_STORAGE / f'{grand_id}.jpg')

  0%|          | 0/90 [00:00<?, ?it/s]

Fitting targets for dam 41: Ross
Dam 41 cutoff year set back to 1999
Fitting targets for dam 63: Tieton
Dam 63 cutoff year set back to 1982
Fitting targets for dam 293: Fresno
Dam 293 cutoff year set back to 1989
Fitting targets for dam 300: Tiber Dike
Dam 300 cutoff year set back to 2007
Fitting targets for dam 307: Fort Peck Dam
Dam 307 cutoff year set back to 1982
Fitting targets for dam 319: Gibson
Dam 319 cutoff year set back to 1994
Fitting targets for dam 355: Yellowtail
Dam 355 cutoff year set back to 1982
Fitting targets for dam 362: Clark Canyon
Dam 362 cutoff year set back to 1982
Fitting targets for dam 364: Hebgen Dam
Dam 364 cutoff year set back to 1995
Fitting targets for dam 367: Mason
Dam 367 cutoff year set back to 1997
Fitting targets for dam 368: Lima
Dam 368 cutoff year set back to 2007
Fitting targets for dam 372: Unity
Dam 372 cutoff year set back to 1994
Fitting targets for dam 373: Buffalo Bill
Dam 373 cutoff year set back to 1982
Fitting targets for dam 384: J

### Release function

In [11]:
for grand_id, obs in tqdm(timeseries.items()):
    
    # update reservoir capacity, if maximum observeation exceeds GRanD
    attributes.loc[grand_id, 'CAP_MCM'] = max(attributes.loc[grand_id, 'CAP_MCM'], obs.s.max())

    # fit release model
    model_release = fit_release(grand_id,
                                daily_ops=obs[['s', 'i', 'r']],
                                attributes=attributes.loc[grand_id],
                                NOR_path=PATH_STORAGE,
                                cutoff_year=None)

    # export fitted parameters
    pars_to_export = ['mean inflow (MCM/wk)', 'harmonic parameters', 'residual parameters', 'constraints']
    pars = {key: value for key, value in model_release.items() if key in pars_to_export}
    with open(PATH_RELEASE / f'{grand_id}.pkl', 'wb') as file:
             pickle.dump(pars, file)

    # extract info from the fitted release: average inflow, harmonic release (standardised) and release contraints
    avg_inflow = model_release['mean inflow (MCM/wk)']
    release_harmonic = create_release_harmonic(model_release['harmonic parameters']).set_index('epiweek').squeeze()
    release_linear = create_release_linear(model_release['residual parameters'])
    Qmin, Qmax = model_release['constraints']
    weekly_release = model_release['weekly release'].set_index('epiweek')

    # plot model
    title = '{0} - {1}'.format(grand_id, attributes.loc[grand_id, 'DAM_NAME'])
    plot_release(weekly_release.r, avg_inflow, release_harmonic, release_linear, Qmin, Qmax, title=title,
                 save=PATH_RELEASE / f'{grand_id}.jpg')

  0%|          | 0/90 [00:00<?, ?it/s]

Fitting release function for dam 41: Ross
Release residual model will be discarded; (release will be based harmonic function only)
Fitting release function for dam 63: Tieton
Release residual model will be discarded; (release will be based harmonic function only)
Fitting release function for dam 293: Fresno
Release residual model will be discarded; (release will be based harmonic function only)
Fitting release function for dam 300: Tiber Dike
Release residual model will be discarded; (release will be based harmonic function only)
Fitting release function for dam 307: Fort Peck Dam
Release residual model will be discarded; (release will be based harmonic function only)
Fitting release function for dam 319: Gibson
Fitting release function for dam 355: Yellowtail
Fitting release function for dam 362: Clark Canyon
Fitting release function for dam 364: Hebgen Dam
Fitting release function for dam 367: Mason
Release residual model will be discarded; (release will be based harmonic function on