# ResOpsPT: time series
***

**Autor:** Chus Casado<br>
**Date:** 04-04-2025<br>

**Introduction:**<br>
This code creates the time series for the reservoirs in ResOpsPT. The time series include records from CONAGUA and simulations from GloFAS. The result is a time series that combines the observed data from CONAGUA with the simulation from GloFASv4. For each reservoir, these time series are exported both in CSV and a NetCDF format.

Records are cleaned to avoid errors:

* Outliers in the **storage** time series are filtered by comparison with a moving median (window 7 days). If the relative difference of a given storage value and the moving median exceeds a threshold, the value is removed. This procedure is encapsulated in the function `lisfloodreservoirs.utils.timeseries.clean_storage()`
* Outliers in the **inflow** time series are removed using two conditions: one based in the gradient, and the other using an estimated inflow based on the water balance. When both conditions are met, the value is removed. Since inflow time series cannot contain missing values when used in the reservoir simulation, a simple linear interpolation is used to fill in gaps up to 7 days. This procedure is encapsulated in the function `lisfloodreservoirs.utils.timeseries.clean_inflow()`.

**To do:**<br>
* [ ] 8 reservoirs that should be in GloFAS don't have time series.
* [ ] Plot time series
* [ ] Make sure that there aren't negative values in the time series, nor zeros in storage.
* [ ] Check the quality of the data by closing the mass balance when possible. <font color='steelblue'>I've used the mass balance to identify errors in the inflow time series (function `clean_inflow`).</font>.
* [ ] Fill in the inflow time series with the mass balance, if possible. <font color='steelblue'>I've filled in gaps in the inflow time series with linear interpolation up to 7-day gaps (function `clean_inflow`).</font>.

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
# from datetime import datetime, timedelta
from tqdm.auto import tqdm
from copy import deepcopy

from lisfloodreservoirs.utils import DatasetConfig
from lisfloodreservoirs import read_attributes
# from lisfloodreservoirs.utils.plots import plot_resops, reservoir_analysis, compare_flows
from lisfloodreservoirs.utils.timeseries import time_encoding, clean_storage, clean_inflow, quantile_mapping

from utils_pt import fit_reservoir_curve, plot_timeseries_PT

## Configuration

In [17]:
cfg = DatasetConfig('config_dataset.yml')

PATH_METEO = cfg.PATH_RESOPS / 'ancillary' / 'catchstats'
PATH_PLOTS = cfg.PATH_TS / 'plots'
PATH_PLOTS.mkdir(exist_ok=True)

print(f'Time series will be saved in {cfg.PATH_TS}')

Time series will be saved in Z:\nahaUsers\casadje\datasets\reservoirs\ResOpsPT\v1.0\time_series


## Data

### Reservoirs

In [3]:
# load reservoir shapefile
reservoirs = gpd.read_file(cfg.PATH_RESOPS / 'GIS' / 'reservoirs_HDMS+GRAND.shp')
# remove those without associated GRanD ID
reservoirs = reservoirs[reservoirs.GRAND_ID.notnull()]
reservoirs.GRAND_ID = reservoirs.GRAND_ID.astype(int)
reservoirs.set_index('GRAND_ID', drop=True, inplace=True)

print(f'{len(reservoirs)} stations selected')

17 stations selected


### Attributes


In [4]:
# import all tables of attributes
attributes = read_attributes(cfg.PATH_ATTRS)
print(f'{attributes.shape[0]} reservoirs in the attribute tables')

17 reservoirs in the attribute tables


### Time series
#### HDMS

In [5]:
timeseries = {}
for grand_id, efas_id in tqdm(reservoirs.EFAS_ID.items(), total=len(reservoirs), desc='load timeseries'):

    file = cfg.PATH_OBS_TS / f'{efas_id}.csv'
    if file.is_file():

        ts = pd.read_csv(file, parse_dates=['time'], index_col='time')

        # remove negative values
        ts[ts < 0] = np.nan
        
        # clean outliers in storage
        if 'storage' in ts.columns:
            ts.storage /= 1000 # convert to hm3
            clean_storage(ts.storage, w=7, error_thr=.1, inplace=True)

        # clean inflow time series
        if 'inflow' in ts.columns:
            clean_inflow(
                ts.inflow,
                storage=ts.storage if 'storage' in ts.columns else None,
                outlfow=ts.outflow if 'outflow' in ts.columns else None,
                grad_thr=1e4,
                balance_thr=5,
                int_method='linear',
                inplace=True
            )

        # trim time series to period with inflow, storage and outflow
        start, end = max(cfg.START, ts.first_valid_index()), min(cfg.END, ts.last_valid_index())
        attributes.loc[grand_id, ['TIME_SERIES_START', 'TIME_SERIES_END']] = start, end
        ts = ts.loc[start:end]
        
        # save
        timeseries[grand_id] = ts.loc[start:end]
    else:
        print(f'File not found: {file}')

load timeseries:   0%|          | 0/17 [00:00<?, ?it/s]

##### **Plot timeseries**

In [6]:
for grand_id, ts in tqdm(timeseries.items()):
    title = '{0} - {1}'.format(grand_id, attributes.loc[grand_id, 'DAM_NAME'])
    plot_timeseries_PT(
        storage=ts.storage if 'storage' in ts.columns else None,
        elevation=ts.elevation if 'elevation' in ts.columns else None,
        inflow=ts.inflow if 'inflow' in ts.columns else None,
        outflow=ts.outflow if 'outflow' in ts.columns else None,
        max_storage={'GRanD': attributes.loc[grand_id, 'CAP_MCM']},
        max_elevation={'GRanD': attributes.loc[grand_id, 'ELEV_MASL']},
        title=title,
        save=PATH_PLOTS / f'{grand_id}.jpg'
    )

  0%|          | 0/17 [00:00<?, ?it/s]

In [7]:
# convert to xarray.Dataset
xarray_list = []
for key, df in timeseries.items():
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.assign_coords(GRAND_ID=key)
    xarray_list.append(ds)
obs = xr.concat(xarray_list, dim='GRAND_ID')

#### EFAS

##### Inflow 

In [12]:
# import GloFAS simulation
sim = xr.open_mfdataset(cfg.PATH_SIM_TS.glob('*.nc')).compute()
sim = sim.rename({'time': 'date', 'id': 'GRAND_ID', 'dis': 'inflow'})
sim = sim.drop_vars(['surface', 'latitude', 'longitude', 'lat', 'lon'], errors='ignore')
sim['date'] = sim['date'] - pd.Timedelta(days=1)

##### Meteo

In [20]:
# load meteorological time series
path_meteo_areal = cfg.PATH_RESOPS / 'ancillary' / 'catchstats'
variables = [x.stem for x in path_meteo_areal.iterdir() if x.is_dir()]
meteo_areal = xr.Dataset({f'{var}': xr.open_mfdataset(f'{path_meteo_areal}/{var}/*.nc')[f'{var}_mean'].compute() for var in variables})
meteo_areal['time'] = meteo_areal['time'] - pd.Timedelta(days=1)

# keep catchments in the attributes
IDs = list(attributes.index.intersection(meteo_areal.id.data))
meteo_areal = meteo_areal.sel(id=IDs)

# rename 'id' with the GRanD ID
meteo_areal = meteo_areal.rename({
    'id': 'GRAND_ID',
    'time': 'date',
    'e0': 'evapo_areal',
    'pr': 'precip_areal',
    'ta': 'temp_areal'
})

## Prepare dataset

### Convert units

In [21]:
if cfg.NORMALIZE:

    # reservoir attributes used to normalize the dataset
    area_sm = xr.DataArray.from_series(attributes.AREA_SKM) * 1e6 # m2
    capacity_cm = xr.DataArray.from_series(attributes.CAP_MCM) * 1e6 # m3
    catchment_sm = xr.DataArray.from_series(attributes.CATCH_SKM) * 1e6 # m2
    
    # Observed timeseries
    # -------------------
    for var, da in obs.items():
        # convert variables in hm3 to fraction of reservoir capacity [-]
        if var in ['storage', 'evaporation']:
            obs[f'{var}_norm'] = obs[var] * 1e6 / capacity_cm
        # convert variables in m3/s to fraction of reservoir capacity [-]
        elif var in ['inflow', 'outflow']:
            obs[f'{var}_norm'] = obs[var] * 24 * 3600 / capacity_cm

    # Simulated timeseries
    # -------------------
    for var, da in sim.items():
        # convert variables in hm3 to fraction of reservoir capacity [-]
        if var.split('_')[0] in ['storage']:
            sim[f'{var}_norm'] = sim[var] * 1e6 / capacity_cm
        # convert variables in m3/s to fraction of reservoir capacity [-]
        elif var.split('_')[0] in ['inflow', 'outflow']:
            sim[f'{var}_norm'] = sim[var] * 24 * 3600 / capacity_cm
            
    # Catchment meteorology
    # ---------------------
    # convert areal evaporation and precipitation from mm to fraction filled
    for var in ['evapo', 'precip']:
        meteo_areal[f'{var}_areal_norm'] = meteo_areal[f'{var}_areal'] * catchment_sm * 1e-3 / capacity_cm       

### Export

In [None]:
path_csv = cfg.PATH_TS / 'csv'
path_csv.mkdir(parents=True, exist_ok=True)
path_nc = cfg.PATH_TS / 'netcdf'
path_nc.mkdir(parents=True, exist_ok=True)

for grand_id in tqdm(attributes.index, desc='Exporting time series'):

    # concatenate time series
    ds = obs.sel(GRAND_ID=grand_id).drop(['GRAND_ID'])
    if grand_id in sim.GRAND_ID.data:
        ds = xr.merge((ds, sim.sel(GRAND_ID=grand_id).drop(['GRAND_ID'])))
    if grand_id in meteo_areal.GRAND_ID.data:
        ds = xr.merge((ds, meteo_areal.sel(GRAND_ID=grand_id).drop(['GRAND_ID'])))

    # # delete empty variables
    # for var in list(ds.data_vars):
    #     if (ds[var].isnull().all()):
    #         del ds[var]
        
    # trim time series to the observed period
    start, end = attributes.loc[grand_id, ['TIME_SERIES_START', 'TIME_SERIES_END']]
    ds = ds.sel(date=slice(start, end))

#     # create time series of temporal attributes
#     ds['year'] = ds.date.dt.year
#     ds['month'] = ds.date.dt.month
#     ds['month_sin'], ds['month_cos'] = time_encoding(ds['month'], period=12)
#     ds['weekofyear'] = ds.date.dt.isocalendar().week
#     ds['woy_sin'], ds['woy_cos'] = time_encoding(ds['weekofyear'], period=52)
#     ds['dayofyear'] = ds.date.dt.dayofyear
#     ds['doy_sin'], ds['doy_cos'] = time_encoding(ds['dayofyear'], period=365)
#     ds['dayofweek'] = ds.date.dt.dayofweek
#     ds['dow_sin'], ds['dow_cos'] = time_encoding(ds['dayofweek'], period=6)

#     # export CSV
#     # ..........
#     ds.to_pandas().to_csv(path_csv / f'{grand_id}.csv')

#     # export NetCDF
#     # .............
#     ds.to_netcdf(path_nc / f'{grand_id}.nc')
    
    break