# GloFAS - combine time series
***

**Author:** Chus Casado Rodríguez<br>
**Date:** 22-09-2023<br>

**Introduction:**<br>
This notebook reads the NetCDF files corresponding to every year of the GloFAS long run that contain the time series for all the reservoirs in GloFAS.

The data is reexported in NetCDF files for each reservoir with the complete time series of the long run.

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.notebook import tqdm
import yaml

import warnings
warnings.filterwarnings('ignore')

import geopandas as gpd
from datetime import datetime

from metrics import KGEmod, ECDF

### Configuration

In [2]:
with open('config_extraction.yml', 'r', encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

path_res = Path(cfg['path_res'])
path_ts = Path(cfg['path_ts'])
path_out = Path(cfg['path_out'])
if path_out.exists() is False:
    path_out.mkdir()
var = cfg['var']

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'Z:\\nahaUsers\\casadje\\GloFASv4\\extraction\\dis_US'

In [None]:
path_datasets = Path('Z:/nahaUsers/casadje/datasets/')

### Rename variable of the NetCDF files

In [13]:
for file in glob.glob(f'{path_out}/*.nc'):
    da = xr.open_dataarray(file)
    da.close()
    # print(file)
    if da.name != 'rsfil':
        print(da.name)
        da.name = var
        da.to_netcdf(file.replace('\\extraction\\', '\\'))

None


### Combine time series

In [5]:
# load data
da = xr.open_mfdataset(f'{path_out}/*.nc')[var].compute()

In [6]:
# create NetCDF for each reservoir
path = Path(f'../data/reservoirs/GloFAS/long_run/{var}')
for id in da.ResID.data:
    da_id = da.sel(ResID=id)
    da_id.to_netcdf(path / f'{id:04}.nc')

## GloFAS vs ResOpsUS

### Reservoirs
#### GloFAS

In [None]:
# load shapefile of GloFAS reservoirs
glofas = gpd.read_file('../data/reservoirs/GloFAS/GloFAS_reservoirs.shp') 
glofas.set_index('ResID', drop=True, inplace=True)
glofas = glofas.loc[da.ResID.data]
glofas.GRAND_ID = glofas.GRAND_ID.astype(int)

glofas.shape

#### GRanD

In [None]:
# load GRanD data set
path_GRanD = path_datasets / 'reservoirs/GRanD/v1_3/'
grand = gpd.read_file(path_GRanD / 'grand_dams_v1_3.shp')
grand.set_index('GRAND_ID', drop=True, inplace=True)
grand = grand.replace(-99, np.nan)

# keep only reservoirs in ResOpsUS
mask = grand.index.intersection(glofas.GRAND_ID)
grand = grand.loc[mask]

grand.shape

#### ResOpsUS

In [None]:
path_ResOps = Path(path_datasets / 'reservoirs' / 'ResOpsUS')

series = {}
for ID in tqdm(glofas.GRAND_ID):
    # load timeseries
    series_id = pd.read_csv(path_ResOps / 'time_series_all' / f'ResOpsUS_{ID}.csv', parse_dates=True, index_col='date')
    series_id.columns.name = 'variable'
    # remove empty time series
    series_id.dropna(axis=1, how='all', inplace=True)
    # remove duplicated index
    series_id = series_id[~series_id.index.duplicated(keep='first')]
    # organize in a DataFrame by variable
    series[ID] = series_id

### Performance in reservoir filling

In [None]:
# plots will be saved in this path
path_plots = Path('GloFAS')
if path_plots.exists() is False:
    path_plots.mkdir()

In [None]:
glofas[f'KGE_{var}'] = np.nan
for id in da.ResID.data:

    fig, ax = plt.subplots(figsize=(12, 3))
    
    # GloFAS reservoir limits
    for col in ['clim', 'flim', 'nlim']:
        lim = glofas.loc[id, col]
        ax.axhline(lim, ls=':', c='k', lw=.5)
        # ax.text(datetime(2020, 1, 1), lim, col, horizontalalignment='right', verticalalignment='bottom')
    
    # GloFAS simulation
    sim = da.sel(ResID=id).to_pandas()
    ax.scatter(sim.index, sim, s=1, c='indianred', label='sim')
        
    # observation in ResOpsUS
    grand_id = glofas.loc[id, 'GRAND_ID']
    if 'storage' in series[grand_id].columns:
        obs1 = series[grand_id].storage / glofas.loc[id, 'stor']
        ax.plot(obs1, c='k', ls='--', lw=1, label='obs (norm. GloFAS)')
        obs2 = series[grand_id].storage / grand.loc[grand_id, 'CAP_MCM']
        ax.plot(obs2, c='steelblue', lw=1, label='obs (norm. GRanD)')

        ax.set(xlim=(datetime(1982, 1, 1), datetime(2020, 1, 1)),
               ylim=(-.02, None))
        
        # performance
        try:
            glofas.loc[id, f'KGE_{var}'] = max(KGEmod(obs1, sim)[0], KGEmod(obs2, sim)[0])
            ax.set_title("{0:>3}  {1} | KGE' = {2:.3f}".format(id, *glofas.loc[id, ['DAM_NAME', f'KGE_{var}']]))
        except:
            ax.set_title('{0:>3}  {1}'.format(id, glofas.loc[id, 'DAM_NAME']))
            pass
    else:
        ax.set_title('{0:>3}  {1}'.format(id, glofas.loc[id, 'DAM_NAME']))
    
    fig.legend(frameon=False, bbox_to_anchor=[.875, .2, .2, .4])
    
    plt.savefig(path_plots / f'{var}_{id:03}.jpg', dpi=300, bbox_inches='tight')

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import cartopy.feature as cf
from typing import List, Tuple
        
        
def create_cmap(cmap: str, bounds: List, name: str = '', specify_color: Tuple = None):
    """Given the name of a colour map and the boundaries, it creates a discrete colour ramp for future plots
    
    Inputs:
    ------
    cmap:          string. Matplotlib's name of a colourmap. E.g. 'coolwarm', 'Blues'...
    bounds:        list. Values that define the limits of the discrete colour ramp
    name:          string. Optional. Name given to the colour ramp
    specify_color: tuple (position, color). It defines a specific color for a specific position in the colour scale. Position must be an integer, and color must be either a colour name or a tuple of 4 floats (red, gren, blue, transparency)
    
    Outputs:
    --------
    cmap:   List of colours
    norm:   List of boundaries
    """
    
    cmap = plt.get_cmap(cmap)
    cmaplist = [cmap(i) for i in range(cmap.N)]
    if specify_color is not None:
        cmaplist[specify_color[0]] = specify_color[1]
    cmap = mpl.colors.LinearSegmentedColormap.from_list(name, cmaplist, cmap.N)
    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
    
    return cmap, norm

In [None]:
# config
cmap_kge, norm_kge = create_cmap('RdBu', [-100, -1, -.75, -.5, -.25 ,0, .25, .5, .75, 1])
proj = ccrs.PlateCarree()
fig, ax = plt.subplots(figsize=(20, 5), subplot_kw=dict(projection=proj))
ax.set_extent([-127, -67.5, 23.5, 55], crs=proj)

# background map
ax.add_feature(cf.NaturalEarthFeature('physical', 'land', '110m', edgecolor='face', facecolor='lightgray'), alpha=.5, zorder=0)
# scatter plot
sct = plt.scatter(glofas.geometry.x, glofas.geometry.y,
                  c=glofas[f'KGE_{var}'], cmap=cmap_kge, norm=norm_kge, edgecolor='w', lw=1, 
                  s=glofas.stor**.5,alpha=.9)
# setup: color bar, title...
cbar = plt.colorbar(sct, shrink=.666)
cbar.set_label("KGE'", rotation=0)
ax.text(.5, 1.05, 'GloFASv4 performance in simulating storage', horizontalalignment='center', verticalalignment='bottom', transform=ax.transAxes, fontsize=12)
ax.axis('off');

plt.savefig(path_plots / f'{var}_map_KGE.jpg', dpi=300, bbox_inches='tight')

In [None]:
ecdf = ECDF(glofas[f'KGE_{var}'], ylabel='KGE (-)', title='reservoir storage')

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax.scatter(glofas.stor, glofas[f'KGE_{var}'], s=5, alpha=.5)
ax.set(xlabel='storage (hm3)',
       ylabel='KGE (-)');

In [None]:
(glofas[f'KGE_{var}'] > .0).sum() / glofas.shape[0]

### Time series decomposition

In [None]:
rsfil = da.to_pandas()
rsfil = rsfil.loc['1982-01-01':,:]

In [None]:
rsfil

In [None]:
rsfil_m = rsfil.resample('M').mean()

rsfil_m.head()

In [None]:
# rsfil_y = rsfil.groupby(rsfil.index.year).mean()
rsfil_y = rsfil.resample('Y').mean()

rsfil_y.head()

In [None]:
rsfil_mm = rsfil_m.groupby(rsfil_m.index.month).mean()

In [None]:
id = glofas[f'KGE_{var}'].idxmax()
id

In [None]:
rsfil_subtracted = pd.merge(rsfil, rsfil_y, left_index=True, right_index=True, suffixes=('', '_mean'))

# Subtract the annual mean from the daily time series
rsfil_subtracted['value'] = rsfil_subtracted['value'] - rsfil_subtracted['value_mean']

In [None]:
rsfil_y = rsfil_y.reindex(rsfil.index, fill_value=)

In [None]:
rsfil_y

In [None]:
rsfil.groupby(rsfil.index.year).transform('mean')

In [None]:
ncols = 3
fig, ax = plt.subplots(figsize=(3 * ncols, 6), ncols=ncols, sharex=True)

ax[0].plot(rsfil_y[id], rsfil_y.index)
ax[0].set(xlim=(-.02, 1.02),
          ylim=(rsfil_y.index.max(), rsfil_y.index.min()));

ax[1].plot(rsfil_m[id], rsfil_m.index)
ax[1].set(xlim=(-.02, 1.02),
          ylim=(rsfil_m.index.max(), rsfil_m.index.min()));

ax[2].plot(rsfil_mm[id], rsfil_mm.index)
# ax[2].set(xlim=(-.02, 1.02),
#           ylim=(rsfil_m.index.max(), rsfil_m.index.min()));

In [None]:
rsfil_m.head(2)

In [None]:
rsfil_y.head(2)

In [None]:
rsfil_y.reindex(rsfil_m.index.year, method='ffill')

In [None]:
rsfil_y.index.to_period('M')

In [None]:
rsfil_y.index = rsfil_y.index.to_period('M')
rsfil_m.subtract(rsfil_y.loc[rsfil_m.index.year])