# Global Water Watch - ES
***

In [1]:
import pandas as pd
import geopandas as gpd

from pathlib import Path
from datetime import datetime
from tqdm.auto import tqdm

from gwwapi.client import get_tiled_reservoirs, get_reservoir_ts
from gwwapi.utils import to_timeseries, to_geopandas, plot_reservoir_timeseries

from reservoirs_lshm import read_attributes, read_timeseries

## Config

In [2]:
country = 'ES'
long_name = 'Spain'
extent = [-9.3, 36, 3.2, 43.5] # W, S, E, N

path_datasets = Path('/home/chus-casado/Datos')

## Data

### Global Dam Watch

In [3]:
path_gdw = path_datasets / 'reservoirs' / 'GDW' / 'GDW_v1_0_shp'
gdw = gpd.read_file(path_gdw / 'GDW_barriers_v1_0.shp').set_index('GRAND_ID')
gdw = gdw[gdw.COUNTRY == long_name]
print(f'GDW contains {len(gdw)} reservoirs in {long_name}')

GDW contains 578 reservoirs in Spain


## ResOpsES

In [4]:
path_resops = path_datasets / 'reservoirs' / 'ResOpsES' / 'v3.0'

# load reservoir attributes
resops_attrs = read_attributes(
    path_resops / 'attributes' / 'other', 
    index_col='GRAND_ID'
)

# load time series
resops_ts = read_timeseries(
    path=path_resops / 'time_series' / 'csv',
    reservoirs=resops_attrs.index
)
for grand_id, df in resops_ts.items():
    if 'storage' in df.columns:
        df.storage *= 1e-6 # convert to hm3
    df = df[df.columns.intersection(['inflow', 'outflow', 'storage', 'elevation'])]
    resops_ts[grand_id] = df

  0%|          | 0/251 [00:00<?, ?it/s]

File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2797.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/3488.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2924.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2916.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2915.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/3478.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2920.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2652.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2926.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/2927.csv doesn't exist
File /home/chus-casado/Datos/reservoirs/ResOpsES/v3.0/time_series/csv/

In [None]:
# reduce reservoirs to those in both GDW and ResOps datasets
grand_ids = gdw.index.intersection(resops_ts.keys())
gdw = gdw.loc[grand_ids]
resops_ts = {grand_id: resops_ts[grand_id] for grand_id in grand_ids}
print(f'{len(grand_ids)} are both in GDW and ResOps datasets')

### Global Water Watch

In [6]:
path_gww = path_datasets / 'reservoirs' / 'GWW'
path_gww_gis = path_gww / 'GIS'
path_gww_ts = path_gww / 'time_series' / 'raw'
path_gww_plots = path_gww_ts / 'plots'
path_gww_plots.mkdir(parents=True, exist_ok=True)

#### Attributes
This bit extracts all the reservoir instances in the GWW database within a bounding box. The resulting list of JSONs is then converted into a `geopandas.GeoDataFrame` of polygons.

In [7]:
# shapefile of GWW reservoirs
gww_shp = path_gww_gis / f'gww_{country}.shp'

if gww_shp.is_file():
    # import reservoirs
    gww_attrs = gpd.read_file(gww_shp).set_index('gww_id')
    print(f'{len(gww_attrs)} reservoirs loaded from {gww_shp}')
else:
    # get reservoir features from GWW in the US
    features = get_tiled_reservoirs(*extent)

    # convert to GeoPandas
    gww_attrs = to_geopandas(features)
    gww_attrs.drop_duplicates(inplace=True)
    gww_attrs.rename(columns={'source_name': 'source'}, inplace=True)
    print(f'{len(gww_attrs)} reservoirs within the extent')

    # find those in GRanD
    in_grand = gww_attrs.grand_id.isin(gdw.index)
    gww_attrs = gww_attrs[in_grand]
    print(f'{in_grand.sum()} of those reservoirs are in {long_name} and have a GRanD ID assigned')

    # export those with GRanD ID
    gww_attrs.to_file(gww_shp)
    print(f'Shapefile saved to {gww_shp}')

203 reservoirs loaded from /home/chus-casado/Datos/reservoirs/GWW/GIS/gww_ES.shp


#### Time series 
##### Post-processed monthly time series

 ```Python
# retrieve time series
gww_ts = client.get_reservoir_ts_monthly(
    reservoir_id=ID,
    start=datetime(1985, 1, 1),
    stop=datetime(2025, 7, 1)
)
gww_ts = pd.DataFrame.from_dict(gww_ts).set_index('t')
gww_ts.index = pd.to_datetime(gww_ts.index)
gww_ts.index.name = 'Timestamp'
gww_ts.drop(['name', 'unit'], axis=1, inplace=True)
gww_ts.rename(columns={'value': 'area_skm'}, inplace=True)
gww_ts.area_skm *= 1e-6
```

##### Raw time series
This snippet extracts time series from the GWW database. It searches for available time series of reservoir area and volume, saves them in a dictionary and exports it as CSV file.

In [None]:
# download/import time series
gww_ts = {}
for grand_id in tqdm(set(gww_attrs.grand_id)):
    try:
        gww_id = gww_attrs[gww_attrs.grand_id == grand_id].index.item()
    except:
        print(f'Multiple intances of GRanD ID {grand_id} in GWW')
        continue
    
    csv_file = path_gww_ts / f'{gww_id}.csv'
    if csv_file.is_file():
        # import time series from CSV
        gww_ts[grand_id] = pd.read_csv(csv_file, parse_dates=True, index_col=0)
        continue
    
    # download time series from GWW
    ts = []
    for variable in ['area', 'volume']:
        data = get_reservoir_ts(
            reservoir_id=gww_id,
            start=datetime(1975, 1, 1),
            stop=datetime(2025, 9, 30),
            variable=variable
        )
        if len(data) > 0:
            ts.append(to_timeseries(data, convert_units=True))
    if len(ts) == 0:
        print(f'No time series found for GRanD ID {grand_id}')
        continue
    gww_ts[grand_id] = pd.concat(ts, axis=1)
    
    # export as CSV
    gww_ts[grand_id].to_csv(csv_file, float_format='%.6f')

  0%|          | 0/202 [00:00<?, ?it/s]

Multiple intances of GRanD ID 2779 in GWW


## Analysis

In [9]:
for grand_id, ts in tqdm(gww_ts.items(), total=len(gww_ts)):
    gww_id = gww_attrs[gww_attrs.grand_id == grand_id].index.item()
    plot_reservoir_timeseries(
        reservoir=gww_attrs.loc[[gww_id]],
        gww_area=ts.area_skm,
        gww_vol=ts.volume_mcm if 'volume_mcm' in ts else None,
        obs_vol=resops_ts[grand_id].storage if 'storage' in resops_ts[grand_id] else None,
        ref_area=resops_attrs.loc[grand_id, 'AREA_MNL'],
        ref_vol=resops_attrs.loc[grand_id, 'VOL_MNL'],
        ref_label='SNCZI',
        title='GRanD {0} - {1} ({2})'.format(grand_id, *gdw.loc[grand_id, ['DAM_NAME', 'COUNTRY']]),
        save=path_gww_ts / 'plots' / f'{gww_id}.jpg'
    )

  0%|          | 0/201 [00:00<?, ?it/s]