# Global Water Watch - US
***

In [1]:
import pandas as pd
import geopandas as gpd

from pathlib import Path
from datetime import datetime
from tqdm.auto import tqdm

from gwwapi.client import get_tiled_reservoirs, get_reservoir_ts
from gwwapi.utils import to_timeseries, to_geopandas, plot_reservoir_timeseries

from lisfloodreservoirs import read_attributes, read_timeseries
from lisfloodreservoirs.utils.reservoir_curves import bin_data, fit_reservoir_curve, storage_from_elevation, area_from_elevation

## Config

In [2]:
country = 'US'
long_name = 'United States'
extent = [-125, 24, -66, 50]

path_datasets = Path('/home/chus-casado/Datos')

## Data

### ResOpsUS

In [3]:
path_resops = path_datasets / 'reservoirs' / 'ResOpsUS' / 'v2.2'

# list of selected reservoirs
res_list = pd.read_csv(path_resops / 'selection' / 'reservoirs.txt', header=None).squeeze().tolist()

# load reservoir attributes
resops_attrs = read_attributes(
    path=path_resops / 'attributes',
    reservoirs=res_list,
    index_col='GRAND_ID'
)

# load time series
resops_ts = read_timeseries(
    path=path_resops / 'time_series' / 'csv',
    reservoirs=resops_attrs.index,
    variables=['inflow', 'outflow', 'storage', 'elevation']
)
# convert storage units to hm3
for grand_id, df in resops_ts.items():
    if 'storage' in df.columns:
        df.storage *= 1e-6

  0%|          | 0/164 [00:00<?, ?it/s]

Time series for ID 55 is missing variables: {'elevation'}
Time series for ID 57 is missing variables: {'elevation'}
Time series for ID 60 is missing variables: {'elevation'}
Time series for ID 63 is missing variables: {'elevation'}
Time series for ID 131 is missing variables: {'elevation'}
Time series for ID 132 is missing variables: {'elevation'}
Time series for ID 133 is missing variables: {'elevation'}
Time series for ID 148 is missing variables: {'elevation'}
Time series for ID 180 is missing variables: {'elevation'}
Time series for ID 191 is missing variables: {'elevation'}
Time series for ID 193 is missing variables: {'elevation'}
Time series for ID 198 is missing variables: {'elevation'}
Time series for ID 214 is missing variables: {'elevation'}
Time series for ID 297 is missing variables: {'elevation'}
Time series for ID 361 is missing variables: {'elevation'}
Time series for ID 367 is missing variables: {'elevation'}
Time series for ID 372 is missing variables: {'elevation'}
T

### Global Dam Watch

In [4]:
path_gdw = path_datasets / 'reservoirs' / 'GDW' / 'GDW_v1_0_shp'
gdw = gpd.read_file(path_gdw / 'GDW_barriers_v1_0.shp').set_index('GRAND_ID')
gdw = gdw[gdw.COUNTRY == long_name]
print(f'GDW contains {len(gdw)} reservoirs in {long_name}')

# reduce reservoirs to those in both GDW and ResOps datasets
grand_ids = gdw.index.intersection(resops_ts.keys())
gdw = gdw.loc[grand_ids]
resops_ts = {grand_id: resops_ts[grand_id] for grand_id in grand_ids}
print(f'{len(grand_ids)} are both in GDW and ResOps datasets')

GDW contains 4862 reservoirs in United States
164 are both in GDW and ResOps datasets


### Global Water Watch

In [5]:
path_gww = path_datasets / 'reservoirs' / 'GWW'
path_gww_gis = path_gww / 'GIS'
path_gww_ts = path_gww / 'time_series' / 'raw'
path_gww_plots = path_gww_ts / 'plots'
path_gww_plots.mkdir(parents=True, exist_ok=True)

#### Attributes
This bit extracts all the reservoir instances in the GWW database within a bounding box. The resulting list of JSONs is then converted into a `geopandas.GeoDataFrame` of polygons.

In [6]:
# shapefile of GWW reservoirs
gww_shp = path_gww_gis / f'gww_{country}.shp'

if gww_shp.is_file():
    # import reservoirs
    gww_attrs = gpd.read_file(gww_shp).set_index('grand_id')
    print(f'{len(gww_attrs)} reservoirs loaded from {gww_shp}')
else:
    # get reservoir features from GWW in the US
    features = get_tiled_reservoirs(*extent)

    # convert to GeoPandas
    gww_attrs = to_geopandas(features)
    gww_attrs.drop_duplicates(inplace=True)
    gww_attrs.rename(columns={'source_name': 'source'}, inplace=True)
    print(f'{len(gww_attrs)} reservoirs within the extent')

    # find those in GRanD
    in_grand = gww_attrs.grand_id.isin(gdw.index)
    gww_attrs = gww_attrs[in_grand]
    print(f'{in_grand.sum()} of those reservoirs are in {long_name} and have a GRanD ID assigned')

    # export those with GRanD ID
    gww_attrs.to_file(gww_shp)
    print(f'Shapefile saved to {gww_shp}')

679 reservoirs loaded from /home/chus-casado/Datos/reservoirs/GWW/GIS/gww_US.shp


In [8]:
# reduce reservoirs to those in both ResOps
grand_ids = gww_attrs.index.intersection(resops_ts.keys())
gww_attrs = gww_attrs.loc[grand_ids]
resops_ts = {grand_id: resops_ts[grand_id] for grand_id in grand_ids}
print(f'{len(grand_ids)} are both in GWW and ResOps datasets')

164 are both in GWW and ResOps datasets


#### Time series 
This snippet extracts time series from the GWW database. It searches for available time series of reservoir area and volume, saves them in a dictionary and exports it as CSV file.

In [11]:
# download/import time series
gww_ts = {}
for grand_id in tqdm(set(gww_attrs.index)):
    try:
        gww_id = gww_attrs.loc[grand_id, 'gww_id'].item()
    except:
        print(f'Multiple intances of GRanD ID {grand_id} in GWW')
        continue

    csv_file = path_gww_ts / f'{gww_id}.csv'
    if csv_file.is_file():
        # import time series from CSV
        gww_ts[grand_id] = pd.read_csv(csv_file, parse_dates=True, index_col=0)
        continue
    
    # download time series from GWW
    ts = []
    for variable in ['area', 'volume']:
        data = get_reservoir_ts(
            reservoir_id=gww_id,
            start=datetime(1975, 1, 1),
            stop=datetime(2025, 9, 30),
            variable=variable
        )
        if len(data) > 0:
            ts.append(to_timeseries(data, convert_units=True))
    if len(ts) == 0:
        print(f'No time series found for GRanD ID {grand_id}')
        continue
    gww_ts[grand_id] = pd.concat(ts, axis=1)
    
    # export as CSV
    gww_ts[grand_id].to_csv(csv_file, float_format='%.6f')

  0%|          | 0/164 [00:00<?, ?it/s]

Multiple intances of GRanD ID 1033 in GWW
Multiple intances of GRanD ID 597 in GWW
Multiple intances of GRanD ID 1207 in GWW
Multiple intances of GRanD ID 1796 in GWW
Multiple intances of GRanD ID 1872 in GWW
Multiple intances of GRanD ID 870 in GWW
Multiple intances of GRanD ID 989 in GWW


## Analysis

### Compare ResOps with GWW

In [14]:
for grand_id, ts in tqdm(gww_ts.items(), total=len(gww_ts)):
    gww_id = gww_attrs.loc[grand_id, 'gww_id'].item()
    plot_reservoir_timeseries(
        reservoir=gww_attrs.loc[[grand_id]],
        gww_area=ts.area_skm,
        gww_vol=ts.volume_mcm if 'volume_mcm' in ts else None,
        obs_vol=resops_ts[grand_id].storage if 'storage' in resops_ts[grand_id] else None,
        ref_area=gdw.loc[grand_id, 'AREA_SKM'],
        ref_vol=gdw.loc[grand_id, 'CAP_MCM'],
        ref_label='GDW',
        title='GRanD {0} - {1} ({2})'.format(grand_id, *gdw.loc[grand_id, ['DAM_NAME', 'COUNTRY']]),
        save=path_gww_ts / 'plots' / f'{gww_id}.jpg'
    )

  0%|          | 0/157 [00:00<?, ?it/s]