# Global Water Watch - US
***

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from pathlib import Path
from datetime import datetime
from tqdm.auto import tqdm
import yaml

from gwwapi.client import get_tiled_reservoirs, get_reservoir_ts
from gwwapi.utils import to_timeseries, to_geopandas, plot_reservoir_timeseries

from lisfloodreservoirs import read_attributes, read_timeseries
from lisfloodreservoirs.utils.reservoir_curves import fit_reservoir_curve, storage_from_elevation, area_from_elevation

## Config

In [3]:
country = 'US'
long_name = 'United States'
extent = [-125, 24, -66, 50]

path_datasets = Path('/home/chus-casado/Datos')

## Data

### GRanD

In [4]:
path_grand = path_datasets / 'reservoirs' / 'GRanD' / 'v1_3'
grand = gpd.read_file(path_grand / 'GRanD_dams_v1_3.shp').set_index('GRAND_ID')
grand = grand[grand.COUNTRY == long_name]
print(f'GRanD contains {len(grand)} reservoirs in {long_name}')

GRanD contains 1920 reservoirs in United States


### Global Dam Watch

In [5]:
path_gdw = path_datasets / 'reservoirs' / 'GDW' / 'GDW_v1_0_shp'
gdw = gpd.read_file(path_gdw / 'GDW_barriers_v1_0.shp').set_index('GRAND_ID')
gdw = gdw[gdw.COUNTRY == long_name]
print(f'GDW contains {len(gdw)} reservoirs in {long_name}')

GDW contains 4862 reservoirs in United States


### ResOpsUS

In [6]:
path_resops = path_datasets / 'reservoirs' / 'ResOpsUS' / 'v2.2'

# list of selected reservoirs
res_list = pd.read_csv(path_resops / 'selection' / 'reservoirs.txt', header=None).squeeze().tolist()

# load reservoir attributes
resops_attrs = read_attributes(
    path=path_resops / 'attributes',
    reservoirs=res_list,
    index_col='GRAND_ID'
)

# load time series
resops_ts = read_timeseries(
    path=path_resops / 'time_series' / 'csv',
    reservoirs=resops_attrs.index,
)
for grand_id, df in resops_ts.items():
    if 'storage' in df.columns:
        df.storage *= 1e-6 # conert to hm3
    df = df[df.columns.intersection(['inflow', 'outflow', 'storage', 'elevation'])]
    resops_ts[grand_id] = df

  0%|          | 0/164 [00:00<?, ?it/s]

### Global Water Watch

In [7]:
path_gww = path_datasets / 'reservoirs' / 'GWW'

#### Attributes
This bit extracts all the reservoir instances in the GWW database within a bounding box. The resulting list of JSONs is then converted into a `geopandas.GeoDataFrame` of polygons.

In [8]:
# shapefile of GWW reservoirs
path_gww_gis = path_gww / 'GIS'
path_gww_gis.mkdir(parents=True, exist_ok=True)
gww_shp = path_gww_gis / f'gww_{country}.shp'

if gww_shp.is_file():
    # import reservoirs
    gww_attrs = gpd.read_file(gww_shp).set_index('gww_id')
    print(f'{len(gww_attrs)} reservoirs loaded from {gww_shp}')
else:
    # get reservoir features from GWW in the US
    features = get_tiled_reservoirs(*extent)

    # convert to GeoPandas
    gww_attrs = to_geopandas(features)
    gww_attrs.drop_duplicates(inplace=True)
    gww_attrs.rename(columns={'source_name': 'source'}, inplace=True)
    print(f'{len(gww_attrs)} reservoirs in the {country}')

    # find those with GRanD ID
    gww_attrs.grand_id = gww_attrs.grand_id.astype('Int64')
    in_grand = gww_attrs.grand_id.notnull()
    print(f'{in_grand.sum()} reservoirs have a GRanD ID assigned')

    # export those with GRanD ID
    gww_attrs[in_grand].to_file(gww_shp)
    print(f'Shapefile saved to {gww_shp}')

2076 reservoirs loaded from /home/chus-casado/Datos/reservoirs/GWW/GIS/gww_US.shp


#### Time 
This snippet extracts time series from the GWW database. It searches for available time series of reservoir area and volume, saves them in a dictionary and exports it as CSV file.

In [9]:
path_gww_ts = path_gww / 'time_series'
path_gww_ts.mkdir(parents=True, exist_ok=True)

# download/import time series
gww_ts = {}
grand_ids = resops_attrs.index.intersection(gww_attrs.grand_id)
for grand_id in tqdm(grand_ids):
    try:
        gww_id = gww_attrs[gww_attrs.grand_id == grand_id].index.item()
    except:
        print(f'Multiple intances of GRanD ID {grand_id} in GWW')
        continue

    csv_file = path_gww_ts / f'{gww_id}.csv'
    if csv_file.is_file():
        # import time series from CSV
        gww_ts[grand_id] = pd.read_csv(csv_file, parse_dates=True, index_col=0)
        continue
    
    # download time series from GWW
    ts = []
    for variable in ['area', 'volume']:
        data = get_reservoir_ts(
            reservoir_id=gww_id,
            start=datetime(1975, 1, 1),
            stop=datetime(2025, 9, 30),
            variable=variable
        )
        if len(data) > 0:
            ts.append(to_timeseries(data, convert_units=True))
    if len(ts) == 0:
        print(f'No time series found for GRanD ID {grand_id}')
        continue
    gww_ts[grand_id] = pd.concat(ts, axis=1)
    
    # export as CSV
    gww_ts[grand_id].to_csv(csv_file, float_format='%.6f')

  0%|          | 0/164 [00:00<?, ?it/s]

Multiple intances of GRanD ID 597 in GWW
Multiple intances of GRanD ID 870 in GWW
Multiple intances of GRanD ID 989 in GWW
Multiple intances of GRanD ID 1033 in GWW
Multiple intances of GRanD ID 1207 in GWW
Multiple intances of GRanD ID 1796 in GWW
Multiple intances of GRanD ID 1872 in GWW


## Analysis

In [None]:
path_gww_plots = path_gww_ts / 'plots'
path_gww_plots.mkdir(parents=True, exist_ok=True)

for grand_id, ts in tqdm(gww_ts.items(), total=len(gww_ts)):
    gww_id = gww_attrs[gww_attrs.grand_id == grand_id].index.item()
    plot_reservoir_timeseries(
        reservoir=gww_attrs.loc[[gww_id]],
        gww_area=ts.area_skm,
        gww_vol=ts.volume_mcm if 'volume_mcm' in ts else None,
        obs_vol=resops_ts[grand_id].storage,
        ref_area=gdw.loc[grand_id, 'AREA_SKM'],
        ref_vol=gdw.loc[grand_id, 'CAP_MCM'],
        ref_label='GDW',
        save=path_gww_ts / 'plots' / f'{gww_id}.jpg'
    )