In [48]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from scipy.stats import sigmaclip, zscore
import warnings
import os
from scipy.signal import savgol_filter
from pathlib import Path
import geopandas as gpd
import hvplot.pandas
import geoviews as gv
import holoviews as hv

import sys
sys.path.append("/tiger1/pdas47/tmsosPP/src")

from tmsswot.tmsswot import calculate_storage

hv.extension('bokeh')

In [49]:
# Select the reservoir
RESERVOIR = '0810'

RESULTS_DIR = Path(f'/tiger1/pdas47/tmsosPP/results')
DATA_DIR = Path(f'/tiger1/pdas47/tmsosPP/data')

In [50]:
# read the bounding box of the study area
### all 100 reservoirs
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()  # select all 100 reservoirs
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name'] # dictionary that can be queried to get reservoir name

RESERVOIR_NAME = res_names[RESERVOIR]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

nominal_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_SKM'].values[0]
nominal_area_poly = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_POLY'].values[0]
max_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MAX'].values[0]
max_area = np.nan if max_area == -99 else max_area
min_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MIN'].values[0]
min_area = 0 if min_area == -99 else min_area
area_rep = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_REP'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
elev_msl = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['ELEV_MASL'].values[0])
depth = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DEPTH_M'].values[0])
capacity = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['CAP_MCM'].values[0])
db = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['db'].values[0]

global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

In [51]:
(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR].hvplot(
    geo=True, tiles='OSM', shared_axes=False
)).opts(title=f"{RESERVOIR_NAME}")

# Read in GLWS data

In [52]:
glws_common_with_tmsos_ids_fn = Path("/tiger1/pdas47/tmsosPP/data/validation-locations/glws_grand_validation_reservoirs_pts.geojson")
glws_reservoirs = gpd.read_file(glws_common_with_tmsos_ids_fn)
glws_reservoirs

Unnamed: 0,glws_id,glws_lakename,latitude,longitude,GRAND_ID,RES_NAME,DAM_NAME,ALT_NAME,RIVER,ALT_RIVER,...,name,rid_id,grand_id_1,rid_filepa,resops_id,rid_filena,tmsos_id,distance,COUNTRY_SH,geometry
0,2505,Clarence Cannon Dam,39.50750,-91.79270,961,Mark Twain Lake,Clarence Cannon Dam,,Salt,,...,"Mark Twain Lake, US",,,,,,0666,0.045701,US,POINT (-91.64848 39.52615)
1,3079,B. Everett Jordan Dam,35.74770,-79.02170,1796,B. Everett Jordan Lake,B. Everett Jordan Dam,,Haw River,,...,"B. Everett Jordan Lake, US",,,,,,0679,0.000773,US,POINT (-79.06867 35.65681)
2,3048,Cijara,39.33170,-4.88339,2811,,Cijara,,Guadiana,,...,"Cijara Dam, Sp",,,,,,0214,0.000406,Sp,POINT (-5.01457 39.37709)
3,1132,Sardar Sarovar,21.93360,74.06850,4734,,Sardar Sarovar,Narmada,Narmada,,...,"Sardar Sarovar Dam, In",,,,,,0527,0.003893,In,POINT (73.74868 21.83112)
4,2017,Supa,15.25370,74.46070,4776,,Supa,,Kalinadi,,...,"Supa Dam, In",,,,,,0565,0.001599,In,POINT (74.52562 15.2764)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,4375,Bang Lang,6.10821,101.29300,5170,,Bang Lang,,Pattani,,...,"Bang Lang Dam, Th",200604.0,5170.0,mekong_insitu/200604-Bang_Lang_Dam.csv,,200604-Bang_Lang_Dam.csv,0825,,Th,POINT (101.27135 6.15479)
176,775,Noi,15.02010,105.38400,5796,Noi,Sirindhorn,,Lam Dom Noi,,...,"Noi, Th",200212.0,5796.0,mekong_insitu/200212-Sirindhorn_Dam.csv,,200212-Sirindhorn_Dam.csv,0810,,Th,POINT (105.43054 15.20508)
177,1696,Richard B.Russell dam,34.13880,-82.70570,7308,Richard B. Russell Lake,Richard B. Russell Dam,Richard B. Russell Lake,Savannah River,,...,"Richard B. Russell Lake, US",,,,7308.0,,1426,,US,POINT (-82.60055 34.02548)
178,10468,Elk city lake,37.25010,-95.79910,7311,Elk City Lake,Elk City Lake,Elk City Lake,Elk River,,...,"Elk City Lake, US",,,,,,0738,,US,POINT (-95.78627 37.27593)


In [53]:
import re

glws_storage_dir = Path("/tiger1/pdas47/tmsosPP/data/reservoir-storage-data/Global database of lake water storage GLWS time series v1.1/Reservoirs/Poly")
glws_paths = [p for p in glws_storage_dir.glob("*csv")]
glws_names = [p.stem for p in glws_paths]

pattern = r"ID(\d+)([A-Za-z_\d]+)30m_GEE1992_2020_monthlyVolume*"
IDS = [int(re.match(pattern, name).groups()[0]) for name in glws_names if re.match(pattern, name)]

glws_path_df = pd.DataFrame({
    'path': glws_paths, 'name': glws_names, 'ID': IDS
})
glws_path_df

Unnamed: 0,path,name,ID
0,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID6298Batman30m_GEE1992_2020_monthlyVolume_Poly,6298
1,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID255Han_shui30m_GEE1992_2020_monthlyVolume_Poly,255
2,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID10565Jacarei30m_GEE1992_2020_monthlyVolume_Poly,10565
3,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID174Kainji_Reservoir30m_GEE1992_2020_monthlyV...,174
4,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID4485Unknown30m_GEE1992_2020_monthlyVolume_Poly,4485
...,...,...,...
193,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID1805Shiroro30m_GEE1992_2020_monthlyVolume_Poly,1805
194,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID28Nasser30m_GEE1992_2020_monthlyVolume_Poly,28
195,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID614Arrow30m_GEE1992_2020_monthlyVolume_Poly,614
196,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID345Agua_Vermelha_reservoir30m_GEE1992_2020_m...,345


In [54]:
pd.Index(glws_path_df['ID'].values).intersection(pd.Index(glws_reservoirs['glws_id'].values))

Index([  562,  2490,   813,   555,  2052,  2468,  4110,  1237,   642,  1920,
        1526,   775,  4082,   579,   551,   439,   232,   355,  1328,  2505,
        1233,   678,   284,   546,  9087,   381,  1635,   552,  1547, 51955,
         378,  3984,  1906,  1757,  2343,  1247,  1144,   142, 10566,  3069,
        1609,  2445,  3772,  1062,  1882,  6385,  1890,   783,   614],
      dtype='int64')

In [55]:
glws_path_df['ID'].sort_values()

65          18
44          26
194         28
162         29
81          32
        ...   
145    9900024
181    9900029
150    9900043
112    9920038
102    9920042
Name: ID, Length: 198, dtype: int64

In [56]:
glws_reservoirs['glws_id'].sort_values()

131        135
56         142
34         232
106        284
158        342
        ...   
134      51955
123    9900058
121    9910200
38     9920022
172    9920039
Name: glws_id, Length: 180, dtype: int32

In [57]:
df = glws_reservoirs[['glws_id', 'tmsos_id']]
glws_tmsos = pd.merge(glws_path_df, df, left_on='ID', right_on='glws_id')
glws_tmsos

Unnamed: 0,path,name,ID,glws_id,tmsos_id
0,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID562Srisailam_reservoir30m_GEE1992_2020_month...,562,562,507
1,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID2490CherokeeSaddle30m_GEE1992_2020_monthlyVo...,2490,2490,1357
2,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID813Sirikit30m_GEE1992_2020_monthlyVolume_Poly,813,813,824
3,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID555Mead30m_GEE1992_2020_monthlyVolume_Poly,555,555,1078
4,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID2052Lam_Pao30m_GEE1992_2020_monthlyVolume_Poly,2052,2052,828
5,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID2468Berryessa30m_GEE1992_2020_monthlyVolume_...,2468,2468,913
6,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID4110PanchetHill30m_GEE1992_2020_monthlyVolum...,4110,4110,539
7,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID1237BullShoals30m_GEE1992_2020_monthlyVolume...,1237,1237,1205
8,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID642Vaaldam30m_GEE1992_2020_monthlyVolume_Poly,642,642,349
9,/tiger1/pdas47/tmsosPP/data/reservoir-storage-...,ID1920Cascade30m_GEE1992_2020_monthlyVolume_Poly,1920,1920,968


In [58]:
glws_tmsos['path'][0]

PosixPath('/tiger1/pdas47/tmsosPP/data/reservoir-storage-data/Global database of lake water storage GLWS time series v1.1/Reservoirs/Poly/ID562Srisailam_reservoir30m_GEE1992_2020_monthlyVolume_Poly.csv')

In [59]:
val_pts['name']

0            Martis Creek Lake, US
1           Lake Arrowhead Dam, US
2              Mark Twain Lake, US
3       Council Grove City Dam, US
4              Table Rock Lake, US
                   ...            
1286             Elk City Lake, US
1287             Elk City Lake, US
1288         Canyon Ferry Lake, US
1289            Tims Ford Lake, US
1290         Cordell Hull Lake, US
Name: name, Length: 1291, dtype: object

In [60]:
aev_dir = Path("/tiger1/pdas47/tmsosPP/data/aec/aev")

In [61]:
import rasterio
import matplotlib.pyplot as plt


def egm_96_to_08_conversion(raster_path, tmsosid, val_pts):
    """
    Samples the value within a raster file at the location corresponding to the given tmsosid.

    Args:
        raster_path (str): File path of the raster EGM96-EGM08.
        tmsosid (str): The tmsosid to get the latitude and longitude values.
        val_pts (gpd.GeoDataFrame): GeoDataFrame containing the validation points with 'tmsos_id' and 'geometry' columns.

    Returns:
        float: The sampled value from the raster at the specified location.
    """
    # Get the geometry for the given tmsosid
    location = val_pts[val_pts['tmsos_id'] == tmsosid]
    if location.empty:
        raise ValueError(f"No location found for tmsosid: {tmsosid}")

    geometry = location.iloc[0]['geometry']
    longitude, latitude = geometry.x, geometry.y

    # Open the raster file
    with rasterio.open(raster_path) as src:
        # Convert latitude and longitude to the raster's coordinate system
        coords = [(longitude, latitude)]

        # Sample the raster value at the transformed coordinates
        sampled_value = list(src.sample(coords))[0][0]

    return sampled_value

In [62]:
import rasterio
from rasterio.plot import show
from rasterio.windows import Window

def get_area_elevation_storage(aev, tmsosid, raster_path=None, area=None, elevation=None, convert_to_egm08=False):
    """
    Interpolates the AEV (Area-Elevation-Volume) curve to return the elevation, area, and storage for an array of values.
    Converts the elevation from EGM96 to EGM08 if required.

    Args:
        aev (pd.DataFrame): DataFrame containing the AEV curve with columns 'Elevation', 'CumArea', and 'Storage'.
        tmsosid (str): The tmsosid to get the latitude and longitude values for EGM96 to EGM08 conversion.
        raster_path (str, optional): File path of the raster EGM96-EGM08. Required if convert_to_egm08 is True.
        area (array-like, optional): Array of area values for interpolation. Defaults to None.
        elevation (array-like, optional): Array of elevation values for interpolation. Defaults to None.
        convert_to_egm08 (bool, optional): Whether to convert elevation from EGM96 to EGM08. Defaults to False.

    Returns:
        dict: A dictionary containing arrays of the interpolated 'elevation', 'area', and 'storage'.
    """
    if area is None and elevation is None:
        raise ValueError("Either 'area' or 'elevation' must be provided")

    if area is not None and elevation is not None:
        raise ValueError("Only one of 'area' or 'elevation' should be provided")

    if convert_to_egm08:
        if raster_path is None or tmsosid is None:
            raise ValueError("raster_path and tmsosid must be provided for EGM96 to EGM08 conversion")
        egm96_to_08_offset = egm_96_to_08_conversion(raster_path, tmsosid, val_pts)
    else:
        egm96_to_08_offset = 0

    if elevation is not None:
        elevations = np.array(elevation)
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        areas = np.interp(elevations, aev['Elevation'], aev['CumArea'])
        storages = np.interp(elevations, aev['Elevation'], aev['Storage'])
    else:
        areas = np.array(area)
        elevations = np.interp(areas, aev['CumArea'], aev['Elevation'])
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        storages = np.interp(areas, aev['CumArea'], aev['Storage'])

    return {
        'elevation': elevations,
        'area': areas,
        'storage': storages
    }

In [63]:
dst_storage_dir = Path("/tiger1/pdas47/tmsosPP/data/storage/glws")

for id, row in glws_tmsos.iterrows():
    tmsos_id = row['tmsos_id']
    path = row['path']

    RESERVOIR = tmsos_id
    RESERVOIR_NAME = val_pts[val_pts['tmsos_id']==tmsos_id]['name'].values[0].replace(' ', '_').split(',')[0]
    
    glws_df = pd.read_csv(path, parse_dates=['Time'], date_format='%Y%j')
    # glws_df = glws_df.loc[glws_df['Time'] > '2017']

    dst_fn = dst_storage_dir / 'baseline_entire_timeperiod' / f"{RESERVOIR}_{RESERVOIR_NAME}_storage.csv"
    dst_fn.parent.mkdir(parents=True, exist_ok=True)
    glws_df = glws_df[['Time', 'rws', "Mean_Levels", "Cleaned_Area"]].rename({
        'Time': 'date',
        'rws': 'relative_storage',
        'Mean_Levels': 'elevation',
        'Cleaned_Area': 'area'
    }, axis=1)

    aev_fn = aev_dir / f"{RESERVOIR}.csv"
    if aev_fn.exists():
        aev = pd.read_csv(aev_fn, comment='#')
    else:
        print('AEV not available for ', RESERVOIR)
        continue
    
    storage = get_area_elevation_storage(
        aev, tmsos_id, "/tiger1/pdas47/tmsosPP/data/geoid/conversion_egm96_minus_rgm2008.tif",
        elevation = glws_df['elevation']
    )['storage']
    glws_df['storage'] = storage
    
    print(glws_df)
    glws_df.to_csv(dst_fn, index=False)

          date  relative_storage   elevation      area       storage
0   1992-10-01               NaN         NaN       NaN           NaN
1   1992-11-01          0.000000  269.611342  510.6580  1.794389e+10
2   1992-12-01         -0.545581  268.481095  454.7610  1.739648e+10
3   1993-01-01         -1.251321  266.770000       NaN  1.658229e+10
4   1993-02-01         -1.735893  265.320461  298.4500  1.590634e+10
..         ...               ...         ...       ...           ...
331 2020-05-01         -2.578345  260.818289   75.7925  1.388829e+10
332 2020-06-01         -2.010477  264.317000       NaN  1.544584e+10
333 2020-07-01         -2.074862  264.051221  235.6790  1.532490e+10
334 2020-08-01         -0.015981  269.580000       NaN  1.792861e+10
335 2020-09-01          0.280522  270.146792  537.1390  1.820588e+10

[336 rows x 5 columns]
          date  relative_storage   elevation      area       storage
0   1992-10-01          0.000000  319.182041   73.5900  2.428572e+07
1   1992-1