# Get Area-Elevation Curve

In [1]:
import ee
ee.Initialize(project='tmospp')

In [2]:
# from rat.ee_utils.ee_aec_file_creator import aec_file_creator
import geopandas as gpd
from pathlib import Path
import hvplot.pandas
import pandas as pd
import holoviews as hv
import geoviews as gv
import numpy as np

hv.extension('bokeh')

### select the reservoir

In [4]:
start_date = '2022-01-01'
end_date = '2024-08-01'
RESERVOIR = '0217'
buffer_amt = 250 # meters. unlike other types of data, nadir altimetry data works better with a smaller buffer around the reservoir. Taking no buffer for now.
# possible_elevations_method = 'grand' #(60, 130)
possible_elevations_method = [160, 950]
DATA_DIR = Path('/tiger1/pdas47/tmsosPP/data')
poly_deg = 2

In [5]:
# read the bounding box of the study area
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/subset-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/subset-validation-reservoirs-grand.geojson'))

selected_reservoirs = [
    '0078','0079','0193','0197','0214','0217','0340','0365','0484','0486',
    '0498','0503','0505','0507','0508','0523','0524','0529','0532','0535',
    '0549','0552','0569','0787','0803','0807','0810','0816','0819','0824',
    '0828','0830','0833','0930','0931','0935','0936','1078','1097','1134',
    '1135','1162','1284','1320','1385','1388','1392','1398','1400','1426',
    '1498','0502'
]
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name']


RESERVOIR_NAME = res_names[RESERVOIR]
print(f'{RESERVOIR}: {RESERVOIR_NAME}')

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

nominal_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_SKM'].values[0]
nominal_area_poly = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_POLY'].values[0]
max_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MAX'].values[0]
max_area = np.nan if max_area == -99 else max_area
min_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MIN'].values[0]
min_area = 0 if min_area == -99 else min_area
area_rep = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_REP'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
elev_msl = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['ELEV_MASL'].values[0])
depth = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DEPTH_M'].values[0])
capacity = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['CAP_MCM'].values[0])

global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

0217: Cernadilla Dam,  Sp


## Storage Calculation

In [6]:
# what is the reported capacity?
capacity_hv = hv.HLine(capacity).opts(color='red', ylim=(0, capacity + capacity*0.1), ylabel='capacity (Mil. m3)')
capacity_hv

In [7]:
srtm_extrapolated_dir = Path('../data/aec/srtm_extrapolated/')

In [8]:
val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'
aec_df = pd.read_csv(aec_fp)

from scipy.integrate import cumulative_trapezoid
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.cumulative_trapezoid.html#scipy.integrate.cumulative_trapezoid
## Cumulatively integrate y(x) using the composite trapezoidal rule.

elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

storage = cumulative_trapezoid(elevation_normalized, aec_df['CumArea'] * 1e6)
storage = np.insert(storage, 0, 0)

aec_df['Storage'] = storage
aec_df['Storage (mil. m3)'] = storage * 1e-6
aec_df

Unnamed: 0,Elevation,CumArea,obs_or_extrapolated,Storage,Storage (mil. m3)
0,871.24,0.00,extrapolated,0.0,0.00000
1,872.26,0.04,extrapolated,20400.0,0.02040
2,873.29,0.34,extrapolated,480900.0,0.48090
3,874.31,0.64,extrapolated,1248900.0,1.24890
4,875.34,0.93,extrapolated,2288550.0,2.28855
...,...,...,...,...,...
63,935.89,13.59,SRTM,384976650.0,384.97665
64,936.92,13.66,SRTM,389538200.0,389.53820
65,937.95,13.74,SRTM,394833800.0,394.83380
66,938.97,13.81,SRTM,399539200.0,399.53920


In [9]:
aec_df.hvplot(x='Elevation', y='CumArea').opts(height=400, width=500, title=f'{RESERVOIR}: {RESERVOIR_NAME}  [A-E]') + aec_df.hvplot(x='Elevation', y='Storage (mil. m3)', title=f'{RESERVOIR}: {RESERVOIR_NAME}  [S-E]').opts(height=400, width=500) * capacity_hv

In [13]:
## save aec in `srtm_extrapolated_storage`
srtm_extrapolated_dir = Path('../data/aec/srtm_extrapolated_storage/')
aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}_storage.csv'
aec_df.to_csv(aec_fp, index=False)

In [10]:
import xarray as xr
import hvplot.xarray
import numpy as np

alg_type = 'tmsos'
elevation_dir = Path('/tiger1/pdas47/tmsosPP/data/tmsos/')
elevation_fp = elevation_dir / f'{RESERVOIR}.csv'

data = {} # data to be used in the xarray
if alg_type == 'tmsos':
    tmsos_df = pd.read_csv(elevation_fp, parse_dates=['date'])
    data['area'] = tmsos_df['area']
    data['date'] = tmsos_df['date'] # add area and date from the tmsos data

reservoir_dynamics = pd.DataFrame(data).set_index('date').to_xarray() # convert to xarray

# compute elevation values using aec
if alg_type == 'tmsos':
    reservoir_dynamics['area'].attrs['alg_type'] = 'tmsos'
    reservoir_dynamics['area'].attrs['obs_imp'] = 'obs'
    reservoir_dynamics['area'].attrs['unit'] = 'km^2'

    elevation = np.interp(reservoir_dynamics['area'], aec_df['CumArea'], aec_df['Elevation'])
    elevation_da = xr.DataArray(data=elevation, coords=reservoir_dynamics.coords)

    storage = np.interp(reservoir_dynamics['area'], aec_df['CumArea'], aec_df['Storage'])
    storage_da = xr.DataArray(data=storage, coords=reservoir_dynamics.coords)

    reservoir_dynamics = reservoir_dynamics.assign(elevation = elevation_da)
    reservoir_dynamics = reservoir_dynamics.assign(storage = storage_da)

# compute storage change
if 'storage_change' not in list(reservoir_dynamics.variables):
    avg_A = (reservoir_dynamics['area'].isel(date=slice(0, -1)) + reservoir_dynamics['area'].isel(date=slice(1, None)))/2
    del_h = reservoir_dynamics['elevation'].diff(dim='date')
    del_s = xr.DataArray(0.5 * avg_A * del_h * 1e6, name='storage_change')
    reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)

reservoir_dynamics

In [11]:
print(capacity)
(capacity_hv * (reservoir_dynamics*1e-6).hvplot.scatter(x='date', y='storage').opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}. Storage (mil. m3)', ylabel='Storage', xlabel='Date'
))

255.4


In [19]:
reservoir_dynamics.hvplot.scatter(x='date', y='area').opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}. Area (km2)'
) 
# + reservoir_dynamics.hvplot.scatter(x='date', y='elevation').opts(
#     title=f'{RESERVOIR}: {RESERVOIR_NAME}. Elevation (m)'
# ) + reservoir_dynamics.hvplot.scatter(
#     x='elevation', y='area', by='date'
# )).cols(1)

In [56]:
print(capacity)
reservoir_dynamics.hvplot.scatter(x='date', y='storage_change').opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}. Storage Change (million m^3)'
)

929.0


In [57]:
satellite_reservoir_dynamics = reservoir_dynamics

In [58]:
VERSION = '0.1'
ALG = 'tmsos'

csv_save_fp = DATA_DIR / 'storage_change' / ALG / f'v{VERSION}' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_dels.csv'
csv_save_fp.parent.mkdir(parents=True, exist_ok=True)
satellite_reservoir_dynamics.to_pandas().to_csv(csv_save_fp)

In [59]:
nc_save_fp = csv_save_fp.with_suffix('.nc')
# save as netcdf
## add attributes

satellite_reservoir_dynamics.attrs['version'] = VERSION
satellite_reservoir_dynamics.to_netcdf(nc_save_fp)

# Read in-situ data and calculate storage change

## Select the reservoir

In [60]:
# RESERVOIR = '0505'
ALG_VERSION = 'v0.1.1' # remove temporal resampling

RESULTS_DIR = Path(f'/tiger1/pdas47/tmsosPP/results/')
DATA_DIR = Path(f'/tiger1/pdas47/tmsosPP/data')

In [61]:
# read the bounding box of the study area
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand.geojson'))


# selected_reservoirs = [
#     '0505', # dumboor. India
#     '0810', # sirindhorn, Thailand.
#     '0830', # Krasoew, Thailand.
#     '0502', # Bhakra dam, India.
#     '0518', # Bhadra, India.
#     '0349', # vaaldam, South Africa.
#     '0464', # Sterkspruit, South Africa.
#     '0214', # Cijara, Spain
#     '1498', # Toledo bend, US
#     '0936', # Arrow, Canada
#     '1078', # Lake Mead, US,
#     '0524', # Tungabhadra, India. Has both nadir and swath.
#     '0486', # Sriram Sagar, India. Has both nadir and swath.
#     '1284', # Sam Rayburn, US. Has both nadir and swath.
#     '1385', # Pickwick Landing, US. Has both nadir and swath.
#     '0193', # Giribaile, Sp. Has j3/s6 tracks
# ]
# res_names = {
#     '0505': 'Dumboor, In',
#     '0810': 'Sirindhorn, Th',
#     '0830': 'Krasoew, Th',
#     '0502': 'Bhakra, In',
#     '0518': 'Bhadra, In',
#     '0349': 'Vaaldam, SA',
#     '0464': 'Sterkspruit, SA',
#     '0214': 'Cijara, Sp',
#     '1498': 'Toledo Bend, US',
#     '0936': 'Arrow, Ca',
#     '1078': 'Lake Mead, US',
#     '0524': 'Tungabhadra, In',
#     '0486': 'Sriram Sagar, In',
#     '1284': 'Sam Rayburn, US',
#     '1385': 'Pickwick Landing, US',
#     '0193': 'Giribaile, Sp'
# }

RESERVOIR_NAME = res_names[RESERVOIR]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

In [62]:
(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR].hvplot(
    geo=True, tiles='OSM', shared_axes=False
)).opts(title=f"{RESERVOIR_NAME}")

## Read insitu and satellite data

In [63]:
import numpy as np

deltares_insitu_dir = Path('/tiger1/pdas47/tmsosPP/data/insitu/deltares/')
rid_insitu_dir = Path('/tiger1/pdas47/tmsosPP/data/insitu/rid')
resops_insitu_dir = Path('/tiger1/pdas47/tmsosPP/data/insitu/resopsus')
area_column = 'tmsos area [km2]'
area_dir = Path('/tiger1/pdas47/tmsosPP/data/area/tmsos/')

def get_insitu_df(tmsos_id):
    idx = val_polys['tmsos_id'].isin(selected_reservoirs)
    subset = val_polys[idx]

    row = subset[subset['tmsos_id']==tmsos_id]
    db = row['db'].values
    
    insitu_df = None

    if db == 'deltares':
        deltares_id = row['deltares_id'].values[0]
        
        fn = deltares_insitu_dir / f'{int(deltares_id):07}.csv'
        
        insitu_df = pd.read_csv(fn, parse_dates=['time'])
        insitu_df.sort_values('time', inplace=True)
        insitu_df['date'] = pd.to_datetime(insitu_df['time'].dt.date)
        insitu_df['observed area [km2]'] = insitu_df['area'] * 1e-6
        insitu_df['observed wse [m]'] = np.nan
        insitu_df['observed storage [Mm3]'] = np.nan
        insitu_df['db'] = 'deltares'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

    if db == 'rid':
        rid_id = int(row['rid_id'].values[0])
        name = row['name'].values[0]
        
        fn = rid_insitu_dir / f'{rid_id}-{name}_Dam.csv'
        insitu_df = pd.read_csv(fn, parse_dates=['date'])
        insitu_df['observed area [km2]'] = np.nan
        insitu_df['observed wse [m]'] = insitu_df['water_level (m)']
        insitu_df['observed storage [Mm3]'] = insitu_df['storage (mil. m3)']
        insitu_df['db'] = 'rid'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

    if db == 'resops':
        resops_id = int(row['resops_id'].values[0])
        fn = resops_insitu_dir / f'ResOpsUS_{resops_id}.csv'

        insitu_df = pd.read_csv(fn, parse_dates=['date'])
        insitu_df['observed area [km2]'] = np.nan
        insitu_df['observed wse [m]'] = insitu_df['elevation']
        insitu_df['observed storage [Mm3]'] = insitu_df['storage']
        insitu_df['db'] = 'resops'
        insitu_df = insitu_df[['date', 'observed area [km2]', 'observed wse [m]', 'observed storage [Mm3]', 'db']]

    return insitu_df

insitu_dfs = []
sat_dfs = []
test_dfs = []

perf_dfs = []

for reservoir in selected_reservoirs:
    insitu_df = get_insitu_df(reservoir)

    insitu_df['tmsos_id'] = reservoir
    insitu_df.set_index(['tmsos_id', 'date'], inplace=True)
    insitu_dfs.append(insitu_df)
    
    sat_fn = Path(f'{area_dir}/v{VERSION}/{reservoir}.csv')
    sat_df = pd.read_csv(sat_fn, parse_dates=['time'], dtype={'tmsos_id': str})
    sat_df['date'] = pd.to_datetime(sat_df['time'].dt.date)
    sat_df = sat_df.drop(['time'], axis=1)
    sat_df.set_index(['tmsos_id', 'date'], inplace=True)
    sat_dfs.append(sat_df)

insitu_df = pd.concat(insitu_dfs)
sat_df = pd.concat(sat_dfs)

FileNotFoundError: [Errno 2] No such file or directory: '/tiger1/pdas47/tmsosPP/data/area/tmsos/v0.1/0078.csv'

In [None]:
import xarray as xr

srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated')
# poly_deg = 3
aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'
aec_df = pd.read_csv(aec_fp)
alg_type = 'insitu'

insitu_df_res = insitu_df.reset_index()
insitu_df_res = insitu_df_res[insitu_df_res['tmsos_id'] == RESERVOIR]
insitu_ds_res = insitu_df_res.set_index(['date']).to_xarray()

reservoir_dynamics = insitu_df_res.rename({
    'observed area [km2]': 'area',
    'observed wse [m]': 'wse',
    'observed storage [Mm3]': 'storage'
}, axis=1).drop_duplicates('date').set_index('date').to_xarray()

# compute values
if alg_type == 'insitu':
    reservoir_dynamics['area'].attrs['alg_type'] = 'insitu'
    reservoir_dynamics['area'].attrs['obs_imp'] = 'obs'
    reservoir_dynamics['area'].attrs['unit'] = 'km^2'

    elevation = np.interp(reservoir_dynamics['area'], aec_df['CumArea'], aec_df['Elevation'])
    elevation_da = xr.DataArray(data=elevation, coords=reservoir_dynamics.coords)
    reservoir_dynamics = reservoir_dynamics.assign(elevation = elevation_da)
# similarly, add other custom functions for other alg_types

if 'storage_change' not in list(reservoir_dynamics.variables):
    avg_A = (reservoir_dynamics['area'].isel(date=slice(0, -1)) + reservoir_dynamics['area'].isel(date=slice(1, None)))/2
    del_h = reservoir_dynamics['elevation'].diff(dim='date')
    del_s = xr.DataArray(0.5 * avg_A * del_h, name='storage_change')
    reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)

reservoir_dynamics

In [None]:
insitu_df_res.hvplot(x='date', y='observed area [km2]', kind='scatter')

In [None]:
import hvplot.xarray
import holoviews as hv
hv.extension('bokeh')

reservoir_dynamics.hvplot(
    x='date', y='storage_change', label='insitu', color='red', kind='scatter'
).opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}. comparing ∆S between insitu and satellite'
) * satellite_reservoir_dynamics.hvplot(
    x='date', y='storage_change', label='satellite (tmsos)', color='blue', alpha=0.6, kind='scatter'
).opts(
    xlim=tuple(pd.to_datetime(('2019-01-01', '2024-07-01')).values), 
    ylim=(-200, 200),
    ylabel='∆S (Mm^3)',
)

In [None]:

alg_type = 'tmsos'
elevation_dir = Path('/tiger1/pdas47/tmsosPP/data/tmsos/')
elevation_fp = elevation_dir / f'{RESERVOIR}.csv'

data = {}
if alg_type == 'tmsos':
    tmsos_df = pd.read_csv(elevation_fp, parse_dates=['date'])
    data['area'] = tmsos_df['area']
    data['date'] = tmsos_df['date']

reservoir_dynamics = pd.DataFrame(data).set_index('date').to_xarray()

# compute values
if alg_type == 'tmsos':
    reservoir_dynamics['area'].attrs['alg_type'] = 'tmsos'
    reservoir_dynamics['area'].attrs['obs_imp'] = 'obs'
    reservoir_dynamics['area'].attrs['unit'] = 'km^2'

    elevation = np.interp(reservoir_dynamics['area'], aec['CumArea'], aec['Elevation'])
    elevation_da = xr.DataArray(data=elevation, coords=reservoir_dynamics.coords)
    elevation_da.attrs['alg_type'] = 'tmsos'
    elevation_da.attrs['obs_imp'] = 'imp'
    elevation_da.attrs['unit'] = 'm'
    
    reservoir_dynamics = reservoir_dynamics.assign(elevation = elevation_da)

# similarly, add other custom functions for other alg_types
if 'storage_change' not in list(reservoir_dynamics.variables):
    avg_A = (reservoir_dynamics['area'] + reservoir_dynamics['area'].shift({'date':1}))/2
    del_h = reservoir_dynamics['elevation'] - reservoir_dynamics['elevation'].shift({'date':1})
    
    del_s = xr.DataArray(avg_A * del_h * 1e6, name='storage_change')
    del_s = xr.DataArray(del_s, name='storage_change')
    del_s.attrs['alg_type'] = 'tmsos'
    del_s.attrs['obs_imp'] = 'calc'
    del_s.attrs['unit'] = 'm^3'
    
    reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)

reservoir_dynamics

In [None]:
reservoir_dynamics.storage_change.hvplot(kind='scatter', color='storage_change', cmap='RdYlGn')

## Save 

In [None]:
VERSION = '0.1'
ALG = 'insitu'

csv_save_fp = DATA_DIR / 'storage_change' / ALG / f'v{VERSION}' / f'{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_dels.csv'
csv_save_fp.parent.mkdir(parents=True, exist_ok=True)
reservoir_dynamics.to_pandas().to_csv(csv_save_fp)

In [None]:
nc_save_fp = csv_save_fp.with_suffix('.nc')
# save as netcdf
## add attributes

reservoir_dynamics.attrs['version'] = VERSION
reservoir_dynamics.to_netcdf(nc_save_fp)