In [1]:
import ee
ee.Initialize(project='tmospp')

In [2]:
# from rat.ee_utils.ee_aec_file_creator import aec_file_creator
import geopandas as gpd
from pathlib import Path
import hvplot.pandas
import pandas as pd
import holoviews as hv
import geoviews as gv
import numpy as np

hv.extension('bokeh')

## Select the reservoir

In [3]:
RESERVOIR = '0505'

In [4]:
# read the bounding box of the study area
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-polys.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()  # select all 100 reservoirs
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name'] # dictionary that can be queried to get reservoir name

RESERVOIR_NAME = res_names[RESERVOIR]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

# get reservoir properties from GRanD
nominal_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_SKM'].values[0]
nominal_area_poly = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_POLY'].values[0]
max_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MAX'].values[0]
max_area = np.nan if max_area == -99 else max_area

min_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MIN'].values[0]
min_area = 0 if min_area == -99 else min_area

area_rep = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_REP'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
elev_msl = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['ELEV_MASL'].values[0])
depth = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DEPTH_M'].values[0])
capacity = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['CAP_MCM'].values[0])


## Plot a map of the selected reservoirs
global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

print(
    f"Selected reservoir: {RESERVOIR}: {RESERVOIR_NAME}\n",
    f"{nominal_area = }\n",
    f"{nominal_area_poly = }\n",
    f"{max_area = }\n",
    f"{min_area = }\n",
    f"{area_rep = }\n",
    f"{dam_height = }\n",
    f"{elev_msl = }\n",
    f"{depth = }\n",
    f"{capacity = }\n",
)

global_map

Selected reservoir: 0505: Gumti Dam,  In
 nominal_area = 34.41
 nominal_area_poly = 34.41
 max_area = nan
 min_area = 0
 area_rep = -99.0
 dam_height = 30.0
 elev_msl = 101.0
 depth = 9.1
 capacity = 312.0



## Storage Calculation

In [5]:
# what is the reported capacity?
capacity_hv = hv.HLine(capacity).opts(color='red', ylim=(0, capacity + capacity*0.1), ylabel='capacity (Mil. m3)')
capacity_hv

In [7]:
srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated/')

In [8]:
import numpy as np


if dam_height == -99:
    dam_height = np.nan
if elev_msl == -99:
    elev_msl = np.nan

aec_fp = Path(f'/tiger1/pdas47/tmsosPP/data/aec/srtm/{RESERVOIR}.csv')
aec = pd.read_csv(aec_fp)

# # if ~np.isnan(max_area): # if max area is available, then we infer the max height using that value
# if possible_elevations_method == 'grand':
#     if ~np.isnan(max_area) and ~np.isnan(dam_height):
#         max_height_inferred = aec[aec['CumArea'] < max_area].iloc[-1]['Elevation']
#         min_height_inferred = max_height_inferred - dam_height
#     elif ~np.isnan(elev_msl) and ~np.isnan(dam_height):
#         max_height_inferred = elev_msl + dam_height - depth
#         min_height_inferred = max_height_inferred - dam_height
#     else:
#         raise ValueError("Cannot infer min/max height either using max area or using dam height")
# elif isinstance(possible_elevations_method, list):
#     min_height_inferred = possible_elevations_method[0]
#     max_height_inferred = possible_elevations_method[1]
# else:
#     raise ValueError(f"Invalid value for possible_elevations_method: {possible_elevations_method}")

# print(f'Nominal area: {nominal_area} (poly: {nominal_area_poly}) km2, Dam height: {dam_height} m, Elevation: {elev_msl} m, Max. Area: {max_area} km2, Min. Area: {min_area} km2, Area Rep: {area_rep} km2, Max. Height Inferred: {max_height_inferred} m, Min. Height Inferred: {min_height_inferred} m')

# aec.hvplot(
#     x='Elevation', y='CumArea'
# ).opts(height=300, width=400) * \
# hv.HLine(nominal_area, name='f').opts(color='red') * \
# hv.HLine(max_area, name='f').opts(color='maroon') * \
# hv.HLine(area_rep, name='f').opts(color='pink') * \
# hv.HLine(min_area, name='f').opts(color='orange') * \
# hv.VLine(max_height_inferred, name='f').opts(color='blue') * \
# hv.VLine(min_height_inferred, name='f').opts(color='blue') * \
# hv.Text(max_height_inferred, nominal_area, f'Nominal area: {nominal_area}', halign='right', valign='top').opts(color='red')
# # hv.Text(max_height_inferred, nominal_area, f'Elevation: {elev_msl}\nDam Height: {dam_height}\nDam Crest Elevation: {elev_msl + dam_height:.1f} m', halign='left', valign='top', rotation=90).opts(color='blue', title=f'{RESERVOIR_NAME}') * \

In [9]:
# obs_aec_above_water = aec[aec['Elevation'] < max_height_inferred]
# obs_aec_above_water = obs_aec_above_water.sort_values('Elevation')
# obs_aec_above_water['CumArea_diff'] = obs_aec_above_water['CumArea'].diff()
# obs_aec_above_water['z_score'] = (obs_aec_above_water['CumArea_diff'] - obs_aec_above_water['CumArea'].mean()) / obs_aec_above_water['CumArea'].std()
# max_z_core_idx = obs_aec_above_water['z_score'].idxmax()
# obs_aec_above_water = obs_aec_above_water.loc[max_z_core_idx:, :]

# # if RESERVOIR in ('0349', '0214', '1498', '0524', '0502', '0518', '0524', '1284', '0193'):
# #     obs_aec_above_water = pd.concat([pd.DataFrame({'Elevation': [min_height_inferred], 'CumArea': [0]}), obs_aec_above_water])


# # obs_aec_above_water
# obs_aec_above_water.hvplot(
#     x='Elevation', y='CumArea'
# ).opts(height=300, width=400) * \
# hv.HLine(nominal_area, name='f').opts(color='red') * \
# hv.HLine(max_area, name='f').opts(color='maroon') * \
# hv.HLine(area_rep, name='f').opts(color='pink') * \
# hv.HLine(min_area, name='f').opts(color='orange') * \
# hv.VLine(max_height_inferred, name='f').opts(color='blue') * \
# hv.VLine(min_height_inferred, name='f').opts(color='blue') * \
# hv.Text(max_height_inferred, nominal_area, f'Elevation: {elev_msl}\nDam Height: {dam_height}\nDam Crest Elevation: {elev_msl + dam_height:.1f} m', halign='left', valign='top', rotation=90).opts(color='blue', title=f'{RESERVOIR_NAME}') * \
# hv.Text(max_height_inferred, nominal_area, f'Nominal area: {nominal_area}', halign='right', valign='top').opts(color='red')

Inspect elevations around the dam locations.

In [10]:
merit_dem_elevations = pd.read_csv("/tiger1/pdas47/tmsosPP/data/dam_bottom_elevation/MERIT_DEM_1000_m_around_dam_locations.csv", dtype={'tmsos_id': str}).drop(
    ['system:index', '.geo'], axis=1
)
merit_dem_elevations

Unnamed: 0,dem_max,dem_min,dem_p10,dem_p20,dem_p30,dem_p40,dem_p50,dem_p60,dem_p70,dem_p80,dem_p90,tmsos_id
0,1152.984375,544.676453,549.934118,559.715737,559.715737,559.715737,559.715737,566.207323,597.246944,634.115248,776.968689,0934
1,687.791748,421.781464,427.067238,428.776840,430.908008,439.065479,447.240622,467.203196,508.821503,564.903510,612.747530,0936
2,992.426514,574.042175,666.007751,731.159677,731.159677,731.159677,741.023829,767.305486,812.764949,881.010539,923.085576,0930
3,1018.599365,451.216797,497.632751,537.842663,541.596362,561.718200,571.139277,571.139277,573.224263,674.347518,769.986251,0931
4,1103.896851,585.429260,600.772171,610.631512,621.716917,625.957204,642.342234,653.633929,669.709038,766.351393,913.982068,0933
...,...,...,...,...,...,...,...,...,...,...,...,...
1286,384.168732,297.040649,315.272678,318.558746,322.761018,324.237232,328.204559,334.747821,339.816723,345.227660,354.280435,1359
1287,384.168732,297.040649,315.272678,318.558746,322.761018,324.237232,328.204559,334.747821,339.816723,345.227660,354.280435,0704
1288,115.723740,57.717018,69.120104,72.687027,75.083551,79.102731,84.357686,88.880953,93.856906,97.865666,102.847569,1362
1289,115.723740,57.717018,69.120104,72.687027,75.083551,79.102731,84.357686,88.880953,93.856906,97.865666,102.847569,0590


In [11]:
## try to plot interactively to rapidly visualize 

import panel as pn
import hvplot.pandas
pn.extension('bokeh')

# Create a reservoir selector dropdown menu
reservoir_selector = pn.widgets.IntSlider(name='Reservoir Selector', start=0, end=len(selected_reservoirs)-1, value=selected_reservoirs.index(RESERVOIR))

def get_aec(reservoir, max_height):
    aec_fp = Path(f'/tiger1/pdas47/tmsosPP/data/aec/srtm/{reservoir}.csv')
    aec = pd.read_csv(aec_fp)
    
    obs_aec_above_water = aec[aec['Elevation'] < max_height]
    obs_aec_above_water = obs_aec_above_water.sort_values('Elevation')
    obs_aec_above_water['CumArea_diff'] = obs_aec_above_water['CumArea'].diff()
    obs_aec_above_water['z_score'] = (obs_aec_above_water['CumArea_diff'] - obs_aec_above_water['CumArea'].mean()) / obs_aec_above_water['CumArea'].std()
    max_z_core_idx = obs_aec_above_water['z_score'].idxmax()
    obs_aec_above_water = obs_aec_above_water.loc[max_z_core_idx:, :]
    obs_aec_above_water = obs_aec_above_water[['Elevation', 'CumArea']]

    return obs_aec_above_water


def plot_elevations(index):
    reservoir = selected_reservoirs[index]
    elevations = merit_dem_elevations[merit_dem_elevations['tmsos_id'] == reservoir]
    min_elev = elevations['dem_min'].values[0]
    max_elev = elevations['dem_max'].values[0]
    elevation_percentiles = list(elevations[['dem_p10', 'dem_p20', 'dem_p30', 'dem_p40', 'dem_p50', 'dem_p60', 'dem_p70', 'dem_p80', 'dem_p90']].values.flatten())

    reservoir_name = res_names[reservoir]
    dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == reservoir]['DAM_HGT_M'].values[0])

    rectangle_x = 0
    rectangle_y_start = min_elev
    rectangle_y_end = rectangle_y_start + dam_height

    rectangle = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
        color='green', alpha=0.5, line_width=2
    )

    elevation_percentiles = [min_elev] + elevation_percentiles + [max_elev]
    x_values = [0] + list(range(10, 100, 10)) + [100]

    return hv.Scatter((x_values, elevation_percentiles), 'Percentile', 'Elevation').opts(
        height=400, width=500, title=f'{reservoir}: {reservoir_name}\nElevation Percentiles within 1000 m of dam location\nDam Height: {dam_height} m',
        xlabel='Percentile', ylabel='Elevation (m)', size=10
    ) * hv.Curve((x_values, elevation_percentiles)) * rectangle

interactive_plot = pn.bind(plot_elevations, reservoir_selector)
pn.Column(
    reservoir_selector, interactive_plot
)



BokehModel(combine_events=True, render_bundle={'docs_json': {'31746716-e5fa-4971-ad67-792071040e1c': {'version…

Minimum elevation, or the 0th percentile elevation may be used as a proxy for the reservoir bottom.

Using the minimum elevation, calculate the storage of the reservoir and compare it with the reported capacity in GRanD.

In [12]:
elevations = merit_dem_elevations[merit_dem_elevations['tmsos_id'] == RESERVOIR]

min_elev = elevations['dem_min'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
if dam_height == -99:
    print(f"Dam height unavailable. ")
    dam_height = np.nan

dam_bottom = min_elev
dam_top = min_elev + dam_height

print("Min max elevations: ", dam_bottom, dam_top)

# Interpolate the max_area corresponding to the dam_top using np.interp
max_area_interpolated = np.interp(dam_top, aec['Elevation'], aec['CumArea'])
min_area = 0
print("Min max areas: ", min_area, max_area_interpolated)

obs_aec_above_water = get_aec(RESERVOIR, dam_top)

# plot dam
rectangle_x = 0
rectangle_y_start = dam_bottom
rectangle_y_end = dam_top

dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='gray', alpha=0.5, line_width=2
)

obs_aec_above_water.hvplot(
    x='CumArea', y='Elevation', label='SRTM'
) * hv.HLine(dam_bottom) * hv.HLine(dam_top) * dam_hv

Min max elevations:  84.15647888183594 114.15647888183594
Min max areas:  0 77.25992408752441


In [32]:
# fit a 2°

from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures, SplineTransformer

poly_deg = 2

pft = PolynomialFeatures(degree=poly_deg)
pft_pipeline = make_pipeline(
    pft,
    Ridge(alpha=1e-3),
)

prediction_range = np.linspace(
    dam_bottom, dam_top, int(dam_top - dam_bottom)
).reshape(-1, 1)


obs_aec_above_water = get_aec(RESERVOIR, dam_top)
obs_aec_and_dam_bottom = pd.concat([pd.DataFrame({'Elevation': [dam_bottom], 'CumArea': [0]}), obs_aec_above_water])

poly_predictions = pft_pipeline.fit(
    obs_aec_and_dam_bottom[['Elevation']], obs_aec_and_dam_bottom[['CumArea']]).predict(
    pd.DataFrame(prediction_range, columns=['Elevation'])
)

poly_pred_df = pd.DataFrame({
    'Elevation': prediction_range.flatten(),
    'CumArea': poly_predictions.flatten()
})

poly_pred_df

# PLOT
print("Min max elevations: ", dam_bottom, dam_top)
print("Min max areas: ", min_area, max_area_interpolated)


# plot dam
rectangle_x = 0
rectangle_y_start = dam_bottom
rectangle_y_end = dam_top

dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='gray', alpha=0.5, line_width=2
)

obs_aec_hv = obs_aec_and_dam_bottom.hvplot(
    x='CumArea', y='Elevation', label='SRTM + dam bottom added'
) 
poly_pred_hv = poly_pred_df.hvplot(
    x='CumArea', y='Elevation', label=f'{poly_deg} deg polynomial'
)

aec_hv = obs_aec_hv * poly_pred_hv * hv.HLine(dam_bottom).opts(color='gray') * hv.HLine(dam_top).opts(color='gray') * dam_hv
aec_hv = aec_hv.opts(
    title=f"AEC: {RESERVOIR}: {RESERVOIR_NAME}"
)
aec_hv

Min max elevations:  84.15647888183594 114.15647888183594
Min max areas:  0 77.25992408752441


In [33]:
from scipy.integrate import cumulative_trapezoid

def calculate_storage(aec_df):
    """
    Calculate the storage of a reservoir from its Area-Elevation Curve (AEC).

    Parameters:
    aec_df (pd.DataFrame): DataFrame containing 'Elevation' and 'CumArea' columns.

    Returns:
    pd.DataFrame: DataFrame with an additional 'Storage' column representing the storage in cubic meters.
    """
    elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

    # cumulative_trapezoid takes two parameters.
    # y = y-axis locations of points. these values will be integrated. 
    # x = x-axis locations of points, where each y value is sampled. Area.
    storage = cumulative_trapezoid(
        elevation_normalized, 
        aec_df['CumArea'] * 1e6
    )
    storage = np.insert(storage, 0, 0)

    aec_df['Storage'] = storage
    aec_df['Storage (mil. m3)'] = storage * 1e-6
    return aec_df

# Example usage:
storage_poly_pred_df = calculate_storage(poly_pred_df)
storage_poly_pred_df.head()

Unnamed: 0,Elevation,CumArea,Storage,Storage (mil. m3)
0,84.156479,2.301188,0.0,0.0
1,85.190962,7.240263,2554694.0,2.554694
2,86.225444,12.010458,9956721.0,9.956721
3,87.259927,16.611773,21856670.0,21.856674
4,88.29441,21.044208,37905140.0,37.905144


In [34]:
volume_at_dam_top = storage_poly_pred_df.iloc[-1]['Storage (mil. m3)']

print(f"TOTAL calculated storage if filled to brim: {volume_at_dam_top:.2f} mil. m3;")
print(f"Reported capacity: {capacity:.2f} Mil. m3")

# Interpolate to find the elevation corresponding to the reported capacity
elevation_at_capacity = np.interp(capacity, storage_poly_pred_df['Storage (mil. m3)'], storage_poly_pred_df['Elevation'])
area_at_capacity = np.interp(elevation_at_capacity, storage_poly_pred_df['Elevation'], storage_poly_pred_df['CumArea'])

print(f"Elevation at reported capacity: {elevation_at_capacity:.2f} m")
print(f"Area at reported capacity: {area_at_capacity:.2f} km²")

TOTAL calculated storage if filled to brim: 765.37 mil. m3;
Reported capacity: 312.00 Mil. m3
Elevation at reported capacity: 97.62 m
Area at reported capacity: 53.38 km²


In [35]:
aev_hv = capacity_hv * storage_poly_pred_df.hvplot(
    x='CumArea', y='Storage (mil. m3)'
).opts(xlabel='CumArea') * hv.VLine(0).opts(color='gray')

aev_hv = aev_hv.opts(
    title = f"AEV: {RESERVOIR} - {RESERVOIR_NAME}\nVolume if filled to dam top: {volume_at_dam_top:.2f} mil. m3\nCapacity: {capacity:.2f} mil. m3\nArea at capacity: {area_at_capacity:.2f} km2, Elevation at capacity: {elevation_at_capacity:.2f} m",
)
aev_hv

In [36]:
# save storage df
storage_poly_pred_df.to_csv(f'/tiger1/pdas47/tmsosPP/data/aec/aev_2deg_polynomial/{RESERVOIR}.csv', index=False)

# save aev plot
hv.save(aev_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial/aev/{RESERVOIR}.png", fmt='png')
hv.save(aec_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial/aec/{RESERVOIR}.png", fmt='png')

The geckodriver version (0.34.0) detected in PATH at /tiger1/pdas47/tmsosPP/.env/bin/geckodriver might not be compatible with the detected firefox version (126.0.1); currently, geckodriver 0.35.0 is recommended for firefox 126.*, so it is advised to delete the driver in PATH and retry


In [14]:
print(f"Interpolated max_area at dam_top ({dam_top:.2f} m): {max_area_interpolated:.2f} km²")

Interpolated max_area at dam_top (114.16 m): 77.26 km²


We know the reported capcaity of the reservoir. Let's call it $S$.
Let's assume that the function that best defines the AEC of the reservoir is a power function, which is monotonically increasing. With increasing Area, the Elevation must increase, and vice versa. The function can be of the form $E = aA^b$ where $E$ is the elevation, $A$ is the area, and $a$ and $b$ are constants. Let's call this function $AEC_{model}$.

We can calculate the storage of the reservoir using this AEC by integrating it. Let's call this calculated storage $s$.

The error in the reported capacity is given by $e = S - s$

Furthermore, we also have the observed AEC of the reservoir observed by SRTM. Let's call this observed part of the AEC $AEC_{obs}$

Since the AECs are arrays of values, we can find the error in the modeled and observed AEC by calculating the root mean squared error (RMSE) between the two. $RMSE = sqrt(MSE(AEC_{obs}, AEC_{model}))$

These two error can be minimized to get $a$ and $b$.

In [43]:
capacity

312.0

In [1]:
from scipy.optimize import minimize
from scipy.integrate import trapezoid

def s(params, E):
    a = params[0]
    b = params[1]
    c = params[2]
    
    A = a + (b * E) + (c * np.power(E, 2))

    # storage = trapezoid(
    #     E, 
    #     A
    # )

    return A

elevations = np.arange(dam_bottom, dam_top) # 1 km2 interval

params = [
    -954, 17.9, -0.0779
]
areas = s(
    params, elevations
)

hv.Scatter(
    (areas, elevations), 'area', 'elevation'
)
# f = lambda params, elevations: capacity - s(params, elevations)

# # capacity - s(x, 0.1, 1)
# res = minimize(
#     f, params, method='nelder-mead', args=(elevations), options={'disp': True}
# )

NameError: name 'np' is not defined

In [51]:
hv.Scatter(
    (areas, elevations), 'area', 'elevation'
) * aec.hvplot(x='CumArea', y='Elevation')

In [52]:
capacity

312.0

In [227]:
# params = res.x
params = [-954, 17.9, -0.0779]

areas = (params[0] + params[1] * elevations + params[2] * elevations**2) * 1e-6

aec_mod_without_zero_area.hvplot(
    x='Elevation', y='CumArea'
) * hv.Scatter((elevations, areas), 'Elevation', 'Area')

In [None]:
aec_mod_without_zero_area

## fill AEC using polynomial interpolation

In [41]:
aec_mod = aec_mod.reset_index(drop=True)[['Elevation', 'CumArea']]

aec_mod.head(2)

Unnamed: 0,Elevation,CumArea
0,92,40.375
1,93,42.115


In [42]:
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures, SplineTransformer


pft = PolynomialFeatures(degree=poly_deg)
pft_pipeline = make_pipeline(
    pft,
    Ridge(alpha=1e-3),
)

In [72]:
prediction_range = np.linspace(
    min_height_inferred, max_height_inferred, int(max_height_inferred - min_height_inferred)
).reshape(-1, 1)
poly_predictions = pft_pipeline.fit(aec_mod[['Elevation']], aec_mod[['CumArea']]).predict(
    pd.DataFrame(prediction_range, columns=['Elevation'])
)

poly_pred_df = pd.DataFrame({
    'Elevation': prediction_range.flatten(),
    'CumArea': poly_predictions.flatten()
})
poly_pred_df['CumArea'] = poly_pred_df['CumArea'].clip(0)
if (poly_pred_df['CumArea'] == 0).sum() > 0:
    zero_elevation = poly_pred_df[poly_pred_df['CumArea'] == 0]['Elevation'].max()
else:
    zero_elevation = min_height_inferred
poly_pred_df = poly_pred_df[poly_pred_df['Elevation'] >= zero_elevation]
poly_pred_df.hvplot(x='Elevation', y='CumArea')

aec_mod_without_zero_area = aec_mod.copy()
aec_mod_without_zero_area = aec_mod_without_zero_area[aec_mod_without_zero_area['CumArea']!=0]
elevations = np.linspace(zero_elevation, max_height_inferred, int(max_height_inferred-zero_elevation))
areas = [
    np.interp(elevation, aec_mod_without_zero_area['Elevation'], aec_mod_without_zero_area['CumArea']) if np.logical_and(
        elevation >= aec_mod_without_zero_area['Elevation'].min(), elevation <= aec_mod_without_zero_area['Elevation'].max()
    ) else np.interp(elevation, poly_pred_df['Elevation'], poly_pred_df['CumArea']) for elevation in elevations
]
obs_extrapolated = [
    'SRTM' if np.logical_and(
        elevation >= aec_mod_without_zero_area['Elevation'].min(), elevation <= aec_mod_without_zero_area['Elevation'].max()
    ) else 'extrapolated' for elevation in elevations
]

extrapolated_aec = {
    'Elevation': elevations,
    'CumArea': areas,
    'obs_or_extrapolated': obs_extrapolated
}

# extrapolated_aec
extrapolated_aec = pd.DataFrame(extrapolated_aec)
if len(extrapolated_aec[extrapolated_aec['CumArea'] == 0]) == 0:
    zero_area_elev = extrapolated_aec.iloc[0]['Elevation']
else:
    zero_area_elev = extrapolated_aec.loc[extrapolated_aec[extrapolated_aec['CumArea'] == 0].idxmax()['Elevation']]['Elevation']


extrapolated_aec = extrapolated_aec[extrapolated_aec['Elevation'] >= zero_area_elev]

# remove any extrapolated values above the SRTM observed elevation
idx_max_extrapolated_value = extrapolated_aec[extrapolated_aec['obs_or_extrapolated'] == 'extrapolated'].idxmax()['Elevation']
# check if the previous value is SRTM, if yes, delete the extrapolated value
if extrapolated_aec.loc[idx_max_extrapolated_value-1, 'obs_or_extrapolated']:
    print("deleting extrapolated point above observed AEC")
    extrapolated_aec = extrapolated_aec.iloc[:-1]

# PLOT

min_inferred_elevation_hv = hv.HLine(min_height_inferred).opts(color='orange')
max_inferred_elevation_hv = hv.HLine(max_height_inferred).opts(color='orange')

extrapolated_aec.hvplot.scatter(
    x='CumArea', y='Elevation', by='obs_or_extrapolated'
).opts(
    height=400, width=500, title=f'{RESERVOIR}: {RESERVOIR_NAME}\nExtrapolated AEC',
    xlabel='Area (km2)', ylabel='Elevation (m)'
) * min_inferred_elevation_hv * max_inferred_elevation_hv

deleting extrapolated point above observed AEC


In [73]:
srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated')

# poly_pred_df.round(2).to_csv(srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv', index=False)
# extrapolated_aec.round(2).to_csv(srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv', index=False)
# print(f"Saved at {srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'}")

In [83]:
val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

# aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'
# aec_df = pd.read_csv(aec_fp)

from scipy.integrate import cumulative_trapezoid
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.cumulative_trapezoid.html#scipy.integrate.cumulative_trapezoid
## Cumulatively integrate y(x) using the composite trapezoidal rule.


aec_df = extrapolated_aec

elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

# cumulative_trapezoid takes two parameters.
# y = y-axis locations of points. these values will be integrated. 
# https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png. Normalized Elevation.
# x = x-axis locations of points, where each y value is sampled. Area.
storage = cumulative_trapezoid(
    elevation_normalized, 
    aec_df['CumArea'] * 1e6
)
storage = np.insert(storage, 0, 0)

aec_df['Storage'] = storage
aec_df['Storage (mil. m3)'] = storage * 1e-6
aec_df

Unnamed: 0,Elevation,CumArea,obs_or_extrapolated,Storage,Storage (mil. m3)
0,79.387755,0.0,extrapolated,0.0,0.0
1,80.44335,3.496786,extrapolated,1845594.0,1.845594
2,81.498944,7.030604,extrapolated,7441014.0,7.441014
3,82.554539,10.498524,extrapolated,16592810.0,16.592807
4,83.610134,13.900545,extrapolated,29161850.0,29.16185
5,84.665728,17.236667,extrapolated,45009020.0,45.009018
6,85.721323,20.506891,extrapolated,63995190.0,63.995186
7,86.776918,23.711216,extrapolated,85981230.0,85.98123
8,87.832512,26.849642,extrapolated,110828000.0,110.828027
9,88.888107,29.92217,extrapolated,138396400.0,138.39645


In [85]:
aec_df.hvplot(x='CumArea', y='Elevation').opts(height=300, width=400, title=f'{RESERVOIR}: {RESERVOIR_NAME}  [A-E]', ylabel='Elevation (m)', xlabel='Area (km2)') \
+ (aec_df.hvplot(x='Elevation', y='Storage (mil. m3)', title=f'{RESERVOIR}: {RESERVOIR_NAME}  [S-E]').opts(height=300, width=400, ylabel='Storage (Million m3)', xlabel='Elevation (m)') * capacity_hv) \
+ (aec_df.hvplot(x='CumArea', y='Storage (mil. m3)', title=f'{RESERVOIR}: {RESERVOIR_NAME}  [S-A]').opts(height=300, width=400, ylabel='Storage (Million m3)', xlabel='Area (km2)') * capacity_hv)

In [13]:
# save aec in `srtm_extrapolated_storage`
srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated_storage/')