In [3]:
import geopandas as gpd
from pathlib import Path
import hvplot.pandas
import pandas as pd
import holoviews as hv
import geoviews as gv
import numpy as np

hv.extension('bokeh')

<!-- ## Select the reservoir -->

In [4]:
RESERVOIR = '0484'

In [5]:
# read the bounding box of the study area
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-polys.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()  # select all 100 reservoirs
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name'] # dictionary that can be queried to get reservoir name

RESERVOIR_NAME = res_names[RESERVOIR]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

# get reservoir properties from GRanD
nominal_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_SKM'].values[0]
nominal_area_poly = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_POLY'].values[0]
max_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MAX'].values[0]
max_area = np.nan if max_area == -99 else max_area

min_area = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_MIN'].values[0]
min_area = 0 if min_area == -99 else min_area

area_rep = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['AREA_REP'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
elev_msl = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['ELEV_MASL'].values[0])
depth = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DEPTH_M'].values[0])
capacity = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['CAP_MCM'].values[0])


## Plot a map of the selected reservoirs
global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

print(
    f"Selected reservoir: {RESERVOIR}: {RESERVOIR_NAME}\n",
    f"{nominal_area = }\n",
    f"{nominal_area_poly = }\n",
    f"{max_area = }\n",
    f"{min_area = }\n",
    f"{area_rep = }\n",
    f"{dam_height = }\n",
    f"{elev_msl = }\n",
    f"{depth = }\n",
    f"{capacity = }\n",
)

global_map

Selected reservoir: 0484: Ukal, In
 nominal_area = 370.3
 nominal_area_poly = 370.3
 max_area = nan
 min_area = 130.0
 area_rep = 520.0
 dam_height = 81.0
 elev_msl = 96.0
 depth = 23.0
 capacity = 8510.0



<!-- ## Storage Calculation -->

In [6]:
# what is the reported capacity?
capacity_hv = hv.HLine(capacity).opts(color='red', ylim=(0, capacity + capacity*0.1), ylabel='capacity (Mil. m3)')
capacity_hv

In [7]:
srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated/')

In [8]:
import numpy as np


if dam_height == -99:
    dam_height = np.nan
if elev_msl == -99:
    elev_msl = np.nan

aec_fp = Path(f'/tiger1/pdas47/tmsosPP/data/aec/srtm/{RESERVOIR}.csv')
aec = pd.read_csv(aec_fp)

Inspect elevations around the dam locations.

In [9]:
merit_dem_elevations = pd.read_csv("/tiger1/pdas47/tmsosPP/data/dam_bottom_elevation/MERIT_DEM_1000_m_around_dam_locations.csv", dtype={'tmsos_id': str}).drop(
    ['system:index', '.geo'], axis=1
)
merit_dem_elevations.head()

Unnamed: 0,dem_max,dem_min,dem_p1,dem_p10,dem_p15,dem_p2,dem_p20,dem_p25,dem_p3,dem_p30,...,dem_p50,dem_p6,dem_p60,dem_p7,dem_p70,dem_p8,dem_p80,dem_p9,dem_p90,tmsos_id
0,1530.077271,541.943115,546.734673,550.019569,553.808512,546.734673,559.641687,559.641687,546.734673,559.641687,...,613.936004,550.019569,670.227516,550.019569,765.874281,550.019569,1033.159235,550.019569,1289.850638,934
1,1464.071289,421.065369,422.092841,428.898907,435.753357,422.092841,443.648814,467.132673,422.092841,515.48288,...,667.396393,428.898907,780.639482,428.898907,891.923305,428.898907,1035.497431,428.898907,1155.850551,936
2,1679.869873,573.878296,574.537821,731.259746,731.259746,579.384089,731.259746,740.009516,587.893527,771.838979,...,932.384094,644.621742,996.266236,667.734305,1100.172784,690.987614,1211.349972,724.447527,1340.358775,930
3,1715.137085,446.809845,451.951807,516.658084,539.237789,451.951807,563.073492,571.347979,460.044878,571.347979,...,684.126697,475.978048,724.98466,475.978048,811.882498,491.161026,1052.369246,499.790276,1331.61865,931
4,1172.023071,581.811462,589.83454,600.551163,618.154278,598.359481,626.191956,642.040255,600.551163,658.303761,...,757.818417,600.551163,854.034727,600.551163,954.043431,600.551163,1021.94579,600.551163,1085.982372,933


In [10]:
## try to plot interactively to rapidly visualize 

import panel as pn
import hvplot.pandas
pn.extension('bokeh')

# Create a reservoir selector dropdown menu
reservoir_selector = pn.widgets.IntSlider(name='Reservoir Selector', start=0, end=len(selected_reservoirs)-1, value=selected_reservoirs.index(RESERVOIR))

def get_aec(reservoir, max_height):
    aec_fp = Path(f'/tiger1/pdas47/tmsosPP/data/aec/srtm/{reservoir}.csv')
    aec = pd.read_csv(aec_fp)
    
    obs_aec_above_water = aec[aec['Elevation'] < max_height]
    obs_aec_above_water = obs_aec_above_water.sort_values('Elevation')
    obs_aec_above_water['CumArea_diff'] = obs_aec_above_water['CumArea'].diff()
    obs_aec_above_water['z_score'] = (obs_aec_above_water['CumArea_diff'] - obs_aec_above_water['CumArea'].mean()) / obs_aec_above_water['CumArea'].std()
    max_z_core_idx = obs_aec_above_water['z_score'].idxmax()
    obs_aec_above_water = obs_aec_above_water.loc[max_z_core_idx:, :]
    obs_aec_above_water = obs_aec_above_water[['Elevation', 'CumArea']]

    return obs_aec_above_water

def get_dam_elevations(
        reservoir_id, 
        percentile=1, 
        merit_dem_elevations_path="/tiger1/pdas47/tmsosPP/data/dam_bottom_elevation/MERIT_DEM_1000_m_around_dam_locations.csv"
    ):
    merit_dem_elevations = pd.read_csv(merit_dem_elevations_path, dtype={'tmsos_id': str}).drop(
        ['system:index', '.geo'], axis=1
    )
    elevations = merit_dem_elevations[merit_dem_elevations['tmsos_id'] == reservoir_id]
    dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == reservoir_id]['DAM_HGT_M'].values[0])
    dam_bottom = elevations[f'dem_p{percentile}'].values[0]
    dam_top = dam_bottom + dam_height
    return dam_bottom, dam_top


def plot_elevations(index):
    reservoir = selected_reservoirs[index]
    dam_bottom, dam_top = get_dam_elevations(reservoir, percentile=1)
    elevations = merit_dem_elevations[merit_dem_elevations['tmsos_id'] == reservoir]
    min_elev = elevations['dem_min'].values[0]
    max_elev = elevations['dem_max'].values[0]
    elevation_percentile_cols = [
        'dem_p1', 'dem_p2', 'dem_p3', 'dem_p4', 'dem_p5', 'dem_p6', 'dem_p7', 'dem_p8', 'dem_p9', 
        'dem_p10', 'dem_p15', 'dem_p20', 'dem_p30', 'dem_p40', 'dem_p50', 'dem_p60', 'dem_p70', 
        'dem_p80', 'dem_p90'
    ]
    elevation_at_percentile = [min_elev] + list(elevations[elevation_percentile_cols].values.flatten()) + [max_elev]
    percentiles = [0] + [int(s.split('p')[-1]) for s in elevation_percentile_cols] + [100]

    reservoir_name = res_names[reservoir]
    dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == reservoir]['DAM_HGT_M'].values[0])

    rectangle_x = 1  # 1st percentile
    rectangle_y_start = dam_bottom
    rectangle_y_end = rectangle_y_start + dam_height

    rectangle = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
        color='green', alpha=0.5, line_width=2
    )

    return hv.Scatter((percentiles, elevation_at_percentile), 'Percentile', 'Elevation').opts(
        height=400, width=500, title=f'{reservoir}: {reservoir_name}\nElevation Percentiles within 1000 m of dam location\nDam Height: {dam_height} m',
        xlabel='Percentile', ylabel='Elevation (m)', size=10
    ) * hv.Curve((percentiles, elevation_at_percentile)) * rectangle * hv.HLine(101) * hv.HLine(105)

interactive_plot = pn.bind(plot_elevations, reservoir_selector)
pn.Column(
    reservoir_selector, interactive_plot
)



BokehModel(combine_events=True, render_bundle={'docs_json': {'e3ff0799-fa80-433a-9060-7d6f0a993600': {'version…

The dam bottom elevation can be estimated by looking at a buffered region around the dam location.
The minimum elevation (0th percentile) within the region will represent the elevaiton of the downstream river.
However, the elevation of the river downstream may be lower than the elevation of reservoir bottom. Furthermore, the minimum value might be spurious in case of issues with the DEM data itself, due to height uncertainty.
Hence taking the 1st pecentile elevation as the reservoir bottom elevation may alleviate some of these issues. 

Using the minimum elevation, calculate the storage of the reservoir and compare it with the reported capacity in GRanD.

In [11]:
elevations = merit_dem_elevations[merit_dem_elevations['tmsos_id'] == RESERVOIR]

min_elev = elevations['dem_min'].values[0]
dam_height = float(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['DAM_HGT_M'].values[0])
if dam_height == -99:
    print(f"Dam height unavailable. ")
    dam_height = np.nan

dam_bottom, dam_top = get_dam_elevations(RESERVOIR, percentile=1)

print("Min max elevations: ", dam_bottom, dam_top)

# Interpolate the max_area corresponding to the dam_top using np.interp
max_area_interpolated = np.interp(dam_top, aec['Elevation'], aec['CumArea'])
min_area = 0
print("Min max areas: ", min_area, max_area_interpolated)

obs_aec_above_water = get_aec(RESERVOIR, dam_top)

# plot dam
rectangle_x = 0
rectangle_y_start = dam_bottom
rectangle_y_end = dam_top

dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='gray', alpha=0.5, line_width=2
)

obs_aec_above_water.hvplot(
    x='CumArea', y='Elevation', label='SRTM'
) * hv.HLine(dam_bottom) * hv.HLine(dam_top) * dam_hv

Min max elevations:  51.84609422233591 132.84609422233592
Min max areas:  0 152.3360626133606


In [12]:
dam_bottom

51.84609422233591

In [13]:
from scipy.integrate import cumulative_trapezoid

def calculate_storage(aec_df):
    """
    Calculate the storage of a reservoir from its Area-Elevation Curve (AEC).

    Parameters:
    aec_df (pd.DataFrame): DataFrame containing 'Elevation' and 'CumArea' columns.

    Returns:
    pd.DataFrame: DataFrame with an additional 'Storage' column representing the storage in cubic meters.
    """
    elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

    # cumulative_trapezoid takes two parameters.
    # y = y-axis locations of points. these values will be integrated. 
    # x = x-axis locations of points, where each y value is sampled. Area.
    storage = cumulative_trapezoid(
        elevation_normalized, 
        aec_df['CumArea'] * 1e6
    )
    storage = np.insert(storage, 0, 0)

    aec_df['Storage'] = storage
    aec_df['Storage (mil. m3)'] = storage * 1e-6
    return aec_df


In [14]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# fit a 2° polynomial with constraints:
# (1) df/dx > 0
# (2) f(x) > 0 when x = 0

poly_deg = 2

obs_aec_above_water = get_aec(RESERVOIR, dam_top)
# obs_aec_and_dam_bottom = pd.concat([pd.DataFrame({'Elevation': [dam_bottom], 'CumArea': [0]}), obs_aec_above_water])

x = obs_aec_above_water['CumArea']
y = obs_aec_above_water['Elevation']

print("Dam bottom elevation: ", dam_bottom)
print("Dam top elevation: ", dam_top)
print("Dam capacity: ", capacity)

# Function to return predicted y-values from the polynomial
def predict_y(params, x):
    a0, a1, a2 = params  # Only three parameters for a 2-degree polynomial
    return a0 + a1 * x + a2 * x**2

# Objective function: residuals to minimize
def objective(params, x, y):
    predicted_y = predict_y(params, x)
    
    s = calculate_storage(pd.DataFrame({'CumArea': x, 'Elevation': predicted_y}))[['Storage (mil. m3)', 'Elevation']]
    storage_at_dam_top = np.interp([dam_top], s['Elevation'], s['Storage (mil. m3)'])

    return np.sum((predicted_y - y)**2)
    # return ((capacity - storage_at_dam_top)**2) + np.sum((predicted_y - y)**2)/1e6
    # return ((capacity - storage_at_dam_top)**2)

# Constraint 1: dy/dx > 0 -> derivative of the polynomial should be positive for all x
def constraint_derivative(params):
    # Sample points across the entire range of x (0 to np.max(x))
    x_sample = np.linspace(0, np.max(x), 100)
    
    # Evaluate the derivative at these sample points
    derivative_values = np.polyval(np.polyder(params[::-1]), x_sample)  # np.polyder gives the derivative coefficients

    # Return the minimum value of the derivative; it should be greater than 0
    return derivative_values

# Constraint 2: intercept should be within (dam_bottom, dam_bottom + 5)
def constraint_intercept(params):
    a0 = params[0]  # Intercept is the first parameter
    return a0 - dam_bottom, (dam_bottom + 5) - a0

# Initial guess for the parameters [a0, a1, a2, a3]
initial_guess = np.polyfit(x, y, 2)
initial_guess = initial_guess[::-1] # reverse order to convert to scipy's format

# initial_guess = [0.001, 0.001, 0.0001, 0.0001]
print("initial guess: ", initial_guess)

# Set up constraints
constraints = [
    {'type': 'ineq', 'fun': lambda params: constraint_derivative(params)},  # dy/dx > 0
    # {'type': 'ineq', 'fun': lambda params: constraint_derivative(params, x)},  # dy/dx > 0
    # {'type': 'ineq', 'fun': lambda params: constraint_intercept(params)} # intercept greater than 0
    {'type': 'ineq', 'fun': lambda params: constraint_intercept(params)[0]},
    {'type': 'ineq', 'fun': lambda params: constraint_intercept(params)[1]}  
]

# Perform minimization
result = minimize(objective, initial_guess, args=(x, y), constraints=constraints, options={'maxiter': 1000})

# Extract optimized parameters
a0_opt, a1_opt, a2_opt = result.x  # Only three parameters for a 2-degree polynomial
print(result)
print(f"Optimized polynomial: y = {a0_opt} + {a1_opt}*x + {a2_opt}*x^2")

Dam bottom elevation:  51.84609422233591
Dam top elevation:  132.84609422233592
Dam capacity:  8510.0
initial guess:  [ 3.53251889e+02 -4.55875598e+00  2.02245978e-02]
 message: Optimization terminated successfully
 success: True
  status: 0
     fun: 755.5507801857051
       x: [ 5.685e+01  1.440e-01  1.938e-03]
     nit: 12
     jac: [-3.796e+00  5.574e-01  7.944e+01]
    nfev: 62
    njev: 11
Optimized polynomial: y = 56.84609422233589 + 0.1439590198518478*x + 0.001938019138685874*x^2


In [15]:
x_pred = np.arange(0, max(x)) # area. 0-> max(area).
y_pred = predict_y(result.x, x_pred)

hv.Scatter((list(x_pred), list(y_pred)))

In [16]:
x_pred = np.arange(0, np.max(x), 0.25)
y_pred = predict_y(result.x, x_pred)

# Create a new DataFrame with the predicted x and y values
predicted_df = pd.DataFrame({
    'CumArea': x_pred, 
    'Elevation': y_pred
})

# Clip the y-values (PredictedElevation) between dam_top+10 and dam_bottom-10
predicted_df['ClippedElevation'] = predicted_df['Elevation'].clip(lower=dam_bottom - 10, upper=dam_top + 10)

predicted_storage_df = calculate_storage(predicted_df)
predicted_storage_df.head()

Unnamed: 0,CumArea,Elevation,ClippedElevation,Storage,Storage (mil. m3)
0,0.0,56.846094,56.846094,0.0,0.0
1,0.25,56.882205,56.882205,4513.860145,0.004514
2,0.5,56.918558,56.918558,18085.722129,0.018086
3,0.75,56.955154,56.955154,40776.149049,0.040776
4,1.0,56.991991,56.991991,72645.704005,0.072646


In [17]:
# PLOT AEC
rectangle_x = 0
rectangle_y_start = dam_bottom
rectangle_y_end = dam_top

dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='green', alpha=0.5, line_width=2
)

aec_hv = hv.Scatter(
    (predicted_df['CumArea'], predicted_df['ClippedElevation']), "Area", "Elevation", label='predicted Elevation'
) * hv.Scatter(
    (0, dam_bottom), label='estimated dam bottom'
) * hv.Scatter(
    (obs_aec_above_water['CumArea'], obs_aec_above_water['Elevation']), 'Area', 'Elevation', label='SRTM'
).opts(
    color='black', height=300, width=600, legend_position='right', show_grid=True, 
    title=f"AEC: {RESERVOIR}: {RESERVOIR_NAME}\nDam Height: {dam_height} m,\nEstimated Bottom Elevation: {dam_bottom:.2f} m"
) * dam_hv

aec_hv

In [18]:
volume_at_dam_top = predicted_storage_df.iloc[-1]['Storage (mil. m3)']
volume_at_dam_top

print(f"TOTAL calculated storage if filled to brim: {volume_at_dam_top:.2f} mil. m3;")
print(f"Reported capacity: {capacity:.2f} Mil. m3")

# Interpolate to find the elevation corresponding to the reported capacity
elevation_at_capacity = np.interp(capacity, predicted_storage_df['Storage (mil. m3)'], predicted_storage_df['Elevation'])
area_at_capacity = np.interp(elevation_at_capacity, predicted_storage_df['Elevation'], predicted_storage_df['CumArea'])

print(f"Elevation at reported capacity: {elevation_at_capacity:.2f} m")
print(f"Area at reported capacity: {area_at_capacity:.2f} km²")

TOTAL calculated storage if filled to brim: 3931.67 mil. m3;
Reported capacity: 8510.00 Mil. m3
Elevation at reported capacity: 123.50 m
Area at reported capacity: 152.00 km²


In [19]:
# Plot dam as a rectangle
rectangle_x = 0
rectangle_y_start = dam_bottom
rectangle_y_end = volume_at_dam_top

dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='gray', alpha=0.5, line_width=2
)
# Calculate max area at dam top as a separate variable
max_area_at_dam_top = np.interp(dam_top, predicted_storage_df['Elevation'], predicted_storage_df['CumArea'])

aev_hv = dam_hv * predicted_storage_df.hvplot(
    x='CumArea', y='Storage (mil. m3)', label='Predicted Storage'
).opts(
    show_grid=True, 
    ylim=(0, capacity + capacity * 0.1), 
    xlim=(0, max_area_at_dam_top),  # Set xlim as 0 to max area at dam top
    title=f"{RESERVOIR} - {RESERVOIR_NAME}: Storage, calculated and reported",
    xlabel='Cumulative Area (km²)',
    ylabel='Storage (Million m³)'
) * hv.HLine(capacity).opts(
    color='black', line_dash='dashed'
) * hv.Text(
    x=0, y=capacity, text=f'Reported Capacity: {capacity:.1f} Mil. m3', halign='left', valign='bottom'
) * hv.Text(
    x=0, y=volume_at_dam_top, text=f'Total Calculated Storage: {volume_at_dam_top:.1f} Mil. m3', halign='left', valign='top'
)

# Add another plot with elevation on the x-axis
elevation_plot = predicted_storage_df.hvplot(
    x='Elevation', y='Storage (mil. m3)', label='Storage vs Elevation'
).opts(
    show_grid=True,
    title=f"{RESERVOIR} - {RESERVOIR_NAME}: Storage vs Elevation",
    xlabel='Elevation (m)',
    ylabel='Storage (Million m³)',
    xlim=(min_elev, dam_top)
)

# Plot dam as a rectangle in the elevation-storage plot
rectangle_x = min_elev
rectangle_y_start = 0
rectangle_y_end = volume_at_dam_top

dam_storage_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
    color='gray', alpha=0.5, line_width=2
)

elevation_plot = elevation_plot * dam_storage_hv

aev_hv = (aev_hv + elevation_plot).cols(1)

aev_hv

In [20]:
# save storage df
# Save the predicted storage dataframe with optimization result as comments
with open(f'/tiger1/pdas47/tmsosPP/data/aec/aev_2deg_polynomial_with_constraints/{RESERVOIR}.csv', 'w') as f:
    f.write("# Optimization result:\n")
    for line in str(result).split('\n'):
        f.write(f"# {line}\n")
    f.write("# Initial guess:\n")
    for line in str(initial_guess).split('\n'):
        f.write(f"# {line}\n")
    predicted_storage_df.to_csv(f, index=False)

# save aev plot
hv.save(aev_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial_with_constraints/aev/{RESERVOIR}.png", fmt='png')
hv.save(aec_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial_with_constraints/aec/{RESERVOIR}.png", fmt='png')

The geckodriver version (0.34.0) detected in PATH at /tiger1/pdas47/tmsosPP/.env/bin/geckodriver might not be compatible with the detected firefox version (126.0.1); currently, geckodriver 0.35.0 is recommended for firefox 126.*, so it is advised to delete the driver in PATH and retry


In [27]:
# # fit a 2° polynomial. by adding the dam bottom elevation as an extra point in the observed SRTM dem.

# from sklearn.linear_model import Ridge
# from sklearn.pipeline import make_pipeline
# from sklearn.preprocessing import PolynomialFeatures, SplineTransformer

# poly_deg = 2

# pft = PolynomialFeatures(degree=poly_deg)
# pft_pipeline = make_pipeline(
#     pft,
#     Ridge(alpha=1e-3),
# )

# prediction_range = np.linspace(
#     dam_bottom, dam_top, int(dam_top - dam_bottom)
# ).reshape(-1, 1)

# dam_bottom, dam_top = get_dam_elevations(RESERVOIR, percentile=1)

# obs_aec_above_water = get_aec(RESERVOIR, dam_top)
# obs_aec_and_dam_bottom = pd.concat([pd.DataFrame({'Elevation': [dam_bottom], 'CumArea': [0]}), obs_aec_above_water])

# poly_predictions = pft_pipeline.fit(
#     obs_aec_and_dam_bottom[['Elevation']], obs_aec_and_dam_bottom[['CumArea']]).predict(
#     pd.DataFrame(prediction_range, columns=['Elevation'])
# )

# poly_pred_df = pd.DataFrame({
#     'Elevation': prediction_range.flatten(),
#     'CumArea': poly_predictions.flatten()
# })

# poly_pred_df

# # PLOT
# print("Min max elevations: ", dam_bottom, dam_top)
# print("Min max areas: ", min_area, max_area_interpolated)


# # plot dam
# rectangle_x = 0
# rectangle_y_start = dam_bottom
# rectangle_y_end = dam_top

# dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
#     color='gray', alpha=0.5, line_width=2
# )

# obs_aec_hv = obs_aec_and_dam_bottom.hvplot(
#     x='CumArea', y='Elevation', label='SRTM + dam bottom added'
# ) 
# poly_pred_hv = poly_pred_df.hvplot(
#     x='CumArea', y='Elevation', label=f'{poly_deg} deg polynomial'
# )

# aec_hv = obs_aec_hv * poly_pred_hv * hv.HLine(dam_bottom).opts(color='gray') * hv.HLine(dam_top).opts(color='gray') * dam_hv
# aec_hv = aec_hv.opts(
#     title=f"AEC: {RESERVOIR}: {RESERVOIR_NAME}"
# )
# aec_hv

In [28]:
# from scipy.integrate import cumulative_trapezoid

# def calculate_storage(aec_df):
#     """
#     Calculate the storage of a reservoir from its Area-Elevation Curve (AEC).

#     Parameters:
#     aec_df (pd.DataFrame): DataFrame containing 'Elevation' and 'CumArea' columns.

#     Returns:
#     pd.DataFrame: DataFrame with an additional 'Storage' column representing the storage in cubic meters.
#     """
#     elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

#     # cumulative_trapezoid takes two parameters.
#     # y = y-axis locations of points. these values will be integrated. 
#     # x = x-axis locations of points, where each y value is sampled. Area.
#     storage = cumulative_trapezoid(
#         elevation_normalized, 
#         aec_df['CumArea'] * 1e6
#     )
#     storage = np.insert(storage, 0, 0)

#     aec_df['Storage'] = storage
#     aec_df['Storage (mil. m3)'] = storage * 1e-6
#     return aec_df

# # Example usage:
# storage_poly_pred_df = calculate_storage(poly_pred_df)
# storage_poly_pred_df.head()

In [29]:
# volume_at_dam_top = storage_poly_pred_df.iloc[-1]['Storage (mil. m3)']

# print(f"TOTAL calculated storage if filled to brim: {volume_at_dam_top:.2f} mil. m3;")
# print(f"Reported capacity: {capacity:.2f} Mil. m3")

# # Interpolate to find the elevation corresponding to the reported capacity
# elevation_at_capacity = np.interp(capacity, storage_poly_pred_df['Storage (mil. m3)'], storage_poly_pred_df['Elevation'])
# area_at_capacity = np.interp(elevation_at_capacity, storage_poly_pred_df['Elevation'], storage_poly_pred_df['CumArea'])

# print(f"Elevation at reported capacity: {elevation_at_capacity:.2f} m")
# print(f"Area at reported capacity: {area_at_capacity:.2f} km²")

In [30]:
# # Plot dam as a rectangle
# rectangle_x = 0
# rectangle_y_start = dam_bottom
# rectangle_y_end = volume_at_dam_top

# dam_hv = hv.Rectangles([(rectangle_x - 0.5, rectangle_y_start, rectangle_x + 0.5, rectangle_y_end)]).opts(
#     color='gray', alpha=0.5, line_width=2
# )

# aev_hv = capacity_hv * storage_poly_pred_df.hvplot(
#     x='CumArea', y='Storage (mil. m3)'
# ).opts(xlabel='CumArea')

# aev_hv = dam_hv * aev_hv.opts(
#     title = f"AEV: {RESERVOIR} - {RESERVOIR_NAME}\nVolume if filled to dam top: {volume_at_dam_top:.2f} mil. m3\nCapacity: {capacity:.2f} mil. m3\nArea at capacity: {area_at_capacity:.2f} km2, Elevation at capacity: {elevation_at_capacity:.2f} m",
# )
# aev_hv

In [31]:
# aev_hv_mod = hv.HLine(7500).opts(color='red') * hv.HLine(5700) * storage_poly_pred_df.hvplot(
#     x='CumArea', y='Storage (mil. m3)'
# ).opts(xlabel='CumArea') * hv.VLine(0).opts(color='gray') * hv.VLine(118) * obs_aec_above_water.hvplot(x='CumArea', y='Elevation', kind='scatter')

# aev_hv_mod = aev_hv_mod.opts(
#     title = f"AEV: {RESERVOIR} - {RESERVOIR_NAME}\nVolume if filled to dam top: {volume_at_dam_top:.2f} mil. m3\nCapacity: {capacity:.2f} mil. m3\nArea at capacity: {area_at_capacity:.2f} km2, Elevation at capacity: {elevation_at_capacity:.2f} m",
# )
# aev_hv_mod

In [32]:
# # save storage df
# predicted_storage_df.to_csv(f'/tiger1/pdas47/tmsosPP/data/aec/aev_2deg_polynomial_with_constraints/{RESERVOIR}.csv', index=False)

# # save aev plot
# hv.save(aev_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial_with_constraints/aev/{RESERVOIR}.png", fmt='png')
# hv.save(aec_hv, f"/tiger1/pdas47/tmsosPP/results/figures/aev_2deg_polynomial_with_constraints/aec/{RESERVOIR}.png", fmt='png')

In [33]:
# print(f"Interpolated max_area at dam_top ({dam_top:.2f} m): {max_area_interpolated:.2f} km²")

<!-- We know the reported capcaity of the reservoir. Let's call it $S$.
Let's assume that the function that best defines the AEC of the reservoir is a power function, which is monotonically increasing. With increasing Area, the Elevation must increase, and vice versa. The function can be of the form $E = aA^b$ where $E$ is the elevation, $A$ is the area, and $a$ and $b$ are constants. Let's call this function $AEC_{model}$.

We can calculate the storage of the reservoir using this AEC by integrating it. Let's call this calculated storage $s$.

The error in the reported capacity is given by $e = S - s$

Furthermore, we also have the observed AEC of the reservoir observed by SRTM. Let's call this observed part of the AEC $AEC_{obs}$

Since the AECs are arrays of values, we can find the error in the modeled and observed AEC by calculating the root mean squared error (RMSE) between the two. $RMSE = sqrt(MSE(AEC_{obs}, AEC_{model}))$

These two error can be minimized to get $a$ and $b$. -->

In [34]:
# capacity

In [35]:
# from scipy.optimize import minimize
# from scipy.integrate import trapezoid

# def s(params, E):
#     a = params[0]
#     b = params[1]
#     c = params[2]
    
#     A = a + (b * E) + (c * np.power(E, 2))

#     # storage = trapezoid(
#     #     E, 
#     #     A
#     # )

#     return A

# elevations = np.arange(dam_bottom, dam_top) # 1 km2 interval

# params = [
#     -954, 17.9, -0.0779
# ]
# areas = s(
#     params, elevations
# )

# hv.Scatter(
#     (areas, elevations), 'area', 'elevation'
# )
# # f = lambda params, elevations: capacity - s(params, elevations)

# # # capacity - s(x, 0.1, 1)
# # res = minimize(
# #     f, params, method='nelder-mead', args=(elevations), options={'disp': True}
# # )

In [36]:
# hv.Scatter(
#     (areas, elevations), 'area', 'elevation'
# ) * aec.hvplot(x='CumArea', y='Elevation')

In [37]:
# capacity

In [38]:
# # params = res.x
# params = [-954, 17.9, -0.0779]

# areas = (params[0] + params[1] * elevations + params[2] * elevations**2) * 1e-6

# aec_mod_without_zero_area.hvplot(
#     x='Elevation', y='CumArea'
# ) * hv.Scatter((elevations, areas), 'Elevation', 'Area')

In [39]:
# aec_mod_without_zero_area

<!-- ## fill AEC using polynomial interpolation -->

In [40]:
# aec_mod = aec_mod.reset_index(drop=True)[['Elevation', 'CumArea']]

# aec_mod.head(2)

In [41]:
# from sklearn.linear_model import Ridge
# from sklearn.pipeline import make_pipeline
# from sklearn.preprocessing import PolynomialFeatures, SplineTransformer


# pft = PolynomialFeatures(degree=poly_deg)
# pft_pipeline = make_pipeline(
#     pft,
#     Ridge(alpha=1e-3),
# )

In [42]:
# prediction_range = np.linspace(
#     min_height_inferred, max_height_inferred, int(max_height_inferred - min_height_inferred)
# ).reshape(-1, 1)
# poly_predictions = pft_pipeline.fit(aec_mod[['Elevation']], aec_mod[['CumArea']]).predict(
#     pd.DataFrame(prediction_range, columns=['Elevation'])
# )

# poly_pred_df = pd.DataFrame({
#     'Elevation': prediction_range.flatten(),
#     'CumArea': poly_predictions.flatten()
# })
# poly_pred_df['CumArea'] = poly_pred_df['CumArea'].clip(0)
# if (poly_pred_df['CumArea'] == 0).sum() > 0:
#     zero_elevation = poly_pred_df[poly_pred_df['CumArea'] == 0]['Elevation'].max()
# else:
#     zero_elevation = min_height_inferred
# poly_pred_df = poly_pred_df[poly_pred_df['Elevation'] >= zero_elevation]
# poly_pred_df.hvplot(x='Elevation', y='CumArea')

# aec_mod_without_zero_area = aec_mod.copy()
# aec_mod_without_zero_area = aec_mod_without_zero_area[aec_mod_without_zero_area['CumArea']!=0]
# elevations = np.linspace(zero_elevation, max_height_inferred, int(max_height_inferred-zero_elevation))
# areas = [
#     np.interp(elevation, aec_mod_without_zero_area['Elevation'], aec_mod_without_zero_area['CumArea']) if np.logical_and(
#         elevation >= aec_mod_without_zero_area['Elevation'].min(), elevation <= aec_mod_without_zero_area['Elevation'].max()
#     ) else np.interp(elevation, poly_pred_df['Elevation'], poly_pred_df['CumArea']) for elevation in elevations
# ]
# obs_extrapolated = [
#     'SRTM' if np.logical_and(
#         elevation >= aec_mod_without_zero_area['Elevation'].min(), elevation <= aec_mod_without_zero_area['Elevation'].max()
#     ) else 'extrapolated' for elevation in elevations
# ]

# extrapolated_aec = {
#     'Elevation': elevations,
#     'CumArea': areas,
#     'obs_or_extrapolated': obs_extrapolated
# }

# # extrapolated_aec
# extrapolated_aec = pd.DataFrame(extrapolated_aec)
# if len(extrapolated_aec[extrapolated_aec['CumArea'] == 0]) == 0:
#     zero_area_elev = extrapolated_aec.iloc[0]['Elevation']
# else:
#     zero_area_elev = extrapolated_aec.loc[extrapolated_aec[extrapolated_aec['CumArea'] == 0].idxmax()['Elevation']]['Elevation']


# extrapolated_aec = extrapolated_aec[extrapolated_aec['Elevation'] >= zero_area_elev]

# # remove any extrapolated values above the SRTM observed elevation
# idx_max_extrapolated_value = extrapolated_aec[extrapolated_aec['obs_or_extrapolated'] == 'extrapolated'].idxmax()['Elevation']
# # check if the previous value is SRTM, if yes, delete the extrapolated value
# if extrapolated_aec.loc[idx_max_extrapolated_value-1, 'obs_or_extrapolated']:
#     print("deleting extrapolated point above observed AEC")
#     extrapolated_aec = extrapolated_aec.iloc[:-1]

# # PLOT

# min_inferred_elevation_hv = hv.HLine(min_height_inferred).opts(color='orange')
# max_inferred_elevation_hv = hv.HLine(max_height_inferred).opts(color='orange')

# extrapolated_aec.hvplot.scatter(
#     x='CumArea', y='Elevation', by='obs_or_extrapolated'
# ).opts(
#     height=400, width=500, title=f'{RESERVOIR}: {RESERVOIR_NAME}\nExtrapolated AEC',
#     xlabel='Area (km2)', ylabel='Elevation (m)'
# ) * min_inferred_elevation_hv * max_inferred_elevation_hv

In [43]:
# srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated')

# # poly_pred_df.round(2).to_csv(srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv', index=False)
# # extrapolated_aec.round(2).to_csv(srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv', index=False)
# # print(f"Saved at {srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'}")

In [44]:
# val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
# val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

# # aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'
# # aec_df = pd.read_csv(aec_fp)

# from scipy.integrate import cumulative_trapezoid
# # https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.cumulative_trapezoid.html#scipy.integrate.cumulative_trapezoid
# ## Cumulatively integrate y(x) using the composite trapezoidal rule.


# aec_df = extrapolated_aec

# elevation_normalized = (aec_df['Elevation'] - aec_df['Elevation'].min())

# # cumulative_trapezoid takes two parameters.
# # y = y-axis locations of points. these values will be integrated. 
# # https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png. Normalized Elevation.
# # x = x-axis locations of points, where each y value is sampled. Area.
# storage = cumulative_trapezoid(
#     elevation_normalized, 
#     aec_df['CumArea'] * 1e6
# )
# storage = np.insert(storage, 0, 0)

# aec_df['Storage'] = storage
# aec_df['Storage (mil. m3)'] = storage * 1e-6
# aec_df

In [45]:
# aec_df.hvplot(x='CumArea', y='Elevation').opts(height=300, width=400, title=f'{RESERVOIR}: {RESERVOIR_NAME}  [A-E]', ylabel='Elevation (m)', xlabel='Area (km2)') \
# + (aec_df.hvplot(x='Elevation', y='Storage (mil. m3)', title=f'{RESERVOIR}: {RESERVOIR_NAME}  [S-E]').opts(height=300, width=400, ylabel='Storage (Million m3)', xlabel='Elevation (m)') * capacity_hv) \
# + (aec_df.hvplot(x='CumArea', y='Storage (mil. m3)', title=f'{RESERVOIR}: {RESERVOIR_NAME}  [S-A]').opts(height=300, width=400, ylabel='Storage (Million m3)', xlabel='Area (km2)') * capacity_hv)

In [46]:
# # save aec in `srtm_extrapolated_storage`
# srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated_storage/')