In [None]:
import xarray as xr
import hvplot.xarray
import hvplot.pandas
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
from pathlib import Path
import geopandas as gpd
import numpy as np
import pandas as pd

hv.extension('bokeh')

In [None]:
start_date = '2023-07-21'
end_date = '2024-10-30'

DATA_DIR = Path('../data')

In [None]:
# read the bounding box of the study area
val_polys = gpd.read_file(Path('../data/validation-locations/2023-24-insitu-poly.geojson'))
val_polys = val_polys.drop_duplicates(subset='GRAND_ID')

In [None]:
fp = Path("../data/icesat2/raw/271707654/processed_ATL13_20221110080859_07731701_006_01.nc")
ds = xr.open_dataset(fp, group='gt1r')

In [None]:
import h5py 

def process_icesat2(fn, save_dir=Path("../data/icesat2/processed")):
    dfs = []
    fn = Path(fn)

    if fn.suffix == '.h5':
        f = h5py.File(fn, mode='r')
        for gp in ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r']:
            # check if data exists
            if 'segment_lat' not in f[gp].keys():
                print(f"Data in {gp} doesn't exist - {fn}")
            else:
                segment_lat = f[gp]['segment_lat'][:]
                segment_lon = f[gp]['segment_lon'][:]
                ht_ortho = f[gp]['ht_ortho'][:]
                ht_water_surf = f[gp]['ht_water_surf'][:]
                delta_time = f[gp]['delta_time'][:]

                df = pd.DataFrame({
                    'segment_lat': segment_lat, 'segment_lon': segment_lon, 
                    'ht_ortho': ht_ortho, 'ht_water_surf': ht_water_surf,
                    'delta_time': delta_time
                })
                dfs.append(df)
    else:
        for gp in ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r']:
            try:
                gp = xr.open_dataset(fn, group=gp) # other groups: gt1l, gt1r, gt2l, gt2r, gt3l, gt3r
                if len(gp) == 0:
                    continue
                gp_df = gp[['segment_lat', 'segment_lon', 'delta_time', 'ht_ortho', 'ht_water_surf']].to_dataframe().reset_index()
                dfs.append(gp_df)
            except:
                print(f"ERROR: {gp}, {fn}")
    df = pd.concat(dfs)

    # Create a GeoDataFrame from the processed data
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.segment_lon, df.segment_lat), crs="EPSG:4326")
    
    val_polys_buffered = val_polys.copy()
    val_polys_buffered['geometry'] = val_polys_buffered['geometry'].buffer(0.005)

    # Perform spatial join to find which polygon each point falls inside
    joined = gpd.sjoin(gdf, val_polys_buffered, how="left", predicate='within')

    # Add the tmsos_id to the dataframe
    df['tmsos_id'] = joined['tmsos_id']

    df.rename({
        'delta_time': 'time'
    }, axis=1, inplace=True)

    tmsos_ids = joined['tmsos_id'].unique()
    for tmsos_id in tmsos_ids:
        if type(tmsos_id) == float:  # tmsos_id is a nan
            print(tmsos_id)
            continue
        save_fp = save_dir / tmsos_id / f"{fn.stem}.csv"
        save_fp.parent.mkdir(exist_ok=True)

        df[df['tmsos_id'] == tmsos_id].to_csv(save_fp, index=False)

    return df

gdf = process_icesat2("../data/icesat2/raw/277739526/processed_ATL13_20220818202945_08851601_006_01.nc")

In [None]:
gdf

In [None]:
gdf.hvplot()

In [None]:
from tqdm.notebook import tqdm

In [None]:
DFS = []

for d in tqdm(list(Path("../data/icesat2/raw").glob('*'))):
    fns = list(d.glob("*"))

    for fn in fns:
        processed_tracker = open("../data/icesat2/processed.txt", 'r')
        processed_list = [l.strip('\n') for l in processed_tracker.readlines()]
        processed_tracker.close()

        if str(fn) in processed_list:
            print(f"Skipping {fn}")
            continue
        else:
            print(f"Processing {fn}")
            df = process_icesat2(fn)

            f = open("../data/icesat2/processed.txt", 'a')
            f.write(str(fn) + '\n')

            DFS.append(df)

# calculate storages

In [None]:
RESERVOIR = '1078'

In [None]:
val_pts = gpd.read_file(Path('../data/validation-locations/2023-24-insitu-pts.geojson'))
val_polys = gpd.read_file(Path('../data/validation-locations/2023-24-insitu-poly.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()  # select all 100 reservoirs
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name'] # dictionary that can be queried to get reservoir name

RESERVOIR_NAME = res_names[RESERVOIR]

In [None]:
aec_dir = Path('../data/aec/aev')

aec_fp = aec_dir / f'{RESERVOIR}.csv'
aec = pd.read_csv(aec_fp, parse_dates=True, comment='#')
aec

In [None]:
def egm_96_to_08_conversion(raster_path, tmsosid, val_pts):
    """
    Samples the value within a raster file at the location corresponding to the given tmsosid.

    Args:
        raster_path (str): File path of the raster EGM96-EGM08.
        tmsosid (str): The tmsosid to get the latitude and longitude values.
        val_pts (gpd.GeoDataFrame): GeoDataFrame containing the validation points with 'tmsos_id' and 'geometry' columns.

    Returns:
        float: The sampled value from the raster at the specified location.
    """
    # Get the geometry for the given tmsosid
    location = val_pts[val_pts['tmsos_id'] == tmsosid]
    if location.empty:
        raise ValueError(f"No location found for tmsosid: {tmsosid}")

    geometry = location.iloc[0]['geometry']
    longitude, latitude = geometry.x, geometry.y

    # Open the raster file
    with rasterio.open(raster_path) as src:
        # Convert latitude and longitude to the raster's coordinate system
        coords = [(longitude, latitude)]

        # Sample the raster value at the transformed coordinates
        sampled_value = list(src.sample(coords))[0][0]

    return sampled_value


def get_area_elevation_storage(aev, tmsosid, raster_path=None, area=None, elevation=None, convert_to_egm08=False):
    """
    Interpolates the AEV (Area-Elevation-Volume) curve to return the elevation, area, and storage for an array of values.
    Converts the elevation from EGM96 to EGM08 if required.

    Args:
        aev (pd.DataFrame): DataFrame containing the AEV curve with columns 'Elevation', 'CumArea', and 'Storage'.
        tmsosid (str): The tmsosid to get the latitude and longitude values for EGM96 to EGM08 conversion.
        raster_path (str, optional): File path of the raster EGM96-EGM08. Required if convert_to_egm08 is True.
        area (array-like, optional): Array of area values for interpolation. Defaults to None.
        elevation (array-like, optional): Array of elevation values for interpolation. Defaults to None.
        convert_to_egm08 (bool, optional): Whether to convert elevation from EGM96 to EGM08. Defaults to False.

    Returns:
        dict: A dictionary containing arrays of the interpolated 'elevation', 'area', and 'storage'.
    """
    if area is None and elevation is None:
        raise ValueError("Either 'area' or 'elevation' must be provided")

    if area is not None and elevation is not None:
        raise ValueError("Only one of 'area' or 'elevation' should be provided")

    if convert_to_egm08:
        if raster_path is None or tmsosid is None:
            raise ValueError("raster_path and tmsosid must be provided for EGM96 to EGM08 conversion")
        egm96_to_08_offset = egm_96_to_08_conversion(raster_path, tmsosid, val_pts)
    else:
        egm96_to_08_offset = 0

    if elevation is not None:
        elevations = np.array(elevation)
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        areas = np.interp(elevations, aev['Elevation'], aev['CumArea'])
        storages = np.interp(elevations, aev['Elevation'], aev['Storage'])
    else:
        areas = np.array(area)
        elevations = np.interp(areas, aev['CumArea'], aev['Elevation'])
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        storages = np.interp(areas, aev['CumArea'], aev['Storage'])

    return {
        'elevation': elevations,
        'area': areas,
        'storage': storages
    }

In [None]:
def get_area_elevation_storage(aev, tmsosid=None, raster_path=None, area=None, elevation=None, convert_to_egm08=False):
    """
    Interpolates the AEV (Area-Elevation-Volume) curve to return the elevation, area, and storage for an array of values.
    Converts the elevation from EGM96 to EGM08 if required.

    Args:
        aev (pd.DataFrame): DataFrame containing the AEV curve with columns 'Elevation', 'CumArea', and 'Storage'.
        tmsosid (str): The tmsosid to get the latitude and longitude values for EGM96 to EGM08 conversion. Defaults to None.
        raster_path (str, optional): File path of the raster EGM96-EGM08. Required if convert_to_egm08 is True.
        area (array-like, optional): Array of area values for interpolation. Defaults to None.
        elevation (array-like, optional): Array of elevation values for interpolation. Defaults to None.
        convert_to_egm08 (bool, optional): Whether to convert elevation from EGM96 to EGM08. Defaults to False.

    Returns:
        dict: A dictionary containing arrays of the interpolated 'elevation', 'area', and 'storage'.
    """
    if area is None and elevation is None:
        raise ValueError("Either 'area' or 'elevation' must be provided")

    if area is not None and elevation is not None:
        raise ValueError("Only one of 'area' or 'elevation' should be provided")

    if convert_to_egm08:
        if raster_path is None or tmsosid is None:
            raise ValueError("raster_path and tmsosid must be provided for EGM96 to EGM08 conversion")
        egm96_to_08_offset = egm_96_to_08_conversion(raster_path, tmsosid, val_pts)
    else:
        egm96_to_08_offset = 0

    if elevation is not None:
        elevations = np.array(elevation)
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        areas = np.interp(elevations, aev['Elevation'], aev['CumArea'])
        storages = np.interp(elevations, aev['Elevation'], aev['Storage'])
    else:
        areas = np.array(area)
        elevations = np.interp(areas, aev['CumArea'], aev['Elevation'])
        if convert_to_egm08:
            elevations = elevations - egm96_to_08_offset
        storages = np.interp(areas, aev['CumArea'], aev['Storage'])

    return {
        'elevation': elevations,
        'area': areas,
        'storage': storages
    }

In [None]:
processed_dir = Path("../data/icesat2/processed")
directories = list(processed_dir.glob("*"))
print(len(directories))

for d in directories:
    RESERVOIR = d.stem
    RESERVOIR_NAME = res_names[RESERVOIR]
    print(RESERVOIR, RESERVOIR_NAME)

    aec_fp = aec_dir / f'{RESERVOIR}.csv'
    if not aec_fp.exists():
        continue
    aec = pd.read_csv(aec_fp, parse_dates=True, comment='#')
    
    files = list(d.glob("*"))
    def read_elevations(f):
        df = pd.read_csv(f, dtype={'tmsos_id': str})
        R = None

        try:
            t = pd.to_datetime(df['time'])
            df['time'] = t
            R = df
        except:
            R = None
        
        return R

    df = pd.concat([read_elevations(f) for f in files])
    df = df[df['time'] > pd.to_datetime('2023-01-01')]
    df['date'] = df['time'].dt.date

    avg_elevation = df.groupby('date').agg({
        'ht_ortho': 'median'
    })

    aes = get_area_elevation_storage(aec, elevation=avg_elevation['ht_ortho'])
    storage = aes['storage']
    area = aes['area']

    storage_df = avg_elevation.copy().rename({'ht_ortho': 'elevation'}, axis=1)
    storage_df['storage'] = storage
    storage_df['area'] = area

    save_fp = Path(f"../data/storage/icesat2/median_elevations/") / f"{RESERVOIR}_{RESERVOIR_NAME.split(',')[0].replace(' ', '_')}_storage.csv"
    storage_df.to_csv(save_fp)