In [1]:
# Last revised:  2024.05.07 - MAC

# Step 1B - Adjusting the run geopackages

In [2]:
import h5py
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, LineString, Polygon, MultiPolygon, box, mapping, shape
import pandas as pd
import numpy as np

from shapely.geometry import MultiLineString, LineString
from shapely.ops import linemerge

from shapely.ops import nearest_points
from shapely.geometry import Point

In [3]:
# Inputs:
hdf_file_path = r'E:\HECRAS_2D_12070205\BLE_12070205_Engineering_Models\Engineering Models\Hydraulic Models\RAS_Submittal\LBSG_501\Input\BLE_LBSG_501.p02.hdf'
gpkg_nextgen_path = r'E:\ras2fim-2d\nextgen-test\nextgen_12.gpkg'

# Texas wide limiting dicharge values (CSV)
str_limiting_discharge_csv = r'E:\ras2fim-2d\input_gis\LimitingDischarge.csv'

# --- Model hydrofabric computed from 01_cwe_ras2D_nextgen_hydrofabric_gpkg_20240502
str_hydrofabric_path = r'E:\sample_2d_output\model_hydrofabric.gpkg'

# Read the nextgen hydrofabric
gdf_flowpaths = gpd.read_file(str_hydrofabric_path, layer='01_stream_lines')

In [4]:
# ------------------------
def fn_get_group_names(hdf5_file_path, group_path):
    """
    Retrieve the names of groups within a specified HDF5 file under a given group path.

    Parameters:
    hdf5_file_path (str): The file path to the HDF5 file.
    group_path (str): The path to the group whose subgroups' names are to be retrieved.

    Returns:
    list or None: A list containing the names of groups found under the specified group path. 
                  Returns None if the group path does not exist in the HDF5 file.
    """
    try:
        with h5py.File(hdf5_file_path, 'r') as hdf_file:
            # Check if the specified group path exists
            if group_path in hdf_file:
                group = hdf_file[group_path]

                # Extract names of HDF5 Group objects
                group_names = [name for name in group if isinstance(group[name], h5py.Group)]

                return group_names
            else:
                print(f"Group '{group_path}' not found in the HDF5 file.")
                return None
    except Exception as e:
        print(f"An error occurred: {e}")
# ------------------------

In [5]:
# ------------------------
def get_gdf_of_2d_area(hdf_file_path):

    # Specify the HDF5 file path and group path
    str_hdf_geom_path = '/Geometry/2D Flow Areas/'

    # Get names of HDF5 Group objects in the specified group
    list_group_names = fn_get_group_names(hdf_file_path, str_hdf_geom_path)

    b_has_2d_area = False

    if len(list_group_names) > 1:
        print('Multiple 2D areas found -- Using first area:', list_group_names[0])
        b_has_2d_area = True
    elif len(list_group_names) == 0:
        print('Error: No 2D areas found')
    else:
        # Only one 2D area found
        b_has_2d_area = True

    if b_has_2d_area:
        str_perimeter_points = str_hdf_geom_path + list_group_names[0] + '/' + 'Perimeter'

        # Open the HDF file
        with h5py.File(hdf_file_path, 'r') as hdf_file:
            arr_perim_points = hdf_file[str_perimeter_points][:]

            # Extract the projection
            projection_wkt = hdf_file.attrs['Projection'].decode('utf-8')

            str_terrain_filename = hdf_file['/Geometry/'].attrs['Terrain Filename'].decode('utf-8')

        # Convert the array of perimeter points into a Polygon
        shp_polygon_geom = Polygon(arr_perim_points)

        # Create a GeoDataFrame
        gdf_2d_area_polygon = gpd.GeoDataFrame(index=[0], crs=projection_wkt, geometry=[shp_polygon_geom])
        
        gdf_2d_area_polygon['area_2d_name'] = list_group_names[0]
        gdf_2d_area_polygon['hdf_path'] = hdf_file_path
        gdf_2d_area_polygon['terrain_path'] = str_terrain_filename
        gdf_2d_area_polygon['prj_wkt_ras_model'] = projection_wkt
        
        return(gdf_2d_area_polygon)
    else:
        pass
        # return nothing as there is nothing to return
# ------------------------

In [6]:
# +++++++++++++++++++++++++++++
def fn_create_cell_gdf(hdf5_file_path, str_hdf_folder_2darea):
    """
    Create a GeoDataFrame of cells from HDF5 data.

    Parameters:
    hdf5_file_path (str): The file path to the HDF5 file.
    str_hdf_folder_2darea (str): The folder containing 2D area data within the HDF5 file.

    Returns:
    GeoDataFrame: A GeoDataFrame containing polygons representing cells, 
                  constructed from the face point coordinates extracted from the HDF5 file.
    """
    # location of Face Point Coordiantes in HDF5
    str_facepoint_coords = str_hdf_folder_2darea + 'FacePoints Coordinate'

    # Open the HDF5 file
    with h5py.File(hdf5_file_path, 'r') as hdf_file:
        # Extract X and Y coordinates
        x_coordinates = hdf_file[str_facepoint_coords][:, 0]
        y_coordinates = hdf_file[str_facepoint_coords][:, 1]

    # Create a pandas DataFrame
    df_facepoints = pd.DataFrame({'X': x_coordinates, 'Y': y_coordinates})

    # location of Indecies of facepoints making up the cells
    str_cells_facepoint_indexes = str_hdf_folder_2darea + 'Cells FacePoint Indexes'

    # Open the HDF5 file
    with h5py.File(hdf5_file_path, 'r') as hdf_file:
        # Extract FacePoints Coordinate data
        facepoints_data = hdf_file[str_cells_facepoint_indexes][:]

        # Extract the projection
        projection_wkt = hdf_file.attrs['Projection'].decode('utf-8')

    # Create a pandas DataFrame from the array
    df_cells_by_facepoints = pd.DataFrame(facepoints_data)

    # Create a GeoDataFrame to store the polygons
    geometry = []

    for row in facepoints_data:
        polygon_coords = []

        for idx in row:
            if idx != -1:
                x = df_facepoints.loc[idx, 'X']
                y = df_facepoints.loc[idx, 'Y']
                polygon_coords.append((x, y))
        # Connect to the first point to close the polygon
        polygon_coords.append(polygon_coords[0])
        geometry.append(Polygon(polygon_coords))

    # Create a GeoDataFrame
    gdf_cells = gpd.GeoDataFrame(geometry=geometry, columns=['geometry'], crs=projection_wkt)

    return gdf_cells
# +++++++++++++++++++++++++++++

In [7]:
%%time
print('Extracting HEC-RAS computational cell polygons...')

# Specify the HDF5 file path and group path
str_hdf_2darea_root_folder = '/Geometry/2D Flow Areas/'

# Get names of HDF5 Group objects in the specified group
list_group_names = fn_get_group_names(hdf_file_path, str_hdf_2darea_root_folder)

str_hdf_folder_2darea = str_hdf_2darea_root_folder + list_group_names[0] + '/'
gdf_cells = fn_create_cell_gdf(hdf_file_path, str_hdf_folder_2darea)

print(f' -- Number of cells in {list_group_names[0]}: {len(gdf_cells)}')

Extracting HEC-RAS computational cell polygons...
 -- Number of cells in 1207020501: 220317
CPU times: total: 22.1 s
Wall time: 22.1 s


In [8]:
# Add a new column 'idx' to gdf_cells with index values
gdf_cells['idx'] = gdf_cells.index

In [9]:
# Add a new column 'idx' to gdf_cells with index values
gdf_cells['idx'] = gdf_cells.index

# Get cell's Mannings at center point
str_cell_center_mannings = str_hdf_folder_2darea + "Cells Center Manning's n"

# Open the HDF5 file
with h5py.File(hdf_file_path, 'r') as hdf_file:
    arr_cell_mannings = hdf_file[str_cell_center_mannings][:]
    
# Reproject gdf_flowpaths to the CRS of gdf_cells
gdf_flowpaths_local_crs = gdf_flowpaths.to_crs(gdf_cells.crs)

# Keep only the 'geometry' and 'id' columns
gdf_flowpaths_local_crs_lean = gdf_flowpaths_local_crs[['geometry', 'id']]

In [10]:
%%time
# about 30 seconds on test grid

print("Determining stream thalweg Manning's 'n'...")

# Intersect the cells and the stream centerlines
gdf_stream_segments_per_cell = gpd.overlay(gdf_flowpaths_local_crs_lean, gdf_cells, how="intersection")

# compute the length for each segment
gdf_stream_segments_per_cell['length'] = gdf_stream_segments_per_cell['geometry'].length

# Set Manning's coefficient for each segment based on 'idx' field
gdf_stream_segments_per_cell['mannings'] = gdf_stream_segments_per_cell['idx'].map(lambda idx: arr_cell_mannings[int(idx)])

# Calculate Manning's x length for each segment
gdf_stream_segments_per_cell['mannings_x_len'] = gdf_stream_segments_per_cell['mannings'] * gdf_stream_segments_per_cell['length']

# Summary values by stream segment

# Create a pandas dataframe
df = gdf_stream_segments_per_cell[['id', 'length', 'mannings_x_len']].copy()

# Group by 'id' and sum the 'length' and 'mannings_x_len'
df_stream_summary = df.groupby('id').agg({'length': 'sum', 'mannings_x_len': 'sum'}).reset_index()

# Rename columns for clarity
df_stream_summary.columns = ['id', 'total_length', 'total_mannings_x_len']

# Compute reach averaged Mannings 'n' values (three decimal points)
df_stream_summary['manning']= round(df_stream_summary['total_mannings_x_len'] / df_stream_summary['total_length'],3)

# Filter down to just the values required
df_stream_summary_lean = df_stream_summary[['id', 'manning']]

Determining stream thalweg Manning's 'n'...
CPU times: total: 27.7 s
Wall time: 27.7 s


In [11]:
# Perform the left join operation
gdf_streams_w_mannings = pd.merge(gdf_flowpaths, df_stream_summary_lean, on='id', how='left')

In [12]:
gdf_streams_w_mannings.explore()

In [13]:
# *************
# For now (2024.05.07 - the upper flow is 30% of the limiting discharge)
flt_q_ratio = 0.30
# *************

In [14]:
# ------------------
def fn_calculate_travel_time(row):
    
    # this is an estimate of "low flow travel time" - This assumes a hydraulic radius
    
    # *************
    #For a low flow travel time, this is the Rh that is being assumed (need a conservative estimate)
    flt_assumed_hydraulic_radius = 1.0
    # *************
    
    flt_mannings = row['manning']
    flt_length_m = row['lengthkm'] * 1000
    flt_slope_m_per_m = row['So']
    flt_time_sec = (flt_length_m * flt_mannings) / ((flt_assumed_hydraulic_radius ** 0.667) * (flt_slope_m_per_m ** 0.5))
    flt_time_hr = round(flt_time_sec / 3600, 2)
    return flt_time_hr
# ------------------

In [15]:
gdf_streams = gdf_streams_w_mannings

In [16]:
# Load the CSV file into a NumPy array
data = np.genfromtxt(str_limiting_discharge_csv, delimiter=',')
data = data[data[:,0].argsort()]

gdf_streams['da_sq_mile'] = gdf_streams['tot_drainage_areasqkm'] * 0.386102

# Perform linear interpolation for each row in df and round the result to the nearest integer
gdf_streams['q_limiting'] = gdf_streams['da_sq_mile'].apply(lambda x: round(np.interp(x, data[:,0], data[:,1])))

# Upper flow limit is ratio of limiting discharge == flt_q_ratio
gdf_streams['q_upper_limit'] = round(gdf_streams['q_limiting'] * flt_q_ratio,0)

# Assuming gdf_streams is your GeoDataFrame
gdf_streams['travel_time_hr'] = gdf_streams.apply(fn_calculate_travel_time, axis=1)

In [17]:
#gdf_streams

In [18]:
# Dissolve the travel paths (flowpaths) by 'mainstem' attribute
gdf_mainstems = gdf_streams.dissolve(by='mainstem')

# Reset the index
gdf_mainstems.reset_index(inplace=True)

# Keep only the 'mainstem' and 'geometry' columns
gdf_mainstems = gdf_mainstems[['mainstem', 'geometry']]

# Assuming gdf_mainstems is a GeoDataFrame with geometry column containing LineString or MultiLineString
for idx, row in gdf_mainstems.iterrows():
    geom = row['geometry']
    if isinstance(geom, MultiLineString):
        merged_line = linemerge(geom)
        if merged_line.is_empty:
            pass
        else:
            # Replace the MultiLineString with the merged LineString
            gdf_mainstems.at[idx, 'geometry'] = merged_line

In [19]:
gdf_mainstems.explore()

In [20]:
# Filter rows where rl_NHDWaterbodyComID is not null
gdf_waterbody_flowpaths = gdf_flowpaths[gdf_flowpaths['rl_NHDWaterbodyComID'].notnull()]

# Dissolve the travel paths by 'mainstem' attribute
gdf_waterbody_flowpaths_disolve = gdf_waterbody_flowpaths.dissolve(by='mainstem')

# Reset the index
gdf_waterbody_flowpaths_disolve.reset_index(inplace=True)

# Keep only the 'mainstem' and 'geometry' columns
gdf_waterbody_flowpaths_disolve = gdf_waterbody_flowpaths_disolve[['mainstem', 'geometry']]

# Assuming gdf_mainstems is a GeoDataFrame with geometry column containing LineString or MultiLineString
for idx, row in gdf_waterbody_flowpaths_disolve.iterrows():
    geom = row['geometry']
    if isinstance(geom, MultiLineString):
        merged_line = linemerge(geom)
        if merged_line.is_empty:
            pass
        else:
            # Replace the MultiLineString with the merged LineString
            gdf_waterbody_flowpaths_disolve.at[idx, 'geometry'] = merged_line

In [21]:
gdf_mainstems_revised = gdf_mainstems

# --------
def fn_convert_to_list_of_linestrings(geometry):
    if isinstance(geometry, MultiLineString):
        result = []
        for part in geometry.geoms:
            result.append(LineString(part.coords))
        return result
    elif isinstance(geometry, LineString):
        return [geometry]
    else:
        return []
# --------

for index, row in gdf_waterbody_flowpaths_disolve.iterrows():
    
    # Extract the mainstem value
    mainstem_value = row['mainstem']
    
    # Extract the corresponding linestring from gdf_mainstems
    mainstem_linestring = gdf_mainstems[gdf_mainstems['mainstem'] == mainstem_value].geometry.iloc[0]
    
    # Clip the linestring from gdf_waterbody_flowpaths with the linestring from gdf_waterbody_flowpaths
    mainstem_differance = mainstem_linestring.difference(row.geometry)
    
    list_linestrings = fn_convert_to_list_of_linestrings(mainstem_differance)
    
    if len(list_linestrings) > 0:
        # Delete row in gdf_mainstems_revied where 'mainstem' = mainstem_value
        gdf_mainstems_revised = gdf_mainstems_revised[gdf_mainstems_revised['mainstem'] != mainstem_value]
        
        # Append items in list_linestrings to gdf_mainstems_revied as new rows
        for linestring in list_linestrings:
            #gdf_mainstems_revied = gdf_mainstems_revised.append({'mainstem': mainstem_value, 'geometry': linestring},
            #                                                   ignore_index=True)
            gdf_mainstems_revised = pd.concat([gdf_mainstems_revised,
                                               pd.DataFrame({'mainstem': [mainstem_value], 'geometry': [linestring]})],
                                              ignore_index=True)

gdf_mainstems_revised.crs = gdf_mainstems.crs

  arr = construct_1d_object_array_from_listlike(values)
  arr = construct_1d_object_array_from_listlike(values)


In [22]:
#gdf_mainstems_revised

In [23]:
#gdf_streams

In [24]:
# Determine the lines in gdf_streams that intersect the line in gdf_mainstems_revised.iloc[0]
# They must have the same 'mainstem' value
# return a dataframe of the intersecting lines.

In [25]:
# For each mainstem (with Waterbodies removed, determine the low flow travel time estiamte for that mainstem run)
list_travel_time_hr = []
list_upper_q_cfs = []

for index, row in gdf_mainstems_revised.iterrows():
    
    # Get the value of 'mainstem' from the first row of gdf_mainstems_revised
    mainstem_value = row['mainstem']

    # Select all rows from gdf_streams where 'mainstem' matches mainstem_value
    selected_rows = gdf_streams[gdf_streams['mainstem'] == mainstem_value]

    # Get the geometry from gdf_mainstems_revised
    mainstems_geometry = row['geometry']

    # Check which rows in selected_rows have geometries that are completely covered by mainstems_geometry
    covered_rows = selected_rows[selected_rows.geometry.apply(lambda x: x.covered_by(mainstems_geometry))]

    # Sort covered_rows by 'da_sq_mile' in ascending order
    sorted_covered_rows = covered_rows.sort_values(by='da_sq_mile', ascending=True)

    # Sum the 'travel_time_hr' column in sorted_covered_rows
    total_travel_time_hr = round(sorted_covered_rows['travel_time_hr'].sum(),2)
    
    # create a list of the travel times
    list_travel_time_hr.append(total_travel_time_hr)
    
    # upper limit flow rate (q) of the row with the smallest drainage area
    #list_upper_q_cfs.append(sorted_covered_rows.iloc[0]['q_upper_limit'])

In [26]:
gdf_mainstems_revised['travel_time_hr'] = list_travel_time_hr
#gdf_mainstems_revised['q_upper_limit_cfs'] = list_upper_q_cfs

In [27]:
gdf_streams

Unnamed: 0,id,toid,mainstem,order,hydroseq,lengthkm,areasqkm,tot_drainage_areasqkm,has_divide,divide_id,...,So,ChSlp,within_2darea,network_group,geometry,manning,da_sq_mile,q_limiting,q_upper_limit,travel_time_hr
0,wb-2410251,nex-2410252,1884413.0,3.0,23858,3.611474,6.762601,105.379651,True,cat-2410251,...,0.004384,0.393827,True,0,"MULTILINESTRING ((-201974.170 865603.245, -201...",0.061,40.687294,159844,47953.0,0.92
1,wb-2410255,nex-2410256,1884413.0,3.0,23880,4.242299,12.443399,298.642500,True,cat-2410255,...,0.002474,0.335564,True,0,"MULTILINESTRING ((-191536.885 862432.472, -191...",0.060,115.306467,300272,90082.0,1.42
2,wb-2410258,nex-2410259,1884413.0,3.0,23891,3.307006,4.692601,402.482251,True,cat-2410258,...,0.002290,0.321687,True,0,"MULTILINESTRING ((-183575.970 855531.666, -183...",0.060,155.399202,359858,107957.0,1.15
3,wb-2410259,nex-2410260,1884413.0,3.0,23902,5.117748,9.213300,513.826651,True,cat-2410259,...,0.002406,0.310669,True,0,"MULTILINESTRING ((-183012.311 852973.757, -182...",0.060,198.389498,416970,125091.0,1.74
4,wb-2410260,nex-2410261,1884413.0,3.0,23906,4.738277,11.139302,543.650852,True,cat-2410260,...,0.002356,0.306763,True,0,"MULTILINESTRING ((-180670.016 849786.164, -180...",0.060,209.904681,431306,129392.0,1.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,wb-2410263,nex-2410264,1884413.0,3.0,23912,8.439995,34.848450,638.368652,True,cat-2410263,...,0.000820,0.298813,True,0,"MULTILINESTRING ((-171561.658 847291.965, -171...",0.059,246.475413,474553,142366.0,4.83
68,wb-2410264,nex-2410265,1884413.0,3.0,23913,1.725184,4.174200,642.542852,True,cat-2410264,...,0.004500,0.298149,True,0,"MULTILINESTRING ((-165132.694 844992.882, -165...",0.057,248.087080,476391,142917.0,0.41
69,wb-2410523,nex-2410487,1884821.0,1.0,23917,3.787704,3.903300,3.903300,True,cat-2410523,...,0.009520,0.641737,False,1,"MULTILINESTRING ((-204837.470 854732.676, -204...",0.070,1.507072,13937,4181.0,0.75
70,wb-2410509,nex-2410491,1884794.0,1.0,23936,3.767248,3.233250,3.233250,True,cat-2410509,...,0.010070,0.660385,False,2,"MULTILINESTRING ((-187569.358 849434.073, -187...",0.100,1.248364,11819,3546.0,1.04


In [28]:
gdf_mainstems_revised.explore()

In [29]:
#gdf_mainstems_revised

In [30]:
# mainstems need peak dicharge

In [37]:
# Read the nextgen hydrofabric points
gdf_flow_points = gpd.read_file(str_hydrofabric_path, layer='02_flow_points')

# ---- limiting dicharge per points ----
gdf_flow_points['da_sq_mile'] = gdf_flow_points['tot_drainage_areasqkm'] * 0.386102

# Perform linear interpolation for each row in df and round the result to the nearest integer
gdf_flow_points['q_limiting'] = gdf_flow_points['da_sq_mile'].apply(lambda x: round(np.interp(x, data[:,0], data[:,1])))

# Upper flow limit is ratio of limiting discharge == flt_q_ratio
gdf_flow_points['q_upper_limit'] = round(gdf_flow_points['q_limiting'] * flt_q_ratio,0)
# ---- ----


# Add upper limit flow and starting cell index stabilizing run's flowpath
list_start_cell_index = []
list_q_upper_limit = []

# get the starting point's cell index for each stabilizing run
for idx, row in gdf_mainstems_revised.iterrows():
    mainstem = row['mainstem']
    linestring = row['geometry']
    start_point = Point(linestring.coords[0])
    points_same_mainstem = gdf_flow_points[gdf_flow_points['mainstem'] == mainstem]['geometry']
    nearest_point_idx = None
    min_distance = float('inf')
    for point in points_same_mainstem:
        distance = start_point.distance(point)
        if distance < min_distance:
            min_distance = distance
            nearest_point = point
            nearest_point_idx = gdf_flow_points[gdf_flow_points['geometry'] == nearest_point].index[0]
    
    if nearest_point_idx != None:
        # append the starting cell index
        int_start_cell = int(gdf_flow_points.iloc[nearest_point_idx]['idx_start_cell'])
        list_start_cell_index.append(int_start_cell)
        
        # append the upper flow limit for that reach
        flt_q_upper_limit = gdf_flow_points.iloc[nearest_point_idx]['q_upper_limit']
        list_q_upper_limit.append(flt_q_upper_limit)
    else:
        list_start_cell_index.append(None)
        list_q_upper_limit.append(None)
        
gdf_mainstems_revised['idx_start_cell'] = list_start_cell_index
gdf_mainstems_revised['q_upper_limit'] = list_q_upper_limit

In [44]:
output_path = r'E:\sample_2d_output\model_hydrofabric_revised.gpkg'

# Read the 2d area
gdf_area = gpd.read_file(str_hydrofabric_path, layer='00_area_2d')

# Write GeoDataFrames to GeoPackage as separate layers
gdf_mainstems_revised.to_file(output_path, layer='03_flowpaths_stabilize', driver="GPKG")
gdf_flow_points.to_file(output_path, layer='02_flow_points', driver="GPKG")
gdf_streams.to_file(output_path, layer='01_stream_lines', driver="GPKG")
gdf_area.to_file(output_path, layer='00_area_2d', driver="GPKG")