In [1]:
# Create a geopackage of the stream network for the HEC-RAS model

In [2]:
import h5py

from shapely.geometry import Point, LineString, Polygon, MultiPolygon, box, mapping, shape
import geopandas as gpd
import pandas as pd
import networkx as nx

import os

In [3]:
# Inputs:
hdf_file_path = r'E:\HECRAS_2D_12070205\BLE_12070205_Engineering_Models\Engineering Models\Hydraulic Models\RAS_Submittal\LBSG_501\Input\BLE_LBSG_501.p02.hdf'
gpkg_nextgen_path = r'E:\ras2fim-2d\nextgen-test\nextgen_12.gpkg'

# Specify the path where you want to save the GeoPackage file
output_path = r'E:\sample_2d_output\model_hydrofabric.gpkg'

In [4]:
# ------------------------
def fn_get_group_names(hdf5_file_path, group_path):
    """
    Retrieve the names of groups within a specified HDF5 file under a given group path.

    Parameters:
    hdf5_file_path (str): The file path to the HDF5 file.
    group_path (str): The path to the group whose subgroups' names are to be retrieved.

    Returns:
    list or None: A list containing the names of groups found under the specified group path. 
                  Returns None if the group path does not exist in the HDF5 file.
    """
    try:
        with h5py.File(hdf5_file_path, 'r') as hdf_file:
            # Check if the specified group path exists
            if group_path in hdf_file:
                group = hdf_file[group_path]

                # Extract names of HDF5 Group objects
                group_names = [name for name in group if isinstance(group[name], h5py.Group)]

                return group_names
            else:
                print(f"Group '{group_path}' not found in the HDF5 file.")
                return None
    except Exception as e:
        print(f"An error occurred: {e}")
# ------------------------

In [5]:
# ------------------------
def get_gdf_of_2d_area(hdf_file_path):

    # Specify the HDF5 file path and group path
    str_hdf_geom_path = '/Geometry/2D Flow Areas/'

    # Get names of HDF5 Group objects in the specified group
    list_group_names = fn_get_group_names(hdf_file_path, str_hdf_geom_path)

    b_has_2d_area = False

    if len(list_group_names) > 1:
        print('Multiple 2D areas found -- Using first area:', list_group_names[0])
        b_has_2d_area = True
    elif len(list_group_names) == 0:
        print('Error: No 2D areas found')
    else:
        # Only one 2D area found
        b_has_2d_area = True

    if b_has_2d_area:
        str_perimeter_points = str_hdf_geom_path + list_group_names[0] + '/' + 'Perimeter'

        # Open the HDF file
        with h5py.File(hdf_file_path, 'r') as hdf_file:
            arr_perim_points = hdf_file[str_perimeter_points][:]

            # Extract the projection
            projection_wkt = hdf_file.attrs['Projection'].decode('utf-8')

            str_terrain_filename = hdf_file['/Geometry/'].attrs['Terrain Filename'].decode('utf-8')

        # Convert the array of perimeter points into a Polygon
        shp_polygon_geom = Polygon(arr_perim_points)

        # Create a GeoDataFrame
        gdf_2d_area_polygon = gpd.GeoDataFrame(index=[0], crs=projection_wkt, geometry=[shp_polygon_geom])
        
        gdf_2d_area_polygon['area_2d_name'] = list_group_names[0]
        gdf_2d_area_polygon['hdf_path'] = hdf_file_path
        gdf_2d_area_polygon['terrain_path'] = str_terrain_filename
        gdf_2d_area_polygon['prj_wkt_ras_model'] = projection_wkt
        
        return(gdf_2d_area_polygon)
    else:
        pass
        # return nothing as there is nothing to return
# ------------------------

In [6]:
# ------------------------
def fn_extract_flowpaths_gdf(gdf_2d_area_polygon, gpkg_nextgen_path):

    # From the 2d area, get the nextgen hydrofabric
    print('Reading Nextgen Hydrofabric ~5 seconds...')

    # Read the nextgen hydrofabric
    gdf_flowpaths = gpd.read_file(gpkg_nextgen_path, layer='flowpaths')

    gdf_2d_area_polygon_nextgen = gdf_2d_area_polygon.to_crs(gdf_flowpaths.crs)

    # Extract the first polygon
    shp_first_poly = gdf_2d_area_polygon_nextgen.geometry.iloc[0]

    # Get the bounding box coordinates
    bbox = shp_first_poly.bounds

    # Create a bounding box geometry using the CRS of the GeoPackage
    bbox_geom = box(*bbox)
    
    # Filter lines within the bounding box
    gdf_ln_within_bbox = gdf_flowpaths[gdf_flowpaths.geometry.intersects(bbox_geom)]

    # Get just the stream lines that are within or intersect the 2d area
    gdf_ln_within_2d_area = gdf_ln_within_bbox[gdf_ln_within_bbox.geometry.within(shp_first_poly) | gdf_ln_within_bbox.geometry.intersects(shp_first_poly)]

    # Get a unique list of 'id' from gdf_ln_within_2d_area
    list_unique_ids = gdf_ln_within_2d_area['id'].unique().tolist()

    # Read the GeoPackage file to get the 'flowpath_attributes' table
    gdf_flowpath_attrib = gpd.read_file(gpkg_nextgen_path, layer='flowpath_attributes')

    # Select rows where 'id' is in list_unique_ids
    gdf_attrib_flowpath_2darea = gdf_flowpath_attrib[gdf_flowpath_attrib['id'].isin(list_unique_ids)]

    # Specify the columns to keep
    columns_to_keep = ['id', 'rl_gages', 'rl_NHDWaterbodyComID', 'So', 'ChSlp']

    # Drop all columns except the specified ones
    gdf_attrib_flowpath_2darea_subset = gdf_attrib_flowpath_2darea[columns_to_keep]

    # Perform left join on 'id' to add the selecte attributes
    gdf_flowpath_with_attrib = gdf_ln_within_2d_area.merge(gdf_attrib_flowpath_2darea_subset, on='id', how='left')

    # Check if each line in gdf_flowpath_with_attrib is entirely within the polygon
    gdf_flowpath_with_attrib['within_2darea'] = gdf_flowpath_with_attrib.geometry.within(gdf_2d_area_polygon_nextgen.iloc[0].geometry)
    
    return(gdf_flowpath_with_attrib)
# ------------------------

In [7]:
# ------------------
def fn_create_upstream_flowpath_points(gdf_flowpath_with_attrib, gdf_2d_area_polygon_nextgen):

    # get a point that is at the upstream end of every flowpath

    # Create an empty list to store the points
    list_upstream_points = []

    # Iterate over each geometry in the GeoDataFrame
    list_id = []

    for index,row in gdf_flowpath_with_attrib.iterrows():

        geom_items = row['geometry']
        list_id.append(row['id'])

        for geom in geom_items:
            # Check if the geometry is a LineString or MultiLineString
            if geom.geom_type == 'LineString':
                endpoint = Point(geom.coords[0])  # Get the beginning of the LineString
                list_upstream_points.append(endpoint)
            elif geom.geom_type == 'MultiLineString':
                for part in geom:
                    endpoint = Point(part.coords[0])  # Get the beginning of each part
                    list_upstream_points.append(endpoint)

    # Create a new GeoDataFrame from the points
    gdf_upstream_points = gpd.GeoDataFrame(geometry=list_upstream_points, crs=gdf_flowpath_with_attrib.crs)

    # Add the id list to the gdf
    gdf_upstream_points['id'] = list_id

    # Create a coloumn that states if a startpoint is within the 2d area
    gdf_upstream_points['within_2darea'] = gdf_upstream_points.geometry.within(gdf_2d_area_polygon_nextgen.iloc[0].geometry)
    
    # add attribute to points from the corresponding stream
    # Specify the columns to keep
    columns_to_keep = ['id', 'mainstem', 'tot_drainage_areasqkm', 'order']

    # Drop all columns except the specified ones
    gdf_flowpath_for_join = gdf_flowpath_with_attrib[columns_to_keep]

    # Perform left join on 'id' to add the selecte attributes
    gdf_upstream_points = gdf_upstream_points.merge(gdf_flowpath_for_join, on='id', how='left')
    
    return(gdf_upstream_points)
# ------------------

In [8]:
# +++++++++++++++++++++++++++++
def fn_create_cell_gdf(hdf5_file_path, str_hdf_folder_2darea):
    """
    Create a GeoDataFrame of cells from HDF5 data.

    Parameters:
    hdf5_file_path (str): The file path to the HDF5 file.
    str_hdf_folder_2darea (str): The folder containing 2D area data within the HDF5 file.

    Returns:
    GeoDataFrame: A GeoDataFrame containing polygons representing cells, 
                  constructed from the face point coordinates extracted from the HDF5 file.
    """
    # location of Face Point Coordiantes in HDF5
    str_facepoint_coords = str_hdf_folder_2darea + 'FacePoints Coordinate'

    # Open the HDF5 file
    with h5py.File(hdf5_file_path, 'r') as hdf_file:
        # Extract X and Y coordinates
        x_coordinates = hdf_file[str_facepoint_coords][:, 0]
        y_coordinates = hdf_file[str_facepoint_coords][:, 1]

    # Create a pandas DataFrame
    df_facepoints = pd.DataFrame({'X': x_coordinates, 'Y': y_coordinates})

    # location of Indecies of facepoints making up the cells
    str_cells_facepoint_indexes = str_hdf_folder_2darea + 'Cells FacePoint Indexes'

    # Open the HDF5 file
    with h5py.File(hdf5_file_path, 'r') as hdf_file:
        # Extract FacePoints Coordinate data
        facepoints_data = hdf_file[str_cells_facepoint_indexes][:]

        # Extract the projection
        projection_wkt = hdf_file.attrs['Projection'].decode('utf-8')

    # Create a pandas DataFrame from the array
    df_cells_by_facepoints = pd.DataFrame(facepoints_data)

    # Create a GeoDataFrame to store the polygons
    geometry = []

    for row in facepoints_data:
        polygon_coords = []

        for idx in row:
            if idx != -1:
                x = df_facepoints.loc[idx, 'X']
                y = df_facepoints.loc[idx, 'Y']
                polygon_coords.append((x, y))
        # Connect to the first point to close the polygon
        polygon_coords.append(polygon_coords[0])
        geometry.append(Polygon(polygon_coords))

    # Create a GeoDataFrame
    gdf_cells = gpd.GeoDataFrame(geometry=geometry, columns=['geometry'], crs=projection_wkt)

    return gdf_cells
# +++++++++++++++++++++++++++++

In [9]:
# ---------------
def fn_determine_starting_hceras_cells(hdf_file_path,
                                       gdf_upstream_points):

    # get the HEC-RAS cell number for each upstream point

    print('Extracting HEC-RAS computational cell polygons...')

    # Specify the HDF5 file path and group path
    str_hdf_2darea_root_folder = '/Geometry/2D Flow Areas/'

    # Get names of HDF5 Group objects in the specified group
    list_group_names = fn_get_group_names(hdf_file_path, str_hdf_2darea_root_folder)

    str_hdf_folder_2darea = str_hdf_2darea_root_folder + list_group_names[0] + '/'
    gdf_cells = fn_create_cell_gdf(hdf_file_path, str_hdf_folder_2darea)

    print(f' -- Number of cells in {list_group_names[0]}: {len(gdf_cells)}')

    # create points that are in the same projection as the HEC-RAS Model
    gdf_upstream_points_hecras_projection = gdf_upstream_points.to_crs(gdf_cells.crs)

    print(f'Determining starting cell for {len(gdf_upstream_points_hecras_projection)} points...')

    # Create a spatial index for the polygon GeoDataFrame
    sindex = gdf_cells.sindex

    # Create an empty list to store the indices of cells that each point is inside
    cell_indices = []

    # Iterate over each point in the point GeoDataFrame
    for point in gdf_upstream_points_hecras_projection.geometry:
        # Find the index of the cell that contains the point
        possible_matches_index = list(sindex.intersection(point.bounds))
        possible_matches = gdf_cells.iloc[possible_matches_index]
        precise_matches = possible_matches[possible_matches.contains(point)]
        if len(precise_matches) > 0:
            # Append the index of the cell to the list
            cell_indices.append(precise_matches.index[0])
        else:
            # Point is not inside any cell
            cell_indices.append(None)

    # Add the cell indices as a new column to the point GeoDataFrame
    gdf_upstream_points['idx_start_cell'] = cell_indices

    # Filter the for only points within 2darea
    gdf_upstream_points_inside_area = gdf_upstream_points[(gdf_upstream_points['within_2darea'] == True)]

    return(gdf_upstream_points_inside_area)
# ---------------

In [10]:
# --------------------
def fn_find_most_downstream_node(graph, start_node):
    visited = set()
    stack = [start_node]

    while stack:
        current_node = stack.pop()
        if current_node not in visited:
            visited.add(current_node)
            neighbors = list(graph.successors(current_node))  # Get successors (outgoing edges)
            stack.extend(neighbors)

    return max(visited)  # Return the highest node ID found
# ----------------------

# ----------------
def fn_find_path_between_nodes(graph, start_node, end_node):
    visited = set()
    stack = [(start_node, [start_node])]

    while stack:
        current_node, path = stack.pop()
        if current_node == end_node:
            return path
        if current_node not in visited:
            visited.add(current_node)
            neighbors = list(graph.successors(current_node))
            for neighbor in neighbors:
                stack.append((neighbor, path + [neighbor]))
    return None
# ----------------

In [11]:
# ----------------
def fn_compute_travel_distance_per_point(gdf_flowpath_with_attrib, 
                                         gpkg_nextgen_path, 
                                         gdf_upstream_points):

    print('Compute travel distance per simulation...')
    
    # getting a list of nexus id's from the flowpaths 'toid'
    list_unique_nodes = gdf_flowpath_with_attrib['toid'].unique().tolist()

    # read in the hydrofabric's nexus
    gdf_nexus = gpd.read_file(gpkg_nextgen_path, layer='nexus')

    # filter to just nexus in list_unique_nodes
    gdf_nexus_on_streams = gdf_nexus[gdf_nexus['id'].isin(list_unique_nodes)]

    # Combine points and lines into a single GeoDataFrame - preperation for graph creation
    gdf_flow_network = gpd.GeoDataFrame(pd.concat([gdf_nexus_on_streams, gdf_flowpath_with_attrib], ignore_index=True),
                                        crs=gdf_flowpath_with_attrib.crs)

    # Create a directed graph
    G = nx.DiGraph()

    # Add edges to the graph based on 'id' and 'toid' fields
    edges = gdf_flow_network[['id', 'toid']].values.tolist()
    G.add_edges_from(edges)

    # Now, let's add attributes to the nodes using the additional columns
    for index, row in gdf_flow_network.iterrows():
        node_id = row['id']
        attributes = {'mainstem': row['mainstem'],
                    'tot_drainage_areasqkm': row['tot_drainage_areasqkm']}
        G.nodes[node_id].update(attributes)

    # -- Some Graph Statistics ---
    print('Hydrofabric Statistics:')
    # Number of nodes
    num_nodes = G.number_of_nodes()
    print("   Number of nodes:", num_nodes)

    # Number of edges
    num_edges = G.number_of_edges()
    print("   Number of edges:", num_edges)

    # Average degree
    avg_degree = round(sum(dict(G.degree()).values()) / num_nodes, 2)
    print("   Average degree:", avg_degree)

    # Calculate the number of weakly connected components
    num_components = nx.number_weakly_connected_components(G)
    print("   Number of weakly connected components:", num_components)
    # -- End Some Graph Statistics ---
    
    # -- Determine travel distance for each node in gdf_upstream_points
    list_travel_dist_flt = []
    list_avg_slope = []

    for index, row in gdf_upstream_points.iterrows():

        str_start_node = row['id']
        str_downstream_node = fn_find_most_downstream_node(G, str_start_node)

        # Create a list of nodes (flowpath and nexus) down the travel path
        list_travel_path = fn_find_path_between_nodes(G, str_start_node, str_downstream_node)

        # get just the flowpaths from gdf_flowpath_with_attrib in list_travel_path
        gdf_streams_travel_path = gdf_flowpath_with_attrib[gdf_flowpath_with_attrib['id'].isin(list_travel_path)]

        # vertical fall per flowpath
        gdf_streams_travel_path = gdf_streams_travel_path.copy()
        gdf_streams_travel_path['delta_z'] = gdf_streams_travel_path['lengthkm'] * gdf_streams_travel_path['So']

        # Sum the values in the 'delta_z' column and rounded 
        flt_total_delta_z = round(gdf_streams_travel_path['delta_z'].sum(), 5)

        # Sum the values in the 'lengthkm' column and round to three decimal places
        flt_total_length_km = round(gdf_streams_travel_path['lengthkm'].sum(), 3)

        # Sum the values in the 'lengthkm' column and round to three decimal places
        flt_avg_slope = flt_total_delta_z / flt_total_length_km

        list_travel_dist_flt.append(flt_total_length_km)
        list_avg_slope.append(flt_avg_slope)

    gdf_upstream_points['flow_dist_km'] = list_travel_dist_flt
    gdf_upstream_points['slope_average'] = list_avg_slope
    
    return(gdf_upstream_points)
# ----------------

In [12]:
%%time
gdf_2d_area_polygon = get_gdf_of_2d_area(hdf_file_path)
gdf_flowpath_with_attrib = fn_extract_flowpaths_gdf(gdf_2d_area_polygon, gpkg_nextgen_path)
gdf_2d_area_polygon_nextgen = gdf_2d_area_polygon.to_crs(gdf_flowpath_with_attrib.crs)
gdf_upstream_points = fn_create_upstream_flowpath_points(gdf_flowpath_with_attrib, gdf_2d_area_polygon_nextgen)

gdf_upstream_points = fn_determine_starting_hceras_cells(hdf_file_path,gdf_upstream_points)
gdf_upstream_points = fn_compute_travel_distance_per_point(gdf_flowpath_with_attrib, gpkg_nextgen_path, gdf_upstream_points)

Reading Nextgen Hydrofabric ~5 seconds...


  for geom in geom_items:


Extracting HEC-RAS computational cell polygons...
 -- Number of cells in 1207020501: 220317
Determining starting cell for 72 points...
Compute travel distance per simulation...
Hydrofabric Statistics:
   Number of nodes: 110
   Number of edges: 107
   Average degree: 1.95
   Number of weakly connected components: 3
CPU times: total: 43.1 s
Wall time: 43.8 s


In [13]:
gdf_2d_area_polygon_nextgen = gdf_2d_area_polygon.to_crs(gdf_flowpath_with_attrib.crs)

# Write GeoDataFrames to GeoPackage as separate layers
gdf_upstream_points.to_file(output_path, layer='02_flow_points', driver="GPKG")
gdf_flowpath_with_attrib.to_file(output_path, layer='01_stream_lines', driver="GPKG")
gdf_2d_area_polygon_nextgen.to_file(output_path, layer='00_area_2d', driver="GPKG")

In [14]:
# TODO - rl_gages could have multiple gages on that reach -- is this a problem?

# TODO - If stream crosses through 2darea polygon, but doesn't cross the boundary condition, then
# water can't flow out.  This stream and the upstream point should be removed.

In [1]:
# Travel time is a function of flow and average slope

import math
# determine the travel time down reach

# ------------
def fn_calculate_hydra_radius(flt_Q_cfs):
    # from a Rh vs flow derivaton from a HEC-RAS 1D model of Wolf Creek near Fredricksburg, TX
    # This is a stream order of ~1 ... A higher stream order is likely to have a higher hydraulic radius
    # so hopefully this is a convervative estiamte
    flt_hydra_radius = 1.036 * math.log(flt_Q_cfs) - 2.9876
    return flt_hydra_radius
# ------------

# ------------
def fn_estimate_travel_time(flt_mannings_n, flt_length_km, flt_slope, flt_Q_cfs):
    flt_Rh = round(fn_calculate_hydra_radius(flt_Q_cfs),2)
    
    # Set minimum allowed hydraulic radius
    flt_min_Rh = 0.2

    # with regression, Rh could be negative
    if flt_Rh < flt_min_Rh:
        flt_Rh = flt_min_Rh
        
    flt_length_m = flt_length_km * 1000

    flt_length_km = flt_length_km * 1000
    flt_Rh_2_3 = flt_Rh ** 0.66667 # hydraulic radius^(2/3)
    flt_slope_1_2 = flt_slope ** 0.50 # square root of the slope

    flt_time_hr = round(((flt_length_m * flt_mannings_n)/(flt_Rh_2_3 * flt_slope_1_2))/3600,0)
    
    return(flt_time_hr)
# ------------

In [15]:
flt_mannings_n = 0.07
flt_length_km = 66.91
flt_slope = 0.005 #this is in m/m
flt_Q_cfs = 1 # this is in cfs

flt_time_hr = fn_estimate_travel_time(flt_mannings_n, flt_length_km, flt_slope, flt_Q_cfs)

flt_time_hr

54.0

In [14]:
fn_calculate_hydra_radius(100)

1.783356312683663