# HEC-RAS 2D HDF Data Analysis Notebook

This notebook demonstrates how to manipulate and analyze HEC-RAS 2D HDF data using the ras-commander library. It leverages the HdfBase, HdfUtils, HdfStruc, HdfMesh, HdfXsec, HdfBndry, HdfPlan, HdfResultsPlan, HdfResultsMesh, and HdfResultsXsec classes to streamline data extraction, processing, and visualization.


In [1]:
# Import required Libraries
import subprocess
import sys
import os
from pathlib import Path

def install_module(module_name):
    try:
        __import__(module_name)
    except ImportError:
        print(f"{module_name} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", module_name])

# List of modules to check and install if necessary
modules = ['h5py', 'numpy', 'requests', 'geopandas', 'matplotlib', 'pandas', 'pyproj', 'shapely', 'xarray', 'rasterio']
for module in modules:
    install_module(module)

# Import the rest of the required libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import pyproj
from shapely.geometry import Point, LineString, Polygon
import xarray as xr
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as patches
from matplotlib.patches import ConnectionPatch
import logging
from pathlib import Path
import rasterio
from rasterio.plot import show


In [2]:
# Install ras-commander if you are not in a dev environment. 
#install_module('ras-commander')

In [None]:
## Importing ras-commander flexibly (from package or local dev copy)
import sys
from pathlib import Path

# Flexible imports to allow for development without installation 
#  ** Use this version with Jupyter Notebooks **
try:
    # Try to import from the installed package
    from ras_commander import (
        init_ras_project, HdfMesh, HdfBndry, HdfResultsMesh, RasExamples, RasPrj, RasPlan, RasCmdr, HdfUtils, HdfResultsPlan, HdfPlan, ras)
    from ras_commander.Decorators import standardize_input, log_call
    from ras_commander.LoggingConfig import setup_logging, get_logger
except ImportError:
    # If the import fails, add the parent directory to the Python path
    print("Using Local ras-commander library")
    import os
    current_file = Path(os.getcwd()).resolve()
    parent_directory = current_file.parent
    sys.path.append(str(parent_directory))
    
    # Now try to import again
    from ras_commander import (
        init_ras_project, HdfMesh, HdfBndry, HdfResultsMesh, RasExamples, RasPrj, RasPlan, RasCmdr, HdfUtils, HdfResultsPlan, HdfPlan, ras)
    from ras_commander.Decorators import standardize_input, log_call
    from ras_commander.LoggingConfig import setup_logging, get_logger

print("ras_commander imported successfully")

In [None]:
# Download the Chippewa_2D project from HEC and run plan 01

# Define the path to the Chippewa_2D project
current_dir = Path.cwd()  # Adjust if your notebook is in a different directory
bald_eagle_path = current_dir / "example_projects" / "Chippewa_2D"
import logging

# Check if Chippewa_2D.p02.hdf exists (so we don't have to re-run the simulation when re-running or debugging)
hdf_file = bald_eagle_path / "Chippewa_2D.p02.hdf"

if not hdf_file.exists():
    # Initialize RasExamples and extract the Chippewa_2D project
    ras_examples = RasExamples()
    ras_examples.extract_project(["Chippewa_2D"])

    # Initialize custom Ras object
    bald_eagle = RasPrj()

    # Initialize the RAS project using the custom ras object
    bald_eagle = init_ras_project(bald_eagle_path, "6.6", ras_instance=bald_eagle)
    logging.info(f"Bald Eagle project initialized with folder: {bald_eagle.project_folder}")
    
    logging.info(f"Bald Eagle object id: {id(bald_eagle)}")
    
    # Define the plan number to execute
    plan_number = "02"

    # Update run flags for the project
    RasPlan.update_run_flags(
        plan_number,
        geometry_preprocessor=True,
        unsteady_flow_simulation=True,
        run_sediment=False,
        post_processor=True,
        floodplain_mapping=False,
        ras_object=bald_eagle
    )

    # Execute Plan 02 using RasCmdr for Bald Eagle
    print(f"Executing Plan {plan_number} for the Bald Eagle Creek project...")
    success_bald_eagle = RasCmdr.compute_plan(plan_number, ras_object=bald_eagle)
    if success_bald_eagle:
        print(f"Plan {plan_number} executed successfully for Bald Eagle.\n")
    else:
        print(f"Plan {plan_number} execution failed for Bald Eagle.\n")
else:
    print("Chippewa_2D.p02.hdf already exists. Skipping project extraction and plan execution.")
    # Initialize the RAS project using the custom ras object
    bald_eagle = RasPrj()
    bald_eagle = init_ras_project(bald_eagle_path, "6.6", ras_instance=bald_eagle)
    plan_number = "02"

In [None]:
# Load Plan and Geometry Dataframes and find Plan and Geometry HDF Paths

output_dir = bald_eagle_path / "detail_face_data_analysis"

# Create output directory if it doesn't exist
output_dir.mkdir(exist_ok=True)
print(f"Output directory created/verified at: {output_dir}")


# Display plan_df for bald_eagle project
print("Plan DataFrame for bald_eagle project:")
display(bald_eagle.plan_df)

# Display geom_df for bald_eagle project
print("\nGeometry DataFrame for bald_eagle project:")
display(bald_eagle.geom_df)

# Get the plan HDF path
plan_number = "02"  # Assuming we're using plan 01 as in the previous code
plan_hdf_path = bald_eagle.plan_df.loc[bald_eagle.plan_df['plan_number'] == plan_number, 'HDF_Results_Path'].values[0]

# Get the geometry file number from the plan DataFrame
geom_file = bald_eagle.plan_df.loc[bald_eagle.plan_df['plan_number'] == plan_number, 'Geom File'].values[0]
geom_number = geom_file[1:]  # Remove the 'g' prefix

# Get the geometry HDF path
geom_hdf_path = bald_eagle.geom_df.loc[bald_eagle.geom_df['geom_number'] == geom_number, 'hdf_path'].values[0]

print(f"\nPlan HDF path for Plan {plan_number}: {plan_hdf_path}")
print(f"Geometry HDF path for Plan {plan_number}: {geom_hdf_path}")

In [6]:
# Define the HDF input path as Plan Number

plan_number = "02"  # Assuming we're using plan 01 as in the previous code


In [None]:
# Example: Extract runtime and compute time data
print("\nExample 2: Extracting runtime and compute time data")
runtime_df = HdfResultsPlan.get_runtime_data(hdf_input=plan_number, ras_object=bald_eagle)
if runtime_df is not None:
    display(runtime_df)
else:
    print("No runtime data found.")

In [None]:
# For all of the RasGeomHdf Class Functions, we will use geom_hdf_path
print(geom_hdf_path)

# For the example project, plan 02 is associated with geometry 09
# If you want to call the geometry by number, call RasHdfGeom functions with a number
# Otherwise, if you want to look up geometry hdf path by plan number, follow the logic in the previous code cells

In [9]:
# Set the  to USA Contiguous Albers Equal Area Conic (USGS version)
# Note, we would usually call the projection function in HdfMesh but the projection is not set in this example project
projection = 'EPSG:5070'  

In [None]:
# Use HdfPlan for geometry-related operations
print("\nExample: Extracting Base Geometry Attributes")
geom_attrs = HdfPlan.get_geometry_information(geom_hdf_path)

if not geom_attrs.empty:
    # Display the DataFrame directly
    print("Base Geometry Attributes:")
    display(geom_attrs)
else:
    print("No base geometry attributes found.")


In [None]:
# Use HdfMesh for geometry-related operations
print("\nExample 3: Listing 2D Flow Area Names")
flow_area_names = HdfMesh.get_mesh_area_names(geom_hdf_path, ras_object=bald_eagle)
print("2D Flow Area Names:", flow_area_names)

In [None]:
# Example: Get 2D Flow Area Attributes (get_geom_2d_flow_area_attrs)
print("\nExample: Extracting 2D Flow Area Attributes")
flow_area_attributes = HdfMesh.get_mesh_area_attributes(geom_hdf_path)
display(flow_area_attributes)


In [None]:
# Example: Get 2D Flow Area Perimeter Polygons (mesh_areas)
print("\nExample: Extracting 2D Flow Area Perimeter Polygons")
mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)  # Corrected function name

In [None]:
# Example: Extract mesh cell faces
print("\nExample: Extracting mesh cell faces")

# Get mesh cell faces using the standardize_input decorator for consistent file handling
mesh_cell_faces = HdfMesh.get_mesh_cell_faces(geom_hdf_path)

# Display the first few rows of the mesh cell faces GeoDataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

In [15]:
# Set the projection to USA Contiguous Albers Equal Area Conic (USGS version)
# Note, we would usually call the projection function in HdfMesh but the projection is not set in this example project
projection = 'EPSG:5070'  # NAD83 / Conus Albers

In [None]:
# Example Function: Find the nearest cell face to a given point
# This provides enough basic information the face cell logic in the notebook

def find_nearest_cell_face(point, cell_faces_df):
    """
    Find the nearest cell face to a given point.

    Args:
        point (shapely.geometry.Point): The input point.
        cell_faces_df (GeoDataFrame): DataFrame containing cell face linestrings.

    Returns:
        int: The face_id of the nearest cell face.
        float: The distance to the nearest cell face.
    """
    # Calculate distances from the input point to all cell faces
    distances = cell_faces_df.geometry.distance(point)

    # Find the index of the minimum distance
    nearest_index = distances.idxmin()

    # Get the face_id and distance of the nearest cell face
    nearest_face_id = cell_faces_df.loc[nearest_index, 'face_id']
    nearest_distance = distances[nearest_index]

    return nearest_face_id, nearest_distance

# Example usage
print("\nExample: Finding the nearest cell face to a given point")

# Create a sample point (you can replace this with any point of interest)
from shapely.geometry import Point
from geopandas import GeoDataFrame

# Create the sample point with the same CRS as mesh_cell_faces
sample_point = GeoDataFrame(
    {'geometry': [Point(1025677, 7853731)]}, 
    crs=mesh_cell_faces.crs
)

if not mesh_cell_faces.empty and not sample_point.empty:
    nearest_face_id, distance = find_nearest_cell_face(sample_point.geometry.iloc[0], mesh_cell_faces)
    print(f"Nearest cell face to point {sample_point.geometry.iloc[0].coords[0]}:")
    print(f"Face ID: {nearest_face_id}")
    print(f"Distance: {distance:.2f} units")

    # Visualize the result
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot all cell faces
    mesh_cell_faces.plot(ax=ax, color='blue', linewidth=0.5, alpha=0.5, label='Cell Faces')
    
    # Plot the sample point
    sample_point.plot(ax=ax, color='red', markersize=100, alpha=0.7, label='Sample Point')
    
    # Plot the nearest cell face
    nearest_face = mesh_cell_faces[mesh_cell_faces['face_id'] == nearest_face_id]
    nearest_face.plot(ax=ax, color='green', linewidth=2, alpha=0.7, label='Nearest Face')
    
    # Set labels and title
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('Nearest Cell Face to Sample Point')
    
    # Add legend and grid
    ax.legend()
    ax.grid(True)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("Unable to perform nearest cell face search due to missing data.")


In [None]:
# Example: Extract mesh cell faces and plot with profile lines
print("\nExample: Extracting mesh cell faces and plotting with profile lines")

# Get mesh cell faces
mesh_cell_faces = HdfMesh.get_mesh_cell_faces(geom_hdf_path)

# Display the first few rows of the mesh cell faces DataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

# Load the GeoJSON file for profile lines
geojson_path = Path(r'data/profile_lines_chippewa2D.geojson')  # Update with the correct path
profile_lines_gdf = gpd.read_file(geojson_path)

# Set the Coordinate Reference System (CRS) to EPSG:5070
profile_lines_gdf = profile_lines_gdf.set_crs(epsg=5070, allow_override=True)

# Plot the mesh cell faces and profile lines together
fig, ax = plt.subplots(figsize=(12, 8))
mesh_cell_faces.plot(ax=ax, color='blue', alpha=0.5, edgecolor='k', label='Mesh Cell Faces')
profile_lines_gdf.plot(ax=ax, color='orange', linewidth=2, label='Profile Lines')

# Set labels and title
ax.set_xlabel('Easting')
ax.set_ylabel('Northing')
ax.set_title('Mesh Cell Faces and Profile Lines')

# Add grid and legend
ax.grid(True)
ax.legend()

# Adjust layout and display
plt.tight_layout()
plt.show()

In [None]:
# Example: Extracting mesh cell faces near profile lines
print("\nExample: Extracting mesh cell faces near profile lines")

# Get mesh cell faces using HdfMesh class
mesh_cell_faces = HdfMesh.get_mesh_cell_faces(geom_hdf_path)

# Display the first few rows of the mesh cell faces DataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

# Load the GeoJSON file for profile lines
geojson_path = Path(r'data/profile_lines_chippewa2D.geojson')  # Update with the correct path
profile_lines_gdf = gpd.read_file(geojson_path)

# Set the Coordinate Reference System (CRS) to EPSG:5070
profile_lines_gdf = profile_lines_gdf.set_crs(epsg=5070, allow_override=True)

# Initialize a dictionary to store faces near each profile line
faces_near_profile_lines = {}

# Define distance threshold (10 ft converted to meters)
distance_threshold = 10
angle_threshold = 60  # degrees

# Function to calculate the smallest angle between two lines or line segments.
def calculate_angle(line):
    if isinstance(line, LineString):
        x_diff = line.xy[0][-1] - line.xy[0][0]
        y_diff = line.xy[1][-1] - line.xy[1][0]
    else:
        x_diff = line[1][0] - line[0][0]
        y_diff = line[1][1] - line[0][1]
    
    angle = np.degrees(np.arctan2(y_diff, x_diff))
    return angle % 360 if angle >= 0 else (angle + 360) % 360

# Function to break line into segments
def break_line_into_segments(line, segment_length):
    segments = []
    segment_angles = []
    
    distances = np.arange(0, line.length, segment_length)
    if distances[-1] != line.length:
        distances = np.append(distances, line.length)
        
    for i in range(len(distances)-1):
        point1 = line.interpolate(distances[i])
        point2 = line.interpolate(distances[i+1])
        segment = LineString([point1, point2])
        segments.append(segment)
        segment_angles.append(calculate_angle([point1.coords[0], point2.coords[0]]))
        
    return segments, segment_angles

# Function to calculate angle difference accounting for 180 degree equivalence
def angle_difference(angle1, angle2):
    diff = abs(angle1 - angle2) % 180
    return min(diff, 180 - diff)

# Function to order faces along profile line
def order_faces_along_profile(profile_line, faces_gdf):
    profile_start = Point(profile_line.coords[0])
    
    faces_with_dist = []
    for idx, face in faces_gdf.iterrows():
        face_start = Point(face.geometry.coords[0])
        dist = profile_start.distance(face_start)
        faces_with_dist.append((idx, dist))
    
    faces_with_dist.sort(key=lambda x: x[1])
    return [x[0] for x in faces_with_dist]

# Function to combine ordered faces into single linestring
def combine_faces_to_linestring(ordered_faces_gdf):
    coords = []
    for _, face in ordered_faces_gdf.iterrows():
        if not coords:  # First face - add all coordinates
            coords.extend(list(face.geometry.coords))
        else:  # Subsequent faces - add only end coordinate
            coords.append(face.geometry.coords[-1])
    return LineString(coords)

# Initialize GeoDataFrame for final profile-to-faceline results
profile_to_faceline = gpd.GeoDataFrame(columns=['profile_name', 'geometry'], crs=profile_lines_gdf.crs)

# Iterate through each profile line
for index, profile_line in profile_lines_gdf.iterrows():
    profile_geom = profile_line.geometry
    
    # Break profile line into segments
    segments, segment_angles = break_line_into_segments(profile_geom, distance_threshold)
    
    # Initialize set to store nearby faces
    nearby_faces = set()
    
    # For each face, check distance to segments and angle difference
    for face_idx, face in mesh_cell_faces.iterrows():
        face_geom = face.geometry
        
        if isinstance(face_geom, LineString):
            face_angle = calculate_angle(face_geom)
            
            for segment, segment_angle in zip(segments, segment_angles):
                if face_geom.distance(segment) <= distance_threshold:
                    if angle_difference(face_angle, segment_angle) <= angle_threshold:
                        nearby_faces.add(face_idx)
                        break
    
    # Convert the set of indices back to a GeoDataFrame
    nearby_faces_gdf = mesh_cell_faces.loc[list(nearby_faces)]
    
    # Order faces along profile line
    ordered_indices = order_faces_along_profile(profile_geom, nearby_faces_gdf)
    ordered_faces_gdf = nearby_faces_gdf.loc[ordered_indices]
    
    # Combine ordered faces into single linestring
    combined_linestring = combine_faces_to_linestring(ordered_faces_gdf)
    
    # Add to profile_to_faceline GeoDataFrame
    new_row = gpd.GeoDataFrame({'profile_name': [profile_line['Name']], 
                               'geometry': [combined_linestring]}, 
                              crs=profile_lines_gdf.crs)
    profile_to_faceline = pd.concat([profile_to_faceline, new_row], ignore_index=True)
    
    # Store the ordered faces in the dictionary
    faces_near_profile_lines[profile_line['Name']] = ordered_faces_gdf

# Plot the results
fig, ax = plt.subplots(figsize=(12, 8))

# Plot all mesh cell faces in light blue
mesh_cell_faces.plot(ax=ax, color='lightblue', alpha=0.3, edgecolor='k', label='All Mesh Faces')

# Plot selected faces for each profile line with numbers
colors = ['red', 'green', 'blue']
for (profile_name, faces), color in zip(faces_near_profile_lines.items(), colors):
    if not faces.empty:
        faces.plot(ax=ax, color=color, alpha=0.6, label=f'Faces near {profile_name}')
        
        # Add numbers to faces
        for i, (idx, face) in enumerate(faces.iterrows()):
            midpoint = face.geometry.interpolate(0.5, normalized=True)
            ax.text(midpoint.x, midpoint.y, str(i+1), 
                   color=color, fontweight='bold', ha='center', va='center')

# Plot the combined linestrings
profile_to_faceline.plot(ax=ax, color='black', linewidth=2, 
                        linestyle='--', label='Combined Face Lines')

# Set labels and title
ax.set_xlabel('Easting')
ax.set_ylabel('Northing')
ax.set_title('Mesh Cell Faces and Profile Lines\nNumbered in order along profile')

# Add grid and legend
ax.grid(True)
ax.legend()

# Adjust layout and display
plt.tight_layout()
plt.show()

# Display the results
print("\nOriginal ordered faces near profile lines:")
display(faces_near_profile_lines)

print("\nCombined profile-to-faceline results:")
display(profile_to_faceline)

-----

In [None]:
# Get face property tables with error handling
face_property_tables = HdfMesh.get_mesh_face_property_tables(geom_hdf_path)
display(face_property_tables)

In [None]:
# Extract the face property table for Face ID 4 and display it
import matplotlib.pyplot as plt

face_id = 4
face_properties = face_property_tables['Perimeter 1'][face_property_tables['Perimeter 1']['Face ID'] == face_id]

# Create subplots arranged horizontally
fig, axs = plt.subplots(1, 3, figsize=(18, 6))

# Plot Z vs Area
axs[0].plot(face_properties['Z'], face_properties['Area'], marker='o', color='blue', label='Area')
axs[0].set_title(f'Face ID {face_id}: Z vs Area')
axs[0].set_xlabel('Z')
axs[0].set_ylabel('Area')
axs[0].grid(True)
axs[0].legend()

# Plot Z vs Wetted Perimeter
axs[1].plot(face_properties['Z'], face_properties['Wetted Perimeter'], marker='o', color='green', label='Wetted Perimeter')
axs[1].set_title(f'Face ID {face_id}: Z vs Wetted Perimeter')
axs[1].set_xlabel('Z')
axs[1].set_ylabel('Wetted Perimeter')
axs[1].grid(True)
axs[1].legend()

# Plot Z vs Manning's n
axs[2].plot(face_properties['Z'], face_properties["Manning's n"], marker='o', color='red', label="Manning's n")
axs[2].set_title(f'Face ID {face_id}: Z vs Manning\'s n')
axs[2].set_xlabel('Z')
axs[2].set_ylabel("Manning's n")
axs[2].grid(True)
axs[2].legend()

plt.tight_layout()
plt.show()


In [None]:
# Get mesh timeseries output

# Get mesh areas from previous code cell
mesh_areas = HdfMesh.get_mesh_area_names(geom_hdf_path, ras_object=bald_eagle)

if mesh_areas:
    mesh_name = mesh_areas[0]  # Use the first 2D flow area name
    timeseries_da = HdfResultsMesh.get_mesh_timeseries(plan_hdf_path, mesh_name, "Water Surface", ras_object=bald_eagle)
    print(f"\nMesh Timeseries Output (Water Surface) for {mesh_name}:")
    print(timeseries_da)
else:
    print("No mesh areas found in the geometry file.")

# Get mesh cells timeseries output
cells_timeseries_ds = HdfResultsMesh.get_mesh_cells_timeseries(plan_hdf_path, mesh_name, ras_object=bald_eagle)
print("\nMesh Cells Timeseries Output:")
print(cells_timeseries_ds)

# Get mesh faces timeseries output
faces_timeseries_ds = HdfResultsMesh.get_mesh_faces_timeseries(plan_hdf_path, mesh_name, ras_object=bald_eagle)
print("\nMesh Faces Timeseries Output:")
print(faces_timeseries_ds)


In [None]:
# Convert all face velocities and face flow values to positive

# Function to process and convert face data to positive values
def convert_to_positive_values(faces_timeseries_ds, cells_timeseries_ds):
    """
    Convert face velocities and flows to positive values while maintaining their relationships.
    
    Args:
        faces_timeseries_ds (xarray.Dataset): Dataset containing face timeseries data
        cells_timeseries_ds (xarray.Dataset): Dataset containing cell timeseries data
        
    Returns:
        xarray.Dataset: Modified dataset with positive values
    """
    # Get the face velocity and flow variables
    face_velocity = faces_timeseries_ds['face_velocity']
    face_flow = faces_timeseries_ds['face_flow']
    
    # Calculate the sign of the velocity to maintain flow direction relationships
    velocity_sign = xr.where(face_velocity >= 0, 1, -1)
    
    # Convert velocities and flows to absolute values while maintaining their relationship
    faces_timeseries_ds['face_velocity'] = abs(face_velocity)
    faces_timeseries_ds['face_flow'] = abs(face_flow)
    
    # Store the original sign as a new variable for reference
    faces_timeseries_ds['velocity_direction'] = velocity_sign
    
    print("Conversion to positive values complete.")
    print(f"Number of faces processed: {len(faces_timeseries_ds.face_id)}")
    
    return faces_timeseries_ds, cells_timeseries_ds

# Convert the values in our datasets
faces_timeseries_ds_positive, cells_timeseries_ds_positive = convert_to_positive_values(
    faces_timeseries_ds, 
    cells_timeseries_ds
)



In [23]:
import pandas as pd
import numpy as np
import xarray as xr

# Function to process faces for a single profile line
def process_profile_line(profile_name, faces, cells_timeseries_ds, faces_timeseries_ds):
    face_ids = faces['face_id'].tolist()
    
    # Extract relevant data for these faces
    face_velocities = faces_timeseries_ds['face_velocity'].sel(face_id=face_ids)
    face_flows = faces_timeseries_ds['face_flow'].sel(face_id=face_ids)
    
    # Create a new dataset with calculated results
    results_ds = xr.Dataset({
        'face_velocity': face_velocities,
        'face_flow': face_flows
    })
    
    # Convert to dataframe for easier manipulation
    results_df = results_ds.to_dataframe().reset_index()
    
    # Add profile name and face order
    results_df['profile_name'] = profile_name
    results_df['face_order'] = results_df.groupby('time')['face_id'].transform(lambda x: pd.factorize(x)[0])
    
    return results_df


In [24]:
# Calculate Vave = Sum Qn / Sum An for each profile line
# where Vave = the summation of face flow / flow area for all the faces in the profile line

# Then, save the results to CSV

In [None]:
# Process all profile lines
all_results = []
for profile_name, faces in faces_near_profile_lines.items():
    profile_results = process_profile_line(profile_name, faces, cells_timeseries_ds, faces_timeseries_ds)
    all_results.append(profile_results)

# Combine results from all profile lines
combined_results_df = pd.concat(all_results, ignore_index=True)

# Display the first few rows of the combined results
print(combined_results_df.head())

-----

In [None]:
profile_time_series = {}

# Iterate through each profile line and extract its corresponding data
for profile_name, faces_gdf in faces_near_profile_lines.items():
    # Get the list of face_ids for this profile line
    face_ids = faces_gdf['face_id'].tolist()
    
    # Filter the combined_results_df for these face_ids
    profile_df = combined_results_df[combined_results_df['face_id'].isin(face_ids)].copy()
    
    # Add the profile name as a column
    profile_df['profile_name'] = profile_name
    
    # Reset index for cleanliness
    profile_df.reset_index(drop=True, inplace=True)
    
    # Store in the dictionary
    profile_time_series[profile_name] = profile_df
    
    # Display a preview
    print(f"\nTime Series DataFrame for {profile_name}:")
    display(profile_df.head())

# Optionally, display all profile names
print("\nProfile Lines Processed:")
print(list(profile_time_series.keys()))


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 370     | 1.543974      | 961.118225  | Profile Line 1 | 0          |
| 2019-04-02 | 232     | 2.738194      | 5103.555176 | Profile Line 1 | 1          |
| 2019-04-02 | 747     | 3.109769      | 4777.513672 | Profile Line 1 | 2          |
| 2019-04-02 | 216     | 2.974400      | 5120.266113 | Profile Line 1 | 3          |
| 2019-04-02 | 184     | 0.924792      | 700.676697  | Profile Line 1 | 4          |  
  


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 52      | 0.000000      | 0.000000    | Profile Line 2 | 0          |
| 2019-04-02 | 92      | 0.000000      | 0.000000    | Profile Line 2 | 1          |
| 2019-04-02 | 548     | 1.018038      | 353.129822  | Profile Line 2 | 2          |
| 2019-04-02 | 691     | 2.106394      | 2195.409912 | Profile Line 2 | 3          |
| 2019-04-02 | 78      | 2.376904      | 3600.228760 | Profile Line 2 | 4          |  
  


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 532     | 0.000000      | 0.000000    | Profile Line 3 | 0          |
| 2019-04-02 | 341     | 0.000000      | 0.000000    | Profile Line 3 | 1          |
| 2019-04-02 | 349     | 1.962641      | 2601.644287 | Profile Line 3 | 2          |
| 2019-04-02 | 455     | 2.367594      | 4148.870605 | Profile Line 3 | 3          |
| 2019-04-02 | 469     | 2.515510      | 4458.292480 | Profile Line 3 | 4          |  
  
  
 
Profile Lines Processed:
['Profile Line 1', 'Profile Line 2', 'Profile Line 3']

In [None]:
all_profiles_df = pd.concat(profile_time_series.values(), ignore_index=True)

# Display the combined dataframe
print("Combined Time Series DataFrame for All Profiles:")
display(all_profiles_df.head())



In [None]:
# Recursively explore the 2D Flow Areas structure in the geometry HDF file
import h5py

def print_hdf_structure(name, obj):
    """Print information about HDF5 object"""
    print(f"\nPath: {name}")
    print(f"Type: {type(obj).__name__}")
    
    if isinstance(obj, h5py.Dataset):
        print(f"Shape: {obj.shape}")
        print(f"Dtype: {obj.dtype}")
        print("Attributes:")
        for key, value in obj.attrs.items():
            print(f"  {key}: {value}")

def explore_flow_areas(file_path):
    """
    Recursively explore and print 2D Flow Areas structure in HDF5 file
    
    :param file_path: Path to the HDF5 file
    """
    try:
        with h5py.File(file_path, 'r') as hdf_file:
            if '/Geometry/2D Flow Areas' in hdf_file:
                flow_areas_group = hdf_file['/Geometry/2D Flow Areas']
                flow_areas_group.visititems(print_hdf_structure)
            else:
                print("2D Flow Areas group not found in geometry file")
    except Exception as e:
        print(f"Error exploring HDF file: {e}")

print("\nExploring 2D Flow Areas structure in geometry file:")
print("HDF Base Path: /Geometry/2D Flow Areas ")
explore_flow_areas(geom_hdf_path)


In [None]:
# Check if we have the necessary variables
print("Available variables:")
print("profile_time_series:", 'profile_time_series' in locals())
print("faces_near_profile_lines:", 'faces_near_profile_lines' in locals())
print("profile_averages:", 'profile_averages' in locals())

# Look at the structure of profile_time_series
if 'profile_time_series' in locals():
    for name, df in profile_time_series.items():
        print(f"\nColumns in {name}:")
        print(df.columns.tolist())

In [30]:
def calculate_discharge_weighted_velocity(profile_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate discharge-weighted average velocity for a profile line
    Vw = Sum(|Qi|*Vi)/Sum(|Qi|) where Qi is face flow and Vi is face velocity
    """
    print("Calculating discharge-weighted velocity...")
    print(f"Input DataFrame:\n{profile_df.head()}")

    # Calculate weighted velocity for each timestep
    weighted_velocities = []
    for time in profile_df['time'].unique():
        time_data = profile_df[profile_df['time'] == time]
        abs_flows = np.abs(time_data['face_flow'])
        abs_velocities = np.abs(time_data['face_velocity'])
        weighted_vel = (abs_flows * abs_velocities).sum() / abs_flows.sum()
        weighted_velocities.append({
            'time': time,
            'weighted_velocity': weighted_vel
        })
    
    weighted_df = pd.DataFrame(weighted_velocities)
    print(f"Calculated weighted velocities:\n{weighted_df.head()}")
    return weighted_df


In [None]:
# Calculate for each profile line
for profile_name, profile_df in profile_time_series.items():
    print(f"\nProcessing profile: {profile_name}")

    # Calculate discharge-weighted velocity
    weighted_velocities = calculate_discharge_weighted_velocity(profile_df)
    
    print("Weighted velocities calculated.")
    display(weighted_velocities)
    
    # Convert time to datetime if it isn't already
    weighted_velocities['time'] = pd.to_datetime(weighted_velocities['time'])
    print("Converted time to datetime format.")

    # Get ordered faces for this profile
    ordered_faces = faces_near_profile_lines[profile_name]
    print(f"Number of ordered faces: {len(ordered_faces)}")
    
    # Save dataframes in the output directory
    output_file = output_dir / f"{profile_name}_discharge_weighted_velocity.csv"
    weighted_velocities.to_csv(output_file, index=False)
    print(f"Saved weighted velocities to {output_file}")

In [None]:
# Create plots comparing discharge-weighted velocity and simple average for each profile line
for profile_name, profile_df in profile_time_series.items():
    
    print(f"\nGenerating comparison plot for profile: {profile_name}")
    
    # Calculate discharge-weighted velocity
    weighted_velocities = calculate_discharge_weighted_velocity(profile_df)
    weighted_velocities['time'] = pd.to_datetime(weighted_velocities['time'])
    
    # Calculate simple average velocity for each timestep
    simple_averages = profile_df.groupby('time')['face_velocity'].mean().reset_index()
    simple_averages['time'] = pd.to_datetime(simple_averages['time'])
    
    # Create figure for comparison plot
    plt.figure(figsize=(16, 9))
    
    # Plot individual face velocities with thin lines
    for face_id in profile_df['face_id'].unique():
        face_data = profile_df[profile_df['face_id'] == face_id]
        plt.plot(face_data['time'], 
                face_data['face_velocity'], 
                alpha=0.8,  # More transparent
                linewidth=0.3,  # Thinner line
                color='gray',  # Consistent color
                label=f'Face ID {face_id}' if face_id == profile_df['face_id'].iloc[0] else "")
        
        # Find and annotate peak value for each face
        peak_idx = face_data['face_velocity'].idxmax()
        peak_time = face_data.loc[peak_idx, 'time']
        peak_vel = face_data.loc[peak_idx, 'face_velocity']
        plt.annotate(f'{peak_vel:.2f}',
                    xy=(peak_time, peak_vel),
                    xytext=(10, 10),
                    textcoords='offset points',
                    fontsize=8,
                    alpha=0.5)
    
    # Plot discharge-weighted velocity
    plt.plot(weighted_velocities['time'], 
            weighted_velocities['weighted_velocity'], 
            color='red', 
            alpha=1.0, 
            linewidth=2,
            label='Discharge-Weighted Velocity')
    
    # Find and annotate peak weighted velocity
    peak_idx = weighted_velocities['weighted_velocity'].idxmax()
    peak_time = weighted_velocities.loc[peak_idx, 'time']
    peak_vel = weighted_velocities.loc[peak_idx, 'weighted_velocity']
    plt.annotate(f'Peak Weighted: {peak_vel:.2f}',
                xy=(peak_time, peak_vel),
                xytext=(10, 10),
                textcoords='offset points',
                color='red',
                fontweight='bold')
    
    # Plot simple average
    plt.plot(simple_averages['time'], 
            simple_averages['face_velocity'], 
            color='blue', 
            alpha=0.5, 
            linewidth=1,
            linestyle='--',
            label='Simple Average')
    
    # Find and annotate peak simple average
    peak_idx = simple_averages['face_velocity'].idxmax()
    peak_time = simple_averages.loc[peak_idx, 'time']
    peak_vel = simple_averages.loc[peak_idx, 'face_velocity']
    plt.annotate(f'Peak Average: {peak_vel:.2f}',
                xy=(peak_time, peak_vel),
                xytext=(10, -10),
                textcoords='offset points',
                color='blue',
                fontweight='bold')
    
    # Configure plot
    plt.title(f'Velocity Comparison - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Velocity (ft/s)')
    plt.grid(True, alpha=0.3)
    
    # Add legend with better placement
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Adjust layout to accommodate legend and stats
    plt.subplots_adjust(right=0.8)
    
    # Save plot to file
    plot_file = output_dir / f"{profile_name}_velocity_comparison.png"
    plt.savefig(plot_file, bbox_inches='tight', dpi=300)
    plt.show()
    
    # Print detailed comparison
    print(f"\nVelocity Comparison for {profile_name}:")
    print(f"Number of faces: {profile_df['face_id'].nunique()}")
    print("\nDischarge-Weighted Velocity Statistics:")
    print(f"Mean: {weighted_velocities['weighted_velocity'].mean():.2f} ft/s")
    print(f"Max: {weighted_velocities['weighted_velocity'].max():.2f} ft/s")
    print(f"Min: {weighted_velocities['weighted_velocity'].min():.2f} ft/s")
    print("\nSimple Average Velocity Statistics:")
    print(f"Mean: {simple_averages['face_velocity'].mean():.2f} ft/s")
    print(f"Max: {simple_averages['face_velocity'].max():.2f} ft/s")
    print(f"Min: {simple_averages['face_velocity'].min():.2f} ft/s")