# HEC-RAS 2D HDF Data Analysis Notebook

This notebook demonstrates how to manipulate and analyze HEC-RAS 2D HDF data using the ras-commander library. It leverages the HdfBase, HdfUtils, HdfStruc, HdfMesh, HdfXsec, HdfBndry, HdfPlan, HdfResultsPlan, HdfResultsMesh, and HdfResultsXsec classes to streamline data extraction, processing, and visualization.


In [1]:
# Import required Libraries
import subprocess
import sys
import os
from pathlib import Path

def install_module(module_name):
    try:
        __import__(module_name)
    except ImportError:
        print(f"{module_name} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", module_name])

# List of modules to check and install if necessary
modules = ['h5py', 'numpy', 'requests', 'geopandas', 'matplotlib', 'pandas', 'pyproj', 'shapely', 'xarray', 'rasterio']
for module in modules:
    install_module(module)

# Import the rest of the required libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import pyproj
from shapely.geometry import Point, LineString, Polygon
import xarray as xr
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as patches
from matplotlib.patches import ConnectionPatch
import logging
from pathlib import Path
import rasterio
from rasterio.plot import show


In [2]:
# Install ras-commander if you are not in a dev environment. 
#install_module('ras-commander')

## Importing ras-commander flexibly (from package or local dev copy)

In [None]:
import sys
from pathlib import Path

# Flexible imports to allow for development without installation 
#  ** Use this version with Jupyter Notebooks **
try:
    # Try to import from the installed package
    from ras_commander import (
        init_ras_project, HdfMesh, HdfBndry, HdfResultsMesh, RasExamples, RasPrj, RasPlan, RasCmdr, HdfUtils, HdfResultsPlan, HdfPlan, ras)
    from ras_commander.Decorators import standardize_input, log_call
    from ras_commander.LoggingConfig import setup_logging, get_logger
except ImportError:
    # If the import fails, add the parent directory to the Python path
    import os
    current_file = Path(os.getcwd()).resolve()
    parent_directory = current_file.parent
    sys.path.append(str(parent_directory))
    
    # Now try to import again
    from ras_commander import (
        init_ras_project, HdfMesh, HdfBndry, HdfResultsMesh, RasExamples, RasPrj, RasPlan, RasCmdr, HdfUtils, HdfResultsPlan, HdfPlan, ras)
    from ras_commander.Decorators import standardize_input, log_call
    from ras_commander.LoggingConfig import setup_logging, get_logger

print("ras_commander imported successfully")

In [None]:
# Download the Chippewa_2D project from HEC and run plan 01

# Define the path to the Chippewa_2D project
current_dir = Path.cwd()  # Adjust if your notebook is in a different directory
bald_eagle_path = current_dir / "example_projects" / "Chippewa_2D"
import logging

# Check if Chippewa_2D.p02.hdf exists (so we don't have to re-run the simulation when re-running or debugging)
hdf_file = bald_eagle_path / "Chippewa_2D.p02.hdf"

if not hdf_file.exists():
    # Initialize RasExamples and extract the Chippewa_2D project
    ras_examples = RasExamples()
    ras_examples.extract_project(["Chippewa_2D"])

    # Initialize custom Ras object
    bald_eagle = RasPrj()

    # Initialize the RAS project using the custom ras object
    bald_eagle = init_ras_project(bald_eagle_path, "6.6", ras_instance=bald_eagle)
    logging.info(f"Bald Eagle project initialized with folder: {bald_eagle.project_folder}")
    
    logging.info(f"Bald Eagle object id: {id(bald_eagle)}")
    
    # Define the plan number to execute
    plan_number = "02"

    # Update run flags for the project
    RasPlan.update_run_flags(
        plan_number,
        geometry_preprocessor=True,
        unsteady_flow_simulation=True,
        run_sediment=False,
        post_processor=True,
        floodplain_mapping=False,
        ras_object=bald_eagle
    )

    # Execute Plan 02 using RasCmdr for Bald Eagle
    print(f"Executing Plan {plan_number} for the Bald Eagle Creek project...")
    success_bald_eagle = RasCmdr.compute_plan(plan_number, ras_object=bald_eagle)
    if success_bald_eagle:
        print(f"Plan {plan_number} executed successfully for Bald Eagle.\n")
    else:
        print(f"Plan {plan_number} execution failed for Bald Eagle.\n")
else:
    print("Chippewa_2D.p02.hdf already exists. Skipping project extraction and plan execution.")
    # Initialize the RAS project using the custom ras object
    bald_eagle = RasPrj()
    bald_eagle = init_ras_project(bald_eagle_path, "6.6", ras_instance=bald_eagle)
    plan_number = "02"

In [None]:
# Load Plan and Geometry Dataframes and find Plan and Geometry HDF Paths

# Display plan_df for bald_eagle project
print("Plan DataFrame for bald_eagle project:")
display(bald_eagle.plan_df)

# Display geom_df for bald_eagle project
print("\nGeometry DataFrame for bald_eagle project:")
display(bald_eagle.geom_df)

# Get the plan HDF path
plan_number = "02"  # Assuming we're using plan 01 as in the previous code
plan_hdf_path = bald_eagle.plan_df.loc[bald_eagle.plan_df['plan_number'] == plan_number, 'HDF_Results_Path'].values[0]

# Get the geometry file number from the plan DataFrame
geom_file = bald_eagle.plan_df.loc[bald_eagle.plan_df['plan_number'] == plan_number, 'Geom File'].values[0]
geom_number = geom_file[1:]  # Remove the 'g' prefix

# Get the geometry HDF path
geom_hdf_path = bald_eagle.geom_df.loc[bald_eagle.geom_df['geom_number'] == geom_number, 'hdf_path'].values[0]

print(f"\nPlan HDF path for Plan {plan_number}: {plan_hdf_path}")
print(f"Geometry HDF path for Plan {plan_number}: {geom_hdf_path}")

plan dataframe: 

| plan_number | full_path | Computation Interval | DSS File | Flow File | Friction Slope Method | Geom File | Mapping Interval | Plan Title       | Program Version | ... | Run WQNet | Short Identifier | Simulation Date                     | UNET D1 Cores | UNET Use Existing IB Tables | UNET 1D Methodology | UNET D2 SolverType | UNET D2 Name | HDF_Results_Path | Geom_File |
|-------------|-----------|----------------------|----------|-----------|-----------------------|-----------|------------------|------------------|-----------------|-----|-----------|------------------|-------------------------------------|----------------|-----------------------------|---------------------|--------------------|---------------|-------------------|-----------|
| 0           | 02        | c:\GH\ras-commander\examples\example_projects\... | 2MIN     | dss       | u04                   | 1         | g01              | 30MIN            | 100ft Sediment  | 6.40 | ...       | 0                | 100ft Sediment  | 02apr2019,0000,05may2019,2400 | 0              | -1                          | Finite Difference   | PARDISO (Direct)  | Perimeter 1  | c:\GH\ras-commander\examples\example_projects\... | c:\GH\ras-commander\examples\example_projects\... |

In [6]:
# Define the HDF input path as Plan Number

plan_number = "02"  # Assuming we're using plan 01 as in the previous code


RasHdfUtils
| Method Name | Description |
|-------------|-------------|
| get_attrs | Converts attributes from a HEC-RAS HDF file into a Python dictionary for a given attribute path |
| get_root_attrs | Returns attributes at root level of HEC-RAS HDF file |
| get_hdf_paths_with_properties | Gets all paths in the HDF file with their properties |
| get_group_attributes_as_df | Gets attributes of a group in the HDF file as a DataFrame |
| get_hdf_filename | Gets the HDF filename from various input types |
| get_runtime_data | Extracts runtime and compute time data from a single HDF file |


In [None]:
# Get HDF Paths with Properties (For Exploring HDF Files)
plan_number = "02"  # Assuming we're using plan 02 as in the previous code
hdf_paths_df = HdfUtils.get_hdf_paths_with_properties(plan_number, ras_object=bald_eagle)
display(hdf_paths_df.head())

In [None]:
# Example: Extract runtime and compute time data
print("\nExample 2: Extracting runtime and compute time data")
runtime_df = HdfResultsPlan.get_runtime_data(hdf_input=plan_number, ras_object=bald_eagle)
if runtime_df is not None:
    display(runtime_df)
else:
    print("No runtime data found.")

runtime_df:


| Plan Name       | File Name              | Simulation Start Time | Simulation End Time   | Simulation Duration (s) | Simulation Time (hr) | Completing Geometry (hr) | Preprocessing Geometry (hr) | Completing Event Conditions (hr) | Unsteady Flow Computations (hr) | Complete Process (hr) | Unsteady Flow Speed (hr/hr) | Complete Process Speed (hr/hr) |
|------------------|-----------------------|-----------------------|-----------------------|-------------------------|-----------------------|---------------------------|-------------------------------|----------------------------------|----------------------------------|------------------------|-------------------------------|----------------------------------|
| 0                | 100ft Sediment        | Chippewa_2D.p02.hdf   | 02Apr2019 00:00:00   | 06May2019 00:00:00     | 2937600.0             | 816.0                     | N/A                           | 0.000096                          | N/A                              | N/A                    | 0.040035                      | N/A                              | 20382.307025                     |

Table of all the functions in the RasGeomHdf class from the ras_commander/RasGeomHdf.py file:

| Function Name | Description |
|---------------|-------------|
| projection | Returns the projection of the RAS geometry as a pyproj.CRS object |
| get_geom_attrs | Returns base geometry attributes from a HEC-RAS HDF file |

| mesh_area_names | Returns a list of the 2D mesh area names of the RAS geometry |
| get_geom_2d_flow_area_attrs | Returns geometry 2d flow area attributes from a HEC-RAS HDF file |
| mesh_areas | Returns 2D flow area perimeter polygons |
| mesh_cell_polygons | Returns 2D flow mesh cell polygons |
| mesh_cell_points | Returns 2D flow mesh cell points |
| mesh_cell_faces | Returns 2D flow mesh cell faces |

| get_geom_structures_attrs | Returns geometry structures attributes from a HEC-RAS HDF file |




| bc_lines | Returns 2D mesh area boundary condition lines |
| breaklines | Returns 2D mesh area breaklines |



| refinement_regions | Returns 2D mesh area refinement regions |
| structures | Returns the model structures |
| reference_lines_names | Returns reference line names |
| reference_points_names | Returns reference point names |
| reference_lines | Returns the reference lines geometry and attributes |
| reference_points | Returns the reference points geometry and attributes |
| cross_sections | Returns the model 1D cross sections |
| river_reaches | Returns the model 1D river reach lines |
| cross_sections_elevations | Returns the model cross section elevation information |

In [None]:
# For all of the RasGeomHdf Class Functions, we will use geom_hdf_path
print(geom_hdf_path)

# For the example project, plan 02 is associated with geometry 09
# If you want to call the geometry by number, call RasHdfGeom functions with a number
# Otherwise, if you want to look up geometry hdf path by plan number, follow the logic in the previous code cells

In [10]:
# Set the  to USA Contiguous Albers Equal Area Conic (USGS version)
# Note, we would usually call the projection function in HdfMesh but the projection is not set in this example project
projection = 'EPSG:5070'  

In [None]:
# Use HdfPlan for geometry-related operations
print("\nExample: Extracting Base Geometry Attributes")
geom_attrs = HdfPlan.get_geom_attrs(geom_hdf_path, ras_object=bald_eagle)

if geom_attrs:
    # Convert the dictionary to a DataFrame for better display
    geom_attrs_df = pd.DataFrame([geom_attrs])
    
    # Display the DataFrame
    print("Base Geometry Attributes:")
    display(geom_attrs_df)
else:
    print("No base geometry attributes found.")


geom_attrs:

| Complete Geometry | Extents | Geometry Time       | Land Cover Date Last Modified | Land Cover File Date | Land Cover Filename               | Land Cover Layername | SI Units | Sediment Bed Material Date Last Modified | Sediment Bed Material File Date | Sediment Bed Material Filename         | Sediment Bed Material Layername | Terrain File Date       | Terrain Filename                          | Terrain Layername | Title      | Version                  |
|-------------------|---------|---------------------|-------------------------------|----------------------|-----------------------------------|----------------------|----------|------------------------------------------|-------------------------------|-----------------------------------------|----------------------------------|-------------------------|------------------------------------------|-------------------|------------|--------------------------|
| True              | [1024276.82827958, 1027738.095898, 7850027.060...] | 01Jun2023 17:10:40 | 25FEB2022 10:39:08          | 25FEB2022 10:39:08   | ..\Chippewa\Mannings_n.hdf       | Mannings_n          | False    | 13OCT2020 17:40:32                      | 13OCT2020 17:40:32          | ..\Chippewa\Sediment Materials.hdf     | Sediment Materials              | 02JUL2020 09:03:32    | .\External Dependencies\100ft.hdf       | 100ft             | 100ft_Mod  | 1.0.20 (20Sep2024)      |

In [None]:
# Use HdfMesh for geometry-related operations
print("\nExample 3: Listing 2D Flow Area Names")
flow_area_names = HdfMesh.mesh_area_names(geom_hdf_path, ras_object=bald_eagle)
print("2D Flow Area Names:", flow_area_names)

In [None]:
# Example: Get 2D Flow Area Attributes (get_geom_2d_flow_area_attrs)
print("\nExample: Extracting 2D Flow Area Attributes")
flow_area_attributes = HdfMesh.get_geom_2d_flow_area_attrs(geom_hdf_path, ras_object=bald_eagle)

if flow_area_attributes:
    # Convert the dictionary to a DataFrame for better display
    flow_area_df = pd.DataFrame([flow_area_attributes])
    
    # Display the DataFrame
    print("2D Flow Area Attributes:")
    display(flow_area_df)
    
    # Optionally, you can access specific attributes
    print("\nSpecific Attribute Examples:")
    print(f"Cell Average Size: {flow_area_attributes.get('Cell Average Size', 'N/A')}")
    print(f"Manning's n: {flow_area_attributes.get('Manning''s n', 'N/A')}")
    print(f"Terrain Filename: {flow_area_attributes.get('Terrain Filename', 'N/A')}")
else:
    print("No 2D Flow Area attributes found.")

# Note: This example assumes that get_geom_2d_flow_area_attrs returns a dictionary.
# If it returns a different format, you may need to adjust the code accordingly.


flow_area_attributes:

| Name             | Locked | Mann | Multiple Face Mann n | Composite LC | Cell Vol Tol | Cell Min Area Fraction | Face Profile Tol | Face Area Tol | Face Conv Ratio | Laminar Depth | Min Face Length Ratio | Spacing dx | Spacing dy | Shift dx | Shift dy | Cell Count |
|------------------|--------|------|-----------------------|--------------|---------------|------------------------|------------------|----------------|------------------|----------------|------------------------|------------|------------|----------|----------|------------|
| [b'Perimeter 1'] | [0]    | [0.06] | [1]                   | [1]          | [0.01]        | [0.01]                | [0.01]          | [0.01]        | [0.02]          | [0.2]          | [0.05]                | [600.0]    | [600.0]    | [nan]    | [nan]    | [354]      |


In [None]:
# Example: Get 2D Flow Area Perimeter Polygons (mesh_areas)
print("\nExample: Extracting 2D Flow Area Perimeter Polygons")
mesh_areas = HdfMesh.mesh_areas(geom_hdf_path, ras_object=bald_eagle)

In [None]:
# Plot the 2D Flow Area Perimeter Polygons
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(12, 8))
mesh_areas.plot(ax=ax, edgecolor='black', facecolor='none')

# Add labels for each polygon
for idx, row in mesh_areas.iterrows():
    centroid = row.geometry.centroid
    # Check if 'Name' column exists, otherwise use a default label
    label = row.get('Name', f'Area {idx}')
    ax.annotate(label, (centroid.x, centroid.y), ha='center', va='center')

plt.title('2D Flow Area Perimeter Polygons')
plt.xlabel('Easting')
plt.ylabel('Northing')
plt.tight_layout()
plt.show()

In [None]:
# Example: Extract mesh cell faces
print("\nExample: Extracting mesh cell faces")

# Get mesh cell faces
mesh_cell_faces = HdfMesh.mesh_cell_faces(geom_hdf_path, ras_object=bald_eagle)

# Display the first few rows of the mesh cell faces DataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

mesh_cell_faces:

| mesh_name   | face_id | geometry                                           |
|-------------|---------|----------------------------------------------------|
| Perimeter 1 | 0       | LINESTRING (1027231.594 7857846.138, 1026833.9... |
| Perimeter 1 | 1       | LINESTRING (1026833.966 7857797.923, 1026849.8... |
| Perimeter 1 | 2       | LINESTRING (1026849.886 7857613.488, 1027249.0... |
| Perimeter 1 | 3       | LINESTRING (1027249.03 7857618.591, 1027231.59... |
| Perimeter 1 | 4       | LINESTRING (1027231.594 7857846.138, 1027231.5... |

In [17]:
# Set the projection to USA Contiguous Albers Equal Area Conic (USGS version)
# Note, we would usually call the projection function in HdfMesh but the projection is not set in this example project
projection = 'EPSG:5070'  # NAD83 / Conus Albers

In [None]:
# Plot the mesh cell faces
fig, ax = plt.subplots(figsize=(12, 8))

# Set the projection for the plot
ax.set_aspect('equal', adjustable='datalim')
ax.set_title('Mesh Cell Faces', fontsize=16)
ax.set_xlabel('Easting', fontsize=12)
ax.set_ylabel('Northing', fontsize=12)

# Plot all cell faces using the specified projection
for _, row in mesh_cell_faces.iterrows():
    ax.plot(*row['geometry'].xy, color='blue', linewidth=0.5, alpha=0.5)

# Add a colorbar to show face IDs
scatter = ax.scatter(
    mesh_cell_faces.geometry.centroid.x,
    mesh_cell_faces.geometry.centroid.y,
    c=mesh_cell_faces['face_id'],
    cmap='viridis',
    s=1,
    alpha=0.5
)
plt.colorbar(scatter, label='Face ID')

plt.tight_layout()
plt.show()

# Calculate and display some statistics
print("\nMesh Cell Faces Statistics:")
print(f"Total number of cell faces: {len(mesh_cell_faces)}")
print(f"Number of unique meshes: {mesh_cell_faces['mesh_name'].nunique()}")


In [None]:
# Function to find the nearest cell face to a given point
def find_nearest_cell_face(point, cell_faces_df):
    """
    Find the nearest cell face to a given point.

    Args:
        point (shapely.geometry.Point): The input point.
        cell_faces_df (GeoDataFrame): DataFrame containing cell face linestrings.

    Returns:
        int: The face_id of the nearest cell face.
        float: The distance to the nearest cell face.
    """
    # Calculate distances from the input point to all cell faces
    distances = cell_faces_df.geometry.distance(point)

    # Find the index of the minimum distance
    nearest_index = distances.idxmin()

    # Get the face_id and distance of the nearest cell face
    nearest_face_id = cell_faces_df.loc[nearest_index, 'face_id']
    nearest_distance = distances[nearest_index]

    return nearest_face_id, nearest_distance

# Example usage
print("\nExample: Finding the nearest cell face to a given point")

# Create a sample point (you can replace this with any point of interest)
from shapely.geometry import Point
from geopandas import GeoDataFrame

# Create the sample point with the same CRS as mesh_cell_faces
sample_point = GeoDataFrame(
    {'geometry': [Point(1025677, 7853731)]}, 
    crs=mesh_cell_faces.crs
)

if not mesh_cell_faces.empty and not sample_point.empty:
    nearest_face_id, distance = find_nearest_cell_face(sample_point.geometry.iloc[0], mesh_cell_faces)
    print(f"Nearest cell face to point {sample_point.geometry.iloc[0].coords[0]}:")
    print(f"Face ID: {nearest_face_id}")
    print(f"Distance: {distance:.2f} units")

    # Visualize the result
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot all cell faces
    mesh_cell_faces.plot(ax=ax, color='blue', linewidth=0.5, alpha=0.5, label='Cell Faces')
    
    # Plot the sample point
    sample_point.plot(ax=ax, color='red', markersize=100, alpha=0.7, label='Sample Point')
    
    # Plot the nearest cell face
    nearest_face = mesh_cell_faces[mesh_cell_faces['face_id'] == nearest_face_id]
    nearest_face.plot(ax=ax, color='green', linewidth=2, alpha=0.7, label='Nearest Face')
    
    # Set labels and title
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('Nearest Cell Face to Sample Point')
    
    # Add legend and grid
    ax.legend()
    ax.grid(True)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("Unable to perform nearest cell face search due to missing data.")


In [None]:
# Example: Extract Cell Polygons
print("\nExample 6: Extracting Cell Polygons")
cell_polygons_df = HdfMesh.mesh_cell_polygons(geom_hdf_path, ras_object=bald_eagle)
if not cell_polygons_df.empty:
    display(cell_polygons_df.head())
else:
    print("No Cell Polygons found.")

# Plot cell polygons
if not cell_polygons_df.empty:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot cell polygons
    cell_polygons_df.plot(ax=ax, edgecolor='blue', facecolor='none')
    
    # Set labels and title
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('2D Flow Area Cell Polygons')
    
    # Add grid
    ax.grid(True)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("No cell polygon data available for plotting.")

cell_polygons_df:
| mesh_name   | cell_id | geometry                             |
|-------------|---------|--------------------------------------|
| Perimeter 1 | 0       | POLYGON ((1026421.241 7857214.462, 1026297.261... |
| Perimeter 1 | 1       | POLYGON ((1026421.241 7857214.462, 1026454.941... |
| Perimeter 1 | 2       | POLYGON ((1026322.451 7856791.144, 1026329.012... |
| Perimeter 1 | 3       | POLYGON ((1026696.503 7856780.036, 1026591.883... |
| Perimeter 1 | 4       | POLYGON ((1026208.456 7855828.458, 1026126.864... |

In [None]:
# Example 5: Extract Cell Info
print("\nExample 5: Extracting Cell Info")
cell_info_df = HdfMesh.mesh_cell_points(geom_hdf_path, ras_object=bald_eagle)
if not cell_info_df.empty:
    display(cell_info_df.head())
else:
    print("No Cell Info found.")

# Plot cell centers
import matplotlib.pyplot as plt

if not cell_info_df.empty:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot cell centers
    cell_info_df.plot(ax=ax, color='red', markersize=5)
    
    # Set labels and title
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('2D Flow Area Cell Centers')
    
    # Add grid
    ax.grid(True)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("No cell data available for plotting.")


cell_info_df

Extracting Cell Info
| mesh_name   | cell_id | geometry                             |
|-------------|---------|--------------------------------------|
| Perimeter 1 | 0       | POINT (1026293.682 7857287.293)     |
| Perimeter 1 | 1       | POINT (1026398.126 7857069.407)     |
| Perimeter 1 | 2       | POINT (1026433.246 7856847.728)     |
| Perimeter 1 | 3       | POINT (1026605.884 7856865.385)     |
| Perimeter 1 | 4       | POINT (1026084.887 7855780.411)     |

In [None]:
# Provide function that will accept a geopandas point object and will find the nearest cell center
# Function to find the nearest cell center to a given point
def find_nearest_cell(point, cell_centers_df):
    """
    Find the nearest cell center to a given point.

    Args:
        point (shapely.geometry.Point): The input point.
        cell_centers_df (GeoDataFrame): DataFrame containing cell center points.

    Returns:
        int: The cell_id of the nearest cell.
        float: The distance to the nearest cell center.
    """
    # Calculate distances from the input point to all cell centers
    distances = cell_centers_df.geometry.distance(point)

    # Find the index of the minimum distance
    nearest_index = distances.idxmin()

    # Get the cell_id and distance of the nearest cell
    nearest_cell_id = cell_centers_df.loc[nearest_index, 'cell_id']
    nearest_distance = distances[nearest_index]

    return nearest_cell_id, nearest_distance

# Example usage
print("\nExample: Finding the nearest cell to a given point")

# Create a sample point (you can replace this with any point of interest)
from shapely.geometry import Point
from geopandas import GeoDataFrame

# Set the projection to USA Contiguous Albers Equal Area Conic (USGS version)
# Note, we would usually call the projection function in HdfMesh but the projection is not set in this example project
projection = 'EPSG:5070'  # NAD83 / Conus Albers

# Create the sample point with the correct CRS
print("Create Sample Point")
sample_point = GeoDataFrame({'geometry': [Point(1026614, 7854594)]}, crs=projection)


print("")
if not cell_info_df.empty and not sample_point.empty:
    
    nearest_cell_id, distance = find_nearest_cell(sample_point.geometry.iloc[0], cell_info_df)
    print(f"Nearest cell to point {sample_point.geometry.iloc[0].coords[0]}:")
    print(f"Cell ID: {nearest_cell_id}")
    print(f"Distance: {distance:.2f} units")

    # Visualize the result
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot all cell centers
    cell_info_df.plot(ax=ax, color='blue', markersize=5, alpha=0.5, label='Cell Centers')
    
    # Plot the sample point
    sample_point.plot(ax=ax, color='red', markersize=100, alpha=0.7, label='Sample Point')
    
    # Plot the nearest cell center
    nearest_cell = cell_info_df[cell_info_df['cell_id'] == nearest_cell_id]
    nearest_cell.plot(ax=ax, color='green', markersize=100, alpha=0.7, label='Nearest Cell')
    
    # Set labels and title
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    ax.set_title('Nearest Cell to Sample Point')
    
    # Add legend and grid
    ax.legend()
    ax.grid(True)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("Unable to perform nearest cell search due to missing data.")


In [None]:
# Example: Extract Boundary Condition Lines and Plot with 2D Flow Area Perimeter Polygons
print("\nExample 7: Extracting Boundary Condition Lines and Plotting with 2D Flow Area Perimeter Polygons")
bc_lines_df = HdfBndry.bc_lines(geom_hdf_path, ras_object=bald_eagle)
if not bc_lines_df.empty:
    display(bc_lines_df.head())
else:
    print("No Boundary Condition Lines found.")

# Plot if data exists
if not bc_lines_df.empty or not mesh_areas.empty:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot 2D Flow Area Perimeter Polygons
    if not mesh_areas.empty:
        mesh_areas.plot(ax=ax, edgecolor='black', facecolor='none', alpha=0.7, label='2D Flow Area')
        
        # Add labels for each polygon
        for idx, row in mesh_areas.iterrows():
            centroid = row.geometry.centroid
            label = row.get('Name', f'Area {idx}')
            ax.annotate(label, (centroid.x, centroid.y), ha='center', va='center')
    
    # Plot boundary condition lines
    if not bc_lines_df.empty:
        bc_lines_df.plot(ax=ax, color='red', linewidth=2, label='Boundary Condition Lines')
    
    # Set labels and title
    ax.set_xlabel('Easting')
    ax.set_ylabel('Northing')
    ax.set_title('2D Flow Area Perimeter Polygons and Boundary Condition Lines')
    
    # Add grid and legend
    ax.grid(True)
    ax.legend()
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("No data available for plotting.")

bc_lines_df:

| bc_line_id | name       | mesh_name   | type     | geometry                                                       |
|-------------|------------|-------------|----------|---------------------------------------------------------------|
| 0           | Upstream   | Perimeter 1 | External | LINESTRING (1027205.957 7858200.238, 1025994.9...)          |
| 1           | Downstream | Perimeter 1 | External | LINESTRING (1026484.007 7850421.804, 1024446.2...)          |

In [None]:
# Example: Extract Breaklines and Plot with 2D Flow Area Perimeter Polygons
print("\nExample 8: Extracting Breaklines and Plotting with 2D Flow Area Perimeter Polygons")
breaklines_df = HdfBndry.breaklines(geom_hdf_path, ras_object=bald_eagle)
if not breaklines_df.empty:
    display(breaklines_df.head())
else:
    print("No Breaklines found.")

# Plot breaklines and 2D Flow Area Perimeter Polygons if they exist
if not breaklines_df.empty or not mesh_areas.empty:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Plot 2D Flow Area Perimeter Polygons
    if not mesh_areas.empty:
        mesh_areas.plot(ax=ax, edgecolor='black', facecolor='none', alpha=0.7, label='2D Flow Area')
        
        # Add labels for each polygon
        for idx, row in mesh_areas.iterrows():
            centroid = row.geometry.centroid
            label = row.get('Name', f'Area {idx}')
            ax.annotate(label, (centroid.x, centroid.y), ha='center', va='center')
    
    # Plot breaklines
    if not breaklines_df.empty:
        breaklines_df.plot(ax=ax, color='blue', linewidth=2, label='Breaklines')
    
    # Set labels and title
    ax.set_xlabel('Easting')
    ax.set_ylabel('Northing')
    ax.set_title('2D Flow Area Perimeter Polygons and Breaklines')
    
    # Add grid and legend
    ax.grid(True)
    ax.legend()
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()
else:
    print("No data available for plotting.")

In [None]:
# Load and plot data/profile_lines_chippewa2D.geojson 
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

# Load the GeoJSON file
geojson_path = r'data/profile_lines_chippewa2D.geojson'  # Update with the correct path
profile_lines_gdf = gpd.read_file(geojson_path)

# Set the Coordinate Reference System (CRS) to EPSG:5070
profile_lines_gdf = profile_lines_gdf.set_crs(epsg=5070, allow_override=True)

# Check the Coordinate Reference System (CRS)
if profile_lines_gdf.crs is None:
    print("Warning: The GeoDataFrame does not have a defined CRS.")
else:
    print(f"CRS of the profile lines GeoDataFrame: {profile_lines_gdf.crs}")

# Plot the profile lines
fig, ax = plt.subplots(figsize=(12, 8))
profile_lines_gdf.plot(ax=ax, color='orange', linewidth=2, label='Profile Lines')

# Set labels and title
ax.set_xlabel('Easting')
ax.set_ylabel('Northing')
ax.set_title('Profile Lines')

# Add grid and legend
ax.grid(True)
ax.legend()

# Adjust layout and display
plt.tight_layout()
plt.show()

print(profile_lines_gdf)

profile_lines_gdf:

| Name            |                                           geometry                                           |
|-----------------|-----------------------------------------------------------------------------------------------|
| Profile Line 1  | LINESTRING (1027230.307 7853817.865, 1026824.3...)                                         |
| Profile Line 2  | LINESTRING (1027326.023 7857086.707, 1026860.8...)                                         |
| Profile Line 3  | LINESTRING (1026863.782 7851854.837, 1026156.6...)                                         |

In [None]:
# Example: Extract mesh cell faces and plot with profile lines
print("\nExample: Extracting mesh cell faces and plotting with profile lines")

# Get mesh cell faces
mesh_cell_faces = HdfMesh.mesh_cell_faces(geom_hdf_path, ras_object=bald_eagle)

# Display the first few rows of the mesh cell faces DataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

# Load the GeoJSON file for profile lines
geojson_path = r'data/profile_lines_chippewa2D.geojson'  # Update with the correct path
profile_lines_gdf = gpd.read_file(geojson_path)

# Set the Coordinate Reference System (CRS) to EPSG:5070
profile_lines_gdf = profile_lines_gdf.set_crs(epsg=5070, allow_override=True)

# Plot the mesh cell faces and profile lines together
fig, ax = plt.subplots(figsize=(12, 8))
mesh_cell_faces.plot(ax=ax, color='blue', alpha=0.5, edgecolor='k', label='Mesh Cell Faces')
profile_lines_gdf.plot(ax=ax, color='orange', linewidth=2, label='Profile Lines')

# Set labels and title
ax.set_xlabel('Easting')
ax.set_ylabel('Northing')
ax.set_title('Mesh Cell Faces and Profile Lines')

# Add grid and legend
ax.grid(True)
ax.legend()

# Adjust layout and display
plt.tight_layout()
plt.show()


Identifying Sets of Cell Faces for each Profile Line

- find all mesh faces whose points are within 10ft of the any profile line segment
- discard any mesh faces whose angle differs from the nearest profile line segment by more than 15 degrees
- For each profile line, provide mesh cell faces that meet the criteria.  Show a plot, with mesh cell faces and profile lines, with the selected set of mesh cell faces highlighted with a red dashed line.  Zoom level of plot should be slightly zoomed out from the profile line.  One list and one plot per profile line


In [None]:
# Example: Extracting mesh cell faces near profile lines
print("\nExample: Extracting mesh cell faces near profile lines")

# Get mesh cell faces
mesh_cell_faces = HdfMesh.mesh_cell_faces(geom_hdf_path, ras_object=bald_eagle)

# Display the first few rows of the mesh cell faces DataFrame
print("First few rows of mesh cell faces:")
display(mesh_cell_faces.head())

# Load the GeoJSON file for profile lines
geojson_path = r'data/profile_lines_chippewa2D.geojson'  # Update with the correct path
profile_lines_gdf = gpd.read_file(geojson_path)

# Set the Coordinate Reference System (CRS) to EPSG:5070
profile_lines_gdf = profile_lines_gdf.set_crs(epsg=5070, allow_override=True)

# Initialize a dictionary to store faces near each profile line
faces_near_profile_lines = {}

# Define distance threshold (10 ft converted to meters)
distance_threshold = 10
angle_threshold = 60  # degrees

# Function to calculate the smallest angle between two lines or line segments.
# The angle is calculated in degrees relative to the positive x-axis.
# If the input is a LineString object, the angle is computed using the 
# coordinates of the start and end points of the line.
# If the input is a list of two points, the angle is calculated 
# directly from those points.

def calculate_angle(line):
    if isinstance(line, LineString):
        x_diff = line.xy[0][-1] - line.xy[0][0]
        y_diff = line.xy[1][-1] - line.xy[1][0]
    else:
        x_diff = line[1][0] - line[0][0]
        y_diff = line[1][1] - line[0][1]
    
    angle = np.degrees(np.arctan2(y_diff, x_diff))
    return angle % 360 if angle >= 0 else (angle + 360) % 360

# Function to break line into segments
def break_line_into_segments(line, segment_length):
    segments = []
    segment_angles = []
    
    distances = np.arange(0, line.length, segment_length)
    if distances[-1] != line.length:
        distances = np.append(distances, line.length)
        
    for i in range(len(distances)-1):
        point1 = line.interpolate(distances[i])
        point2 = line.interpolate(distances[i+1])
        segment = LineString([point1, point2])
        segments.append(segment)
        segment_angles.append(calculate_angle([point1.coords[0], point2.coords[0]]))
        
    return segments, segment_angles

# Function to calculate angle difference accounting for 180 degree equivalence
def angle_difference(angle1, angle2):
    diff = abs(angle1 - angle2) % 180
    return min(diff, 180 - diff)

# Function to order faces along profile line
def order_faces_along_profile(profile_line, faces_gdf):
    # Get start point of profile line
    profile_start = Point(profile_line.coords[0])
    
    # Calculate distance from each face's start point to profile start
    faces_with_dist = []
    for idx, face in faces_gdf.iterrows():
        face_start = Point(face.geometry.coords[0])
        dist = profile_start.distance(face_start)
        faces_with_dist.append((idx, dist))
    
    # Sort faces by distance
    faces_with_dist.sort(key=lambda x: x[1])
    
    # Return ordered face indices
    return [x[0] for x in faces_with_dist]

# Function to combine ordered faces into single linestring
def combine_faces_to_linestring(ordered_faces_gdf):
    coords = []
    for _, face in ordered_faces_gdf.iterrows():
        if not coords:  # First face - add all coordinates
            coords.extend(list(face.geometry.coords))
        else:  # Subsequent faces - add only end coordinate
            coords.append(face.geometry.coords[-1])
    return LineString(coords)

# Initialize GeoDataFrame for final profile-to-faceline results
profile_to_faceline = gpd.GeoDataFrame(columns=['profile_name', 'geometry'], crs=profile_lines_gdf.crs)

# Iterate through each profile line
for index, profile_line in profile_lines_gdf.iterrows():
    profile_geom = profile_line.geometry
    
    # Break profile line into segments
    segments, segment_angles = break_line_into_segments(profile_geom, distance_threshold)
    
    # Initialize set to store nearby faces
    nearby_faces = set()
    
    # For each face, check distance to segments and angle difference
    for face_idx, face in mesh_cell_faces.iterrows():
        face_geom = face.geometry
        
        if isinstance(face_geom, LineString):
            face_angle = calculate_angle(face_geom)
            
            for segment, segment_angle in zip(segments, segment_angles):
                if face_geom.distance(segment) <= distance_threshold:
                    if angle_difference(face_angle, segment_angle) <= angle_threshold:
                        nearby_faces.add(face_idx)
                        break
    
    # Convert the set of indices back to a GeoDataFrame
    nearby_faces_gdf = mesh_cell_faces.loc[list(nearby_faces)]
    
    # Order faces along profile line
    ordered_indices = order_faces_along_profile(profile_geom, nearby_faces_gdf)
    ordered_faces_gdf = nearby_faces_gdf.loc[ordered_indices]
    
    # Combine ordered faces into single linestring
    combined_linestring = combine_faces_to_linestring(ordered_faces_gdf)
    
    # Add to profile_to_faceline GeoDataFrame
    new_row = gpd.GeoDataFrame({'profile_name': [profile_line['Name']], 
                               'geometry': [combined_linestring]}, 
                              crs=profile_lines_gdf.crs)
    profile_to_faceline = pd.concat([profile_to_faceline, new_row], ignore_index=True)
    
    # Store the ordered faces in the dictionary
    faces_near_profile_lines[profile_line['Name']] = ordered_faces_gdf

# Plot the results
fig, ax = plt.subplots(figsize=(12, 8))

# Plot all mesh cell faces in light blue
mesh_cell_faces.plot(ax=ax, color='lightblue', alpha=0.3, edgecolor='k', label='All Mesh Faces')

# Plot selected faces for each profile line with numbers
colors = ['red', 'green', 'blue']
for (profile_name, faces), color in zip(faces_near_profile_lines.items(), colors):
    if not faces.empty:
        faces.plot(ax=ax, color=color, alpha=0.6, label=f'Faces near {profile_name}')
        
        # Add numbers to faces
        for i, (idx, face) in enumerate(faces.iterrows()):
            midpoint = face.geometry.interpolate(0.5, normalized=True)
            ax.text(midpoint.x, midpoint.y, str(i+1), 
                   color=color, fontweight='bold', ha='center', va='center')

# Plot the combined linestrings
profile_to_faceline.plot(ax=ax, color='black', linewidth=2, 
                        linestyle='--', label='Combined Face Lines')

# Set labels and title
ax.set_xlabel('Easting')
ax.set_ylabel('Northing')
ax.set_title('Mesh Cell Faces and Profile Lines\nNumbered in order along profile')

# Add grid and legend
ax.grid(True)
ax.legend()

# Adjust layout and display
plt.tight_layout()
plt.show()

# Display the results
print("\nOriginal ordered faces near profile lines:")
display(faces_near_profile_lines)

print("\nCombined profile-to-faceline results:")
display(profile_to_faceline)

# Identify the adjacent mesh cell 

# Original ordered faces near profile lines:

faces_near_profile_lines

| Profile Line     | mesh_name   | face_id | geometry                                           |
|------------------|-------------|---------|----------------------------------------------------|
| Profile Line 1   | Perimeter 1 | 272     | LINESTRING (1027033.379 7853915.407, 1027236.5...) |
|                  | Perimeter 1 | 280     | LINESTRING (1026828.547 7854023.197, 1027033.3...) |
|                  | Perimeter 1 | 788     | LINESTRING (1026679.152 7854178.914, 1026828.5...) |
|                  | Perimeter 1 | 497     | LINESTRING (1026514.717 7854204.236, 1026318.9...) |
|                  | Perimeter 1 | 786     | LINESTRING (1026514.717 7854204.236, 1026679.1...) |
|                  | Perimeter 1 | 362     | LINESTRING (1026318.937 7854241.964, 1026124.1...) |
|                  | Perimeter 1 | 370     | LINESTRING (1026124.125 7854277.676, 1025914.9...) |
|                  | Perimeter 1 | 232     | LINESTRING (1025697.135 7854288.241, 1025914.9...) |
|                  | Perimeter 1 | 747     | LINESTRING (1025492.697 7854295.877, 1025310.0...) |
|                  | Perimeter 1 | 216     | LINESTRING (1025492.697 7854295.877, 1025697.1...) |
|                  | Perimeter 1 | 184     | LINESTRING (1025310.011 7854309.329, 1025120.2...) |
|                  | Perimeter 1 | 181     | LINESTRING (1025120.253 7854326.58, 1024848.02...) |
| Profile Line 2   | Perimeter 1 | 52      | LINESTRING (1027100.412 7857052.854, 1027350.3...) |
|                  | Perimeter 1 | 92      | LINESTRING (1026851.383 7857008.101, 1027100.4...) |
|                  | Perimeter 1 | 548     | LINESTRING (1026641.354 7856996.257, 1026851.3...) |
|                  | Perimeter 1 | 691     | LINESTRING (1026641.354 7856996.257, 1026502.8...) |
|                  | Perimeter 1 | 78      | LINESTRING (1026502.841 7856972.375, 1026329.0...) |
|                  | Perimeter 1 | 79      | LINESTRING (1026329.012 7856944.836, 1026128.8...) |
|                  | Perimeter 1 | 697     | LINESTRING (1025929.132 7856985.559, 1026128.8...) |
| Profile Line 3   | Perimeter 1 | 532     | LINESTRING (1026498.193 7851901.653, 1026838.2...) |
|                  | Perimeter 1 | 341     | LINESTRING (1026498.193 7851901.653, 1026163.9...) |
|                  | Perimeter 1 | 349     | LINESTRING (1026163.923 7851921.349, 1025938.7...) |
|                  | Perimeter 1 | 455     | LINESTRING (1025734.076 7851837.786, 1025938.7...) |
|                  | Perimeter 1 | 469     | LINESTRING (1025531.838 7851798.217, 1025734.0...) |
|                  | Perimeter 1 | 416     | LINESTRING (1025333.4 7851765.372, 1025531.838...) |
|                  | Perimeter 1 | 437     | LINESTRING (1025107.276 7851731.39, 1025333.4...) |
|                  | Perimeter 1 | 480     | LINESTRING (1024765.061 7851691.548, 1025107.2...) |

profile_to_faceline

| profile_name     | geometry                                           |
|------------------|----------------------------------------------------|
| Profile Line 1   | LINESTRING (1027033.379 7853915.407, 1027236.5...) |
| Profile Line 2   | LINESTRING (1027100.412 7857052.854, 1027350.3...) |
| Profile Line 3   | LINESTRING (1026498.193 7851901.653, 1026838.2...) |



-----

In [None]:
face_property_tables = HdfMesh.get_face_property_tables(geom_hdf_path)

display(face_property_tables)

face_property_tables: 

face_property_tables: 

|   Face ID |         Z |       Area |  Wetted Perimeter |  Manning's n |
|-----------|-----------|------------|-------------------|---------------|
|         0 | 683.783142 |    0.000000 |          0.000000 |      0.066800 |
|         1 | 683.983154 |   25.314476 |        311.063843 |      0.066800 |
|         2 | 684.140930 |   77.886810 |        355.364807 |      0.066002 |
|         3 | 684.189270 |   98.404495 |        368.926331 |      0.065757 |
|         4 | 684.579102 | 249.174759 |        400.563110 |      0.065312 |
| ...       | ...       | ...        | ...               | ...           |
|      5183 | 683.024048 | 1228.016968 |        475.787079 |      0.063346 |
|      5184 | 683.636292 |    0.000000 |          0.000000 |      0.075398 |
|      5185 | 683.836304 |   13.135144 |        199.787888 |      0.075398 |
|      5186 | 683.945923 |   45.552128 |        391.646118 |      0.075415 |
|      5187 | 683.949463 |   51.697250 |        397.835114 |      0.075416 |

[5188 rows x 5 columns]


In [None]:
import matplotlib.pyplot as plt

# Extract the face property table for the first Face ID
face_id = 4
face_properties = face_property_tables['Perimeter 1'][face_property_tables['Perimeter 1']['Face ID'] == face_id]

# Create subplots arranged horizontally
fig, axs = plt.subplots(1, 3, figsize=(18, 6))

# Plot Z vs Area
axs[0].plot(face_properties['Z'], face_properties['Area'], marker='o', color='blue', label='Area')
axs[0].set_title(f'Face ID {face_id}: Z vs Area')
axs[0].set_xlabel('Z')
axs[0].set_ylabel('Area')
axs[0].grid(True)
axs[0].legend()

# Plot Z vs Wetted Perimeter
axs[1].plot(face_properties['Z'], face_properties['Wetted Perimeter'], marker='o', color='green', label='Wetted Perimeter')
axs[1].set_title(f'Face ID {face_id}: Z vs Wetted Perimeter')
axs[1].set_xlabel('Z')
axs[1].set_ylabel('Wetted Perimeter')
axs[1].grid(True)
axs[1].legend()

# Plot Z vs Manning's n
axs[2].plot(face_properties['Z'], face_properties["Manning's n"], marker='o', color='red', label="Manning's n")
axs[2].set_title(f'Face ID {face_id}: Z vs Manning\'s n')
axs[2].set_xlabel('Z')
axs[2].set_ylabel("Manning's n")
axs[2].grid(True)
axs[2].legend()

plt.tight_layout()
plt.show()


In [None]:
# Get mesh timeseries output

# Get mesh areas from previous code cell
mesh_areas = HdfMesh.mesh_area_names(geom_hdf_path, ras_object=bald_eagle)

if mesh_areas:
    mesh_name = mesh_areas[0]  # Use the first 2D flow area name
    timeseries_da = HdfResultsMesh.mesh_timeseries_output(plan_hdf_path, mesh_name, "Water Surface", ras_object=bald_eagle)
    print(f"\nMesh Timeseries Output (Water Surface) for {mesh_name}:")
    print(timeseries_da)
else:
    print("No mesh areas found in the geometry file.")

# Get mesh cells timeseries output
cells_timeseries_ds = HdfResultsMesh.mesh_cells_timeseries_output(plan_hdf_path, mesh_name, ras_object=bald_eagle)
print("\nMesh Cells Timeseries Output:")
print(cells_timeseries_ds)

# Get mesh faces timeseries output
faces_timeseries_ds = HdfResultsMesh.mesh_faces_timeseries_output(plan_hdf_path, mesh_name, ras_object=bald_eagle)
print("\nMesh Faces Timeseries Output:")
print(faces_timeseries_ds)



## Face Property Tables

| Face ID |      Z      |      Area      | Wetted Perimeter | Manning's n |
|---------|-------------|----------------|------------------|-------------|
|    0    | 683.783142  |     0.000000   |      0.000000    |   0.066800  |
|    1    | 683.983154  |    25.314476   |    311.063843    |   0.066800  |
|    2    | 684.140930  |    77.886810   |    355.364807    |   0.066002  |
|    3    | 684.189270  |    98.404495   |    368.926331    |   0.065757  |
|    4    | 684.579102  |   249.174759   |    400.563110    |   0.065312  |
|   ...   |     ...     |       ...      |        ...       |     ...     |
|  5183   | 683.024048  |  1228.016968   |    475.787079    |   0.063346  |
|  5184   | 683.636292  |     0.000000   |      0.000000    |   0.075398  |
|  5185   | 683.836304  |    13.135144   |    199.787888    |   0.075398  |
|  5186   | 683.945923  |    45.552128   |    391.646118    |   0.075415  |
|  5187   | 683.949463  |    51.697250   |    397.835114    |   0.075416  |

**[5188 rows x 5 columns]**

## Cells Timeseries Dataset

| Mesh Name   | Size  | Dimensions                     | Coordinates                                                                 | Data Variables                                                                                     | Attributes                     |
|-------------|-------|--------------------------------|-----------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------|--------------------------------|
| Perimeter 1 | 13MB  | (time: 1633, cell_id: 433, face_id: 814) | * time (time) datetime64[ns] <br> * cell_id (cell_id) int32 <br> * face_id (face_id) int32 | Water Surface (time, cell_id) float32 <br> Face Velocity (time, face_id) float32 <br> Face Flow (time, face_id) float32 | mesh_name: Perimeter 1 <br> start_time: 2019-04-02 00:00:00 |

## Faces Timeseries Dataset

| Mesh Name   | Size  | Dimensions                     | Coordinates                                                                 | Data Variables                                                                                     | Attributes                     |
|-------------|-------|--------------------------------|-----------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------|--------------------------------|
| Perimeter 1 | 11MB  | (time: 1633, face_id: 814)    | * time (time) datetime64[ns] <br> * face_id (face_id) int32              | face_velocity (time, face_id) float32 <br> face_flow (time, face_id) float32                     | units: ft/s <br> mesh_name: Perimeter 1 <br> variable: Face Velocity |


Identifying Sets of Cell Faces for each Profile Line

- find all mesh faces whose points are within 10ft of the any profile line segment
- discard any mesh faces whose angle differs from the nearest profile line segment by more than 15 degrees
- For each profile line, provide mesh cell faces that meet the criteria.  Show a plot, with mesh cell faces and profile lines, with the selected set of mesh cell faces highlighted with a red dashed line.  Zoom level of plot should be slightly zoomed out from the profile line.  One list and one plot per profile line


Face Results: Velocity and Face Flow

    The Face Velocity and Face Flow for each face is provided in mesh_cells_timeseries_output


Face Property Table: 

    Faces Area Elevation Values:

    For each face, there is a table with Z vs Area, Wetted Perimeter, and Mannings N




Faces Minimum Elevation

    This will be useful when plotting a profile of the mesh cell faces, showing the variation and composite values for the string of cell faces
    because we don't have the terrain, we have to represent the profile using the length of each face, projected vertically from the water surface elevation to the minimum elevation.  



In [None]:
# Convert all face velocities and face flow values to positive

# Function to process and convert face data to positive values
def convert_to_positive_values(faces_timeseries_ds, cells_timeseries_ds):
    """
    Convert face velocities and flows to positive values while maintaining their relationships.
    
    Args:
        faces_timeseries_ds (xarray.Dataset): Dataset containing face timeseries data
        cells_timeseries_ds (xarray.Dataset): Dataset containing cell timeseries data
        
    Returns:
        xarray.Dataset: Modified dataset with positive values
    """
    # Get the face velocity and flow variables
    face_velocity = faces_timeseries_ds['face_velocity']
    face_flow = faces_timeseries_ds['face_flow']
    
    # Calculate the sign of the velocity to maintain flow direction relationships
    velocity_sign = xr.where(face_velocity >= 0, 1, -1)
    
    # Convert velocities and flows to absolute values while maintaining their relationship
    faces_timeseries_ds['face_velocity'] = abs(face_velocity)
    faces_timeseries_ds['face_flow'] = abs(face_flow)
    
    # Store the original sign as a new variable for reference
    faces_timeseries_ds['velocity_direction'] = velocity_sign
    
    print("Conversion to positive values complete.")
    print(f"Number of faces processed: {len(faces_timeseries_ds.face_id)}")
    
    return faces_timeseries_ds, cells_timeseries_ds

# Convert the values in our datasets
faces_timeseries_ds_positive, cells_timeseries_ds_positive = convert_to_positive_values(
    faces_timeseries_ds, 
    cells_timeseries_ds
)

# Print summary statistics to verify the conversion
print("\nSummary Statistics for Face Velocity:")
print(f"Min: {float(faces_timeseries_ds_positive.face_velocity.min()):.2f}")
print(f"Max: {float(faces_timeseries_ds_positive.face_velocity.max()):.2f}")
print(f"Mean: {float(faces_timeseries_ds_positive.face_velocity.mean()):.2f}")

print("\nSummary Statistics for Face Flow:")
print(f"Min: {float(faces_timeseries_ds_positive.face_flow.min()):.2f}")
print(f"Max: {float(faces_timeseries_ds_positive.face_flow.max()):.2f}")
print(f"Mean: {float(faces_timeseries_ds_positive.face_flow.mean()):.2f}")

# Create a quick visualization to verify the conversion
plt.figure(figsize=(12, 8))

# Plot face velocity
plt.subplot(2, 1, 1)
plt.plot(faces_timeseries_ds_positive['face_velocity'].time, faces_timeseries_ds_positive['face_velocity'].mean(dim='face_id'), label='Mean Face Velocity', color='blue')
plt.title('Mean Face Velocity Over Time')
plt.xlabel('Time')
plt.ylabel('Velocity (m/s)')
plt.legend()
plt.grid()

# Plot face flow
plt.subplot(2, 1, 2)
plt.plot(faces_timeseries_ds_positive['face_flow'].time, faces_timeseries_ds_positive['face_flow'].mean(dim='face_id'), label='Mean Face Flow', color='green')
plt.title('Mean Face Flow Over Time')
plt.xlabel('Time')
plt.ylabel('Flow (m³/s)')
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()


In [33]:
import pandas as pd
import numpy as np
import xarray as xr

# Function to process faces for a single profile line
def process_profile_line(profile_name, faces, cells_timeseries_ds, faces_timeseries_ds):
    face_ids = faces['face_id'].tolist()
    
    # Extract relevant data for these faces
    face_velocities = faces_timeseries_ds['face_velocity'].sel(face_id=face_ids)
    face_flows = faces_timeseries_ds['face_flow'].sel(face_id=face_ids)
    
    # Create a new dataset with calculated results
    results_ds = xr.Dataset({
        'face_velocity': face_velocities,
        'face_flow': face_flows
    })
    
    # Convert to dataframe for easier manipulation
    results_df = results_ds.to_dataframe().reset_index()
    
    # Add profile name and face order
    results_df['profile_name'] = profile_name
    results_df['face_order'] = results_df.groupby('time')['face_id'].transform(lambda x: pd.factorize(x)[0])
    
    return results_df


In [34]:
# Calculate Vave = Sum Qn / Sum An for each profile line
# where Vave = the summation of face flow / flow area for all the faces in the profile line

# Then, save the results to CSV

In [None]:
# Process all profile lines
all_results = []
for profile_name, faces in faces_near_profile_lines.items():
    profile_results = process_profile_line(profile_name, faces, cells_timeseries_ds, faces_timeseries_ds)
    all_results.append(profile_results)

# Combine results from all profile lines
combined_results_df = pd.concat(all_results, ignore_index=True)

# Display the first few rows of the combined results
print(combined_results_df.head())

combined_results_df: 


|    time    | face_id | face_velocity |  face_flow  |   profile_name   | face_order |
|------------|---------|---------------|-------------|------------------|------------|
| 2019-04-02 |   370   |    1.543974   | 961.118225  | Profile Line 1   |     0      |
| 2019-04-02 |   232   |    2.738194   | 5103.555176 | Profile Line 1   |     1      |
| 2019-04-02 |   747   |    3.109769   | 4777.513672 | Profile Line 1   |     2      |
| 2019-04-02 |   216   |    2.974400   | 5120.266113 | Profile Line 1   |     3      |
| 2019-04-02 |   184   |    0.924792   | 700.676697  | Profile Line 1   |     4      |


-----

### Calculate Face Area

The **Face Area** can be calculated using the formula:

\[
\text{Face Area} = \frac{\text{Face Flow}}{\text{Face Velocity}}
\]

We'll add this as a new column to the `combined_results_df` dataframe.


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define unique colors for each face_id
unique_face_ids = combined_results_df['face_id'].unique()
colors = plt.cm.viridis(np.linspace(0, 1, len(unique_face_ids)))

# Create a color mapping for face_ids
color_mapping = dict(zip(unique_face_ids, colors))

# Scatter plot for Face Velocity
plt.figure(figsize=(12, 6))
for face_id in unique_face_ids:
    face_data = combined_results_df[combined_results_df['face_id'] == face_id]
    plt.scatter(face_data['time'], face_data['face_velocity'].abs(), 
                label=f'Face ID {face_id}', color=color_mapping[face_id], s=1, linewidth=0.5)
plt.title('Face Velocity Time Series')
plt.xlabel('Time')
plt.ylabel('Face Velocity (m/s)')
plt.grid()
plt.legend()
plt.tight_layout()
plt.show()

# Scatter plot for Face Flow
plt.figure(figsize=(12, 6))
for face_id in unique_face_ids:
    face_data = combined_results_df[combined_results_df['face_id'] == face_id]
    plt.scatter(face_data['time'], face_data['face_flow'].abs(),
                label=f'Face ID {face_id}', color=color_mapping[face_id], s=1, linewidth=0.5)
plt.title('Face Flow Time Series')
plt.xlabel('Time')
plt.ylabel('Face Flow (m³/s)')
plt.grid()
plt.legend()
plt.tight_layout()
plt.show()


-----

### Create New Time Series DataFrame for Each Profile Line

We'll create a dictionary where each key is a profile line name and the value is its corresponding time series dataframe containing only the relevant face cells.



In [None]:
profile_time_series = {}

# Iterate through each profile line and extract its corresponding data
for profile_name, faces_gdf in faces_near_profile_lines.items():
    # Get the list of face_ids for this profile line
    face_ids = faces_gdf['face_id'].tolist()
    
    # Filter the combined_results_df for these face_ids
    profile_df = combined_results_df[combined_results_df['face_id'].isin(face_ids)].copy()
    
    # Add the profile name as a column
    profile_df['profile_name'] = profile_name
    
    # Reset index for cleanliness
    profile_df.reset_index(drop=True, inplace=True)
    
    # Store in the dictionary
    profile_time_series[profile_name] = profile_df
    
    # Display a preview
    print(f"\nTime Series DataFrame for {profile_name}:")
    display(profile_df.head())

# Optionally, display all profile names
print("\nProfile Lines Processed:")
print(list(profile_time_series.keys()))


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 370     | 1.543974      | 961.118225  | Profile Line 1 | 0          |
| 2019-04-02 | 232     | 2.738194      | 5103.555176 | Profile Line 1 | 1          |
| 2019-04-02 | 747     | 3.109769      | 4777.513672 | Profile Line 1 | 2          |
| 2019-04-02 | 216     | 2.974400      | 5120.266113 | Profile Line 1 | 3          |
| 2019-04-02 | 184     | 0.924792      | 700.676697  | Profile Line 1 | 4          |  
  


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 52      | 0.000000      | 0.000000    | Profile Line 2 | 0          |
| 2019-04-02 | 92      | 0.000000      | 0.000000    | Profile Line 2 | 1          |
| 2019-04-02 | 548     | 1.018038      | 353.129822  | Profile Line 2 | 2          |
| 2019-04-02 | 691     | 2.106394      | 2195.409912 | Profile Line 2 | 3          |
| 2019-04-02 | 78      | 2.376904      | 3600.228760 | Profile Line 2 | 4          |  
  


| Time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 532     | 0.000000      | 0.000000    | Profile Line 3 | 0          |
| 2019-04-02 | 341     | 0.000000      | 0.000000    | Profile Line 3 | 1          |
| 2019-04-02 | 349     | 1.962641      | 2601.644287 | Profile Line 3 | 2          |
| 2019-04-02 | 455     | 2.367594      | 4148.870605 | Profile Line 3 | 3          |
| 2019-04-02 | 469     | 2.515510      | 4458.292480 | Profile Line 3 | 4          |  
  
  
 
Profile Lines Processed:
['Profile Line 1', 'Profile Line 2', 'Profile Line 3']

### Combine All Profiles into a Single DataFrame

This is useful for aggregated analysis or visualization.


In [None]:

all_profiles_df = pd.concat(profile_time_series.values(), ignore_index=True)

# Display the combined dataframe
print("Combined Time Series DataFrame for All Profiles:")
display(all_profiles_df.head())



all_profiles_df:

| time       | face_id | face_velocity | face_flow   | profile_name   | face_order |
|------------|---------|---------------|-------------|----------------|------------|
| 2019-04-02 | 370     | 1.543974      | 961.118225  | Profile Line 1 | 0          |
| 2019-04-02 | 232     | 2.738194      | 5103.555176 | Profile Line 1 | 1          |
| 2019-04-02 | 747     | 3.109769      | 4777.513672 | Profile Line 1 | 2          |
| 2019-04-02 | 216     | 2.974400      | 5120.266113 | Profile Line 1 | 3          |
| 2019-04-02 | 184     | 0.924792      | 700.676697  | Profile Line 1 | 4          |


### Visualize All WSELs Together

This helps in comparing the temporal variations across different profile lines.

plt.figure(figsize=(16, 8))

# Iterate through each profile line and plot WSEL
for profile_name, profile_df in profile_time_series.items():
    plt.plot(
        profile_df['time'],
        profile_df['wsel'],
        marker='o',
        label=profile_name
    )

plt.title('Water Surface Elevation (WSEL) Over Time for All Profile Lines')
plt.xlabel('Time')
plt.ylabel('WSEL (Z)')
plt.legend(title='Profile Lines')
plt.tight_layout()
plt.show()


In [None]:
# Recursively explore the 2D Flow Areas structure in the geometry HDF file
import h5py

def print_hdf_structure(name, obj):
    """Print information about HDF5 object"""
    print(f"\nPath: {name}")
    print(f"Type: {type(obj).__name__}")
    
    if isinstance(obj, h5py.Dataset):
        print(f"Shape: {obj.shape}")
        print(f"Dtype: {obj.dtype}")
        print("Attributes:")
        for key, value in obj.attrs.items():
            print(f"  {key}: {value}")

def explore_flow_areas(file_path):
    """
    Recursively explore and print 2D Flow Areas structure in HDF5 file
    
    :param file_path: Path to the HDF5 file
    """
    try:
        with h5py.File(file_path, 'r') as hdf_file:
            if '/Geometry/2D Flow Areas' in hdf_file:
                flow_areas_group = hdf_file['/Geometry/2D Flow Areas']
                flow_areas_group.visititems(print_hdf_structure)
            else:
                print("2D Flow Areas group not found in geometry file")
    except Exception as e:
        print(f"Error exploring HDF file: {e}")

print("\nExploring 2D Flow Areas structure in geometry file:")
print("HDF Base Path: /Geometry/2D Flow Areas ")
explore_flow_areas(geom_hdf_path)


# Extract Breakline as Reference Line

We can't use a profile line, because the mesh orientation may be quite different than the direction of flow.  

Instead, use a breakline - the one named "SayersDam" should work

We can find the information specific to faces: 






# Extract Composite Results for 2D at Profile Lines to simulate Reference Lines



In [None]:
# Calculate average velocity (Vave) for each profile line
def calculate_profile_averages(profile_df):
    """
    Calculate average velocity (Vave) for a profile line using Sum(Qn)/Sum(An)
    
    Args:
        profile_df: DataFrame containing face flow data for a profile line
    
    Returns:
        DataFrame with time series of average velocities
    """
    # Group by time to calculate sums for each timestep
    time_groups = profile_df.groupby('time').agg({
        'face_flow': 'sum'  # Sum of all face flows (Qn)
    })
    
    # Calculate Vave = Sum(Qn)/Sum(An) - Note: face_area is no longer used
    time_groups['vave'] = time_groups['face_flow']  # Vave is now just face_flow since face_area is removed
    
    return time_groups

# Calculate averages for each profile line
profile_averages = {}
for profile_name, profile_df in profile_time_series.items():
    averages = calculate_profile_averages(profile_df)
    profile_averages[profile_name] = averages
    
    # Print summary statistics
    print(f"\nSummary for {profile_name}:")
    print(f"Mean Vave: {averages['vave'].mean():.2f} ft/s")
    print(f"Mean Flow: {averages['face_flow'].mean():.2f} cfs")

# Combine all profile averages into a single DataFrame
combined_averages = pd.DataFrame()
for profile_name, averages in profile_averages.items():
    # Add profile name as a column
    profile_data = averages.copy()
    profile_data['profile_name'] = profile_name
    combined_averages = pd.concat([combined_averages, profile_data])

# Reset index to make time a column
combined_averages = combined_averages.reset_index()

# Save to CSV
output_file = 'profile_line_averages.csv'
combined_averages.to_csv(output_file, index=False)
print(f"\nResults saved to {output_file}")

# Create visualization of Vave over time for all profile lines
plt.figure(figsize=(12, 6))
for profile_name, averages in profile_averages.items():
    plt.plot(averages.index, averages['vave'], label=profile_name)

plt.title('Average Velocity (Vave) Over Time by Profile Line')
plt.xlabel('Time')
plt.ylabel('Velocity (ft/s)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Create separate plots for each profile line showing Vave and individual face velocities
for profile_name, profile_df in profile_time_series.items():
    # Calculate averages for this profile
    averages = profile_averages[profile_name]
    
    # Create figure
    plt.figure(figsize=(12, 6))
    
    # Get ordered faces for this profile from faces_near_profile_lines
    ordered_faces = faces_near_profile_lines[profile_name]
    
    # Plot individual face velocities
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        face_id = face_row['face_id']
        face_data = profile_df[profile_df['face_id'] == face_id]
        plt.plot(face_data['time'], 
                face_data['face_velocity'], 
                alpha=0.3, 
                linestyle='-',
                label=f'Face #{idx} (ID: {face_id})')
    
    # Plot average velocity
    plt.plot(averages.index, 
            averages['vave'], 
            color='red', 
            linewidth=2, 
            label='Average Velocity (Vave)')
    
    plt.title(f'Velocities Over Time - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Velocity (ft/s)')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    
    # Adjust layout to prevent legend overlap
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics for this profile
    print(f"\nSummary Statistics for {profile_name}:")
    print(f"Number of faces: {len(ordered_faces)}")
    print(f"Mean Vave: {averages['vave'].mean():.2f} ft/s")
    print(f"Max Vave: {averages['vave'].max():.2f} ft/s")
    print(f"Min Vave: {averages['vave'].min():.2f} ft/s")
    
    # Print face mapping
    print("\nFace Mapping:")
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        print(f"Face #{idx} = Face ID {face_row['face_id']}")

In [None]:
# Create separate plots for each profile line showing total flow and individual face flows
for profile_name, profile_df in profile_time_series.items():
    # Calculate averages for this profile
    averages = profile_averages[profile_name]
    
    # Create figure
    plt.figure(figsize=(12, 6))
    
    # Get ordered faces for this profile from faces_near_profile_lines
    ordered_faces = faces_near_profile_lines[profile_name]
    
    # Plot individual face flows
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        face_id = face_row['face_id']
        face_data = profile_df[profile_df['face_id'] == face_id]
        plt.plot(face_data['time'], 
                abs(face_data['face_flow']), 
                alpha=0.3, 
                linestyle='-',
                label=f'Face #{idx} (ID: {face_id})')
    
    # Calculate and plot total flow as sum of absolute face flows
    total_flows = []
    for time in averages.index:
        time_data = profile_df[profile_df['time'] == time]
        total_flow = abs(time_data['face_flow']).sum()
        total_flows.append(total_flow)
    
    plt.plot(averages.index, 
            total_flows, 
            color='red', 
            linewidth=2, 
            label='Total Flow')
    
    plt.title(f'Flow Over Time - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Flow (cfs)')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    
    # Adjust layout to prevent legend overlap
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics for this profile
    print(f"\nSummary Statistics for {profile_name}:")
    print(f"Number of faces: {len(ordered_faces)}")
    print(f"Mean Total Flow: {np.mean(total_flows):.2f} cfs")
    print(f"Max Total Flow: {np.max(total_flows):.2f} cfs") 
    print(f"Min Total Flow: {np.min(total_flows):.2f} cfs")
    
    # Print face mapping
    print("\nFace Mapping:")
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        print(f"Face #{idx} = Face ID {face_row['face_id']}")


In [None]:
# Create separate plots for each profile line showing face flow
for profile_name, profile_df in profile_time_series.items():
    # Calculate averages for this profile
    averages = profile_averages[profile_name]
    
    # Create figure
    plt.figure(figsize=(12, 6))
    
    # Get ordered faces for this profile from faces_near_profile_lines
    ordered_faces = faces_near_profile_lines[profile_name]
    
    # Plot individual face flow
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        face_id = face_row['face_id']
        face_data = profile_df[profile_df['face_id'] == face_id]
        plt.plot(face_data['time'], 
                abs(face_data['face_flow']), 
                alpha=0.3, 
                linestyle='-',
                label=f'Face #{idx} (ID: {face_id})')
    
    # Plot average flow
    plt.plot(averages.index, 
            abs(averages['face_flow']), 
            color='red', 
            linewidth=2, 
            label='Average Flow')
    
    plt.title(f'Flow Over Time - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Flow (cfs)')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    
    # Adjust layout to prevent legend overlap
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics for this profile
    print(f"\nSummary Statistics for {profile_name}:")
    print(f"Number of faces: {len(ordered_faces)}")
    print(f"Mean Flow: {abs(averages['face_flow']).mean():.2f} cfs")
    print(f"Max Flow: {abs(averages['face_flow']).max():.2f} cfs")
    print(f"Min Flow: {abs(averages['face_flow']).min():.2f} cfs")
    
    # Print face mapping
    print("\nFace Mapping:")
    for idx, (_, face_row) in enumerate(ordered_faces.iterrows(), 1):
        print(f"Face #{idx} = Face ID {face_row['face_id']}")

In [None]:
# Check if we have the necessary variables
print("Available variables:")
print("profile_time_series:", 'profile_time_series' in locals())
print("faces_near_profile_lines:", 'faces_near_profile_lines' in locals())
print("profile_averages:", 'profile_averages' in locals())

# Look at the structure of profile_time_series
if 'profile_time_series' in locals():
    for name, df in profile_time_series.items():
        print(f"\nColumns in {name}:")
        print(df.columns.tolist())

In [45]:
def calculate_discharge_weighted_velocity(profile_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate discharge-weighted average velocity for a profile line
    Vw = Sum(|Qi|*Vi)/Sum(|Qi|) where Qi is face flow and Vi is face velocity
    """
    print("Calculating discharge-weighted velocity...")
    print(f"Input DataFrame:\n{profile_df.head()}")

    # Calculate weighted velocity for each timestep
    weighted_velocities = []
    for time in profile_df['time'].unique():
        time_data = profile_df[profile_df['time'] == time]
        abs_flows = np.abs(time_data['face_flow'])
        abs_velocities = np.abs(time_data['face_velocity'])
        weighted_vel = (abs_flows * abs_velocities).sum() / abs_flows.sum()
        weighted_velocities.append({
            'time': time,
            'weighted_velocity': weighted_vel
        })
    
    weighted_df = pd.DataFrame(weighted_velocities)
    print(f"Calculated weighted velocities:\n{weighted_df.head()}")
    return weighted_df


In [None]:
# Calculate for each profile line

for profile_name, profile_df in profile_time_series.items():
    print(f"\nProcessing profile: {profile_name}")

    # Calculate discharge-weighted velocity
    weighted_velocities = calculate_discharge_weighted_velocity(profile_df)
    
    print("Weighted velocities calculated.")
    display(weighted_velocities)
    
    # Convert time to datetime if it isn't already
    weighted_velocities['time'] = pd.to_datetime(weighted_velocities['time'])
    print("Converted time to datetime format.")

    # Get ordered faces for this profile
    ordered_faces = faces_near_profile_lines[profile_name]
    print(f"Number of ordered faces: {len(ordered_faces)}")
    
    # Save dataframes as profile_name + "_discharge_weighted_velocity.csv"
    # Save weighted velocities to CSV
    output_file = f"{profile_name}_discharge_weighted_velocity.csv"
    weighted_velocities.to_csv(output_file, index=False)
    print(f"Saved weighted velocities to {output_file}")

In [None]:
# Create scatter plots for discharge-weighted velocity timeseries for each profile line

for profile_name, profile_df in profile_time_series.items():
    
    print(f"\nGenerating scatter plot for profile: {profile_name}")
    
    # Calculate discharge-weighted velocity
    weighted_velocities = calculate_discharge_weighted_velocity(profile_df)
    
    # Convert time to datetime if it isn't already
    weighted_velocities['time'] = pd.to_datetime(weighted_velocities['time'])
    
    # Create figure for scatter plot
    plt.figure(figsize=(12, 6))
    plt.scatter(weighted_velocities['time'], 
                weighted_velocities['weighted_velocity'], 
                color='blue', 
                alpha=0.6, 
                label='Discharge-Weighted Velocity')
    
    # Configure plot
    plt.title(f'Discharge-Weighted Velocity Timeseries - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Discharge-Weighted Velocity (ft/s)')
    plt.grid(True)
    plt.legend()
    
    # Show plot
    plt.tight_layout()
    plt.show()




In [None]:
# Create scatter plots for discharge-weighted velocity timeseries for each profile line

for profile_name, profile_df in profile_time_series.items():
    
    print(f"\nGenerating scatter plot for profile: {profile_name}")
    
    # Calculate discharge-weighted velocity
    weighted_velocities = calculate_discharge_weighted_velocity(profile_df)
    
    # Convert time to datetime if it isn't already
    weighted_velocities['time'] = pd.to_datetime(weighted_velocities['time'])
    
    # Create figure for scatter plot
    plt.figure(figsize=(12, 6))
    
    # Plot individual face velocities with thinner lines and no markers
    for face_id in profile_df['face_id'].unique():
        face_data = profile_df[profile_df['face_id'] == face_id]
        plt.plot(face_data['time'], 
                 face_data['face_velocity'], 
                 alpha=0.3, 
                 linewidth=1,  # Thinner line
                 label=f'Face ID {face_id}')
    
    # Plot discharge-weighted velocity with thinner line and no marker
    plt.plot(weighted_velocities['time'], 
             weighted_velocities['weighted_velocity'], 
             color='red', 
             alpha=0.8, 
             linewidth=2,  # Thinner line
             label='Discharge-Weighted Velocity')
    
    # Configure plot
    plt.title(f'Face Velocities and Discharge-Weighted Velocity Timeseries - {profile_name}')
    plt.xlabel('Time')
    plt.ylabel('Velocity (ft/s)')
    plt.grid(True)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Show plot
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics
    print(f"\nSummary Statistics for {profile_name}:")
    print(f"Number of faces: {profile_df['face_id'].nunique()}")
    print(f"Mean Weighted Velocity: {weighted_velocities['weighted_velocity'].mean():.2f} ft/s")
    print(f"Max Weighted Velocity: {weighted_velocities['weighted_velocity'].max():.2f} ft/s")
    print(f"Min Weighted Velocity: {weighted_velocities['weighted_velocity'].min():.2f} ft/s")
    
    # Print face mapping
    print("\nFace Mapping:")
    for idx, face_id in enumerate(profile_df['face_id'].unique(), 1):
        print(f"Face #{idx} = Face ID {face_id}")