# HEC-RAS Pipes, Conduits, and Pump Stations HDF Data Analysis Notebook

This notebook demonstrates how to manipulate and analyze the new HEC-RAS Conduits, Pipes, and Pump Stations results using the ras-commander library. It leverages the HdfPipe and HdfPump classes to streamline data extraction, processing, and visualization.

## Package Installation and Environment Setup
Uncomment and run package installation commands if needed

In [None]:
# Install ras-commander from pip (uncomment to install if needed)
#!pip install ras-commander
# This installs ras-commander and all dependencies

In [None]:
# Import all required modules
#from ras_commander import *  # Import all ras-commander modules

# Import the required libraries for this notebook
import h5py
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm
import scipy
import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt
from IPython import display
import psutil  # For getting system CPU info
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import subprocess
import sys
import os
import shutil
from datetime import datetime, timedelta
from pathlib import Path  # Ensure pathlib is imported for file operations
import pyproj
from shapely.geometry import Point, LineString, Polygon
import xarray as xr

In [None]:
# This cell will try to import the pip package, if it fails it will 
# add the parent directory to the Python path and try to import again
# This assumes you are working in a subfolder of the ras-commander repository
# This allows a user's revisions to be tested locally without installing the package

import sys
from pathlib import Path

# Flexible imports to allow for development without installation 
#  ** Use this version with Jupyter Notebooks **
try:
    # Try to import from the installed package
    from ras_commander import *
except ImportError:
    # If the import fails, add the parent directory to the Python path
    import os
    current_file = Path(os.getcwd()).resolve()
    rascmdr_directory = current_file.parent
    sys.path.append(str(rascmdr_directory))
    print("Loading ras-commander from local dev copy")
    # Now try to import again
    from ras_commander import *
print("ras_commander imported successfully")


# Use Example Project or Load Your Own Project

In [None]:
# Download the Pipes Beta project from HEC and run plan 01

# Define the path to the Pipes Beta project
current_dir = Path.cwd()  # Adjust if your notebook is in a different directory
pipes_ex_path = current_dir / "example_projects" / "Davis"
import logging

# Check if Pipes Beta.p06.hdf exists (so we don't have to re-run the simulation when re-running or debugging)
hdf_file = pipes_ex_path / "DavisStormSystem.p02.hdf"

if not hdf_file.exists():
    # Initialize RasExamples and extract the Pipes Beta project
    RasExamples.extract_project(["Davis"])

    # Initialize the RAS project using the ras. (Pipe Networks are only supported in versions 6.6 and above)
    init_ras_project(pipes_ex_path, "6.6")
    logging.info(f"Pipes Beta project initialized with folder: {ras.project_folder}")
    
    logging.info(f"Pipes Beta object id: {id(ras)}")
    
    # Define the plan number to execute
    plan_number = "02"

    # Update run flags for the project
    RasPlan.update_run_flags(
        plan_number,
        geometry_preprocessor=True,
        unsteady_flow_simulation=True,
        run_sediment=False,
        post_processor=True,
        floodplain_mapping=False
    )

    # Execute Plan 06 using RasCmdr for Pipes Beta
    print(f"Executing Plan {plan_number} for the Pipes Beta Creek project...")
    success_pipes_ex = RasCmdr.compute_plan(plan_number)
    if success_pipes_ex:
        print(f"Plan {plan_number} executed successfully for Pipes Beta.\n")
    else:
        print(f"Plan {plan_number} execution failed for Pipes Beta.\n")
else:
    print("Pipes Beta.p06.hdf already exists. Skipping project extraction and plan execution.")
    # Initialize the RAS project using the ras.
    init_ras_project(pipes_ex_path, "6.6")
    plan_number = "02"

###  OPTIONAL: Use your own project instead

your_project_path = Path(r"D:\yourprojectpath")

init_ras_project(your_project_path, "6.6")
plan_number = "01"  # Plan number to use for this notebook 



### If you use this code cell, don't run the previous cell or change to markdown
### NOTE: Ensure the HDF Results file was generated by HEC-RAS Version 6.x or above

# Explore Project Dataframes using 'ras' Object

In [None]:
print("Plan DataFrame for the project:")
ras.plan_df

In [None]:
print("\nUnsteady DataFrame for the project:")
ras.unsteady_df

In [None]:
print("\nBoundary Conditions DataFrame for the project:")
ras.boundaries_df 

In [None]:
# Get HDF Results Entries (only present when results are present)
ras.get_hdf_entries()

# Find Paths for Results and Geometry HDF's

In [None]:
# Get the plan HDF path for the plan_number defined above
plan_hdf_path = ras.plan_df.loc[ras.plan_df['plan_number'] == plan_number, 'HDF_Results_Path'].values[0]

In [None]:
plan_hdf_path

In [None]:
# Alternate: Get the geometry HDF path if you are extracting geometry elements from the geometry HDF
geom_hdf_path = ras.plan_df.loc[ras.plan_df['plan_number'] == plan_number, 'Geom Path'].values[0] + '.hdf'

In [None]:
geom_hdf_path

In [None]:
# Extract runtime and compute time data
print("\nExtracting runtime and compute time data")
runtime_df = HdfResultsPlan.get_runtime_data(hdf_path=plan_number)
runtime_df
 

-----

# 2D Models with Pipe Networks: HDF Data Extraction Examples

In [None]:
# Get pipe conduits
pipe_conduits_gdf = HdfPipe.get_pipe_conduits("02") # NOTE: Here we use the plan number instead of the path variable.  The library decorators ensure this maps correctly.  
print("\nPipe Conduits: pipe_conduits_gdf")
pipe_conduits_gdf

In [None]:
# Plot the pipe conduit linestrings
import matplotlib.pyplot as plt

# Create a new figure with a specified size
plt.figure(figsize=(12, 9))

# Plot each linestring from the GeoDataFrame
for idx, row in pipe_conduits_gdf.iterrows():
    # Extract coordinates from the linestring
    x_coords, y_coords = row['Polyline'].xy
    
    # Plot the linestring
    plt.plot(x_coords, y_coords, 'b-', linewidth=1, alpha=0.7)
    
    # Add vertical line markers at endpoints
    plt.plot([x_coords[0]], [y_coords[0]], 'x', color='black', markersize=4)
    plt.plot([x_coords[-1]], [y_coords[-1]], 'x', color='black', markersize=4)
    
    # Calculate center point of the line
    center_x = (x_coords[0] + x_coords[-1]) / 2
    center_y = (y_coords[0] + y_coords[-1]) / 2
    
    # Add pipe name label at center, oriented top-right
    plt.text(center_x, center_y, f'{row["Name"]}', fontsize=8, 
             verticalalignment='bottom', horizontalalignment='left',
             rotation=45)  # 45 degree angle for top-right orientation

# Add title and labels
plt.title('Pipe Conduit Network Layout')
plt.xlabel('Easting')
plt.ylabel('Northing')

# Add grid
plt.grid(True, linestyle='--', alpha=0.6)

# Adjust layout to prevent label clipping
plt.tight_layout()

# Display the plot
plt.show()


In [None]:
# Plot the first 2 terrain profiles
import matplotlib.pyplot as plt

# Extract terrain profiles from the GeoDataFrame
terrain_profiles = pipe_conduits_gdf['Terrain_Profiles'].tolist()

# Create separate plots for the first 2 terrain profiles
for i in range(2):
    profile = terrain_profiles[i]
    
    # Unzip the profile into x and y coordinates
    x_coords, y_coords = zip(*profile)
    
    # Create a new figure for each profile
    plt.figure(figsize=(12, 6))
    plt.plot(x_coords, y_coords, marker='o', linestyle='-', color='g', alpha=0.7)
    
    # Add title and labels
    plt.title(f'Terrain Profile {i + 1}')
    plt.xlabel('Distance along profile (m)')
    plt.ylabel('Elevation (m)')
    
    # Add grid
    plt.grid(True, linestyle='--', alpha=0.6)
    
    # Adjust layout to prevent label clipping
    plt.tight_layout()
    
    # Display the plot
    plt.show()


In [None]:
# Use get_hdf5_dataset_info function to get Pipe Conduits data:
#HdfUtils.get_hdf5_dataset_info(plan_hdf_path, "/Geometry/Pipe Nodes/")

In [None]:
# Get pipe nodes
pipe_nodes_gdf = HdfPipe.get_pipe_nodes(plan_hdf_path)
print("\nPipe Nodes:")
pipe_nodes_gdf

In [None]:
# Use get_hdf5_dataset_info function to get Pipe Conduits data:
#HdfUtils.get_hdf5_dataset_info(plan_hdf_path, "/Geometry/Pipe Networks/")

In [None]:
# Get pipe network data
pipe_network_gdf = HdfPipe.get_pipe_network(plan_hdf_path)
print("\nPipe Network Data:")
pipe_network_gdf

In [None]:
# Get pump stations
pump_stations_gdf = HdfPump.get_pump_stations(plan_hdf_path)
print("\nPump Stations:")
pump_stations_gdf

In [None]:
# Get pump groups
pump_groups_df = HdfPump.get_pump_groups(plan_hdf_path)
print("\nPump Groups:")
pump_groups_df

In [None]:
# Use HdfUtils for extracting projection
print("\nExtracting Projection from HDF")
projection = HdfBase.get_projection(hdf_path=geom_hdf_path)
print(f"Projection: {projection}")

In [None]:
# Set CRS for GeoDataFrames
if projection:
    pipe_conduits_gdf.set_crs(projection, inplace=True, allow_override=True)
    pipe_nodes_gdf.set_crs(projection, inplace=True, allow_override=True)

print("Pipe Conduits GeoDataFrame columns:")
print(pipe_conduits_gdf.columns)

print("\nPipe Nodes GeoDataFrame columns:")
print(pipe_nodes_gdf.columns)

perimeter_polygons = HdfMesh.get_mesh_areas(geom_hdf_path)
if projection:
    perimeter_polygons.set_crs(projection, inplace=True, allow_override=True)
    
print("\nPerimeter Polygons GeoDataFrame columns:")
print(perimeter_polygons.columns)

In [None]:
import matplotlib.pyplot as plt
from shapely import wkt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import numpy as np

fig, ax = plt.subplots(figsize=(28, 20))

# Plot cell polygons with 50% transparency behind the pipe network
cell_polygons_df = HdfMesh.get_mesh_cell_polygons(geom_hdf_path)
if not cell_polygons_df.empty:
    cell_polygons_df.plot(ax=ax, edgecolor='lightgray', facecolor='lightgray', alpha=0.5)

# Plot pipe conduits - the Polyline column already contains LineString geometries
pipe_conduits_gdf.set_geometry('Polyline', inplace=True)

# Plot each pipe conduit individually to ensure all are shown
for idx, row in pipe_conduits_gdf.iterrows():
    ax.plot(*row.Polyline.xy, color='blue', linewidth=1)

# Create a colormap for node elevations
norm = plt.Normalize(pipe_nodes_gdf['Invert Elevation'].min(), 
                    pipe_nodes_gdf['Invert Elevation'].max())
cmap = plt.cm.viridis

# Plot pipe nodes colored by invert elevation
scatter = ax.scatter(pipe_nodes_gdf.geometry.x, pipe_nodes_gdf.geometry.y,
                    c=pipe_nodes_gdf['Invert Elevation'], 
                    cmap=cmap, norm=norm,
                    s=100)

# Add colorbar
cbar = plt.colorbar(scatter)
cbar.set_label('Invert Elevation (ft)', rotation=270, labelpad=15)

# Add combined labels for invert and drop inlet elevations
for idx, row in pipe_nodes_gdf.iterrows():
    label_text = ""  # Initialize label_text for each node
    # Add drop inlet elevation label if it exists and is not NaN
    if 'Drop Inlet Elevation' in row and not np.isnan(row['Drop Inlet Elevation']):
        label_text += f"TOC: {row['Drop Inlet Elevation']:.2f}\n"
    label_text += f"INV: {row['Invert Elevation']:.2f}"
    
    ax.annotate(label_text,
                xy=(row.geometry.x, row.geometry.y),
                xytext=(-10, -10), textcoords='offset points',
                fontsize=8,
                bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

# Add perimeter polygons 
if not perimeter_polygons.empty:
    perimeter_polygons.plot(ax=ax, edgecolor='black', facecolor='none')

# Create proxy artists for legend
conduit_line = mlines.Line2D([], [], color='blue', label='Conduits')
node_point = mlines.Line2D([], [], color='blue', marker='o', linestyle='None',
                          markersize=10, label='Nodes')
perimeter = mpatches.Patch(facecolor='none', edgecolor='black',
                          label='Perimeter Polygons')

ax.set_title('Pipe Network with Node Elevations')

# Add legend with proxy artists
ax.legend(handles=[conduit_line, node_point, perimeter])

# Set aspect ratio to be equal and adjust limits
ax.set_aspect('equal', 'datalim')
ax.autoscale_view()

plt.show()


In [None]:
# Visualize pump stations on a map
fig, ax = plt.subplots(figsize=(12, 8))
pump_stations_gdf.plot(ax=ax, color='green', markersize=50, label='Pump Station')

# Add perimeter polygons
if not perimeter_polygons.empty:
    perimeter_polygons.plot(ax=ax, edgecolor='black', facecolor='none', label='Perimeter Polygons')

ax.set_title('Pump Station Location')
ax.legend()
plt.show()

In [None]:
# Example 3: Get pipe network timeseries
valid_variables = [
    "Cell Courant", "Cell Water Surface", "Face Flow", "Face Velocity",
    "Face Water Surface", "Pipes/Pipe Flow DS", "Pipes/Pipe Flow US",
    "Pipes/Vel DS", "Pipes/Vel US", "Nodes/Depth", "Nodes/Drop Inlet Flow",
    "Nodes/Water Surface"
]

print("Valid variables for pipe network timeseries:")
for var in valid_variables:
    print(f"- {var}")

# Extract pipe network timeseries for each valid pipe-related variable
pipe_variables = [var for var in valid_variables if var.startswith("Pipes/") or var.startswith("Nodes/")]

for variable in pipe_variables:
    try:
        pipe_timeseries = HdfPipe.get_pipe_network_timeseries(plan_hdf_path, variable=variable)
        print(f"\nPipe Network Timeseries ({variable}):")
        print(pipe_timeseries.head())  # Print first few rows to avoid overwhelming output
    except Exception as e:
        print(f"Error extracting {variable}: {str(e)}")

### Pipe Network Timeseries Data Description

The `get_pipe_network_timeseries` function returns an xarray DataArray for each variable. Here's a general description of the data structure:

1. **Pipes/Pipe Flow DS and Pipes/Pipe Flow US**:
   - Dimensions: time, location (pipe IDs)
   - Units: ft^3/s (cubic feet per second)
   - Description: Represents the flow rate at the downstream (DS) and upstream (US) ends of pipes over time.

2. **Pipes/Vel DS and Pipes/Vel US**:
   - Dimensions: time, location (pipe IDs)
   - Units: ft/s (feet per second)
   - Description: Shows the velocity at the downstream (DS) and upstream (US) ends of pipes over time.

3. **Nodes/Depth**:
   - Dimensions: time, location (node IDs)
   - Units: ft (feet)
   - Description: Indicates the depth of water at each node over time.

4. **Nodes/Drop Inlet Flow**:
   - Dimensions: time, location (node IDs)
   - Units: cfs (cubic feet per second)
   - Description: Represents the flow rate through drop inlets at each node over time.

5. **Nodes/Water Surface**:
   - Dimensions: time, location (node IDs)
   - Units: ft (feet)
   - Description: Shows the water surface elevation at each node over time.

General notes:
- The 'time' dimension represents the simulation timesteps.
- The 'location' dimension represents either pipe IDs or node IDs, depending on the variable.
- The number of timesteps and locations may vary depending on the specific dataset and simulation setup.
- Negative values in flow variables may indicate reverse flow direction.



In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
import random

# Define the variables we want to plot
variables = [
    "Pipes/Pipe Flow DS", "Pipes/Pipe Flow US", "Pipes/Vel DS", "Pipes/Vel US",
    "Nodes/Depth", "Nodes/Drop Inlet Flow", "Nodes/Water Surface"
]

# Create a separate plot for each variable
for variable in variables:
    try:
        # Get the data for the current variable
        data = HdfPipe.get_pipe_network_timeseries(plan_hdf_path, variable=variable)
        
        # Create a new figure
        fig, ax = plt.subplots(figsize=(12, 6))
        
        # Pick one random location
        random_location = random.choice(data.location.values)
        
        # Determine if it's a pipe or node variable
        if variable.startswith("Pipes/"):
            location_type = "Conduit ID"
        else:
            location_type = "Node ID"
        
        # Plot the data for the randomly selected location
        ax.plot(data.time, data.sel(location=random_location), label=f'{location_type} {random_location}')
        
        # Set the title and labels
        ax.set_title(f'{variable} Over Time ({location_type} {random_location})')
        ax.set_xlabel('Time')  # Corrected from ax.xlabel to ax.set_xlabel
        ax.set_ylabel(f'{variable} ({data.attrs["units"]})')  # Corrected from ax.ylabel to ax.set_ylabel
        
        # Format the x-axis to show dates nicely
        ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d %H:%M'))
        plt.xticks(rotation=45)
        
        # Add a legend
        ax.legend(title=location_type, loc='upper left')
        
        # Adjust the layout
        plt.tight_layout()
        
        # Show the plot
        plt.show()
        
    except Exception as e:
        print(f"Error plotting {variable}: {str(e)}")


In [None]:
# Example 8: Get pump station timeseries
pump_station_name = pump_stations_gdf.iloc[0]['Name']  # Get the first pump station name
# Use the results_pump_station_timeseries method 
pump_timeseries = HdfPump.get_pump_station_timeseries(plan_hdf_path, pump_station=pump_station_name)
print(f"\nPump Station Timeseries ({pump_station_name}):")
print(pump_timeseries)

In [None]:
# Use get_hdf5_dataset_info function to get Pipe Conduits data:
HdfBase.get_dataset_info(plan_hdf_path, "/Geometry/Pump Stations/")

In [None]:
# Extract the pump station timeseries data
pump_station_name = pump_stations_gdf.iloc[0]['Name']  # Get the first pump station name
pump_timeseries = HdfPump.get_pump_station_timeseries(plan_hdf_path, pump_station=pump_station_name)

# Print the pump station timeseries
print(f"\nPump Station Timeseries ({pump_station_name}):")
print(pump_timeseries)

# Create a new figure for plotting
fig, ax = plt.subplots(figsize=(12, 12))

# Plot each variable in the timeseries
for variable in pump_timeseries.coords['variable'].values:
    data = pump_timeseries.sel(variable=variable)
    
    # Decode units to strings
    unit = pump_timeseries.attrs["units"][list(pump_timeseries.coords["variable"].values).index(variable)][1].decode('utf-8')
    
    # Check if the variable is 'Pumps on' to plot it differently
    if variable == 'Pumps on':
        # Plot with color based on the on/off status
        colors = ['green' if val > 0 else 'red' for val in data.values.flatten()]
        ax.scatter(pump_timeseries['time'], data, label=f'{variable} ({unit})', color=colors)
    else:
        ax.plot(pump_timeseries['time'], data, label=f'{variable} ({unit})')
        
        # Label the peak values
        peak_time = pump_timeseries['time'][data.argmax()]
        peak_value = data.max()
        ax.annotate(f'Peak: {peak_value:.2f}', xy=(peak_time, peak_value), 
                    xytext=(peak_time, peak_value + 0.1 * peak_value), 
                    arrowprops=dict(facecolor='black', arrowstyle='->'),
                    fontsize=10, color='black', ha='center')

# Set the title and labels
ax.set_title(f'Timeseries Data for Pump Station: {pump_station_name}')
ax.set_xlabel('Time')
ax.set_ylabel('Values')

# Format the x-axis to show dates nicely
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d %H:%M'))
plt.xticks(rotation=45)

# Add a legend
ax.legend(title='Variables', loc='upper left')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()


## Exploring HDF Datasets with HdfBase.get_dataset_info
This allows users to find HDF information that is not included in the ras-commander library.  Find the path in HDFView and set the group_path below to explore the HDF datasets and attributes.  Then, use the output to write your own function to extract the data.  

#### Use get_hdf5_dataset_info function to get Pipe Conduits data:
HdfBase.get_dataset_info(plan_hdf_path, "/Geometry/Pipe Conduits/")

For HDF datasets that are not supported by the RAS-Commander library, provide the dataset path to HdfBase.get_dataset_info and provide the output to an LLM along with a relevent HDF* class(es) to generate new functions that extend the library's coverage.   