In [3]:
# pip install pandas
# pip install geopandas

This block imports the required Python libraries:

pandas for handling tabular data (such as time series).
geopandas for working with geospatial data (GeoPackage format).
sqlite3 for connecting and interacting with an SQLite database.


In [4]:
# Import necessary libraries
import pandas as pd  # Pandas is used for handling tabular data efficiently
import geopandas as gpd  # Geopandas is used for handling geospatial data
import sqlite3  # SQLite3 is used to connect and interact with an SQLite database

This block loads three geospatial layers from a GeoPackage (.gpkg) file using geopandas:

POD_points: Contains the locations of Points of Diversion (POD), where water is withdrawn.
event: Represents gage stations or hydrological monitoring points.
ResOps_points: Includes information about reservoirs and their operations.
Each layer is stored as a GeoDataFrame, allowing spatial analysis and attribute queries.

In [5]:
# Read geospatial layers from a GeoPackage file
pod_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='DIVERSION_POINTS')  
# POD (Point of Diversion) layer contains locations where water is diverted

gage_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='event')  
# Event layer contains gage locations or monitoring points

res_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='RESERVOIR_POINTS')  
# Reservoir operations layer contains details about reservoirs

DataSourceError: data/enhanced_reference_14.gpkg: No such file or directory

### Config

Start and end date for the simulation functionality 

In [6]:
# HUC4 must match what you used when building the relational DB
huc4_code = "14"

# Path to the relational database
db_path = f"data/relational_db_{huc4_code}.db"

# Simulation window for reservoir operations (change as needed)
start_date = "10/1/2003"
end_date   = "10/1/2013"

This block establishes a connection to an SQLite database to query key identifiers. First, it retrieves the POI_TypeID for "Reservoir" from the POI_Type table and the VariableID for "Demand" from the Variables table, both of which are crucial for querying related records.

Next, the script iterates through each reservoir point in the res_layer, extracting its Source_comid (hydrofabric segment ID) and using it to look up the corresponding POIID in the POI table. Once the POIID is found, it queries the POI_Values table to retrieve the historical reservoir operation data (Inflows (CMS), Outflows (CMS) Storage amount (MCM)), which is then converted into a pandas.DataFrame for easy processing. Next, script retrieves data such as the POI_NativeID (a unique identifier) and the POI_Flow_ComID (hydrofabric segment ID) from the POI table.

In each iteration, the script structures key variables for each point for integration into a water management model. These include the Reservoir’s unique ID, hydrofabric segment, and historical reservoir operation records. 

### <span style="color:red">There is a specified block in the code that can be used to implement MODEL parametrizing code.</span> 

Use the specified variables in each iteration to inject data to the model with model specific functions. 



In [7]:
# Connect to the relational DB defined in the previous cell
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# ------------------------------------------------------------------
# 1. Look up the POI_TYPE_ID for reservoirs
# ------------------------------------------------------------------
cursor.execute("""
    SELECT POI_TYPE_ID
    FROM POI_TYPE
    WHERE POI_TYPE_NAME = 'RESERVOIR'
""")
poi_type_result = cursor.fetchone()

if poi_type_result:
    reservoir_poi_type_id = poi_type_result[0]
else:
    conn.close()
    raise RuntimeError("No POI_TYPE_ID found for 'RESERVOIR' in the POI_TYPE table.")

# ------------------------------------------------------------------
# 2. Look up VARIABLE_IDs for reservoir variables
#    (change names if your VARIABLES table uses different labels)
# ------------------------------------------------------------------
variable_names = ["INFLOW", "OUTFLOW", "STORAGE"]
variable_ids = {}

for var_name in variable_names:
    cursor.execute("""
        SELECT VARIABLE_ID
        FROM VARIABLES
        WHERE VARIABLE_NAME = ?
    """, (var_name,))
    result = cursor.fetchone()
    if result:
        variable_ids[var_name] = result[0]
    else:
        print(f"WARNING: No VARIABLE_ID found for '{var_name}' in VARIABLES")

if not variable_ids:
    conn.close()
    raise RuntimeError("No reservoir VARIABLE_IDs were found. Check the VARIABLES table.")

# ------------------------------------------------------------------
# 3. Loop over reservoir points from the RESERVOIR_POINTS layer
# ------------------------------------------------------------------
for idx, row in res_layer.iterrows():
    # Hydrofabric COMID associated with this reservoir
    # NOTE: change 'Source_comid' to match your actual column name
    source_comid = row["SOURCE_COMID"]

    # Reservoir attributes from the geospatial layer
    reservoir_name     = row["DAM_NAME"]     # Designated reservoir name
    reservoir_capacity = row["CAP_MCM"]      # Max storage (million m³)
    reservoir_height   = row["DAM_HGT_M"]    # Dam height (m)
    reservoir_main_use = row["MAIN_USE"]     # Main purpose

    # ------------------------------------------------------------------
    # 3a. Look up POI_ID for this reservoir in POI table
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT POI_ID
        FROM POI
        WHERE POI_FLOW_COMID = ? AND POI_TYPE_ID = ?
    """, (source_comid, reservoir_poi_type_id))
    poiid_result = cursor.fetchone()

    if not poiid_result:
        # No matching POI row for this reservoir – skip
        print(f"No POI row found for reservoir COMID {source_comid}")
        continue

    poiid = poiid_result[0]

    # ------------------------------------------------------------------
    # 3b. Get reservoir time-series (INFLOW, OUTFLOW, STORAGE) from POI_VALUES
    # ------------------------------------------------------------------
    timeseries_dict = {}

    for var_name, var_id in variable_ids.items():
        cursor.execute("""
            SELECT LOCAL_DATE_TIME, DATA_VALUE
            FROM POI_VALUES
            WHERE POI_ID = ? AND VARIABLE_ID = ?
            ORDER BY LOCAL_DATE_TIME
        """, (poiid, var_id))
        records = cursor.fetchall()

        if not records:
            # No time series for this variable / reservoir
            continue

        df = pd.DataFrame(records, columns=["LocalDateTime", var_name])
        df["LocalDateTime"] = pd.to_datetime(df["LocalDateTime"])

        # Apply simulation window
        mask = (
            (df["LocalDateTime"] >= pd.to_datetime(start_date)) &
            (df["LocalDateTime"] <= pd.to_datetime(end_date))
        )
        df = df.loc[mask]

        if df.empty:
            continue

        df.set_index("LocalDateTime", inplace=True)
        timeseries_dict[var_name] = df

    if not timeseries_dict:
        print(f"No reservoir time-series found in window for COMID {source_comid}")
        continue

    # Combine all variable DataFrames on datetime index
    timeseries = pd.concat(timeseries_dict.values(), axis=1, join="outer").reset_index()

    # ------------------------------------------------------------------
    # 3c. Get the reservoir's native ID and segment COMID from POI
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT POI_NATIVE_ID, POI_FLOW_COMID
        FROM POI
        WHERE POI_ID = ?
    """, (poiid,))
    poi_record = cursor.fetchone()

    if not poi_record:
        print(f"No POI_NATIVE_ID/POI_FLOW_COMID found for POI_ID {poiid}")
        continue

    poi_native_id, segment_comid = poi_record

    # Optional: small sanity print
    print(f"Reservoir {idx}: POI_NATIVE_ID={poi_native_id}, segment_comid={segment_comid}")
    print(timeseries.head())

    ##########################################################################
    # MODEL HOOK BLOCK – use these variables for your WMM / hydrologic model
    #
    # 1. `POI_ID`            – Native ID of the reservoir point (POI)
    # 2. `reservoir_name`    – Reservoir designated name
    # 3. `reservoir_capacity`– Max storage capacity (million cubic meters)
    # 4. `reservoir_height`  – Dam height (meters)
    # 5. `reservoir_main_use`– Main purpose (Irrigation, Hydroelectricity, etc.)
    # 6. `segment_comid`     – Hydrofabric segment ID where the reservoir is connected
    # 7. `timeseries`        – Pandas DataFrame with columns:
    #                            LocalDateTime, INFLOW, OUTFLOW, STORAGE
    #
    # -> Here is where you call your model-specific constructor / API:
    #  
    #        reservoir_id       = poi_native_id,
    #        name               = reservoir_name,
    #        segment_comid      = segment_comid,
    #        capacity_mcm       = reservoir_capacity,
    #        dam_height_m       = reservoir_height,
    #        main_use           = reservoir_main_use,
    #        ts_df              = timeseries,
    #   
    ##########################################################################

# Close DB when finished
conn.close()

OperationalError: unable to open database file

This block generates a model-ready summary table of all reservoirs in the HUC4 region by joining attributes from the geospatial reservoir layer (RESERVOIR_POINTS) with the corresponding reservoir entries stored in the relational database. For each reservoir, we extract its persistent POI identifier, core physical characteristics, and the Hydrofabric segment COMID where it connects to the flow network.

The resulting table provides a compact representation of the key properties required for water-management modeling and can be directly used in workflow automation or WMM construction. Each row represents a single reservoir and includes:

1. POI_ID – The persistent native POI identifier assigned during database ingestion.

2. reservoir_name – Designated name of the reservoir or dam.

3. reservoir_capacity – Maximum storage capacity (in million cubic meters).

4. reservoir_height – Physical dam height (in meters).

5. reservoir_main_use – Primary purpose of the reservoir (e.g., irrigation, hydropower, water supply, flood control, recreation).

6. segment_comid – The Hydrofabric segment COMID where the reservoir is connected.

Table provides a clean, standardized input that aligns with the diversion summary format and supports consistent integration with water-management models.

In [8]:
# (Re)open a connection if needed
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# 1. Get POI_TYPE_ID for reservoirs
cursor.execute("""
    SELECT POI_TYPE_ID
    FROM POI_TYPE
    WHERE POI_TYPE_NAME = 'RESERVOIR'
""")
result = cursor.fetchone()
if result:
    reservoir_type_id = result[0]
else:
    raise RuntimeError("No POI_TYPE entry for 'RESERVOIR' found in POI_TYPE table.")

# 2. Build a summary table by joining RESERVOIR_POINTS to POI via COMID
rows = []

for idx, row in res_layer.iterrows():
    source_comid        = row["SOURCE_COMID"]
    reservoir_name      = row["DAM_NAME"]
    reservoir_capacity  = row["CAP_MCM"]
    reservoir_height    = row["DAM_HGT_M"]
    reservoir_main_use  = row["MAIN_USE"]

    # Find corresponding POI record for this reservoir
    cursor.execute("""
        SELECT POI_NATIVE_ID, POI_FLOW_COMID
        FROM POI
        WHERE POI_FLOW_COMID = ?
          AND POI_TYPE_ID = ?
    """, (source_comid, reservoir_type_id))
    poi_record = cursor.fetchone()

    if poi_record is None:
        # If there is no matching POI, skip (or log)
        print(f"No POI found for reservoir COMID {source_comid}")
        continue

    poi_native_id, segment_comid = poi_record

    rows.append({
        # 1. `POI_ID` - Native ID of the reservoir point (POI)
        "POI_ID": poi_native_id,

        # 2. 'reservoir_name' - Reservoir designated name
        "reservoir_name": reservoir_name,

        # 3. 'reservoir_capacity' - Max storage capacity (million cubic meters)
        "reservoir_capacity": reservoir_capacity,

        # 4. 'reservoir_height' - Dam height (meters)
        "reservoir_height": reservoir_height,

        # 5. 'reservoir_main_use' - Main purpose
        "reservoir_main_use": reservoir_main_use,

        # 6. `segment_comid` - Hydrofabric segment ID where reservoir is connected
        "segment_comid": segment_comid,
    })

reservoirs_df = pd.DataFrame(rows)

# 3. Export CSV
output_path = f"data/reservoirs_table_{huc4_code}.csv"
reservoirs_df.to_csv(output_path, index=False)

print(f"Exported reservoir summary table to: {output_path}")
print(reservoirs_df.head())

OperationalError: unable to open database file