In [6]:
# pip install pandas
# pip install geopandas

This block imports the required Python libraries:

pandas for handling tabular data (such as time series).
geopandas for working with geospatial data (GeoPackage format).
sqlite3 for connecting and interacting with an SQLite database.


In [7]:
# Import necessary libraries
import pandas as pd  # Pandas is used for handling tabular data efficiently
import geopandas as gpd  # Geopandas is used for handling geospatial data
import sqlite3  # SQLite3 is used to connect and interact with an SQLite database

This block loads three geospatial layers from a GeoPackage (.gpkg) file using geopandas:

POD_points: Contains the locations of Points of Diversion (POD), where water is withdrawn.
event: Represents gage stations or hydrological monitoring points.
ResOps_points: Includes information about reservoirs and their operations.
Each layer is stored as a GeoDataFrame, allowing spatial analysis and attribute queries.

In [8]:
# Read geospatial layers from a GeoPackage file
pod_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='DIVERSION_POINTS')  
# POD (Point of Diversion) layer contains locations where water is diverted

gage_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='event')  
# Event layer contains gage locations or monitoring points

res_layer = gpd.read_file('data/enhanced_reference_14.gpkg', layer='RESERVOIR_POINTS')  
# Reservoir operations layer contains details about reservoirs

DataSourceError: data/enhanced_reference_14.gpkg: No such file or directory

Start and end date for the data querries

In [9]:
start_date = '10/1/2003'  #specify start date for time series data
end_date = '10/1/2013'  #specify end date for time series data

This block establishes a connection to an SQLite database to query key identifiers. First, it retrieves the POI_TypeID for "POD" from the POI_Type table and the VariableID for "Demand" from the Variables table, both of which are crucial for querying related records.

Next, the script iterates through each diversion point in the pod_layer, extracting its Source_comid (hydrofabric segment ID) and using it to look up the corresponding POIID in the POI table. Once the POIID is found, it queries the POI_Values table to retrieve the historical demand time series, which is then converted into a pandas.DataFrame for easy processing. Additionally, it fetches water rights information, including the allocation date and the legally permitted water withdrawal rate (CFS) from the POD_WaterRights table. Next, script retrieves data such as the POI_NativeID (a unique identifier) and the POI_Flow_ComID (hydrofabric segment ID) from the POI table.

In each iteration, the script structures key variables for each point for integration into a water management model. These include the diversion point’s unique ID, hydrofabric segment, water rights information, and historical demand records. 

### <span style="color:red">There is a specified block in the code that can be used to implement MODEL parametrizing code.</span> 

Use the specified variables in each iteration to inject data to the model with model specific functions. 



In [10]:
# This HUC4 must match what you used when building the DB
huc4_code = '14'
db_path = f'data/relational_db_{huc4_code}.db'

conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# ------------------------------------------------------------------
# 1. Look up the POI_TYPE_ID for POD (Points of Diversion)
# ------------------------------------------------------------------
cursor.execute("""
    SELECT POI_TYPE_ID
    FROM POI_TYPE
    WHERE POI_TYPE_NAME = 'POD'
""")
poi_type_result = cursor.fetchone()

if poi_type_result:
    diversion_poi_type_id = poi_type_result[0]
else:
    raise RuntimeError("No POI_TYPE_ID found for 'POD' in the POI_TYPE table.")

# ------------------------------------------------------------------
# 2. Look up the VARIABLE_ID for the demand / diversion variable
#    (change 'Demand' if you use a different VARIABLE_NAME)
# ------------------------------------------------------------------
cursor.execute("""
    SELECT VARIABLE_ID
    FROM VARIABLES
    WHERE VARIABLE_NAME = 'DEMAND'
""")
variable_id_result = cursor.fetchone()

if variable_id_result:
    demand_variable_id = variable_id_result[0]
else:
    raise RuntimeError("No VARIABLE_ID found for 'Demand' in the VARIABLES table.")

# ------------------------------------------------------------------
# 3. Loop over diversion points (PODs) from the GeoPackage layer
# ------------------------------------------------------------------
for idx, row in pod_layer.iterrows():
    # Hydrofabric COMID associated with this POD (from the POD_points layer)
    source_comid = row['SOURCE_COMID']

    # ------------------------------------------------------------------
    # 3a. Look up the POI_ID for this COMID and POD POI_TYPE
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT POI_ID
        FROM POI
        WHERE POI_FLOW_COMID = ? AND POI_TYPE_ID = ?
    """, (source_comid, diversion_poi_type_id))
    poiid_result = cursor.fetchone()

    if not poiid_result:
        # No matching POI row for this POD – skip it
        continue

    poiid = poiid_result[0]  # Primary key in the POI table for this diversion

    # ------------------------------------------------------------------
    # 3b. Get time-series demand values from POI_VALUES
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT LOCAL_DATE_TIME, DATA_VALUE
        FROM POI_VALUES
        WHERE POI_ID = ? AND VARIABLE_ID = ?
        ORDER BY LOCAL_DATE_TIME
    """, (poiid, demand_variable_id))
    ts_records = cursor.fetchall()

    demand_timeseries = pd.DataFrame(
        ts_records,
        columns=['LOCAL_DATE_TIME', 'DiversionValue']
    )

    # Convert to datetime and apply the simulation start/end window
    if not demand_timeseries.empty:
        demand_timeseries['LOCAL_DATE_TIME'] = pd.to_datetime(
            demand_timeseries['LOCAL_DATE_TIME'],
            errors='coerce'
        )

        mask = (
            (demand_timeseries['LOCAL_DATE_TIME'] >= pd.to_datetime(start_date)) &
            (demand_timeseries['LOCAL_DATE_TIME'] <= pd.to_datetime(end_date))
        )
        demand_timeseries = demand_timeseries.loc[mask].reset_index(drop=True)

    # ------------------------------------------------------------------
    # 3c. Get water-right attributes from POD_WATER_RIGHTS
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT WATER_RIGHT_ID, ALLOCATION_DATE, ALLOCATION_CFS
        FROM POD_WATER_RIGHTS
        WHERE POI_ID = ?
    """, (poiid,))
    waterrights_result = cursor.fetchall()

    # ------------------------------------------------------------------
    # 3d. Get the diversion's native ID and flow segment COMID from POI
    # ------------------------------------------------------------------
    cursor.execute("""
        SELECT POI_NATIVE_ID, POI_FLOW_COMID
        FROM POI
        WHERE POI_ID = ?
    """, (poiid,))
    diversion_record = cursor.fetchone()

    if diversion_record:
        diversion_ID = diversion_record[0]   # Native ID of the diversion (e.g., WDID)
        segment_comid = diversion_record[1]  # Hydrofabric segment COMID
    else:
        diversion_ID = None
        segment_comid = None

    # ------------------------------------------------------------------
    # 3e. (Optional) Print a quick summary for inspection
    # ------------------------------------------------------------------
    print()
    print(f"{diversion_ID} (COMID {segment_comid}):")

    for wr in waterrights_result:
        water_right_id = wr[0]
        allocation_date = wr[1]
        allocation_value_cfs = wr[2]
        print(f"  WR {water_right_id}: {allocation_date}, {allocation_value_cfs} cfs")

    # ------------------------------------------------------------------
    # 3f. PLACEHOLDER: hook this diversion into your WMM / hydrologic model
    # ------------------------------------------------------------------
    # At this point in each loop iteration you have:
    #   1. diversion_ID        – ID of the diversion point (POI_NATIVE_ID)
    #   2. segment_comid       – Hydrofabric segment ID where the diversion occurs
    #   3. waterrights_result  – list of (WATER_RIGHT_ID, ALLOCATION_DATE, ALLOCATION_CFS)
    #   4. demand_timeseries   – DataFrame with (LOCAL_DATE_TIME, DiversionValue)
    #                             within the [start_date, end_date] simulation window
    #
    # Use these variables to parameterize your model-specific diversion object
    # (create nodes/links, demand patterns, allocation rules, etc.).

# Close the database connection when finished
conn.close()

OperationalError: unable to open database file

This block extracts a summary of all diversion points (PODs) stored in the SQLite database and exports them as a standalone CSV file for reference or downstream analysis. Diversions are stored in the POI table, and each diversion is identified by the POI_TYPE entry with the name "POD".

The code performs the following steps:

1. Connects to the SQLite database generated during the data-building workflow.

2. Retrieves the POI_TYPE_ID for diversions so only POD-type points are selected.

3. Queries the POI table to extract key attributes for each diversion, including:

* diversion_id — the native identifier for the diversion (e.g., WDID).

* lat, long — geographic coordinates stored in the database.

* comid — the Hydrofabric flowline COMID where the diversion withdraws water.

4. Creates a Pandas DataFrame with these fields.

5.  Exports the table as a CSV file to the data/publish_output/ directory using a name that includes the HUC4 code (e.g., diversions_table_14.csv).

Table provides a convenient way to inspect all diversion points, validate their spatial attributes, or integrate them into external workflows such as visualization, geoprocessing, or companion water-management models.

In [11]:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# 1. Find POI_TYPE_ID for POD (Diversions)
cursor.execute("""
    SELECT POI_TYPE_ID
    FROM POI_TYPE
    WHERE POI_TYPE_NAME = 'POD'
""")
result = cursor.fetchone()
if result:
    diversion_type_id = result[0]
else:
    raise RuntimeError("No POI_TYPE entry for 'POD' found.")

# 2. Pull diversion records and cumulative allocation (cfs)

query = """
    SELECT 
        p.POI_NATIVE_ID      AS diversion_id,
        p.POI_LAT            AS lat,
        p.POI_LON            AS long,
        p.POI_FLOW_COMID     AS comid,
        COALESCE(SUM(w.ALLOCATION_CFS), 0.0) AS allocation_cfs
    FROM POI p
    LEFT JOIN POD_WATER_RIGHTS w
        ON p.POI_ID = w.POI_ID
    WHERE p.POI_TYPE_ID = ?
    GROUP BY 
        p.POI_ID,
        p.POI_NATIVE_ID,
        p.POI_LAT,
        p.POI_LON,
        p.POI_FLOW_COMID
"""

diversions_df = pd.read_sql_query(query, conn, params=(diversion_type_id,))

# 3. Export CSV
output_path = f'data/diversions_table_{huc4_code}.csv'
diversions_df.to_csv(output_path, index=False)

print(f"Exported diversion table to: {output_path}")
print(diversions_df.head())

OperationalError: unable to open database file