In [1]:
# pip install pandas
# pip install geopandas

This block imports the required Python libraries:

pandas for handling tabular data (such as time series).
geopandas for working with geospatial data (GeoPackage format).
sqlite3 for connecting and interacting with an SQLite database.


In [1]:
# Import necessary libraries
import pandas as pd  # Pandas is used for handling tabular data efficiently
import geopandas as gpd  # Geopandas is used for handling geospatial data
import sqlite3  # SQLite3 is used to connect and interact with an SQLite database

This block loads three geospatial layers from a GeoPackage (.gpkg) file using geopandas:

POD_points: Contains the locations of Points of Diversion (POD), where water is withdrawn.
event: Represents gage stations or hydrological monitoring points.
ResOps_points: Includes information about reservoirs and their operations.
Each layer is stored as a GeoDataFrame, allowing spatial analysis and attribute queries.

In [2]:
# Read geospatial layers from a GeoPackage file
pod_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='POD_points')  
# POD (Point of Diversion) layer contains locations where water is diverted

gage_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='event')  
# Event layer contains gage locations or monitoring points

res_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='ResOps_points')  
# Reservoir operations layer contains details about reservoirs

This block establishes a connection to an SQLite database to query key identifiers. First, it retrieves the POI_TypeID for "USGS_Gage" from the POI_Type table and the VariableID for "Demand" from the Variables table, both of which are crucial for querying related records.

Next, the script iterates through each reservoir point in the res_layer, extracting its Source_comid (hydrofabric segment ID) and using it to look up the corresponding POIID in the POI table. Once the POIID is found, it queries the POI_Values table to retrieve the historical Flow data (CFS), which is then converted into a pandas.DataFrame for easy processing. Next, script retrieves data such as the POI_NativeID (a unique identifier) and the POI_Flow_ComID (hydrofabric segment ID) from the POI table.

In each iteration, the script structures key variables for each point for integration into a water management model. These include the Gage unique ID, hydrofabric segment, and historical Flow records. 

### <span style="color:red">There is a specified block in the code that can be used to implement MODEL parametrizing code.</span> 

Use the specified variables in each iteration to inject data to the model with model specific functions. 



In [5]:
# Establish connection to an SQLite database
conn = sqlite3.connect('data/optional_db.db')  
cursor = conn.cursor()  

# Query the POI_Type table to retrieve POI_TypeID for 'USGS_Gage'
cursor.execute("SELECT POI_TypeID FROM POI_Type WHERE POI_TypeName = 'USGS_Gage'")
poi_type_result = cursor.fetchone()  # Fetch the first matching result


# Check if a result was found; if not, print a message and exit
if poi_type_result:
    poi_type_id = poi_type_result[0]  # Extract POI_TypeID
else:
    print("No POI_TypeID found for Diversion. Check the POI_Type table.")
    conn.close()  # Close the database connection
    exit()  # Exit the script

# Query the Variables table to get VariableIDs for 'Gage_Flow'
variable_names = ['Gage_Flow']
variable_ids = {}

for var_name in variable_names:
    cursor.execute("SELECT VariableID FROM Variables WHERE VariableName = ?", (var_name,))
    result = cursor.fetchone()
    if result:
        variable_ids[var_name] = result[0]
    else:
        print(f"No VariableID found for '{var_name}'. Check the Variables table.")
        conn.close()
        exit()

# Iterate over each row (gage) in the POD layer
for index, row in gage_layer.iterrows():
    source_comid = row['Source_comid']

    cursor.execute("""
        SELECT POIID FROM POI 
        WHERE POI_Flow_ComID = ? AND POI_TypeID = ?
    """, (source_comid, poi_type_id))
    poiid_result = cursor.fetchone()

    if poiid_result:
        poiid = poiid_result[0]

        # Dictionary to hold timeseries data
        timeseries_dict = {}

        for var_name, var_id in variable_ids.items():
            cursor.execute("""
                SELECT LocalDateTime, DataValue FROM POI_Values 
                WHERE POIID = ? AND VariableID = ? 
                ORDER BY LocalDateTime
            """, (poiid, var_id))
            records = cursor.fetchall()
            df = pd.DataFrame(records, columns=['LocalDateTime', var_name])
            df.set_index('LocalDateTime', inplace=True)
            timeseries_dict[var_name] = df

        # Combine all variable dataframes on the datetime index
        timeseries = pd.concat(timeseries_dict.values(), axis=1, join='outer')
        timeseries.reset_index(inplace=True)

        # Query POI to get Native ID and flow segment ID
        cursor.execute("""
            SELECT POI_NativeID, POI_Flow_ComID FROM POI
            WHERE POIID = ?
        """, (poiid,))
        POI_record = cursor.fetchone()

        POI_ID = POI_record[0]
        
        print()
        print('POI_ID: ', POI_ID)
        print(timeseries.head())
        segment_comid = POI_record[1]
       
        ##########################################################################
        #This block is for using the queried data in each iteration (Gages) to implement data in MODEL
        # 1. `POI_ID` - Native ID of the USGS gage (POI)
        # 2. `segment_comid` - Hydrofabric segment ID where the gage occurs
        # 3. `timeseries` - A Pandas DataFrame containing historical flow time series for this USGS gage with this column: LocalDateTime  Gage_Flow
        
        
        
        
        
        
        
        #########################################################################
        
conn.close()  # Close the database connection
 


POI_ID:  09104500
               LocalDateTime  Gage_Flow
0  1940-10-01T00:00:00+00:00       10.0
1  1940-10-02T00:00:00+00:00        6.7
2  1940-10-03T00:00:00+00:00        7.2
3  1940-10-04T00:00:00+00:00        6.2
4  1940-10-05T00:00:00+00:00       13.0

POI_ID:  09101500
               LocalDateTime  Gage_Flow
0  1945-05-01T00:00:00+00:00       20.0
1  1945-05-02T00:00:00+00:00       20.0
2  1945-05-03T00:00:00+00:00       20.0
3  1945-05-04T00:00:00+00:00       20.0
4  1945-05-05T00:00:00+00:00       20.0

POI_ID:  09100500
               LocalDateTime  Gage_Flow
0  1945-06-01T00:00:00+00:00       36.0
1  1945-06-02T00:00:00+00:00       40.0
2  1945-06-03T00:00:00+00:00       42.0
3  1945-06-04T00:00:00+00:00       44.0
4  1945-06-05T00:00:00+00:00       47.0

POI_ID:  09106150
               LocalDateTime  Gage_Flow
0  1990-10-01T00:00:00+00:00      461.0
1  1990-10-02T00:00:00+00:00      448.0
2  1990-10-03T00:00:00+00:00      486.0
3  1990-10-04T00:00:00+00:00      559.0
4  1