In [5]:
# pip install pandas
# pip install geopandas

This block imports the required Python libraries:

pandas for handling tabular data (such as time series).
geopandas for working with geospatial data (GeoPackage format).
sqlite3 for connecting and interacting with an SQLite database.


In [6]:
# Import necessary libraries
import pandas as pd  # Pandas is used for handling tabular data efficiently
import geopandas as gpd  # Geopandas is used for handling geospatial data
import sqlite3  # SQLite3 is used to connect and interact with an SQLite database

This block loads three geospatial layers from a GeoPackage (.gpkg) file using geopandas:

POD_points: Contains the locations of Points of Diversion (POD), where water is withdrawn.
event: Represents gage stations or hydrological monitoring points.
ResOps_points: Includes information about reservoirs and their operations.
Each layer is stored as a GeoDataFrame, allowing spatial analysis and attribute queries.

In [7]:
# Read geospatial layers from a GeoPackage file
pod_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='POD_points')  
# POD (Point of Diversion) layer contains locations where water is diverted

gage_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='event')  
# Event layer contains gage locations or monitoring points

res_layer = gpd.read_file('data/simple_enhanced_reference_14.gpkg', layer='ResOps_points')  
# Reservoir operations layer contains details about reservoirs


add the start and end date for the simulation functionality 

This block establishes a connection to an SQLite database to query key identifiers. First, it retrieves the POI_TypeID for "Reservoir" from the POI_Type table and the VariableID for "Demand" from the Variables table, both of which are crucial for querying related records.

Next, the script iterates through each reservoir point in the res_layer, extracting its Source_comid (hydrofabric segment ID) and using it to look up the corresponding POIID in the POI table. Once the POIID is found, it queries the POI_Values table to retrieve the historical reservoir operation data (Inflows (CMS), Outflows (CMS) Storage amount (MCM)), which is then converted into a pandas.DataFrame for easy processing. Next, script retrieves data such as the POI_NativeID (a unique identifier) and the POI_Flow_ComID (hydrofabric segment ID) from the POI table.

In each iteration, the script structures key variables for each point for integration into a water management model. These include the Reservoirâ€™s unique ID, hydrofabric segment, and historical reservoir operation records. 

### <span style="color:red">There is a specified block in the code that can be used to implement MODEL parametrizing code.</span> 

Use the specified variables in each iteration to inject data to the model with model specific functions. 



In [8]:
# Establish connection to an SQLite database
conn = sqlite3.connect('data/optional_db.db')  
cursor = conn.cursor()  

# Query the POI_Type table to retrieve POI_TypeID for 'Reservoir'
cursor.execute("SELECT POI_TypeID FROM POI_Type WHERE POI_TypeName = 'Reservoir'")
poi_type_result = cursor.fetchone()  # Fetch the first matching result


# Check if a result was found; if not, print a message and exit
if poi_type_result:
    poi_type_id = poi_type_result[0]  # Extract POI_TypeID
else:
    print("No POI_TypeID found for Diversion. Check the POI_Type table.")
    conn.close()  # Close the database connection
    exit()  # Exit the script

# Query the Variables table to get VariableIDs for 'Inflow', 'Outflow', and 'Storage'
variable_names = ['Inflow', 'Outflow', 'Storage']
variable_ids = {}

for var_name in variable_names:
    cursor.execute("SELECT VariableID FROM Variables WHERE VariableName = ?", (var_name,))
    result = cursor.fetchone()
    if result:
        variable_ids[var_name] = result[0]
    else:
        print(f"No VariableID found for '{var_name}'. Check the Variables table.")
        conn.close()
        exit()

# Iterate over each row (reservoir) in the POD layer
for index, row in res_layer.iterrows():
    source_comid = row['Source_comid']
    reservoir_name = row['DAM_NAME']
    reservoir_capacity = row['CAP_MCM']
    reservoir_height = row['DAM_HGT_M']
    reservoir_main_use= row['MAIN_USE']

    cursor.execute("""
        SELECT POIID FROM POI 
        WHERE POI_Flow_ComID = ? AND POI_TypeID = ?
    """, (source_comid, poi_type_id))
    poiid_result = cursor.fetchone()

    if poiid_result:
        poiid = poiid_result[0]

        # Dictionary to hold timeseries data
        timeseries_dict = {}

        for var_name, var_id in variable_ids.items():
            cursor.execute("""
                SELECT LocalDateTime, DataValue FROM POI_Values 
                WHERE POIID = ? AND VariableID = ? 
                ORDER BY LocalDateTime
            """, (poiid, var_id))
            records = cursor.fetchall()
            df = pd.DataFrame(records, columns=['LocalDateTime', var_name])
            df.set_index('LocalDateTime', inplace=True)
            timeseries_dict[var_name] = df

        # Combine all variable dataframes on the datetime index
        timeseries = pd.concat(timeseries_dict.values(), axis=1, join='outer')
        timeseries.reset_index(inplace=True)

        # Query POI to get Native ID and flow segment ID
        cursor.execute("""
            SELECT POI_NativeID, POI_Flow_ComID FROM POI
            WHERE POIID = ?
        """, (poiid,))
        POI_record = cursor.fetchone()

        POI_ID = POI_record[0]
        
        
        print('POI_ID: ', POI_ID)
        print('Reservoir Name:', reservoir_name)
        print('Reservoir Capacity (MCM):', reservoir_capacity)
        print('Reservoir Height (m):', reservoir_height)
        print('Reservoir Main Use:', reservoir_main_use)
        print(timeseries.head())
        segment_comid = POI_record[1]
       
        ##########################################################################
        #This block is for using the queried data in each iteration (Reservoirs) to implement data in MODEL
        # 1. `POI_ID` - Native ID of the reservoir point (POI)
        # 2. 'reservoir_name' - Reservoir designated name
        # 3. 'reservoir_capacity' = Maximum storage capacity of reservoir in million cubic meters
        # 4. 'reservoir_height' = Height of dam in meters
        # 5. 'reservoir_main_use' = Main purpose of reservoir: Irrigation; Hydroelectricity; Water supply; Flood control; Recreation; Navigation; Fisheries; Pollution control; Livestock; or Other
        # 6. `segment_comid` - Hydrofabric segment ID where the reservoir is connected
        # 7. `timeseries` - A Pandas DataFrame containing historical reservoir operation time series for this reservoir with these columns: LocalDateTime  Inflow  Outflow  Storage
        
        
        
        
        
        
        
        #########################################################################
        
conn.close()  # Close the database connection
 

POI_ID:  CO01697
Reservoir Name: Vega
Reservoir Capacity (MCM): 41.1
Reservoir Height (m): 49
Reservoir Main Use: Irrigation
  LocalDateTime  Inflow  Outflow  Storage
0    1974-10-24     NaN      NaN    6.132
1    1974-10-25     NaN      NaN      NaN
2    1974-10-26     NaN      NaN      NaN
3    1974-10-27     NaN      NaN      NaN
4    1974-10-28     NaN      NaN      NaN
POI_ID:  CO00673
Reservoir Name: Homestake Dam
Reservoir Capacity (MCM): 56.6
Reservoir Height (m): 81
Reservoir Main Use: Water supply
  LocalDateTime Inflow Outflow  Storage
0    2005-04-18   None    None   20.228
1    2005-04-19   None    None   19.513
2    2005-04-20   None    None   18.839
3    2005-04-21   None    None   18.339
4    2005-04-22   None    None   17.994
POI_ID:  CO01664
Reservoir Name: Ruedi
Reservoir Capacity (MCM): 160.4
Reservoir Height (m): 88
Reservoir Main Use: Irrigation
  LocalDateTime  Inflow  Outflow  Storage
0    1969-04-01     NaN      NaN   59.350
1    1969-04-02     NaN      NaN   5