In [1]:
import pandas as pd
from pathlib import Path
import re
import requests
from typing import Optional
import sys
sys.path.append('../src')
from utils.duckdb_utils import connect_duckdb
from utils.duckdb_utils import run_sql_file
import utils.site_list as sl



In [2]:
with connect_duckdb() as con:
    result = con.execute("SELECT site_cd, site_nm FROM site WHERE agency_cd = 'BOR'").fetchall()
    sites = [row[0] for row in result]
    site_names = [row[1] for row in result]

parameters = [17, 29, 42, 49]
base_url = "https://usbr.gov/uc/water/hydrodata/reservoir_data/"
print(sites)
print(site_names)


['917', '919', '913', '914', '915', '928', '1999', '2000', '2002', '2005']
['FLAMING GORGE RESERVOIR', 'LAKE POWELL', 'BLUE MESA RESERVOIR', 'MORROW POINT RESERVOIR', 'CRYSTAL RESERVOIR', 'STARVATION RESERVOIR', 'GRANBY RESERVOIR', 'GREEN MOUNTAIN RESERVOIR', 'RUEDI RESERVOIR', 'WILLIAMS FORK RESERVOIR']


In [None]:
with connect_duckdb() as con:
    result = con.execute("SELECT * FROM parameter").df()
    result.to_csv("parameters.csv", index=False)

In [None]:
query = """
UPDATE parameter
SET parameter_dsc = 'Pool elevation'
WHERE parameter_cd = 49
"""

In [None]:
with connect_duckdb() as con:
    result = con.execute(query)

In [None]:
sites = sites[0:1]

In [None]:
for site in sites:
    all_data = []
    for pcode in parameters:
        url = f"{base_url}{site}/csv/{pcode}.csv"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                df = pd.read_csv(url, header=0, names=['date', 'value'], parse_dates=['date'])
                df['site_cd'] = site
                df['parameter_cd'] = str(pcode)
                if df.empty:
                    print(f"No data found for site {site} with parameter {pcode}.")
                    continue
            else:
                print(f"Failed to fetch data for site {site} with parameter {pcode}: HTTP {response.status_code}")
                continue
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for site {site} with parameter {pcode}: {e}")
            continue
        if not df.empty:
            all_data.append(df)
    if all_data:
        df_combined = pd.concat(all_data, ignore_index=True)

# https://usbr.gov/uc/water/hydrodata/reservoir_data/100010/csv/17.csv
# https://usbr.gov/uc/water/hydrodata/reservoir_data/100010/csv/49.csv
# https://usbr.gov/uc/water/hydrodata/reservoir_data/100089
# https://usbr.gov/uc/water/hydrodata/reservoir_data/917/csv/17.csv

In [3]:
rename_map = {
    'site_id': 'site_cd', 'datatype_id': 'parameter_cd', 'site_metadata.site_name': 'site_nm', 
    'datatype_metadata.datatype_common_name': 'parameter_nm', 
    'datatype_metadata.physical_quantity_name': 'parameter_type', 
    'datatype_metadata.unit_name': 'units', 'site_metadata.lat': 'latitude_dd',
    'site_metadata.longi': 'longitude_dd', 'site_metadata.elevation': 'elevation_m',

}

In [7]:
names = [
    "Gramby", "Green Mountain", "Ruedi", "Williams Fork", "Willow Creek", 
    "Windy Gap", "Wolford", "Flaming Gorge", "Granby", "Green Mountain", 
    "Ruedi", "Williams Fork", "Willow Creek", "Windy Gap", "Wolford Mountain", 
    "Flaming Gorge", "Starvation", "Catamount", "Stagecoach", "Shadow Mountain",
    "Blue Mesa", "Crystal", "Morrow Point", "Ridgeway",
    "Powell" 
    ]

parameters = [17, 29, 42, 49] 
pattern = "|".join(re.escape(name) for name in names)

print("Pattern:", pattern)

Pattern: Gramby|Green\ Mountain|Ruedi|Williams\ Fork|Willow\ Creek|Windy\ Gap|Wolford|Flaming\ Gorge|Granby|Green\ Mountain|Ruedi|Williams\ Fork|Willow\ Creek|Windy\ Gap|Wolford\ Mountain|Flaming\ Gorge|Starvation|Catamount|Stagecoach|Shadow\ Mountain|Blue\ Mesa|Crystal|Morrow\ Point|Ridgeway|Powell


In [None]:
df = pd.read_csv("https://www.usbr.gov/uc/water/hydrodata/reservoir_data/meta.csv")

matches = df[(df["site_metadata.site_name"].str.contains(pattern, case = False, na=False))].sort_values("site_metadata.site_name")# &
  #           (df["site_metadata.db_site_code"] == "UC")].sort_values("site_metadata.site_name") #&
            # (df["datatype_id"].isin(parameters))]
             

selected = matches[list(rename_map)].rename(columns=rename_map).reset_index(drop=True)
selected['parameter_cd'] = selected['parameter_cd'].astype(str)
selected['site_cd'] = selected['site_cd'].astype(str)
#ck2 = selected[list['site_cd', 'site_nm', 'latitude_dd', 'longitude_dd', 'elevation_m']]
sites = selected.drop_duplicates(subset=['site_cd']).reset_index(drop=True)

ck = selected[list(['site_cd', 'site_nm', 'parameter_cd', 'parameter_nm', 'parameter_type', 'units'])]
all_params = ck[list(['parameter_cd', 'parameter_nm', 'parameter_type', 'units'])].drop_duplicates().reset_index(drop=True).sort_values(['parameter_type', 'parameter_nm'])
#print(ck)
all_params.to_csv("usbr_parameters.csv", index=False)
params_list = all_params['parameter_cd'].astype(str).tolist()
#selected[list(['site_cd', 'site_nm'])].drop_duplicates().to_csv("usbr_sites.csv", index=False)

print(params_list)

with connect_duckdb() as con:
    site_tbl = con.execute("SELECT site_id, site_cd FROM site").df()
    param_tbl = con.execute("SELECT parameter_id, parameter_cd FROM parameter").df()

site_param = selected[
    list(['site_cd', 'parameter_cd'])
    ].merge(site_tbl, on='site_cd', how='left').merge(param_tbl, on='parameter_cd', how='left').drop_duplicates().reset_index(drop=True)
site_param['site_parameter_id'] = range(1, len(site_param) + 1)
site_param = site_param[list(['site_parameter_id', 'site_id', 'parameter_id'])]
site_param.to_csv("usbr_site_parameter.csv", index=False)

['15', '47', '25', '29', '30', '31', '32', '33', '34', '1197', '1198', '39', '40', '43', '46', '42', '17', '89', '49']


In [17]:
with connect_duckdb() as con:
    con.execute("DROP TABLE IF EXISTS site_parameter;")

run_sql_file(Path('../db/schema.sql'))

In [18]:
usbr_site_param = pd.read_csv("../artifacts/usbr_site_parameter.csv")
usbr_site_param['created_ts'] = pd.Timestamp.now()
usbr_site_param['modified_ts'] = pd.Timestamp.now()

with connect_duckdb() as con:
    con.register('site_param', usbr_site_param)
    con.execute("INSERT INTO site_parameter SELECT * FROM site_param")

In [22]:
with connect_duckdb() as con:
    result = con.execute("""
                         SELECT 
                            s.site_cd, s.site_nm, s.site_type, p.parameter_cd, p.parameter_nm, p.unit_nm, p.unit_cd 
                         FROM site s
                         INNER JOIN site_parameter sp ON s.site_id = sp.site_id
                         INNER JOIN parameter p ON sp.parameter_id = p.parameter_id
                         WHERE s.agency_cd = 'BOR'
                         """).df()

In [None]:
params = sl.params
params['parameter_id'] = range(1, len(params) +1)
params['create_ts'] = pd.Timestamp.now()
params['update_ts'] = pd.Timestamp.now()
selected_params = params[list(['parameter_id', 'parameter_cd', 'parameter_nm', 'parameter_dsc', 'unit_cd', 'unit_nm', 'create_ts', 'update_ts'])]
selected_params = params[list(['parameter_id', 'parameter_cd', ])]
selected=selected[list(['site_cd', 'parameter_cd'])]

selected = selected.merge(selected_params, on='parameter_cd', how='inner')

In [None]:
with connect_duckdb() as con:
    site = con.execute("SELECT site_id, site_cd FROM SITE").df()

site_param = selected.merge(site, on='site_cd', how='inner').drop_duplicates(subset=['site_cd', 'parameter_cd']).reset_index(drop=True)
site_param['site_parameter_id'] = range(1, len(site_param) + 1)
site_param['create_ts'] = pd.Timestamp.now()
site_param['update_ts'] = pd.Timestamp.now()
site_param = site_param[list(['site_parameter_id', 'site_id', 'parameter_id', 'create_ts', 'update_ts'])]

In [None]:
query = "INSERT INTO parameter SELECT * FROM staging_table"

In [None]:
with connect_duckdb() as con:
    try:
        con.register('staging_table', selected)
        con.execute(query)
    except Exception as e:
            print(f"❌ Error executing SQL: {e}")
            raise

In [None]:

# Do not think RISE API is currently available, so this function is a placeholder.
def fetch_rise_timeseries(
        site_cd: str,
        parameter_cd: str,
        start_date: str,
        end_date: str,
        observed_modeled: str = "observed",
        base_url: str = "https://data.usbr.gov/rise/api/timeseries",
        format: str = "json"
    ) -> Optional[pd.DataFrame]:
    """Fetches time series data from the RISE API for a given site and parameter."""
    
    params = {
        "locationId": site_cd,
        "parameterId": parameter_cd,
        "startDate": start_date,
        "endDate": end_date,
        "observedModeled": observed_modeled,
        "format": format
    }

    headers = {
        "Accept": "application/vnd.api+json"
    }
    try:
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses

        if format == "json":
            json_data = response.json()
            records = json_data.get("timeSeries", [])
            if not records:
                print(f"No data found for site {site_cd} and parameter {parameter_cd}.")
                return None
            
            df = pd.DataFrame(records)
            return df
        
        elif format == "csv":
            from io import StringIO
            return pd.read_csv(StringIO(response.text))
        
        else:
            raise ValueError("Unsupported format. Use 'json' or 'csv'.")
        
    except requests.RequestException as e:
        print(f"Error fetching data for site {site_cd} and parameter {parameter_cd}: {e}")
        return None

In [None]:
df = fetch_rise_timeseries(
    site_cd='2002',     # Example: Ruedi Reservoir
    parameter_cd='29',      # Example: Storage
    start_date="2024-10-01",
    end_date="2024-12-31"
)

if df is not None:
    print(df.head())

In [None]:
https://www.cbrfc.noaa.gov/wsup/graph/espgraph_hc.html?year=2025&id=CAMC2#
https://www.cbrfc.noaa.gov/wsup/graph/espgraph_hc.html?year=2025&id=CAMC2#