In [None]:
import pandas as pd
import re
import requests
from typing import Optional
import sys
sys.path.append('../src')
from utils.duckdb_utils import connect_duckdb



In [None]:
with connect_duckdb() as con:
    result = con.execute("SELECT site_cd, site_nm FROM site WHERE agency_cd = 'BOR'").fetchall()
    sites = [row[0] for row in result]
    site_names = [row[1] for row in result]

parameters = [17, 29, 42, 49]
base_url = "https://usbr.gov/uc/water/hydrodata/reservoir_data/"
print(sites)
print(site_names)


In [None]:
sites = sites[0:1]

In [None]:
for site in sites:
    all_data = []
    for pcode in parameters:
        url = f"{base_url}{site}/csv/{pcode}.csv"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                df = pd.read_csv(url, header=0, names=['date', 'value'], parse_dates=['date'])
                df['site_cd'] = site
                df['parameter_cd'] = str(pcode)
                if df.empty:
                    print(f"No data found for site {site} with parameter {pcode}.")
                    continue
            else:
                print(f"Failed to fetch data for site {site} with parameter {pcode}: HTTP {response.status_code}")
                continue
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for site {site} with parameter {pcode}: {e}")
            continue
        if not df.empty:
            all_data.append(df)
    if all_data:
        df_combined = pd.concat(all_data, ignore_index=True)

# https://usbr.gov/uc/water/hydrodata/reservoir_data/100010/csv/17.csv
# https://usbr.gov/uc/water/hydrodata/reservoir_data/100010/csv/49.csv
# https://usbr.gov/uc/water/hydrodata/reservoir_data/100089
# https://usbr.gov/uc/water/hydrodata/reservoir_data/917/csv/17.csv

In [None]:
rename_map = {
    'site_id': 'site_cd', 'datatype_id': 'parameter_cd', 'site_metadata.site_name': 'site_nm', 
    'datatype_metadata.datatype_common_name': 'parameter_nm', 
    'datatype_metadata.physical_quantity_name': 'alt_parameter_nm', 
    'datatype_metadata.unit_name': 'units', 'site_metadata.lat': 'latitude_dd',
    'site_metadata.longi': 'longitude_dd', 'site_metadata.elevation': 'elevation_m',

}

In [None]:
names = [
    "Gramby", "Green Mountain", "Ruedi", "Williams Fork", "Willow Creek", 
    "Windy Gap", "Wolford", "Flaming Gorge", "Granby", "Green Mountain", 
    "Ruedi", "Williams Fork", "Willow Creek", "Windy Gap", "Wolford Mountain", 
    "Flaming Gorge", "Starvation", "Catamount", "Stagecoach",
    "Blue Mesa", "Crystal", "Morrow Point", "Ridgeway",
    "Powell" 
    ]

parameters = [17, 29, 42, 49] 
pattern = "|".join(re.escape(name) for name in names)

print("Pattern:", pattern)

In [None]:
df = pd.read_csv("https://www.usbr.gov/uc/water/hydrodata/reservoir_data/meta.csv")

matches = df[(df["site_metadata.site_name"].str.contains(pattern, case = False, na=False)) &
             (df["site_metadata.db_site_code"] == "UC") &
             (df["datatype_id"].isin(parameters))].sort_values("site_metadata.site_name")

selected = matches[list(rename_map)].rename(columns=rename_map).reset_index(drop=True)
sites = selected.drop_duplicates(subset=['site_cd']).reset_index(drop=True)

print(df.columns)


In [None]:

# Do not think RISE API is currently available, so this function is a placeholder.
def fetch_rise_timeseries(
        site_cd: str,
        parameter_cd: str,
        start_date: str,
        end_date: str,
        observed_modeled: str = "observed",
        base_url: str = "https://data.usbr.gov/rise/api/timeseries",
        format: str = "json"
    ) -> Optional[pd.DataFrame]:
    """Fetches time series data from the RISE API for a given site and parameter."""
    
    params = {
        "locationId": site_cd,
        "parameterId": parameter_cd,
        "startDate": start_date,
        "endDate": end_date,
        "observedModeled": observed_modeled,
        "format": format
    }

    headers = {
        "Accept": "application/vnd.api+json"
    }
    try:
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses

        if format == "json":
            json_data = response.json()
            records = json_data.get("timeSeries", [])
            if not records:
                print(f"No data found for site {site_cd} and parameter {parameter_cd}.")
                return None
            
            df = pd.DataFrame(records)
            return df
        
        elif format == "csv":
            from io import StringIO
            return pd.read_csv(StringIO(response.text))
        
        else:
            raise ValueError("Unsupported format. Use 'json' or 'csv'.")
        
    except requests.RequestException as e:
        print(f"Error fetching data for site {site_cd} and parameter {parameter_cd}: {e}")
        return None

In [None]:
df = fetch_rise_timeseries(
    site_cd='2002',     # Example: Ruedi Reservoir
    parameter_cd='29',      # Example: Storage
    start_date="2024-10-01",
    end_date="2024-12-31"
)

if df is not None:
    print(df.head())

In [None]:
https://www.cbrfc.noaa.gov/wsup/graph/espgraph_hc.html?year=2025&id=CAMC2#
https://www.cbrfc.noaa.gov/wsup/graph/espgraph_hc.html?year=2025&id=CAMC2#