## Setup & Imports

In [34]:
# Import required libraries
import json
import warnings
from pathlib import Path
from typing import Optional

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from obspy.clients.fdsn import Client
from pygeohydro import WBD
from shapely.geometry import Point, box

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

print("All libraries imported successfully.")

All libraries imported successfully.


## User Configuration

**‚ö†Ô∏è IMPORTANT: You must fill in the correct HUC codes for each basin.**

To find HUC codes:
1. Visit the [USGS WBD Viewer](https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer)
2. Search for your basin by name
3. Add the HUC codes (as strings) to the appropriate basin list below

You can also export/import this configuration as JSON (see cells below).

In [25]:
# ========== CONFIGURATION ==========

# List of basin names to search for
basin_names = [
    "Skagit",
    "Nooksack",
    "Skykomish",
    "Snoqualmie",
    "Cedar",  # Cedar River watershed (not Cedar Creek)
    "Green",
    "Puyallup",
    "Carbon",
    "Nisqually",
    "Cowlitz",
]

# HUC level to query (8, 10, or 12)
huc_level = 10

# FDSN client for station queries
fdsn_provider = "IRIS"

# Bounding box for western Washington (minlon, minlat, maxlon, maxlat)
# This should encompass all basins of interest
bbox_wa = (-124.8, 45.5, -120.0, 49.0)

# Optional: Filter stations by elevation (meters)
# Set to None to disable filtering
elev_min = None  # Example: 0 (sea level)
elev_max = None  # Example: 3000 (3000 meters)

# Optional: Filter by specific networks
# Set to None to include all networks
network_filter = None  # Example: ["UW", "CC", "PB"]

# Output directory
output_dir = Path(".")
output_dir.mkdir(exist_ok=True)

print(f"Configuration loaded:")
print(f"  HUC Level: {huc_level}")
print(f"  FDSN Provider: {fdsn_provider}")
print(f"  Bounding Box: {bbox_wa}")
print(f"  Elevation Filter: {elev_min} to {elev_max}")
print(f"  Network Filter: {network_filter}")
print(f"  Output Directory: {output_dir.absolute()}")

Configuration loaded:
  HUC Level: 10
  FDSN Provider: IRIS
  Bounding Box: (-124.8, 45.5, -120.0, 49.0)
  Elevation Filter: None to None
  Network Filter: None
  Output Directory: /Users/marinedenolle/GitHub/gaia-data-downloaders


## Step 1: Discover HUC Codes from Basin Names

Query the USGS WBD database to find HUC codes for each basin.

In [38]:
print("üîç Discovering HUC codes for basins...")
print(f"Searching for {len(basin_names)} basins in western Washington\n")

# Derive the WBD layer / column name from the chosen HUC level
huc_col = f"huc{huc_level}"

# Create geometry from bounding box (box already imported from shapely.geometry)
bbox_geom = box(*bbox_wa)

print(f"Using WBD layer: {huc_col}")

üîç Discovering HUC codes for basins...
Searching for 10 basins in western Washington

Using WBD layer: huc10


In [41]:

# Recreate these here so this cell can run independently
huc_col = f"huc{huc_level}"
bbox_geom = box(*bbox_wa)
wbd = WBD(huc_col)


InputTypeError: The crs argument should be of type a valid CRS

In [None]:

# Store discovered HUC codes and geometries
basin_to_hucs = {}
basin_geometries = {}

for basin_name in basin_names:
    print(f"Searching for '{basin_name}'...")
    
    try:
        # Query WBD with name filter
        sql_clause = f"UPPER(name) LIKE '%{basin_name.upper()}%'"
        basin_hucs = wbd.bygeom(bbox_geom, geo_crs=4326, sql_clause=sql_clause)
        
        if len(basin_hucs) > 0:
            # Ensure EPSG:4326
            if basin_hucs.crs is None:
                basin_hucs = basin_hucs.set_crs("EPSG:4326")
            elif basin_hucs.crs.to_epsg() != 4326:
                basin_hucs = basin_hucs.to_crs("EPSG:4326")
            
            huc_codes = basin_hucs[huc_col].tolist()
            huc_names = basin_hucs['name'].tolist() if 'name' in basin_hucs.columns else []
            
            # Store results
            basin_to_hucs[basin_name] = huc_codes
            basin_geometries[basin_name] = basin_hucs
            
            print(f"  ‚úì Found {len(huc_codes)} HUC(s)")
            for code, name in zip(huc_codes[:2], huc_names[:2]):
                print(f"    {code}: {name}")
            if len(huc_codes) > 2:
                print(f"    ... and {len(huc_codes) - 2} more")
        else:
            basin_to_hucs[basin_name] = []
            basin_geometries[basin_name] = gpd.GeoDataFrame()
            print(f"  ‚ö†Ô∏è No HUCs found")
    
    except Exception as e:
        basin_to_hucs[basin_name] = []
        basin_geometries[basin_name] = gpd.GeoDataFrame()
        print(f"  ‚úó Error: {e}")
    
    print()

print("="*70)
print("DISCOVERY SUMMARY")
print("="*70)
total_hucs = 0
for basin, codes in basin_to_hucs.items():
    count = len(codes)
    total_hucs += count
    status = f"‚úì {count} HUC(s)" if count > 0 else "‚ö†Ô∏è NOT FOUND"
    print(f"  {basin:15s}: {status}")
print("="*70)
print(f"Total HUCs discovered: {total_hucs}")
print("="*70)

InputTypeError: The crs argument should be of type a valid CRS