# USGS Stream Gage Site Metadata Downloader  
AUTH: Nathan T. Stevens  
ORG: Pacific Northwest Seismic Network  
LICENSE: GNU GPLv3  
PURPOSE: This notebook details how to get USGS surface water gage metadata (site information) from the WaterWatch dataservice. 

In [8]:
# Import dependencies
import pandas as pd
from pathlib import Path
import os

In [9]:
# USER PARAMETER BLOCK
# Define states to query and where to save the output
# Human readable list of USA state abbreviations
states = ['WA','OR']
# Where to save the file
PWD = Path().cwd()
SAVEDIR = PWD/'USGS_Stream_Gage'
# Create save path if it does not already exist
os.makedirs(SAVEDIR, exist_ok=True)

# END OF USER PARAMETER BLOCK


In [10]:
# Compose request URL for gauge site metadata
# Convert state list to URL format
states = ','.join(states).lower()
# Make request URL
URL = f'https://waterwatch.usgs.gov/download/?gt=map&mt=real&st={states}&dt=site&ht=&fmt=csv&mk=1'

In [11]:
# Submit request
df = pd.read_csv(URL, index_col='id')

In [12]:
# Parse `flowinfo` and `name` columns
_FI_HDRS = set(['Discharge (cfs)', 'Stage (ft)', 'Stage (adj) (ft)', 'Date', 'Length of record (years)', 'Class', '% normal(median) (%)', '% normal(mean) (%)', 'Status'])
holder = dict(zip(_FI_HDRS, [[] for _e in _FI_HDRS]))
idx = []
for id, row in df.iterrows():
    # Skip repeat header lines for multi-state query
    if id == 'id':
        continue
    # Catch station ID
    idx.append(id)

    # parse flowinfo
    fiparts = row.flowinfo.split(';')
    fi_claimed = set()
    for _e in fiparts:
        if 'Date' in _e:
            _k = _e[:4]
            _v = pd.Timestamp(_e[6:], tz='US/Pacific')
            holder[_k].append(_v)
            fi_claimed.add(_k)
            continue
        
        try:
            _k, _v = _e.split(':')
        except:
            breakpoint()
        _v = _v.strip()
        if _k in ['Class', 'Status']:
            pass
        else:
            _vp = _v.split(' ')
            try:
                _v = float(_vp[0])
            except:
                breakpoint()
            if _vp[1] != '%':
                _k += f' ({_vp[-1]})'
        holder[_k].append(_v)
        fi_claimed.add(_k)
    for _k, _v in holder.items():
        if len(_v) < len(idx):
            holder[_k].append(None)

dfp = pd.DataFrame(holder, index=idx)
df_parsed = df.join(dfp, how='left')
df_parsed.drop(columns=['flowinfo'], inplace=True)
df_parsed = df_parsed[df_parsed.lng != 'lng']

In [13]:
# Display parsed site information & save to disk
display(df_parsed)
df_parsed.to_csv(SAVEDIR/'usgs_gage_site_metadata.csv')

Unnamed: 0_level_0,name,lat,lng,class,url,huc_cd,Date,Status,Class,Discharge (cfs),% normal(median) (%),Stage (adj) (ft),% normal(mean) (%),Stage (ft),Length of record (years),Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
10352500,"USGS 10352500 MCDERMITT CK NR MCDERMITT, NV",41.96655720,-117.83181200,4,https://waterdata.usgs.gov/monitoring-location...,16040201,2025-12-15 12:00:00-08:00,,10-24,3.77,49.93,4547.23,28.54,2.23,74.0,,,
10387110,USGS 10387110 CHEWAUCAN RIVER AT MOUTH NEAR VA...,42.52208056,-120.24945000,0,https://waterdata.usgs.gov/monitoring-location...,171200060506,2025-12-15 12:00:00-08:00,,Not-ranked,,,8.52,,,8.52,,Not-ranked,2025-12-15 12:00:00-08:00
10387150,"USGS 10387150 LAKE ABERT NEAR VALLEY FALLS, OR",42.60350000,-120.18730560,0,https://waterdata.usgs.gov/monitoring-location...,17120006,2025-12-15 12:45:00-08:00,,Not-ranked,,,4253.30,,4253.30,,,,
10396000,USGS 10396000 DONNER UND BLITZEN RIVER NR FREN...,42.79083330,-118.86750000,5,https://waterdata.usgs.gov/monitoring-location...,17120003,2025-12-15 12:00:00-08:00,,25-75,50.50,120.24,4262.32,90.19,1.99,94.0,,,
11491450,"USGS 11491450 IRVING CREEK NEAR LENZ, OR",42.95166667,-121.45905560,0,https://waterdata.usgs.gov/monitoring-location...,18010201,2025-12-15 12:30:00-08:00,,Not-ranked,0.92,,4636.71,,19.71,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,,,
14378200,"USGS 14378200 ILLINOIS RIVER NEAR AGNESS, OR",42.54487778,-124.05191390,0,https://waterdata.usgs.gov/monitoring-location...,17100311,2025-12-15 12:45:00-08:00,,Not-ranked,698.00,21.61,116.69,8.23,4.69,21.0,,,
14378430,"USGS 14378430 ROGUE RIVER AT HWY 101 BRIDGE, A...",42.42888889,-124.41222220,0,https://waterdata.usgs.gov/monitoring-location...,17100310,2025-12-15 12:40:00-08:00,,Not-ranked,,,0.91,,2.91,,,,
14400000,"USGS 14400000 CHETCO RIVER NEAR BROOKINGS, OR",42.12344278,-124.18731070,3,https://waterdata.usgs.gov/monitoring-location...,17100312,2025-12-15 12:30:00-08:00,,<10,606.00,15.25,49.36,9.84,0.20,54.0,,,
444650123134500,USGS 444650123134500 LUCKIAMUTE RIVER NEAR PAR...,44.78051110,-123.22924720,0,https://waterdata.usgs.gov/monitoring-location...,17090003,2025-12-15 12:25:00-08:00,,Not-ranked,,,186.92,,186.92,,,,


In [None]:
from obspy.clients.fdsn import Client
from obspy import UTCDateTime
from geopy.distance import geodesic
import numpy as np

# Initialize FDSN client for IRIS
client = Client("IRIS")

# Get current time for active stations query
now = UTCDateTime()

# Query for UW and CC network stations
print("Fetching station inventory from UW and CC networks...")
inventory = client.get_stations(network="UW,CC", 
                                starttime=now - 86400,  # Active in last 24 hours
                                endtime=now,
                                level="station")

# Extract station information
stations_list = []
for network in inventory:
    for station in network:
        stations_list.append({
            'network': network.code,
            'station': station.code,
            'latitude': station.latitude,
            'longitude': station.longitude,
            'elevation': station.elevation,
            'start_date': station.start_date
        })

df_stations = pd.DataFrame(stations_list)
print(f"Found {len(df_stations)} active stations in UW and CC networks")

# Find stations within 10 km of river gages
matches = []
for gage_id, gage in df_parsed.iterrows():
    gage_lat = float(gage['lat'])
    gage_lon = float(gage['lng'])
    gage_coords = (gage_lat, gage_lon)
    
    for _, sta in df_stations.iterrows():
        sta_coords = (sta['latitude'], sta['longitude'])
        distance_km = geodesic(gage_coords, sta_coords).kilometers
        
        if distance_km <= 10:
            matches.append({
                'gage_id': gage_id,
                'gage_name': gage['name'],
                'gage_lat': gage_lat,
                'gage_lon': gage_lon,
                'network': sta['network'],
                'station': sta['station'],
                'sta_lat': sta['latitude'],
                'sta_lon': sta['longitude'],
                'distance_km': distance_km
            })

df_matches = pd.DataFrame(matches)
print(f"\nFound {len(df_matches)} station-gage pairs within 10 km")

# Display results
display(df_matches.sort_values('distance_km'))

# Save results
df_matches.to_csv(SAVEDIR/'seismic_stations_near_gages.csv', index=False)