### Download all historical water level data from NWS Co-ops station at Cape Hatteras

In [2]:
import pandas as pd
import requests
import gzip
from io import StringIO
from pathlib import Path

# ----------------------------
# CO-OPS bulk download settings
# ----------------------------
BASE = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
STATION = "8654467"

# Record start for CO-OPS 8654467 (USCG Station Hatteras) is ~2010; use a practical start.
START = pd.Timestamp("2010-04-01")
END   = pd.Timestamp.today().normalize()

# Output file
OUTCSV = f"COOPS_{STATION}_water_level_6min_MSL_metric_GMT.csv"

# ----------------------------
# Fetch one chunk (typically a month)
# ----------------------------
def fetch_chunk(beg, fin, product="water_level", datum="MSL",
                units="metric", time_zone="gmt", interval="6"):
    params = dict(
        product=product,
        application="bulk_download_script",
        begin_date=beg.strftime("%Y%m%d"),
        end_date=fin.strftime("%Y%m%d"),
        station=STATION,
        datum=datum,
        units=units,
        time_zone=time_zone,
        interval=interval,
        format="csv",
    )
    r = requests.get(BASE, params=params, timeout=60)
    r.raise_for_status()

    txt = r.text.strip()

    # CO-OPS sometimes returns errors as plain text or "Error: ..." lines
    if not txt or txt.lower().startswith("error"):
        return None

    # Parse CSV text
    df = pd.read_csv(StringIO(txt))

    if df.empty:
        return None

    return df

# ----------------------------
# Download all months and stitch
# ----------------------------
chunks = []

for beg in pd.date_range(START, END, freq="MS"):
    fin = beg + pd.offsets.MonthEnd(0)
    if fin > END:
        fin = END

    df = fetch_chunk(beg, fin)

    if df is not None:
        chunks.append(df)

if not chunks:
    raise RuntimeError("No data returned. Check station/product/datum/time_zone parameters.")

wl = pd.concat(chunks, ignore_index=True)

# ----------------------------
# Normalize time column and numeric water level
# ----------------------------
# Common CO-OPS CSV headers for water_level include:
# "Date Time", " Water Level", " Sigma", " O", " F", " R", " L", " Q"
# Column names sometimes include leading spaces; strip them.
wl.columns = [c.strip() for c in wl.columns]

# Parse time
if "Date Time" not in wl.columns:
    raise RuntimeError(f"Expected 'Date Time' column, found: {wl.columns.tolist()}")

wl["Date Time"] = pd.to_datetime(wl["Date Time"], errors="coerce")

# Identify water level column (usually "Water Level")
wl_col = None
for c in wl.columns:
    if c.lower() == "water level":
        wl_col = c
        break
if wl_col is None:
    # fallback: first column that contains "water"
    for c in wl.columns:
        if "water" in c.lower() and "level" in c.lower():
            wl_col = c
            break
if wl_col is None:
    raise RuntimeError(f"Could not find a water level column. Columns: {wl.columns.tolist()}")

wl[wl_col] = pd.to_numeric(wl[wl_col], errors="coerce")

wl = wl.dropna(subset=["Date Time", wl_col]).sort_values("Date Time").reset_index(drop=True)

# ----------------------------
# Save
# ----------------------------
wl.to_csv(OUTCSV, index=False)

print(f"Wrote {len(wl):,} rows to: {OUTCSV}")
print(wl.head())
print(wl.tail())


Wrote 1,364,334 rows to: COOPS_8654467_water_level_6min_MSL_metric_GMT.csv
            Date Time  Water Level  Sigma  O or I (for verified)  F  R  L  \
0 2010-04-27 18:00:00       -0.028  0.004                      0  0  0  0   
1 2010-04-27 18:06:00       -0.036  0.011                      0  0  0  0   
2 2010-04-27 18:12:00       -0.059  0.042                      0  0  0  0   
3 2010-04-27 18:18:00       -0.055  0.005                      0  0  0  0   
4 2010-04-27 18:24:00       -0.064  0.004                      0  0  0  0   

  Quality  
0       v  
1       v  
2       v  
3       v  
4       v  
                  Date Time  Water Level  Sigma  O or I (for verified)  F  R  \
1364329 2025-12-23 15:36:00       -0.035  0.004                      1  0  0   
1364330 2025-12-23 15:42:00       -0.036  0.022                      1  0  0   
1364331 2025-12-23 15:48:00       -0.039  0.011                      1  0  0   
1364332 2025-12-23 15:54:00       -0.041  0.003                      1

In [3]:
wl.describe()

Unnamed: 0,Date Time,Water Level,Sigma,O or I (for verified),F,R,L
count,1364334,1364334.0,1364315.0,1364334.0,1364334.0,1364334.0,1364334.0
mean,2018-02-27 12:29:33.378660352,0.1351453,0.007098727,0.004757633,0.000263132,0.00631297,3.591496e-05
min,2010-04-27 18:00:00,-0.926,0.0,0.0,0.0,0.0,0.0
25%,2014-04-08 02:37:30,0.037,0.002,0.0,0.0,0.0,0.0
50%,2018-02-27 06:57:00,0.124,0.005,0.0,0.0,0.0,0.0
75%,2022-02-01 11:34:30,0.217,0.008,0.0,0.0,0.0,0.0
max,2025-12-23 16:00:00,1.846,0.888,1.0,1.0,1.0,1.0
std,,0.1486225,0.008250024,0.06881134,0.01621922,0.07920304,0.005992804
