In [1]:
# import pandas as pd

# # Check these values

# # Load the data (assuming it's tab-separated)
# df = pd.read_csv("../usgs_streamflow/11467000_streamflow_qc.txt", sep="\t", header=None, names=["gauge_id", "date", "discharge_cfs", "qualifier"])
# df['date'] = pd.to_datetime(df['date'])
# df.set_index('date', inplace=True)

# # Convert from cfs to mm/day (optional, if you want depth)
# # First you need basin area in km² from USGS or StreamStats
# area_sqmi = 1338
# area_km2 = area_sqmi * 2.58999
# cfs_to_mm_day = 0.0283168 * 86400 / (area_km2 * 1e6) * 1e3
# df["discharge_mm"] = df["discharge_cfs"] * cfs_to_mm_day

# display(df)

In [2]:
# q_mean = df["discharge_cfs"].mean()
# q5 = df["discharge_cfs"].quantile(0.05)
# q95 = df["discharge_cfs"].quantile(0.95)

# display(df)

In [3]:
# # Reorder or drop columns if needed
# df_to_save = df[["gauge_id", "discharge_cfs", "discharge_mm"]].copy()
# df_to_save.reset_index(inplace=True)  # move date back to column

# # Save as tab-separated .txt
# df_to_save.to_csv("11467000_streamflow_mm.txt", sep="\t", index=False)

In [4]:
# df_to_save

In [5]:
import pandas as pd
import numpy as np

# ---- Load your daily discharge data (already shown in your notebook)
df = pd.read_csv("../usgs_streamflow/11467000_streamflow_qc.txt", sep="\t", header=None, names=["gauge_id", "date", "discharge_cfs", "qualifier"])
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

# ---- Convert to mm/day
area_sqmi = 1338
area_km2 = area_sqmi * 2.58999
cfs_to_mm_day = 0.0283168 * 86400 / (area_km2 * 1e6) * 1000
df["discharge_mm"] = df["discharge_cfs"] * cfs_to_mm_day

# ---- Compute hydrologic metrics
def compute_event_stats(series, threshold, condition):
    event_flags = condition(series, threshold)
    starts = (event_flags & ~event_flags.shift(1, fill_value=False)).sum()
    durations = []
    count = 0
    for val in event_flags:
        if val:
            count += 1
        elif count > 0:
            durations.append(count)
            count = 0
    if count > 0:
        durations.append(count)
    mean_duration = np.mean(durations) if durations else 0
    return starts, mean_duration

# Basic stats
q_mean = df["discharge_cfs"].mean()
q5 = df["discharge_cfs"].quantile(0.05)
q95 = df["discharge_cfs"].quantile(0.95)

# Runoff ratio (needs precip)
p_mean = 900  # placeholder in mm/year (adjust if you have actual)
runoff_mm_annual = df["discharge_mm"].resample("Y").sum().mean()
runoff_ratio = runoff_mm_annual / p_mean

# Slope of FDC
sorted_flow = df["discharge_cfs"].sort_values(ascending=False).reset_index(drop=True)
q33 = sorted_flow.iloc[int(0.33 * len(sorted_flow))]
q66 = sorted_flow.iloc[int(0.66 * len(sorted_flow))]
slope_fdc = (q33 - q66) / (0.66 - 0.33)

# Baseflow index (simplified: use 90th percentile of baseflow proxy)
# You may want to replace this with a proper filter (e.g. Lyne-Hollick)
baseflow_index = df["discharge_cfs"].rolling(7, center=True).min().sum() / df["discharge_cfs"].sum()

# Stream elasticity: %ΔQ / %ΔP (placeholder)
stream_elas = None  # needs both annual Q and P time series

# High/low flow event frequency/duration
high_thresh = df["discharge_cfs"].quantile(0.9)
low_thresh = df["discharge_cfs"].quantile(0.1)

high_freq, high_dur = compute_event_stats(df["discharge_cfs"], high_thresh, lambda s, t: s > t)
low_freq, low_dur = compute_event_stats(df["discharge_cfs"], low_thresh, lambda s, t: s < t)

# Zero-flow frequency
zero_q_freq = (df["discharge_cfs"] == 0).sum() / len(df)

# HFD mean
df["doy"] = df.index.dayofyear
hfd = df.groupby(df.index.year).apply(lambda x: (x["discharge_cfs"] * x["doy"]).sum() / x["discharge_cfs"].sum())
hfd_mean = hfd.mean()

# ---- Package into DataFrame
summary = {
    "gauge_id": 11467000,
    "q_mean": q_mean,
    "runoff_ratio": runoff_ratio,
    "slope_fdc": slope_fdc,
    "baseflow_index": baseflow_index,
    "stream_elas": stream_elas,
    "q5": q5,
    "q95": q95,
    "high_q_freq": high_freq,
    "high_q_dur": high_dur,
    "low_q_freq": low_freq,
    "low_q_dur": low_dur,
    "zero_q_freq": zero_q_freq,
    "hfd_mean": hfd_mean
}

summary_df = pd.DataFrame([summary])

# ---- Export to CSV with semicolon delimiter
summary_df.to_csv("camels_hydro_11467000.txt", sep=";", index=False)




  runoff_mm_annual = df["discharge_mm"].resample("Y").sum().mean()
