-
Notifications
You must be signed in to change notification settings - Fork 18
/
usgs_streamflow.py
52 lines (40 loc) · 1.83 KB
/
usgs_streamflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import datetime
from pathlib import Path
import pandas as pd
from wsfr_read.config import DATA_ROOT
from wsfr_read.sites import read_metadata
# About: https://help.waterdata.usgs.gov/codes-and-parameters/parameters
# Look up: https://help.waterdata.usgs.gov/parameter_cd_nm
MEAN_DISCHARGE_RAW_COL = "00060_Mean"
MEAN_DISCHARGE_READABLE_COL = "discharge_cfs_mean"
def read_usgs_streamflow_data(
site_id: str, issue_date: str | datetime.date | pd.Timestamp
) -> pd.DataFrame:
"""Read USGS daily mean streamflow data for a given forecast site as of a given forecast issue
date.
Args:
site_id (str): Identifier for forecast site
issue_date (str | datetime.date | pd.Timestamp): Date that forecast is being issued for
Returns:
pd.DateFrame: dateframe with columns ["datetime", "discharge_cfs_mean"]
"""
issue_date = pd.to_datetime(issue_date)
path = get_path_to_file(site_id, issue_date)
df = pd.read_csv(path, parse_dates=["datetime"])
df = df[df["datetime"].dt.date < issue_date.date()][["datetime", MEAN_DISCHARGE_RAW_COL]]
df = df.rename(columns={MEAN_DISCHARGE_RAW_COL: MEAN_DISCHARGE_READABLE_COL})
return df.copy()
def get_path_to_file(site_id: str, issue_date: str | datetime.date | pd.Timestamp) -> Path:
"""Get path to data file given site_id and an issue_date (for the forecast year of that issue
date).
Args:
site_id (str): Identifier for forecast site
issue_date (str | datetime.date | pd.Timestamp): Date that forecast is being issued for
Returns:
Path: path to CSV file
"""
issue_date = pd.to_datetime(issue_date)
if site_id not in read_metadata().index:
raise ValueError(f"Invalid site_id: {site_id}")
forecast_year = issue_date.year
return DATA_ROOT / "usgs_streamflow" / f"FY{forecast_year}" / f"{site_id}.csv"