In [4]:
import csv
import requests
from datetime import datetime
import os
import json
import pandas as pd
import numpy as np
from pathlib import Path

In [5]:
proj_dir = Path("../../../..")

In [6]:
# define a function to format the url
def format_url(station_name: str, pcodes: list, start: datetime, end: datetime):
    """Formats the url for the USBR PN data query.
    Args:
        station_name (str): The station name.
        pcodes (list): The list of pcodes.
        start (datetime): The start date.
        end (datetime): The end date.
    Returns:
        url (str): The formatted url.
    """
    url = (
        f"https://www.usbr.gov/pn-bin/daily.pl?station={station_name.lower()}&format=csv&year={start.year}&month={start.month}&day={start.day}&year={end.year}&month={end.month}&day={end.day}"
        + "".join(["&pcode=" + pcode.strip(" ").lower() for pcode in pcodes])
    )

    return url

In [7]:
# define a function to download the data for a station
def download_data(station_name: str, pcodes: list, start: datetime, end: datetime, path: str):
    """Downloads the data for a station.
    Args:
        station_name (str): The station name.
        pcodes (list): The list of pcodes.
        start (datetime): The start date.
        end (datetime): The end date.
        path (str): The path to save the data.
    Returns:
        None
    """
    # format the url
    url = format_url(station_name, pcodes, start, end)

    # download the data
    r = requests.get(url)

    # write the data to a csv file
    with open(os.path.join(path, 'raw/usbr', station_name + ".csv"), "w") as f:
        f.write(r.text)

    # read the csv file
    with open(os.path.join(path, 'raw/usbr', station_name + ".csv"), "r") as f:
        reader = csv.reader(f)
        data = list(reader)

    # remove the header
    data = data[1:]

    # define the column names
    column_names = ["date"] + pcodes

    # write the data to a csv file
    with open(os.path.join(path, 'raw/usbr', station_name + ".csv"), "w") as f:
        writer = csv.writer(f)
        writer.writerow(column_names)
        writer.writerows(data)

    return None

In [8]:
# function to process the downloaded data
def postprocess_data(
    station_name: str,
    path: str,
    grand_id: str = None,
    pcodes: list = None,
    pcode_keys: dict = None,
):
    if not grand_id:
        grand_id = station_name

    # read in the data
    # print(path, "raw/usbr", "{}.csv".format(station_name.upper()))
    df = pd.read_csv(
        os.path.join(path, "raw/usbr", "{}.csv".format(station_name.upper()))
    )

    new_df = pd.DataFrame()
    new_df["date"] = df["date"]

    # convert the data to the correct units
    for pcode in pcodes:
        if pcode in pcode_keys.keys():
            try:
                pcode_keys[pcode]["constant"] = pcode_keys[pcode]["constant"]
            except:
                pcode_keys[pcode]["constant"] = None
            
            if pcode_keys[pcode]["constant"]:
                new_df[pcode_keys[pcode]["column_name"]] = (
                    df[pcode] * np.prod(pcode_keys[pcode]["conversion_factors"])
                    + pcode_keys[pcode]["constant"]
                )
            else:
                new_df[pcode_keys[pcode]["column_name"]] = df[pcode] * np.prod(
                    pcode_keys[pcode]["conversion_factors"]
                )

    # save the data
    new_df.to_csv(
        # os.path.join(path, "processed", "USBR_{}.csv".format(grand_id)), index=False
        os.path.join(path, "processed", "USBR_{}.csv".format(station_name)), index=False
    )
    # print("processed data for {}".format(station_name))

In [9]:
# define the station names
# station_names = ["crpo", 'prv', 'prvo', 'kee', 'cle', 'crao']
station_names = pd.read_csv("pcodes.csv", header=None)[0]
# grand_ids = [None, 91, '91_forebay', 55, 58, None]

# define the start and end dates
start_date = datetime.strptime("1990-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2023-10-31", "%Y-%m-%d")

In [10]:
# read the stations json file
with open("stations.json", "r") as f:
    stations_dict = json.load(f)

# specify the download folder and make it the current working directory
data_dir = proj_dir / "Data/InSituTemperature"

# if not os.path.exists(path):
#     os.makedirs(path)
os.makedirs(os.path.join(data_dir, "raw/usbr"), exist_ok=True)
os.makedirs(os.path.join(data_dir, "raw/usbr"), exist_ok=True)
os.makedirs(os.path.join(data_dir, "processed"), exist_ok=True)

# download the data for each station
# for station_name, id in zip(station_names, grand_ids):
for station_name in station_names:
    # if pcodes exist for the station
    if "pcodes" in stations_dict[station_name.upper()]:
        # define the pcodes and pcode keys
        pcodes = stations_dict[station_name.upper()]["pcodes"]
        pcode_keys = stations_dict["pcode_keys"]

        # download the data
        download_data(station_name.upper(), pcodes, start_date, end_date, data_dir)
        # postprocess the data
        postprocess_data(station_name.upper(), data_dir, pcodes=pcodes, pcode_keys=pcode_keys)