## Request USGS Streamflow Data.
This notebook is an example of how to use pydrology to download and resample USGS streamflow data. The USGS provides gage data from streams and rivers across the united states, which can be accessed on their website (https://waterdata.usgs.gov/nwis). An example of the monitoring data can be found here (https://waterdata.usgs.gov/monitoring-location/04234000/#parameterCode=00065&period=P7D) for Fall Creek in Ithaca, NY. The data provided at each location is generally a gage measurement in feet and a discharge value that is derived from a rating curve. 

The general workflow for requesting USGS streamflow data is as follows:
1. Request the gage/discharge data for a particular site using the function call below.
2. Inspect the raw gage/discharge data for missing values and other issues.
3. Handle any data cleaning and then the data is ready for use!
4. (Optional) Downsample or upsample the data using the provided functions.
5. Save the Data Frame.

In [None]:
# Library imports.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path

# Local imports.
from pydrology.usgs.usgs_request import request_usgs_data
from pydrology import plotting
from pydrology import time_series

In [None]:
# Parameters for the request.
# ---------------------------

# Gage IDs. Found on the USGS page for the specific monitoring location.
gage_dir = Path(r'Path to directory')
gage_ids_df = pd.read_csv(gage_dir / 'usgs_stations_ny.csv')
gage_id_list = [i[:-1] for i in gage_ids_df['station_number']]
print(gage_id_list)

# Parameter to get data for. 'discharge' or 'height'.
parameter = 'height'

# Start date in format yyyy-mm-dd. "2022-06-24"
start_date = "1970-01-01" 

# Local start time in format HH:MM:SS.mmm. "11:17:05.203"
start_time = "00:00:00.000" 

# End date in format yyyy-mm-dd. "2022-06-24"
end_date = "2023-01-02" 

# Local end time in format HH:MM:SS.mmm. "11:17:05.203"
end_time = "00:00:00.000" 

# Number of hour offset from GMT (+ or -) in format +/-HH:MM. "-04:00"
gmt_offset = "-05:00" 

# Data processing flags.
# -------------------------

# Plot the inspection of the stream data.
plot_data_inspection = False

# Convert missing data to NaN.
missing_to_nan = True

# Standardize datetimes to a consecutive series.
standardize_dates = True

# Interpolate NaN values.
interpolate_nan = True

# Resample data to a specific time interval (set interval below).
resample_data = True

In [None]:
def save_usgs_dataframe(df, gage_id):
    fname = f'{gage_id}_{list(df.datetime)[0]}-{list(df.datetime)[-1]}.csv'
    df.to_csv(gage_dir / fname, index=False)
    

def request_full_timeframe(gage_id, parameter, start_date, start_time, end_date, end_time, gmt_offset):
    """
    Requests the full time frame of data in chunks.
    """
    cur_end_date = end_date
    cur_end_time = end_time
    
    for

In [None]:
# Batch request.
for gage_id in gage_id_list:
    # Request the gage data as a DataFrame.
    try:
        gage_df = request_usgs_data(gage_id, parameter, start_date, start_time, end_date, end_time, gmt_offset)
    except Exception as e:
        print(e)
        continue

    # Print the head and tail.
    print(gage_df)

    # =============================
    # Inspect the data

    # Plotting column names and missing value.
    data_column_name = parameter
    time_column_name = 'datetime'
    missing_value = 'M'

    if plot_data_inspection is True:
        # Plot the valid, missing, and non-valid data as a bar chart.
        plotting.plot_missing_ratio(gage_df, data_column_name)

        # Plot the data as a time series.
        plotting.plot_data_timeseries(gage_df, data_column_name, time_column_name, missing_value=missing_value)

    # ==============================
    # Data Cleaning

    dt = 15 # Time step in minutes.
    data_column = parameter
    time_column = 'datetime'

    # ==============================
    # Missing data to Nan.
    if missing_to_nan is True:
        missing_value = 'M'
        gage_df.replace(missing_value, np.nan, inplace=True)

    # ==============================
    # Standardize datetime to insert missing time stamps.
    if standardize_dates is True:
        gage_df = time_series.standardize_datetime(gage_df, time_column, data_column, dt)

    # ==============================
    # Interpolate Nan Values.
    if interpolate_nan is True:
        gage_df = time_series.interpolate_time_series(gage_df, data_column, method='linear')

    # ==============================
    # Resample data.
    if resample_data is True:
        new_dt = 1440 # New time step in minutes. 
        data_column = parameter
        time_column = 'datetime'
        resample_gage_df = time_series.resample_data(gage_df, time_column, data_column, new_dt)
        resample_gage_df.head()
        save_usgs_dataframe(resample_gage_df, gage_id)
    else:
        save_usgs_dataframe(gage_df, gage_id)