## Request USGS Precipitation Data.
This notebook is an example of how to use pydrology to download and resample USGS precipitation data. The USGS provides gage data from rain gauges across the United States, which can be accessed on their website (https://waterdata.usgs.gov/nwis). An example of the monitoring data can be found here (https://waterdata.usgs.gov/monitoring-location/350110080502045/#parameterCode=00045&period=P7D) for a gauge in Mecklenburg, NC. The data provided at each location is generally a rain measurement in inches.

The general workflow for requesting USGS streamflow data is as follows:
1. Request the gage data for a particular site using the function call below.
2. Inspect the raw gage data for missing values and other issues.
3. Handle any data cleaning and then the data is ready for use!
4. (Optional) Downsample or upsample the data using the provided functions.
5. Save the Data Frame.

In [None]:
# Library imports.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Local imports.
from pydrology.usgs.usgs_prcp_request import request_usgs_prcp_data
from pydrology import plotting
from pydrology import time_series

## 1) Request USGS rain gauge data

In [None]:
# Parameters for the request.
# ---------------------------

# Gage ID. Found on the USGS page for the specific monitoring location.
gage_id = "350110080502045"  # Somewhere, North Carolina

# Start date in format yyyy-mm-dd. "2022-06-24"
start_date = "2016-10-01"

# Local start time in format HH:MM:SS.mmm. "11:17:05.203"
start_time = "00:00:00.000"

# End date in format yyyy-mm-dd. "2022-06-24"
end_date = "2016-11-01"

# Local end time in format HH:MM:SS.mmm. "11:17:05.203"
end_time = "00:00:00.000"

# Number of hour offset from GMT (+ or -) in format +/-HH:MM. "-04:00"
gmt_offset = "-05:00"

In [None]:
# Request the gage data as a DataFrame.
gage_df = request_usgs_prcp_data(gage_id, start_date, start_time, end_date, end_time, gmt_offset)

# Print the head and tail.
gage_df.head()

## 2) Inspect the data.

In [None]:
# Plotting column names and missing value.
data_column_name = 'precip'
time_column_name = 'datetime'
missing_value = 'M'

# Plot the valid, missing, and non-valid data as a bar chart.
plotting.plot_missing_ratio(gage_df, data_column_name)

# Plot the data as a time series.
plotting.plot_data_timeseries(gage_df, data_column_name, time_column_name, missing_value=missing_value)

## 3) Data cleaning.

In [None]:
dt = 15 # Time step in minutes.
data_column = 'precip'
time_column = 'datetime'

### Set Missing to NaN. 'M' => NaN

In [None]:
missing_value = 'M'
gage_df.replace(missing_value, np.nan, inplace=True)

### Add NaN values to missing time steps.

In [None]:
gage_df = time_series.standardize_datetime(gage_df, time_column, data_column, dt)

### Interpolation of NaN values.

In [None]:
gage_df = time_series.interpolate_time_series(gage_df, data_column, method='linear')

## 4) Resampling data.

In [None]:
new_dt = 1440 # New time step in minutes. 
data_column = 'precip'
time_column = 'datetime'
resample_gage_df = time_series.resample_data(gage_df, time_column, data_column, new_dt)
resample_gage_df.head()

In [None]:
fig, ax = plt.subplots()
ax.plot(gage_df.datetime, gage_df[data_column], 'b-o', label='Orig. Data')
ax.plot(resample_gage_df.datetime, resample_gage_df[data_column], 'rx', label='Clean/Resamp.')
ax.legend()
ax.tick_params(axis='x', rotation=45)
plt.show()

## 5) Save Data Frame.

In [None]:
gage_df.to_csv('~/Mecklenberg_Prcp.csv', index=False)


In [None]:
## Batch Request.

In [None]:
# Parameters for the request.
# ---------------------------

# List of gage IDs. Found on the USGS page for the specific monitoring location.
gage_ids = [
    "354356078331845",
    "354525078382645",
    "352936076125245",
    "354356078331845",
    "354525078382645",
    "354528078372645",
    "354546078422045",
    "354606078412845",
    "354623078441345",
    "355020078465645",
    "355201078362845",
    "355339078405145",
    "355348078433545",
    "355852078572045",
    "355856078492945",
    "360143078540945",
    "360334078584145",
    "360419078543145",
    "350152079030745",
    "350258078512145",
    "350915078533245",
    "355511078570745",
    "355520079035845",
    "355631079025645",
    "360733079552145",
    "350627080410645",
    "350857080383245",
    "351028080385545",
    "351145080371945",
    "351218080331345",
    "351302080412701",
    "351455080374445",
    "351536080410645",
    "351540080430045",
    "351741080475045",
    "351812080445545",
    "351943080323145",
    "352000080414645",
    "352006080462845",
    "352135080462045",
    "352310080424845",
    "352432080473745",
    "352541080441745",
    "352718080484345",
    "352921080473245",
    "354057080362545",
    "354303080354645",
    "354822080521501",
    "354855080134201",
    "355037080393045",
    "355113080230345",
    "360000080444645",
    "360848080251845",
    "362416080334345",
    "350110080502045",
    "350128081000145",
    "350314080484945",
    "350324080551845",
    "350351080454145",
    "350359080521145",
    "350623080583801",
    "350624081023345",
    "350630080455845",
    "350635080513245",
    "350637080475645",
    "350646080432545",
    "350657080544945",
    "350815080460745",
    "350823080505345",
    "350842080572801",
    "350903081004545",
    "350947080524945",
    "351001080495845",
    "351023080435745",
    "351032080475245",
    "351104080521845",
    "351109080412145",
    "351124080581245",
    "351132080504145",
    "351132080562345",
    "351229080460245",
    "351229080480145",
    "351247080592745",
    "351320080502645",
    "351331080525945",
    "351412080541245",
    "351414080463245",
    "351452081055245",
    "351502080512045",
    "351553080562645",
    "351604080470845",
    "351633080493445",
    "351642080533445",
    "351753081011745",
    "351816080564345",
    "351822081140545",
    "351922080540345",
    "351928080515645",
    "351954080493445",
    "352003080591245",
    "352155080531145",
    "352224080500345",
    "352323080551645",
    "352440080505045",
    "352523080535545",
    "352555080574445",
    "352602081014745",
    "352750080523545",
    "353003080591745",
    "353014080524945",
    "353459081334545",
    "352536082333245",
]

# Start date in format yyyy-mm-dd. "2022-06-24"
start_date = "2016-10-01"

# Local start time in format HH:MM:SS.mmm. "11:17:05.203"
start_time = "00:00:00.000"

# End date in format yyyy-mm-dd. "2022-06-24"
end_date = "2016-11-01"

# Local end time in format HH:MM:SS.mmm. "11:17:05.203"
end_time = "00:00:00.000"

# Number of hour offset from GMT (+ or -) in format +/-HH:MM. "-04:00"
gmt_offset = "-04:00"

In [None]:
# Request for each gage ID.
gage_df_list = []
for gid in gage_ids:
    try:
        gage_df_list.append(request_usgs_prcp_data(gid, start_date, start_time, end_date, end_time, gmt_offset,
                                                  timeout=10))
    except Exception as e:
        print(e)
        print('Could not get data for {}'.format(gid))
    
# Join the gage dfs.
gage_df_concat = pd.concat(gage_df_list)

In [None]:
gage_df_concat.head()

In [None]:
# Save the data frame.
gage_df_concat.to_csv('~/Path to Directory/NC_Prcp_20161001_20161101.csv', index=False)