# Step-by-step guide to retrieving data from USGS API

## Example: 
### Get streamflow data from selected gage for time period of interest & plot timeseries results

In [None]:
# Import libraries

%matplotlib inline
import pandas as pd
import requests
import json
from datetime import datetime
from collections import OrderedDict

In [None]:
# Enter Desired Data
gage       = "01646500"                              # USGS Gage    

y0, m0 ,d0 = 2013, 4, 30                             # Start date (year, month, day)
y1, m1 ,d1 = 2014, 5, 10                             # End date

parameter  = "00060"                                 # Parameter 
obser      = "StreamFlow"                            # Observed data Requested
dformat    = "json"                                  # Data Format  
url        = 'http://waterservices.usgs.gov/nwis/iv' # USGS API

In [None]:
# Create Datetime Objects
start     = datetime(y0, m0, d0,0)    
stop      = datetime(y1, m1 ,d1,0)         

# Format Datetime Objects for USGS API
first    =  datetime.date(start).strftime('%Y-%m-%d')
last     =  datetime.date(stop).strftime('%Y-%m-%d') 

In [None]:
# Ping the USGS API for data

params = OrderedDict([('format',dformat),('sites',gage),('startDT',first), 
            ('endDT',last), ('parameterCD',parameter)])  

r = requests.get(url, params = params) 
print("Retrieved Data for USGS Gage: ", gage)
data = r.content.decode()
d = json.loads(data)

In [None]:
# Take a peak at the JSON output:
d

In [None]:
# There is alot of meta data we don't need here, lets dig deeper:
d['value']

In [None]:
# Still not where we need to be, lets go down another level:
d['value']['timeSeries']

In [None]:
# Let's see what the keys are in the JSON output:
mydict = dict(d['value']['timeSeries'][0])
for key in mydict: print(key)

In [None]:
# Now, Lets dig a little deeper into one of the keys:
mydict['sourceInfo']

In [None]:
# Great, We can pull the station name, and assign to a variable for use later:
SiteName = mydict['sourceInfo']['siteName']
print(SiteName)

In [None]:
# After reveiwing the JSON Data structure, select only data we need: 
tseries = d['value']['timeSeries'][0]['values'][0]['value'][:]

In [None]:
# Create a Dataframe, format Datetime data,and assign numeric type to observations
df = pd.DataFrame.from_dict(tseries)
df.index = pd.to_datetime(df['dateTime'],format='%Y-%m-%d{}%H:%M:%S'.format('T'))

df['UTC Offset'] = df['dateTime'].apply(lambda x: x.split('-')[3][1])
df['UTC Offset'] = df['UTC Offset'].apply(lambda x: pd.to_timedelta('{} hours'.format(x)))

df.index = df.index - df['UTC Offset']
df.value = pd.to_numeric(df.value)

In [None]:
df.head()

In [None]:
# Get Rid of unwanted data, rename observed data
df = df.drop('dateTime', 1)
df.drop('qualifiers',axis = 1, inplace = True)
df.drop('UTC Offset',axis = 1, inplace = True)
df = df.rename(columns = {'value':obser})
df.head()

In [None]:
# Plot the Results, and use the SiteName as a title!
df.plot(grid = True, title = SiteName)