# Haw River Levels
Data feed from [USGS REST API](https://waterservices.usgs.gov/rest/IV-Service.html)

In [1]:
import ipyleaflet as lf
import ipywidgets as widgets
import pandas as pd
import requests

In [16]:
url = 'https://waterservices.usgs.gov/nwis/iv/?format=json' + \
        '&sites=02096960,02096500' + \
        '&startDT=2018-09-17' + \
        '&endDT=2018-09-19' + \
        '&parameterCd=00060,00065' + \
        '&siteStatus=all'

def get_location_data(location):
    '''Extracts location data from a timeSeries data array.
    Returns a dictionary of site data and a dataframe of
    the metric for that location.'''
    
    # Time series array to time series dataframe
    metrics = pd.DataFrame.from_records(location['values'][0]['value'])
    
    # Set data types
    metrics['value'] = metrics['value'].astype('float64')
    metrics['dateTime'] = metrics['dateTime'].astype('datetime64')
    
    # Add time limits to meta data
    start = metrics['dateTime'].min()
    end = metrics['dateTime'].max()
    
    # Set value name to metric description
    description = location['variable']['variableDescription']
    description = description.replace(' ','_').replace(',','').lower()
    metrics.rename(columns={'value':description}, inplace=True)
    
    # Set index to timescale
    metrics.set_index('dateTime', inplace=True)
    
    # Change array in qualifiers to string
    if type(metrics.qualifiers[0]) == list:
        
        metrics['status'] = [i[0] for i in metrics.qualifiers]
    
    # Dictionary of metadata with metrics dataframe
    source = location['sourceInfo']
    geo_info = source['geoLocation']['geogLocation']
    
    site_data = {
        'site_name':source['siteName'].title().replace('Nc','NC'),
        'site_code':source['siteCode'][0]['value'],
        'network':source['siteCode'][0]['network'],
        'projection':geo_info['srs'],
        'latitude':geo_info['latitude'],
        'longitude':geo_info['longitude'],
        'coordinates':(geo_info['latitude'],
                       geo_info['longitude']),
        'measurement':location['variable']['variableName'],
        'description':description,
        'start_datetime': start,
        'end_datetime': end,
        'metrics':metrics,
                }
    
    
    return (site_data)



In [17]:
# Future Main Loop

data = requests.get(url)
raw_data = data.json()
time_series = raw_data['value']['timeSeries']

dataset = {}
for location in time_series:
    site_data = get_location_data(location) 
    #print(site_data['metrics'])
    unique_name = site_data['site_name'] + \
    '-' + \
    site_data['description']
    key_name = unique_name.lower().replace(' ','_').replace(',','')
    dataset[key_name] = site_data


In [39]:
prev_location = [None,]
prev_key = None
location_datasets = {}
for key in dataset.keys():
    # Extract the location portion of the name
    location = key.split('-')[0]
    
    # Match against previous location
    if prev_location[0] == location:
        current = dataset[key]['metrics']
        
        # Inner join the datasets, drop redundant columns
        merged = pd.merge(dataset[key]['metrics'].drop(
                        columns=['qualifiers',]),
                  dataset[prev_key]['metrics'].drop(
                          columns=['qualifiers', 'status']),
                  left_index = True,
                  right_index = True)
        
        # Add the combined data to the datasets
        location_datasets[location] = merged
    prev_location = [location,]
    prev_key = key
location_datasets.keys()

match
match


dict_keys(['haw_river_at_haw_river_nc', 'haw_river_near_bynum_nc'])

In [41]:
for key in location_datasets.keys():
    p = location_datasets[key]
    
p.describe()

Unnamed: 0,gage_height_feet,discharge_cubic_feet_per_second
count,288.0,288.0
mean,13.303854,29906.736111
std,3.480894,16720.101271
min,7.52,5230.0
25%,9.545,10850.0
50%,13.96,31800.0
75%,16.5025,45600.0
max,17.62,51900.0
