# Haw River Levels
Data feed from [USGS REST API](https://waterservices.usgs.gov/rest/IV-Service.html)

In [1]:
import ipyleaflet as lf
import ipywidgets as widgets
import pandas as pd
import requests

In [56]:
url = 'https://waterservices.usgs.gov/nwis/iv/?format=json' + \
        '&sites=02096960,02096500' + \
        '&startDT=2018-09-17' + \
        '&endDT=2018-09-19' + \
        '&parameterCd=00060,00065' + \
        '&siteStatus=all'

def get_location_data(location):
    '''Extracts location data from a timeSeries data array.
    Returns a dictionary of site data and a dataframe of
    the metric for that location.'''
    
    # Time series array to time series dataframe
    metrics = pd.DataFrame.from_records(location['values'][0]['value'])
    
    # Set data types
    metrics['value'] = metrics['value'].astype('float64')
    metrics['dateTime'] = metrics['dateTime'].astype('datetime64')
    
    # Add time limits to meta data
    start = metrics['dateTime'].min()
    end = metrics['dateTime'].max()
    
    # Set value name to metric description
    description = location['variable']['variableDescription']
    description = description.replace(' ','_').replace(',','').lower()
    metrics.rename(columns={'value':description}, inplace=True)
    
    # Set index to timescale
    metrics.set_index('dateTime', inplace=True)
    
    # Change array in qualifiers to string
    if type(metrics.qualifiers[0]) == list:
        
        metrics['status'] = [i[0] for i in metrics.qualifiers]
    
    # Dictionary of metadata with metrics dataframe
    source = location['sourceInfo']
    geo_info = source['geoLocation']['geogLocation']
    
    site_data = {
        'site_name':source['siteName'].title().replace('Nc','NC'),
        'site_code':source['siteCode'][0]['value'],
        'network':source['siteCode'][0]['network'],
        'projection':geo_info['srs'],
        'latitude':geo_info['latitude'],
        'longitude':geo_info['longitude'],
        'coordinates':(geo_info['latitude'],
                       geo_info['longitude']),
        'measurement':location['variable']['variableName'],
        'description':description,
        'start_datetime': start,
        'end_datetime': end,
        'metrics':metrics,
                }
    
    
    return (site_data)


def join_metrics(previous, current):
    '''Inner join dataframes. 
    Removes duplicate columns to reduce data size. 
    Returns merged dataframe. '''
            
    # Get the columns from each set
    current_columns = set(current.columns.tolist())
    prev_columns = set(previous.columns.tolist())

    # Find the duplicates
    dupes = list(current_columns.intersection(prev_columns))

    # Inner join the datasets, drop duplicate columns
    merged = pd.merge(current.drop(
                        columns=['qualifiers',]),
              previous.drop(
                        columns=dupes),
              left_index = True,
              right_index = True)
    
    return merged

def combine_location_metrics(dataset):
    '''Joins data based on location.'''
    
    prev_location = [None,]
    prev_key = None
    location_datasets = {}
    
    for key in dataset.keys():
        # Extract the location portion of the name
        location = key.split('-')[0]

        # Match against previous location
        if prev_location[0] == location:

            # Pull the dataframes
            current = dataset[key]['metrics']
            previous = dataset[prev_key]['metrics']

            # Combine the dataframes
            merged = join_metrics(previous, current)

            # Add the combined data to the datasets
            location_datasets[location] = merged

        # Set the previous data variables for the next iteration
        prev_location = [location,]
        prev_key = key
        
    return location_datasets



In [57]:
# Future Main Loop

data = requests.get(url)
raw_data = data.json()
time_series = raw_data['value']['timeSeries']

dataset = {}
for location in time_series:
    site_data = get_location_data(location) 
    #print(site_data['metrics'])
    unique_name = site_data['site_name'] + \
    '-' + \
    site_data['description']
    key_name = unique_name.lower().replace(' ','_').replace(',','')
    dataset[key_name] = site_data

locaton_data = combine_location_metrics(dataset)

In [53]:
prev_location = [None,]
prev_key = None
location_datasets = {}
for key in dataset.keys():
    # Extract the location portion of the name
    location = key.split('-')[0]
    
    # Match against previous location
    if prev_location[0] == location:
        
        # Pull the dataframes
        current = dataset[key]['metrics']
        previous = dataset[prev_key]['metrics']

        # Combine the dataframes
        merged = join_metrics(previous, current)
        
        # Add the combined data to the datasets
        location_datasets[location] = merged
        
    # Set the previous data variables for the next iteration
    prev_location = [location,]
    prev_key = key
location_datasets.keys()

dict_keys(['haw_river_at_haw_river_nc', 'haw_river_near_bynum_nc'])

In [58]:
for key in location_datasets.keys():
    p = location_datasets[key]
    
locaton_data

{'haw_river_at_haw_river_nc':                      gage_height_feet status  discharge_cubic_feet_per_second
 dateTime                                                                     
 2018-09-17 04:00:00             10.06      P                           3860.0
 2018-09-17 04:15:00             10.41      P                           4080.0
 2018-09-17 04:30:00             10.78      P                           4310.0
 2018-09-17 04:45:00             11.13      P                           4540.0
 2018-09-17 05:00:00             11.48      P                           4770.0
 2018-09-17 05:15:00             11.82      P                           5030.0
 2018-09-17 05:30:00             12.16      P                           5290.0
 2018-09-17 05:45:00             12.50      P                           5570.0
 2018-09-17 06:00:00             12.80      P                           5820.0
 2018-09-17 06:15:00             13.11      P                           6080.0
 2018-09-17 06:30:00   

{'gage_height_feet', 'qualifiers', 'status'}