# Haw River Levels
Data feed from [USGS REST API](https://waterservices.usgs.gov/rest/IV-Service.html)

In [1]:
import bqplot as bq
import ipyleaflet as lf
import ipywidgets as widgets
import pandas as pd
import requests

from bqplot import pyplot as plt



In [2]:
url = 'https://waterservices.usgs.gov/nwis/iv/?format=json' + \
        '&sites=02096960,02096500' + \
        '&startDT=2018-08-01' + \
        '&endDT=2018-10-01' + \
        '&parameterCd=00060,00065' + \
        '&siteStatus=all'

def get_location_data(location):
    '''Extracts location data from a timeSeries data array.
    Returns a dictionary of site data and a dataframe of
    the metric for that location.'''
    
    # Time series array to time series dataframe
    metrics = pd.DataFrame.from_records(location['values'][0]['value'])
    
    # Set data types
    metrics['value'] = metrics['value'].astype('float64')
    metrics['dateTime'] = metrics['dateTime'].astype('datetime64')
    
    # Add time limits to meta data
    start = metrics['dateTime'].min()
    end = metrics['dateTime'].max()
    
    # Set value name to metric description
    description = location['variable']['variableDescription']
    description = description.replace(' ','_').replace(',','').lower()
    metrics.rename(columns={'value':description}, inplace=True)
    
    # Set index to timescale
    metrics.set_index('dateTime', inplace=True)
    
    # Change array in qualifiers to string
    if type(metrics.qualifiers[0]) == list:
        
        metrics['status'] = [i[0] for i in metrics.qualifiers]
    
    # Dictionary of metadata with metrics dataframe
    source = location['sourceInfo']
    geo_info = source['geoLocation']['geogLocation']
    
    site_data = {
        'site_name':source['siteName'].title().replace('Nc','NC'),
        'site_code':source['siteCode'][0]['value'],
        'network':source['siteCode'][0]['network'],
        'projection':geo_info['srs'],
        'latitude':geo_info['latitude'],
        'longitude':geo_info['longitude'],
        'coordinates':(geo_info['latitude'],
                       geo_info['longitude']),
        'measurement':location['variable']['variableName'],
        'description':description,
        'start_datetime': start,
        'end_datetime': end,
        'metrics':metrics,
                }
    
    
    return (site_data)


def join_metrics(previous, current):
    '''Inner join dataframes. 
    Removes duplicate columns to reduce data size. 
    Returns merged dataframe. '''
            
    # Get the columns from each set
    current_columns = set(current.columns.tolist())
    prev_columns = set(previous.columns.tolist())

    # Find the duplicates
    dupes = list(current_columns.intersection(prev_columns))

    # Inner join the datasets, drop duplicate columns
    merged = pd.merge(current.drop(
                        columns=['qualifiers',]),
              previous.drop(
                        columns=dupes),
              left_index = True,
              right_index = True)
    
    return merged

def combine_location_metrics(dataset):
    '''Joins data based on location.'''
    
    prev_location = [None,]
    prev_key = None
    location_datasets = {}
    
    for key in dataset.keys():
        # Extract the location portion of the name
        location = key.split('-')[0]

        # Match against previous location
        if prev_location[0] == location:

            # Pull the dataframes
            current = dataset[key]['metrics']
            previous = dataset[prev_key]['metrics']

            # Combine the dataframes
            merged = join_metrics(previous, current)

            # Add the combined data to the datasets
            location_datasets[location] = merged

        # Set the previous data variables for the next iteration
        prev_location = [location,]
        prev_key = key
        
    return location_datasets



In [3]:
# Future Main Loop

data = requests.get(url)
if data.status_code != 200:
    print('Server returned ' + data.status_code + ' exiting...')
    end
        
print('data downloaded')
    
try:
    raw_data = data.json()
    print('Data Parsed')
except JSONDecodeError:
    print('Request Timed Out')

time_series = raw_data['value']['timeSeries']

dataset = {}
for location in time_series:
    site_data = get_location_data(location) 
    #print(site_data['metrics'])
    unique_name = site_data['site_name'] + \
    '-' + \
    site_data['description']
    key_name = unique_name.lower().replace(' ','_').replace(',','')
    dataset[key_name] = site_data

location_data = combine_location_metrics(dataset)

data downloaded
Data Parsed


In [4]:
# future plot function

# Extract from the data frame
fig_title = list(location_data.keys())[0]
df = location_data[fig_title]
date = df.index.values
depth = list(df.gage_height_feet.replace(-999999.0, 0))
flow = list(df.discharge_cubic_feet_per_second.replace(-999999.0, 0))

# Set up the axis
dt_x = bq.DateScale()
sc_y = bq.LinearScale()
ax_x = bq.Axis(scale=dt_x)
ax_y = bq.Axis(scale=sc_y, orientation='vertical')

line = bq.Lines(x=date, y=depth, scales={'x': dt_x, 'y': sc_y})

plt.figure(marks=[line], axes=[ax_x, ax_y], title=fig_title)


Figure(axes=[Axis(scale=DateScale()), Axis(orientation='vertical', scale=LinearScale())], fig_margin={'top': 6…