## Interactive Plot for Santa Monica Curb Data

This notebook, built on seaborn and ipython widgets, allows the user to specify times of interest in the Santa Monica curb data and generate plots in near real-time. It also provides an option to download the underlying data. 

In [1]:
##Run this cell to import libraries required by the rest of the notebook

import numpy as np
import pandas as pd
import datetime as dt

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.cbook as cbook
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import seaborn as sns
#import mplcursors

import sys
import operator
import time

import fastparquet
import snappy

import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import Javascript

  from pandas.core.index import CategoricalIndex, RangeIndex, Index, MultiIndex


In [2]:
## This cell reads in the source data

alldays_timestamped = pd.read_parquet('TimestampToSep11new.parquet')
alldays_timestamped.sort_index(level='Timestamp', inplace=True)
alldays_timestamped = alldays_timestamped.dropna(subset=['Duration'])

In [3]:
##This cell defines the plotting functions

def showall(df):
    """shows entire dataframe"""
    assert df.shape[0] < 5000
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(df)

def make_plot(df, palette, save):
    """Using a modified seaborn strip plot, plots space occupancy and bike lane blocking using the
    specified color palette. 
    """
    #return int for size based on plot duration
    def dynamic(duration_min):
        if duration_min < 2:
            return 7
        elif duration_min < 5:
            return 6
        elif duration_min < 15:
            return 5
        elif duration_min < 60:
            return 3
        else:
            return 2
    #suppress Pandas view vs copy warning (seems to work ok here)
    with pd.option_context('mode.chained_assignment', None):
        #remove locations besides spc 1-3, bike lane
        filtered_df = df[~df['Vehicle Location'].isin(['SB travel lane', 
                                                        'NB right turn pocket', 'Both/Middle of Space 1 & 2'])]
        #order locations
        filtered_df.sort_values(by=['Vehicle Location'], inplace=True)
        #create time column from index for x-axis, assign enforcement start/end times
        filtered_df['Time'] = filtered_df.index
        start = min(filtered_df.index).to_pydatetime()
        end = max(filtered_df.index).to_pydatetime()
        duration = ((end - start).seconds) / 60
        enf_start = start.replace(hour=18, minute=0)
        enf_end = start.replace(hour=22, minute=0)
        
        fig, ax = plt.subplots()
        #plot using Seaborn strip plot, set x-axis range, add line at enforcement start/end time
        #hue based on 'Violator' values, which include TNC/CNS status 
        ax = sns.stripplot(x="Time", y="Vehicle Location", hue='Violator', palette=palette, data=filtered_df,
                           size = dynamic(duration), jitter=False)
        ax.set_xlim([start, end])
        ax.axvline(enf_start, label='Loading Zone Start', c='r')
        ax.axvline(enf_end, label='Loading Zone End', c='b')
        #move legend
        ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        #title plot with date
        ax.set_title(filtered_df['Begin Date'][0])
        #format times on x-axis to readable '9:30PM' format
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%-I:%M %p'))
        fig.autofmt_xdate()
        #Save to figures folder with start/end times in filename
        if save:
            plt.savefig('Figures/{}–{}.png'.format(start, end), dpi=600, format="png", bbox_inches='tight')
        plt.show()
        return ax

def plot(df, allday_df=alldays_timestamped, save=False):
    """top-level plot function, sets style and color palette"""
    sns.set_style('white')
    #get/sort unique Violator types
    unique = allday_df['Violator'].unique()
    unique.sort()
    #colors from xkcd colors https://xkcd.com/color/rgb/ 
    colors = ['black', 'scarlet', 'light red', 
              'olive green', 'grass green', 'mint', 
              'brick red', 'dark orange', 'pumpkin']
    #zip Violator types to color values
    palette = dict(zip(unique, sns.xkcd_palette(colors)))
    #call to make actual plot
    fig = make_plot(df, palette, save)
    return

In [7]:
def download_df(df):
    """Generates downloadable CSV from input df
    Total copy/paste job from 
    https://stackoverflow.com/questions/31893930/download-csv-from-an-ipython-notebook
    Javascript allows for client-side CSV generation, avoids creating server-side CSV
    for each request
    Tends to generate two duplicate downloads on Firefox, but one on Safari. Have yet 
    to test with Chrome. Likely ipython/Jupyter/browser quirk. 
    """
    from IPython.display import Javascript
    js_download = """
    var csv = '%s';

    var filename = 'CurbDataExport.csv';
    var blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
    if (navigator.msSaveBlob) { // IE 10+
        navigator.msSaveBlob(blob, filename);
    } else {
        var link = document.createElement("a");
        if (link.download !== undefined) { // feature detection
            // Browsers that support HTML5 download attribute
            var url = URL.createObjectURL(blob);
            link.setAttribute("href", url);
            link.setAttribute("download", filename);
            link.style.visibility = 'hidden';
            document.body.appendChild(link);
            link.click();
            document.body.removeChild(link);
        }
    }
    """ % df.to_csv(index=False).replace('\n','\\n').replace("'","\'")
#     time.sleep(.5)
    return Javascript(js_download)

In [5]:
##This cell wraps the plot function to be used with interact
##It also adds functionality for time range selection and download generation

#create widgets for interact
time_widget = widgets.SelectionRangeSlider(options=[0], continuous_update=False, layout={'width': '400px'})
date_widget = widgets.Dropdown(options=alldays_timestamped['Begin Date'].unique())
download_widget = widgets.ToggleButtons(
    options=['None', 'Summary Only', 'Detailed'],
    description='Generate Download?',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Will not generate export CSV', 
              'Generates export CSV with level of detail same as displayed summary',
              'Generates export CSV with full detail'],
    style=dict(description_width='initial')
#     icons=['check'] * 3
)

def update(*args):
    """allows date selection to define time selection range"""
    index = alldays_timestamped[alldays_timestamped['Begin Date'] == date_widget.value].index
    minutes = index.strftime('%I:%M%p').unique()
    time_widget.options = minutes
date_widget.observe(update)

def disp_summary(filtered_df, keep, viol_correction):
    """Displays summary information for a (subset) dataframe
    Appears under plot created by iplot function
    """
    with pd.option_context('mode.chained_assignment', None):
        #Drop space 3 and minor locations to focus on Space 1/Space 2
        top_durations = filtered_df[~filtered_df['Vehicle Location'].isin(['Space 3', 'SB travel lane', 
                                                            'NB right turn pocket', 'Both/Middle of Space 1 & 2'])]
        #Drop less useful columns to present clear summary
        top_durations.drop(['Begin Date', 'Vehicle Type', 'Vehicle Characteristics', 
                                'Bikeway Users Displaced', 'LZ Space Avail', 
                                'Occupied while idle?', 'CNS?', 'TNC?'], axis=1, inplace=True)
        #keep value based on above tests, ensures accurate Violator display in df
        top_durations = top_durations.drop_duplicates(keep=keep)
        top_durations = top_durations.sort_values(by='Duration', ascending=False)
        top_durations = top_durations.set_index('Vehicle Location', drop=False)
        #manually correct edge case of violator spanning entire interval
        if viol_correction:
            top_durations.iloc[0, -1] = 'Violator'
        return top_durations

def iplot(Date, Time, df, enf_start, enf_end, download, display_rows):
    """Interactive wrapper for plot function"""
    
    datefiltered = alldays_timestamped[alldays_timestamped['Begin Date'] == Date]
    #calculates enforcement duration based on fixed/variable args
    enf_dur = (dt.datetime.combine(dt.date.today(),dt.time.fromisoformat(enf_end)) 
                   - dt.datetime.combine(dt.date.today(),dt.time.fromisoformat(enf_start)))
    try:
        #filters df based on selected interval
        start_dt = dt.datetime.strptime(Time[0], '%I:%M%p')
        start_time = start_dt.time()
        end_dt = dt.datetime.strptime(Time[1], '%I:%M%p')
        end_time = end_dt.time()

        timefiltered = datefiltered[datefiltered.index.time > start_time]
        timefiltered = timefiltered[timefiltered.index.time < end_time]
        
        #tests to sort so as to to display accurate Violator value
        keep = 'last'
        viol_correction = False
        if end_time > dt.time.fromisoformat(enf_end):
            keep = 'first'

            longest_dur = timefiltered['Duration'].max()
            to_td = dt.datetime.strptime(longest_dur, '%H:%M:%S')
            td = dt.timedelta(hours=to_td.hour, minutes=to_td.minute, seconds=to_td.second)
            if td > enf_dur:
                viol_correction = True
        if timefiltered.shape[0] < 1:
            raise IndexError

    #provide useful error messages for zero-duration range selections, invalid Duration values in source data            
    except IndexError:
        print('Please select a valid time range.')
        return
    except TypeError:
        print('Invalid duration values, violator classification may be inaccurate')
        if timefiltered['Duration'].max() == np.nan:
            print('Please select a valid time range.')
        return
    
    plot(timefiltered)
    summarydf = disp_summary(timefiltered, keep, viol_correction)
    display(summarydf[:display_rows])
    
    #generate download, if requested
    if download == 'Summary Only':
        to_download = summarydf.sort_values(by='Begin Time')
    elif download == 'Detailed':
        to_download = timefiltered.drop_duplicates(keep=keep).sort_values(by='Begin Time')
    if not download == 'None':
        display(download_df(to_download))
        
    return

In [6]:
##After running all cells above, run this cell to start the interactive visualization!
interact(iplot, Date=date_widget, Time=time_widget, 
         df=fixed(alldays_timestamped), enf_start=fixed('18:00:00'),
        enf_end=fixed('22:00:00'), download=download_widget,
        display_rows=fixed(10));

interactive(children=(Dropdown(description='Date', options=('08/21/2019', '08/22/2019', '08/23/2019', '08/24/2…