In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium
from folium import plugins
#import matplotlib.pyplot as plt
#import seaborn as sns
#%matplotlib inline

import datetime as dt

import pysal as ps
from pysal.viz import mapclassify

import ipywidgets as widgets

idx = pd.IndexSlice

In [2]:
#nbi:hide_in
#Patch for Chrome courtesy of : https://github.com/python-visualization/folium/issues/812#issuecomment-555238062

import base64


def _repr_html_(self, **kwargs):
    html = base64.b64encode(self.render(**kwargs).encode('utf8')).decode('utf8')
    onload = (
        'this.contentDocument.open();'
        'this.contentDocument.write(atob(this.getAttribute(\'data-html\')));'
        'this.contentDocument.close();'
    )
    if self.height is None:
        iframe = (
            '<div style="width:{width};">'
            '<div style="position:relative;width:100%;height:0;padding-bottom:{ratio};">'
            '<iframe src="about:blank" style="position:absolute;width:100%;height:100%;left:0;top:0;'
            'border:none !important;" '
            'data-html={html} onload="{onload}" '
            'allowfullscreen webkitallowfullscreen mozallowfullscreen>'
            '</iframe>'
            '</div></div>').format
        iframe = iframe(html=html, onload=onload, width=self.width, ratio=self.ratio)
    else:
        iframe = ('<iframe src="about:blank" width="{width}" height="{height}"'
                  'style="border:none !important;" '
                  'data-html={html} onload="{onload}" '
                  '"allowfullscreen" "webkitallowfullscreen" "mozallowfullscreen">'
                  '</iframe>').format
        iframe = iframe(html=html, onload=onload, width=self.width, height=self.height)
    return iframe

folium.branca.element.Figure._repr_html_ = _repr_html_

In [21]:
def crowding_view(df, geo_df, st_hour, end_hour, routes):
    
#     routes_at_stop = df.droplevel(['route_id', 'hour'])[['route_id']].groupby('stop_id').apply(lambda x: ', '.join(list(x.route_id.unique())))
#     routes_at_stop = pd.DataFrame(routes_at_stop).rename(columns={0:'Route(s) Serving Stop'})
    try:
        routes_at_stop = df.droplevel(['route_id', 'hour'])[['route_id']].groupby('stop_id').apply(lambda x: x['route_id'].unique())
        routes_at_stop = pd.DataFrame(routes_at_stop).rename(columns={0:'Route(s) Serving Stop'})
        routes_at_stop['Route(s) Serving Stop'] = routes_at_stop['Route(s) Serving Stop'].apply(lambda x: ', '.join(list(x)))
    except:
        print('new failed too!')
        pass
    
    if 'All Routes' not in routes:
        ##TODO this might not be right...
        df = df.loc[idx[routes,:,np.arange(st_hour, end_hour+1)],:]
        df = df.groupby('stop_id').sum()
    else:
        df = df.loc[idx[:,:,np.arange(st_hour, end_hour+1)],:]
        df = df.groupby('stop_id').sum()

    df['Total Observations'] = df.sum(axis=1)
    if 'STANDING_ROOM_ONLY' in df.columns:
        df['Percent Full'] = (df['STANDING_ROOM_ONLY'] / df['Total Observations']) * 100
        df = df.rename(columns={'Percent Full': 'Percent Crowded'})
    elif 'FULL' in df.columns:
        df['Percent Full'] = (df['FULL'] / df['Total Observations']) * 100
        df['Percent Full/Few Available'] = (((df['FEW_SEATS_AVAILABLE'] / df['Total Observations']) * 100)
                                        .add(df['Percent Full'], fill_value=0))
        df = df.rename(columns={'Percent Full/Few Available': 'Percent Crowded'})
    
    df = df.join(routes_at_stop)
    return geo_df.join(df).dropna(subset=['Percent Crowded']).to_crs('EPSG:4326')

In [6]:
%%capture
#nbi:hide_in
def add_choropleth(gdf, m, classifier):
    
    friendly_name = 'Percentage of Trips that may be Crowded'

    gdf['Percent Crowded'] = gdf['Percent Crowded'].round(2)
    gdf.rename(columns={'stop_name':'Stop Name', 'Percent Crowded': friendly_name},inplace=True)
    gdf.rename_axis('Stop ID', inplace=True)
    if classifier == 'Equal Interval (categories spaced 20% apart)':
        threshold_scale = [0, 20, 40, 60, 80, 101]
    elif classifier == 'Quantiles (five categories, each containing the same number of observations)':
        threshold_scale = mapclassify.Quantiles(
            gdf[friendly_name], k = 5).bins.tolist()
        threshold_scale = [gdf[friendly_name].min()] + threshold_scale

    choropleth = folium.Choropleth(geo_data = gdf.reset_index().to_json(),
                                   data = gdf.reset_index(),
                    columns = ('Stop ID', friendly_name), key_on = 'feature.properties.Stop ID',
                    nan_fill_color = 'red', fill_color = 'YlOrRd', fill_opacity = 0.6, line_opacity = 0.2,  
                    threshold_scale = threshold_scale, legend_name=friendly_name
                                    )
    choropleth.add_to(m)
    
    choropleth.geojson.add_child(folium.features.GeoJsonTooltip(['Stop ID', 'Stop Name', friendly_name, 'Route(s) Serving Stop']))
    
    if type(mass_train) != type(None):
        mbta_colors = {'RED':'#f73219', 'ORANGE':'#ff8826', 'GREEN':'#0f8741', 'SILVER':'#708a8f', 'BLUE':'#094695'}
        folium.GeoJson(
            mass_train.to_crs('epsg:4326').to_json(),
            style_function=lambda feature: {
                'fillColor': mbta_colors[feature['properties']['LINE']],
                'color': mbta_colors[feature['properties']['LINE']],
                'weight': 3,
                'fillOpacity': 0.7
            }).add_to(m)
        
    return

In [7]:
service_types = {'Entire Day': (0, 24), 'AM Peak': (7, 10), 'Midday': (10, 16), 
                 'PM Peak': (16, 19), 'Evening': (19, 24), 'Early AM': (0, 7)}

In [8]:
happy_names = {'MBTA (Boston)': 'mbta',
             'Rock Region Metro (Little Rock)': 'rock_reg_metro',
             'Emery Go-Round (Emeryville)': 'emery_go_round',
             'Pullman Transit (Pullman, WA)': 'pullman_transit',
             'Gold Coast Transit & Ojai Trolley (Ventura County)': 'gold_ojai',
             'Simi Valley Transit (Ventura County)': 'simi',
             'Thousand Oaks Transit (Ventura County)': 'tsnd_oaks'}

### Which public transit routes seem crowded during the COVID-19 pandemic? 
*This visualization tool allows you to explore crowding data for Boston, Little Rock, Emeryville, Ventura County, Shreveport, Pullman (Washington), and the Middle Monongahela River Valley (near Pittsburgh, PA)*

1.  Select a region
2.  Select the entire system or particular routes to examine
3.  Select all day or particular weekday times to examine
4.  Select how you want to display the data: the Equal Interval option is useful for showing crowding in absolute terms, while the Quantiles option is useful for identifying the comparatively more crowded stops in your selection.
5.  For additional information, mouse over a stop to see its Stop ID, Stop Name, Percentage of Trips that may be Crowded, as well as the routes that serve that stop.

#### Notes
This tool uses GTFS-realtime data collected between September 10 and September 18 to show the share of service that is likely somewhat crowded at each stop along each route. If multiple routes are selected and they serve the same stop, the data shown for that stop will be an aggregation of all routes. 

*Data from the MBTA in Boston reflect a crowding standard based on social distancing (a vehicle is considered crowded even at less than its normal capacity). However, all other data are reported based on the vehicle's normal capacity. This means that a vehicle could be too full for social distancing, but not show as crowded in these data. Therefore, crowding levels shown for areas besides Boston likely underestimate actual crowding.*

*Boston Rail and BRT lines are shown for reference only, no crowding information for these routes.*

In [9]:
import os
from IPython.display import clear_output

# agencies = [x.split('/')[-1] for x in subdirs][1:]

def visualize_crowding(Agency):
    '''
    '''
    agency = happy_names[Agency]
    clear_output(wait=True)
    print('Loading...')
    
    folder_path = 'data/processed/'
    subdirs = [x[0] for x in os.walk(folder_path)]
    agencies = [x.split('/')[-1] for x in subdirs][1:]
    agency_dir = dict(zip([x.split('/')[-1] for x in subdirs][1:], subdirs[1:]))
    
    if agency in ['gold_ojai', 'simi', 'tsnd_oaks']:
        combined = 'vctc_intercity'
        agency_filtered = pd.read_parquet('./'+agency_dir[combined]+f'/{combined}_filtered.parquet')
        agency_geo = gpd.read_file('./'+agency_dir[combined]+f'/{combined}_geo.geojson').set_index('stop_id', drop=True)
        
        vc_rts = {'gold_ojai': ['Route 1', 'Route 10',
                       'Route 11', 'Route 15', 'Route 16', 'Route 17', 'Route 19', 'Route 2',
                       'Route 21', 'Route 23', 'Route 3', 'Route 4', 'Route 42', 'Route 43',
                       'Route 44', 'Route 5', 'Route 6', 'Route 7', 'Route 8', 'TA'],
                  'simi': ['10', '20', '30'], 'tsnd_oaks': ['40', '41']}
        
        agency_filtered = agency_filtered.loc[idx[vc_rts[agency],:]]
        
    else:
        agency_filtered = pd.read_parquet('./'+agency_dir[agency]+f'/{agency}_filtered.parquet')
        agency_geo = gpd.read_file('./'+agency_dir[agency]+f'/{agency}_geo.geojson').set_index('stop_id', drop=True)

    global routes
    routes = ['All Routes'] + list(agency_filtered.index.get_level_values(0).unique())
    
    global mass_train
    mass_train = None
    if agency == 'mbta':
        mass_train = gpd.read_file('./data/processed/mbta/mbta_rapid_transit/MBTA_ARC.shp')
    
    def interactive_map(serv_type, routes, classifier, df=agency_filtered, geo_df=agency_geo):
        clear_output(wait=True)
#         print('Running data query...', end='')
        hour_range = service_types[serv_type]
        view = crowding_view(df, geo_df, hour_range[0], hour_range[1], routes)
#         if view.shape[0] > 1000:
#             print('Selection too large! Please select fewer routes.')
#             return
#         most_crowded = view[view['Percent Crowded'] == view['Percent Crowded'].max()]
#         x = most_crowded['geometry'][0].centroid.x
#         y = most_crowded['geometry'][0].centroid.y
        x = view['geometry'].apply(lambda polygon: polygon.centroid.x).mean()
        y = view['geometry'].apply(lambda polygon: polygon.centroid.y).mean()
        m = folium.Map([y, x], zoom_start = 13, tiles='Stamen Terrain')
        try:
            add_choropleth(view, m, classifier)
        except ValueError:
            print('Data can not be displayed as Quantiles, displaying Equal Interval')
            interactive_map(serv_type, routes, classifier='Equal Interval (categories spaced 20% apart)', df=agency_filtered, geo_df=agency_geo)
            return
#             add_choropleth(view, m, 'Equal Interval (categories spaced 20% apart)')
        print('Loading map...')
        explainations = {'Quantiles (five categories, each containing the same number of observations)': '\nThis classifier is best for for identifying the comparatively more crowded stops in your selection.',
                 'Equal Interval (categories spaced 20% apart)' : '\nThis classifier is best for showing crowding in absolute terms.'}
        print(f'Using Classifier: {classifier}{explainations[classifier]}')
    #     print(view.shape)

        display(m)
    
    service_widget = widgets.Select(
    options=service_types.keys(),
    value='Entire Day',
    #rows=10,
    description='Service Type',
    disabled=False
    )

    options = range(0,25)
    hours_widget = widgets.SelectionRangeSlider(
        options=options,
        index=(0,24),
        description='Hour Range',
        disabled=False
    )

    # routes = tuple(emery_filtered.index.get_level_values(0).unique())
    routes_widget = widgets.SelectMultiple(
        options=routes,
        value=[routes[0]],
        #rows=10,
        description='Routes',
        disabled=False
    )

    #nbi:hide_in
    classify_widget = widgets.RadioButtons(
        options=['Quantiles (five categories, each containing the same number of observations)',
                 'Equal Interval (categories spaced 20% apart)'],
        value='Equal Interval (categories spaced 20% apart)', # Defaults to 'Fixed'
       layout={'width': 'max-content'}, # If the items' names are long
        description='Classifier:',
        disabled=False
    )

    
    w = widgets.interactive_output(
    interactive_map,
    {'serv_type': service_widget, 'routes': routes_widget,
    'classifier': classify_widget})
    ui = widgets.VBox([
    widgets.HBox([routes_widget, service_widget]), 
    classify_widget])
    
    display(ui, w);
    

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
folder_path = 'data/processed/'
subdirs = [x[0] for x in os.walk(folder_path)]
agencies = happy_names.keys()
widgets.interact(visualize_crowding, Agency=agencies);

interactive(children=(Dropdown(description='Agency', options=('MBTA (Boston)', 'Rock Region Metro (Little Rock…