In [1]:
import geopandas as gpd
import partridge as ptg
# import modin.pandas as pd
import pandas as pd
idx = pd.IndexSlice
import numpy as np

import requests
from arcgis2geojson import arcgis2geojson

from geopandas.tools import sjoin
import folium
from folium.plugins import MarkerCluster
#from folium.element import IFrame
import shapely
#from shapely.geometry import Point
import pysal as ps
from pysal.viz import mapclassify

import os
import datetime as dt

from tqdm.auto import tqdm
tqdm.pandas(desc="interpolating stops...")

  from pandas import Panel


In [2]:
def diag_path(path):
    print(ptg.read_busiest_date(path))
    return feed_from_path(path)

In [3]:
def showall(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(df)

In [4]:
#read feed at a different date if busiest date not in analysis range
#(e.g. post-COVID feed still includes pre-COVID info)
serv_id_exceptions = {'pasadena20200806_gtfs.zip': [3]}

def feed_from_path(path):
    '''Using Partridge, read a (GeoPandas enabled) GTFS feed given a filepath'''
    _date, service_ids = ptg.read_busiest_date(path)
    
    #check if feed has exception defined above, if so read that service id
    feed_id = path.split('/')[-2:][0] + path.split('/')[-2:][1]
    try:
        service_ids = serv_id_exceptions[feed_id]
    except:
        pass
    
    view = {
        'trips.txt': {'service_id': service_ids},
    }

    return ptg.load_geo_feed(path, view)

In [5]:
def feeds_from_files(folder_path):
    '''
    Given path to a folder structured: folder_path/agency/yyyymmdd_gtfs.zip,
    read all feeds into a nested dict with keys being agency name, then datetime object'''
    feeds = {}
    subdirs = [x[0] for x in os.walk(folder_path)]
    for subdir in subdirs[1:]:
        print(subdir)
        agency = subdir.split('/')[-1]
        feeds[agency] = {}
        for feed in os.listdir(subdir):
            if feed[0] == '.':
                continue
            datestr = feed.split('_')[0]
            date = dt.datetime.strptime(datestr,'%Y%m%d')
            feeds[agency][date] = feed_from_path(subdir+'/'+feed)
    return feeds

In [6]:
feeds_dict = feeds_from_files('./gtfs_feeds/bay/')

./gtfs_feeds/bay/SamTrans
./gtfs_feeds/bay/SFMTA
./gtfs_feeds/bay/Caltrain
./gtfs_feeds/bay/Sonoma County Transit
./gtfs_feeds/bay/VTA
./gtfs_feeds/bay/Golden Gate Transit
./gtfs_feeds/bay/Stanford
./gtfs_feeds/bay/Fairfield Suisun Transit
./gtfs_feeds/bay/WestCAT
./gtfs_feeds/bay/SMART (Marin&Sonoma)
./gtfs_feeds/bay/AC Transit
./gtfs_feeds/bay/Blue and Gold Fleet
./gtfs_feeds/bay/Mountain View
./gtfs_feeds/bay/BART
./gtfs_feeds/bay/Santa Cruz Metro
./gtfs_feeds/bay/Emery Go-Round
./gtfs_feeds/bay/Tideline Water Taxi
./gtfs_feeds/bay/Capitol Corridor
./gtfs_feeds/bay/Marin Transit
./gtfs_feeds/bay/Vacaville City Coach
./gtfs_feeds/bay/Mission Bay TMA
./gtfs_feeds/bay/County Connection
./gtfs_feeds/bay/SolTrans (Solano County)
./gtfs_feeds/bay/Petaluma Transit


In [7]:
feeds_dict

{'SamTrans': {datetime.datetime(2020, 2, 25, 0, 0): <partridge.gtfs.Feed at 0x1448649d0>,
  datetime.datetime(2020, 8, 18, 0, 0): <partridge.gtfs.Feed at 0x146519610>},
 'SFMTA': {datetime.datetime(2020, 2, 19, 0, 0): <partridge.gtfs.Feed at 0x1054c23d0>,
  datetime.datetime(2020, 8, 12, 0, 0): <partridge.gtfs.Feed at 0x1054c20d0>,
  datetime.datetime(2020, 9, 24, 0, 0): <partridge.gtfs.Feed at 0x146cd9a90>},
 'Caltrain': {datetime.datetime(2020, 9, 4, 0, 0): <partridge.gtfs.Feed at 0x14496c8b0>,
  datetime.datetime(2020, 7, 29, 0, 0): <partridge.gtfs.Feed at 0x144995430>,
  datetime.datetime(2020, 1, 10, 0, 0): <partridge.gtfs.Feed at 0x14496c280>},
 'Sonoma County Transit': {datetime.datetime(2020, 1, 1, 0, 0): <partridge.gtfs.Feed at 0x144967430>,
  datetime.datetime(2020, 8, 19, 0, 0): <partridge.gtfs.Feed at 0x1054c2a30>,
  datetime.datetime(2020, 10, 6, 0, 0): <partridge.gtfs.Feed at 0x144864d90>},
 'VTA': {datetime.datetime(2020, 3, 5, 0, 0): <partridge.gtfs.Feed at 0x144acd7f0>

In [8]:
def get_county_geog(county):
    '''Using TIGERweb API, get geographies for all tracts within a county'''
    api_url = f'https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/tigerWMS_ACS2019/MapServer/8/query?f=json&outsr=4326&where=STATE={county[:2]}%20and%20county={county[2:]}'
    data = requests.get(api_url).json()
    data = arcgis2geojson(data)
    #print(data)
    gdf = gpd.GeoDataFrame.from_features(data['features'])
    gdf['county'] = county
    gdf.crs = 'EPSG:4326'
    gdf = gdf.rename(columns={'BASENAME':'tract'})
    return gdf

def get_region_geog(counties):
    region = get_county_geog(counties[0])
    for county in counties[1:]:
        region = region.append(get_county_geog(county))
    return region

In [9]:
# sac_counties = ['06113', '06101', '06115', '06061', '06057', '06005', '06017', '06095']
# sac_tahoe = get_region_geog(sac_counties)
# sac_tahoe.to_file("./geographies/sac_tahoe.geojson", driver='GeoJSON')

In [10]:
# la_county = gpd.read_file('./geographies/la_county.geojson')

In [11]:
# sf_bay = gpd.read_file('./geographies/ne_bay.geojson').append(gpd.read_file('./geographies/sf_pen.geojson'))

In [14]:
sf_bay = gpd.read_file('./geographies/bay_area.geojson', driver='GeoJSON')

In [31]:
sf_bay

Unnamed: 0,tract,county,oldtract,geometry
0,060014061,06001,4061,"POLYGON ((-122.23872 37.78358, -122.23872 37.7..."
1,060014333,06001,4333,"POLYGON ((-122.17066 37.70877, -122.17082 37.7..."
2,060014403.07,06001,4403.07,"POLYGON ((-122.06077 37.59868, -122.06105 37.5..."
3,060014338,06001,4338,"POLYGON ((-122.13004 37.69298, -122.13006 37.6..."
4,060014094,06001,4094,"POLYGON ((-122.19073 37.74581, -122.19069 37.7..."
...,...,...,...,...
1636,060816113,06081,6113,"POLYGON ((-122.23451 37.44452, -122.23438 37.4..."
1637,060816016.03,06081,6016.03,"POLYGON ((-122.46923 37.66382, -122.46923 37.6..."
1638,060816013,06081,6013,"POLYGON ((-122.47166 37.68326, -122.47167 37.6..."
1639,060816115,06081,6115,"POLYGON ((-122.20576 37.46493, -122.20575 37.4..."


In [16]:
example_feed = feeds_dict['AC Transit'][dt.datetime(2019, 8, 9, 0, 0)]

In [17]:
## https://gist.github.com/csb19815/476335cb299ddb3d5a1a4b898424bb35

def service_hours(stop_times, time_range=None):
    '''return sum of duration of all trips in stop_times (filtered elsewhere)'''
    
    #support arbitrary time selections...
    if time_range:
        stop_times = (
            stop_times[(stop_times['arrival_time'] >= time_range[0])
            & (stop_times['arrival_time'] <= time_range[1])])
    try:
        trip_lengths = stop_times.groupby('trip_id').arrival_time.agg(['min', 'max'])
        service_hours = (trip_lengths['max'] - trip_lengths['min']) / 60 / 60
        return service_hours.sum()
    except:
        print('serv_hr_error, returning 0')
        return 0


In [18]:
example_feed.stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,timepoint
0,366735070,85260.0,85260.0,2481,1,0,0,1
1,366735070,85260.0,85260.0,5336,2,0,0,0
2,366735070,86460.0,86460.0,4173,3,0,0,0
3,366735070,86460.0,86460.0,4168,4,0,0,1
4,366735070,87120.0,87120.0,5310,5,1,0,1
...,...,...,...,...,...,...,...,...
282493,371781230,45000.0,45000.0,5873,5,0,0,0
282494,371781230,45120.0,45120.0,2497,6,0,0,0
282495,371781230,45240.0,45240.0,5334,7,0,0,1
282496,371781230,45360.0,45360.0,2512,8,0,0,0


In [19]:
def interpolate_stops(gdf, geo):
    '''
    When the next stop is in a different tract, estimate when it crosses tracts and add
    that arrival time to both tracts. Important because the next step is to group by tract
    and estimate service hours, service crossing tracts would be dropped without this interpolation.
    '''
    
    #can't interpolate on single-stop trip... (should fix LADOT bug...)
    if gdf.shape[0] < 2:
        return gdf
#     if gdf['geometry'].is_unique == False:
#         print('non-unique stops!')
#         return gdf
    
    gdf.loc[:, 'geometry'] = gdf.geometry.centroid
    gdf.loc[:, 'lasttract'] = gdf['tract'].shift()
    gdf.loc[:, 'lastgeo'] = gpd.GeoSeries(gdf['geometry'].shift())
    gdf.loc[:, 'lastarr'] = gdf['arrival_time'].shift()
    gdf.loc[:, 'sametract'] = gdf['tract'].eq(gdf['lasttract'])
    #debug
#     return gdf
    #reset in case stop_id not a unique index
    gdf = gdf.reset_index()
#     return gdf
    #create a line between every pair of stops
#     print('lining...', end='')
    gdf['line_last_stop'] = gdf.iloc[1:,:].apply(
    lambda x: shapely.geometry.LineString(
        [(x.lastgeo.x, x.lastgeo.y),
         (x.geometry.x, x.geometry.y)
        ]), axis=1)
#     print('lined!...', end='')

    #add column with arrival time at tract boundary
    
    global _debug
    _debug = gdf
#     display(gdf)

    gdf['split_arr'] = gdf.apply(line_in_tracts, args=(geo,), axis = 1)
    #generate df with tract+arrival time rows to be appended
    records = gdf.apply(add_interpolated_rows, axis=1).dropna().values
    to_append = pd.DataFrame()
    for record in records:
        to_append = to_append.append(pd.DataFrame(record))
    #append interpolated rows to origional stop times gdf
    merged_df = (gdf.append(to_append).
                 reset_index()[['tract', 'arrival_time', 'stop_sequence', 'trip_id']])
    merged_df.loc[:, 'trip_id'] = merged_df['trip_id'][0]
    return merged_df

In [20]:
def line_in_tracts(df, tract_geos):
    global _debug2
    _debug2 = df
    '''Measure how much of the distance between 2 stops is in each tract'''
    if df['sametract'] or  np.all(np.isnan(df['line_last_stop'])):
        return
    tract1 = df['lasttract']
    tract2 = df['tract']
    line = df['line_last_stop']
    tract1_polygon = tract_geos[tract_geos['tract'] == tract1]['geometry'].iloc[0]
    tract2_polygon = tract_geos[tract_geos['tract'] == tract2]['geometry'].iloc[0]
    line_tract1 = line.length - line.difference(tract1_polygon).length
    line_tract2 = line.length - line.difference(tract2_polygon).length
    line_total = line_tract1 + line_tract2
    #display(df)
#     print(line_tract1, line_tract2, line_total)
    #list with first value being % of line in tract1, second being % of line in tract 2
    ##note this is only out of the 2-tract total, ignores potential intermediate tracts
    if line_total == 0:
        print('warning: 0 distance in tracts, not fully accurate')
        return df['lastarr']
    arrival_splits = (np.array([line_tract1/line_total, line_tract2/line_total]) * 
                      (df['arrival_time'] - df['lastarr']))
    return df['lastarr'] + arrival_splits[0]

In [21]:
def add_interpolated_rows(df):
    '''Generate dict with a single set of interpolated rows'''
    if df['split_arr'] and not np.all(np.isnan(df['split_arr'])):
        two_rows = {'tract': [df['tract'], df['lasttract']], 'arrival_time': df['split_arr']}
        return two_rows

In [22]:
feeds_dict

{'SamTrans': {datetime.datetime(2020, 2, 25, 0, 0): <partridge.gtfs.Feed at 0x1448649d0>,
  datetime.datetime(2020, 8, 18, 0, 0): <partridge.gtfs.Feed at 0x146519610>},
 'SFMTA': {datetime.datetime(2020, 2, 19, 0, 0): <partridge.gtfs.Feed at 0x1054c23d0>,
  datetime.datetime(2020, 8, 12, 0, 0): <partridge.gtfs.Feed at 0x1054c20d0>,
  datetime.datetime(2020, 9, 24, 0, 0): <partridge.gtfs.Feed at 0x146cd9a90>},
 'Caltrain': {datetime.datetime(2020, 9, 4, 0, 0): <partridge.gtfs.Feed at 0x14496c8b0>,
  datetime.datetime(2020, 7, 29, 0, 0): <partridge.gtfs.Feed at 0x144995430>,
  datetime.datetime(2020, 1, 10, 0, 0): <partridge.gtfs.Feed at 0x14496c280>},
 'Sonoma County Transit': {datetime.datetime(2020, 1, 1, 0, 0): <partridge.gtfs.Feed at 0x144967430>,
  datetime.datetime(2020, 8, 19, 0, 0): <partridge.gtfs.Feed at 0x1054c2a30>,
  datetime.datetime(2020, 10, 6, 0, 0): <partridge.gtfs.Feed at 0x144864d90>},
 'VTA': {datetime.datetime(2020, 3, 5, 0, 0): <partridge.gtfs.Feed at 0x144acd7f0>

In [23]:
# test_agencies = ['torrance', 'lacmta_rail', 'bbb', 'ccb']
test_agencies = ['Santa Cruz Metro']
test_dict = {agency: feeds_dict[agency] for agency in test_agencies}

In [24]:
test_dict

{'Santa Cruz Metro': {datetime.datetime(2020, 9, 4, 0, 0): <partridge.gtfs.Feed at 0x144953400>,
  datetime.datetime(2020, 6, 24, 0, 0): <partridge.gtfs.Feed at 0x104520fd0>,
  datetime.datetime(2020, 2, 20, 0, 0): <partridge.gtfs.Feed at 0x145978d30>}}

In [25]:
def service_hours_by_geo(times_in_geo, serv_type):
    '''Calculate service hours by tract at a particular time of day'''
    #GTFS times are measured in seconds since midnight, so 6*60**2 corresponds to 6:00AM
    serv_times = {'am_peak': (6*60**2, 9*60**2),
                  'midday': (9*60**2, 15*60**2),
                 'pm_peak': (15*60**2, 19*60**2),
                 'evening': (19*60**2, 28*60**2),
                 'early_am': (0*60**2, 6*60**2)}
    
    grouped = pd.DataFrame(times_in_geo.groupby("tract").
            apply(service_hours, time_range = (serv_times[serv_type])))
    grouped = grouped.rename(columns = {0:f'{serv_type}_vrh'})
    
    return grouped

In [32]:
def single_agency_df(feed, geo):
    '''Calculate service hours by tract by service type for a single feed (agency+date)'''
    serv_types = ['am_peak', 'midday', 'pm_peak', 'evening', 'early_am']
    
    #interpolate between stop times if needed
    if feed.stop_times['arrival_time'].isnull().any():
        nulls = feed.stop_times['arrival_time'].isnull().value_counts()[True]
        print(f'Feed contains {nulls} null stop_times entries, interpolating...')
        feed.stop_times['arrival_time'] = feed.stop_times['arrival_time'].interpolate()
        feed.stop_times['departure_time'] = feed.stop_times['departure_time'].interpolate()
        
    def stops_in_geo(feed, geo):

        stops_in_geo = gpd.sjoin(geo, feed.stops,
                                 how='inner', op='intersects')
        stops_in_geo = stops_in_geo.drop_duplicates(subset=['stop_id'])
        stops_in_geo = stops_in_geo.set_index('stop_id')
        return stops_in_geo

    times_in_geo = stops_in_geo(feed, geo).join(
        feed.stop_times.set_index('stop_id'), how='inner')
    ##BBB debug return
#     return times_in_geo

    #interpolate_stops is applied here once per feed (this is the slowest step)
#     times_in_geo = (times_in_geo.sort_values(by=['trip_id', 'stop_sequence'])
#                .groupby('trip_id').apply(interpolate_stops, geo=geo)
#                .rename(columns={'trip_id':'trip_id2'}))

    times_in_geo = (times_in_geo.sort_values(by=['trip_id', 'stop_sequence'])
               .groupby('trip_id').progress_apply(interpolate_stops, geo=geo)
               .rename(columns={'trip_id':'trip_id2'}))

    vrh_by_serv_type = pd.DataFrame()
    for serv_type in serv_types:
        print(serv_type)
        vrh_by_serv_type = vrh_by_serv_type.append(
            service_hours_by_geo(times_in_geo, serv_type))
    return vrh_by_serv_type

In [33]:
def service_hour_df(feeds_dict, geo):
    '''
    Given dictionary of feeds generated in previous step,
    generate final dataframe with VRH by tract, COVID status, and agency.
    Currently slow to run if feeds are large (expect 20-40 minutes for LA area).
    '''
    mdf = pd.DataFrame()
    
    for agency in list(feeds_dict.keys()):
        print(agency)
        agency_dict = feeds_dict[agency]
        pre_covid = agency_dict[min(agency_dict.keys())]
        pre_covid.stops.crs = 'EPSG:4326'
        current = agency_dict[max(agency_dict.keys())]
        current.stops.crs = 'EPSG:4326'
        try:
            print('pre-covid')
            ##TODO add some sort of progress print in interpolate_stops?
            pre_covid_hrs = single_agency_df(pre_covid, geo)
            pre_covid_hrs['covid'] = 0
            pre_covid_hrs.set_index('covid', append=True, inplace=True)
    #         return pre_covid_hrs
            print('covid')
            current_hrs = single_agency_df(current, geo) 
            current_hrs['covid'] = 1
            current_hrs.set_index('covid', append=True, inplace=True)
            #         return current_hrs
            #skip to next agency if either of these fail (continuing to debug interpolation)...
        except:
            print(f'agency error: {agency}')
            continue
        
        agency_hrs = pre_covid_hrs.append(current_hrs)
        agency_hrs['agency'] = agency
        agency_hrs.set_index('agency', append=True, inplace=True)
        mdf = mdf.append(agency_hrs)
        
    mdf = mdf.groupby(level=['tract', 'covid', 'agency']).sum()
    mdf['total_vrh'] = mdf.sum(axis=1)
    return mdf

In [34]:
svc_bay = service_hour_df(feeds_dict, sf_bay)

SamTrans


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=1771.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=1300.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
SFMTA


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=11730.0, style=ProgressStyle…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=6501.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
Caltrain
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=92.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=70.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
Sonoma County Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=324.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=206.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
VTA


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 141785 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=4028.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=2915.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
Golden Gate Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=548.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=219.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Stanford


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=594.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=491.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Fairfield Suisun Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 3793 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=346.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 4530 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=450.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
WestCAT


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 5718 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=836.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 4276 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=462.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
SMART (Marin&Sonoma)
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=38.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=16.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
AC Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=6904.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=4912.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
Blue and Gold Fleet
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=105.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=36.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
Mountain View
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=64.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=54.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
BART


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=1149.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=754.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Santa Cruz Metro


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=626.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=422.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Emery Go-Round


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=243.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=134.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Tideline Water Taxi
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=8.0, style=ProgressStyle(des…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=8.0, style=ProgressStyle(des…


am_peak
midday
pm_peak
evening
early_am
Capitol Corridor
pre-covid


  return _prepare_from_string(" ".join(pjargs))


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=69.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=49.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
Marin Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=656.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=1195.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
Vacaville City Coach


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 3796 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=204.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 768 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=48.0, style=ProgressStyle(de…


am_peak
midday
pm_peak
evening
early_am
Mission Bay TMA


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 2 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=122.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 2 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=113.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
County Connection


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 20242 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=1022.0, style=ProgressStyle(…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 19685 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=975.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
SolTrans (Solano County)


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 7350 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=566.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 4555 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=336.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
Petaluma Transit


  return _prepare_from_string(" ".join(pjargs))


pre-covid
Feed contains 2105 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=233.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am
covid
Feed contains 1878 null stop_times entries, interpolating...


HBox(children=(FloatProgress(value=0.0, description='interpolating stops...', max=215.0, style=ProgressStyle(d…


am_peak
midday
pm_peak
evening
early_am


In [35]:
svc_bay

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,am_peak_vrh,midday_vrh,pm_peak_vrh,evening_vrh,early_am_vrh,total_vrh
tract,covid,agency,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
060014001,0,AC Transit,0.633316,0.309715,0.384160,0.094617,0.034823,1.456631
060014001,1,AC Transit,0.086231,0.242748,0.156517,0.000000,0.000000,0.485496
060014002,0,AC Transit,2.135538,5.645216,5.880770,3.165281,0.469931,17.296735
060014002,1,AC Transit,1.573668,4.268017,3.209160,2.165814,0.500765,11.717424
060014003,0,AC Transit,3.992604,8.799828,7.453219,5.007804,1.348052,26.601508
...,...,...,...,...,...,...,...,...
060971542.02,1,Sonoma County Transit,1.458604,4.268573,0.745774,0.057755,0.000000,6.530706
060971543.02,0,Sonoma County Transit,0.677007,1.020459,0.510229,0.000000,0.000000,2.207695
060971543.02,1,Sonoma County Transit,0.340153,1.020459,0.340153,0.000000,0.000000,1.700765
060971543.04,0,Sonoma County Transit,0.149542,0.825992,0.177950,0.000000,0.000000,1.153485


In [37]:
svc_bay.to_parquet('./processed_data/Bay_svc_Oct11.parquet')

## To-do:

### Feature Adds:
* _visualization along routes_
* geoid, not tracts?
* "megaregion" view?? --> likely requires automating feed collection...
    * also some sort of tiling, aggregation by city, etc...
    
### Architecture
* make compatible with Modin?
