In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import sqlalchemy
import folium
from folium import plugins
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import datetime as dt

import os

In [2]:
import folium
from folium.plugins import MarkerCluster
import pysal as ps
from pysal.viz import mapclassify

import ipywidgets as widgets

In [3]:
import partridge as ptg
idx = pd.IndexSlice

In [4]:
def positions_from_db(db_name, limit, offset=0, routes=['all']):
    cols = 'oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp'
    aws_host='gtfs-rt-logging.clakglowlpps.us-west-2.rds.amazonaws.com'
    eng = sqlalchemy.create_engine(
        f'mysql://halfempty:tentoninety@{aws_host}:3306/{db_name}')
    
    where = ''
    sep = "'"
    
    if offset == 0:
        offset = ''
    else:
        offset = f'offset {offset}'
    if routes[0] != 'all':
        where = f"where route_id in {str(tuple(routes))} and weekday(timestamp) in (0,1,2,3,4)"
    else:
        where = 'where weekday(timestamp) in (0,1,2,3,4)'

    query = f'select {cols} from vehicle_positions {where} order by timestamp desc limit {limit} {offset}'
    print(query)
    df = pd.read_sql(query, con=eng)
    count = eng.execute('select count(*) from vehicle_positions').fetchall()[0][0]
    return df, count


In [5]:
def gdf_from_positions_df(df):
    
    df = df.dropna(subset=['position_latitude', 'position_longitude'])
    geo = gpd.points_from_xy(df['position_longitude'], df['position_latitude'])
    gdf = gpd.GeoDataFrame(df, geometry=geo, crs='EPSG:4326')
    
    return gdf

In [6]:
def feed_from_path(path):
    '''Using Partridge, read a (GeoPandas enabled) GTFS feed given a filepath'''
    _date, service_ids = ptg.read_busiest_date(path)
    
    view = {
        'trips.txt': {'service_id': service_ids},
    }
    return ptg.load_geo_feed(path, view)

In [7]:
mbta_gtfs = feed_from_path('./data/mbta/08282020_gtfs.zip')

In [13]:
def aggregate_by_stop(df, gtfs):
    
    routes = df['route_id'].unique()
    
    trips_crowding_rts = gtfs.trips[gtfs.trips['route_id'].isin(routes)]['trip_id']
    stops_crowding_routes = gtfs.stop_times[gtfs.stop_times['trip_id'].isin(trips_crowding_rts)].drop_duplicates(subset=['stop_id'])['stop_id']
    stops_with_crowding = gtfs.stops[gtfs.stops['stop_id'].isin(stops_crowding_routes)]
    stops_with_crowding = stops_with_crowding[['stop_id', 'stop_name', 'geometry']]
    
    if stops_with_crowding.shape[0] == 0:
        rt_id_name = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
        trips = gtfs.trips.set_index('route_id').join(rt_id_name)
        trips_crowding_rts = trips[trips['route_short_name'].isin(routes)]['trip_id']
        stops_crowding_routes = gtfs.stop_times[gtfs.stop_times['trip_id'].isin(trips_crowding_rts)].drop_duplicates(subset=['stop_id'])['stop_id']
        stops_with_crowding = gtfs.stops[gtfs.stops['stop_id'].isin(stops_crowding_routes)]
        stops_with_crowding = stops_with_crowding[['stop_id', 'stop_name', 'geometry']]
    
    #https://spatialreference.org/ref/epsg/3586/
    stops_projected = stops_with_crowding.to_crs('EPSG:3586')

    #250ft buffer
    stops_projected['geometry'] = stops_projected['geometry'].buffer(250)
#     return stops_projected

    df = df.to_crs('EPSG:3586')
    ##filter df smaller...
    try:
        df.drop(columns=['position_latitude',
                          'position_longitude'], inplace=True)
    except KeyError:
        pass
    
    display(stops_projected.head(5))
    display(df.head(5))

    join1 = gpd.sjoin(stops_projected, df, how='left', op='contains')
#     return join1
    join1['hour'] = join1['timestamp'].dt.hour
    grouped = join1.groupby(['stop_id', 'route_id', 'hour'])
    series = grouped['occupancy_status'].value_counts().transpose()
    testdf = (pd.DataFrame(series)
              .rename(columns={'occupancy_status':'reports'})
              .unstack(level=[3]))

    ##This is it!
    testdf = testdf.droplevel(0, axis=1)
    geo_df = stops_projected.set_index('stop_id')
    
    return testdf, geo_df

In [8]:
def route_id_from_gtfs(df, gtfs):
    trips = gtfs.trips[['trip_id', 'route_id']].set_index('route_id')
    rt_id_name = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
    merge1 = (df.set_index('trip_id')
                .join(trips.join(rt_id_name)
                .set_index('trip_id'))
                 .dropna(subset=['route_short_name']))
    merge1['route_id'] = merge1['route_short_name']
    return merge1

In [28]:
def agg_positions_with_crowding(db_name, gtfs):
    print('Agg runs!')
    #fetch a subset and sense which routes provide crowding data
    subset_allrt, row_count = positions_from_db(db_name, 100000)
#     return subset_allrt

    if (subset_allrt['route_id'] == '').all():
        subset_allrt = route_id_from_gtfs(subset_allrt, gtfs)
        route_data = False
    else:
        route_data = True
        
    display(subset_allrt.head(5))
#     print(row_count)
#     return
    #routes where crowding info isn't all the same, i.e. actual data
    values_by_rt = subset_allrt.groupby('route_id')['occupancy_status'].nunique().eq(1)
    print('values--->',values_by_rt)
    routes_with_crowding = values_by_rt[values_by_rt == False].index
    print('rts_w_crowd--->',routes_with_crowding)
    print(len(routes_with_crowding))
    if list(routes_with_crowding) == ['']:
        print('No actual crowding data!')
        return
    
    ##TODO too small for MBTA but need memory for gtfs stops, implement batching every 10**6 on final.
    i = 0
    while i < row_count:
        if route_data:
            gdf_crowding, _count = positions_from_db(db_name, 5*10**5, 
                                         offset=i, routes=routes_with_crowding)
        else:
            gdf_crowding, _count = positions_from_db(db_name, 5*10**5, 
                                         offset=i,)
            gdf_crowding = route_id_from_gtfs(gdf_crowding, gtfs)
#             return gdf_crowding, routes_with_crowding
            gdf_crowding = gdf_crowding[gdf_crowding['route_id'].isin(routes_with_crowding)]
#             return gdf_crowding
        if gdf_crowding.shape[0] == 0:
            break
        gdf_crowding = gdf_from_positions_df(gdf_crowding)
        aggregated_df, geo_df = aggregate_by_stop(gdf_crowding, gtfs)
        if i == 0:
            global db1
            db1 = aggregated_df
            aggregated_dfs = aggregated_df
        else:
            global db2
            db2 = aggregated_df
            aggregated_dfs = aggregated_dfs.add(aggregated_df, fill_value=0)
        i += 5*10**5
    return aggregated_dfs, geo_df


In [14]:
gold, gold_geo = (agg_positions_with_crowding('moorpark_city_transit', feed_from_path('./data/to_process/moorpark_city_transit/gtfs.zip')))

Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
139-1,623312,Route 5,07:06:00,20200918,3768,EMPTY,34.199,-119.178,2020-09-18 14:28:29,Route 5
139-1,623246,Route 5,07:06:00,20200918,3768,EMPTY,34.199,-119.178,2020-09-18 14:27:31,Route 5
139-1,623180,Route 5,07:06:00,20200918,3768,EMPTY,34.1989,-119.18,2020-09-18 14:26:39,Route 5
139-1,623115,Route 5,07:06:00,20200918,3768,EMPTY,34.1989,-119.181,2020-09-18 14:25:50,Route 5
139-1,623051,Route 5,07:06:00,20200918,3768,EMPTY,34.1976,-119.181,2020-09-18 14:25:00,Route 5


values---> route_id
1           False
10          False
2           False
20          False
30          False
40          False
41          False
50          False
52          False
60          False
70           True
77           True
80          False
80C          True
80X          True
81          False
81B         False
82           True
83          False
84           True
84U         False
85          False
86          False
87          False
88           True
99          False
Fillmore    False
KS           True
Piru        False
Route 1     False
Route 10    False
Route 11    False
Route 15    False
Route 16    False
Route 17    False
Route 19    False
Route 2     False
Route 21    False
Route 23    False
Route 3     False
Route 4     False
Route 42    False
Route 43    False
Route 44    False
Route 5     False
Route 6     False
Route 7     False
Route 8     False
TA          False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['1', '10', '2', '20', '30', '40', '41',

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,3287703,Wells Ctr.,"POLYGON ((-13126825.952 3963331.158, -13126827..."
1,3289176,Telephone & Wells,"POLYGON ((-13126719.838 3959542.060, -13126721..."
2,3289177,Telephone & Saticoy (Westbound),"POLYGON ((-13129302.574 3959375.582, -13129303..."
3,3289178,Telephone & Scandia,"POLYGON ((-13130102.580 3959310.061, -13130103..."
4,3289179,Telephone & Cachuma (Westbound),"POLYGON ((-13130835.218 3959289.067, -13130836..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
139-1,623312,Route 5,07:06:00,20200918,3768,EMPTY,2020-09-18 14:28:29,Route 5,POINT (-13150250.818 3937458.376)
139-1,623246,Route 5,07:06:00,20200918,3768,EMPTY,2020-09-18 14:27:31,Route 5,POINT (-13150250.818 3937458.376)
139-1,623180,Route 5,07:06:00,20200918,3768,EMPTY,2020-09-18 14:26:39,Route 5,POINT (-13150787.835 3937750.969)
139-1,623115,Route 5,07:06:00,20200918,3768,EMPTY,2020-09-18 14:25:50,Route 5,POINT (-13151046.600 3937912.842)
139-1,623051,Route 5,07:06:00,20200918,3768,EMPTY,2020-09-18 14:25:00,Route 5,POINT (-13151299.862 3937507.989)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 offset 500000


Unnamed: 0,stop_id,stop_name,geometry
0,3287703,Wells Ctr.,"POLYGON ((-13126825.952 3963331.158, -13126827..."
1,3289176,Telephone & Wells,"POLYGON ((-13126719.838 3959542.060, -13126721..."
2,3289177,Telephone & Saticoy (Westbound),"POLYGON ((-13129302.574 3959375.582, -13129303..."
3,3289178,Telephone & Scandia,"POLYGON ((-13130102.580 3959310.061, -13130103..."
4,3289179,Telephone & Cachuma (Westbound),"POLYGON ((-13130835.218 3959289.067, -13130836..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
139-1,52774,Route 5,07:06:00,20200909,3738,EMPTY,2020-09-09 14:23:42,Route 5,POINT (-13150250.818 3937458.376)
139-1,52706,Route 5,07:06:00,20200909,3738,EMPTY,2020-09-09 14:22:51,Route 5,POINT (-13150529.068 3937589.100)
139-1,52638,Route 5,07:06:00,20200909,3738,EMPTY,2020-09-09 14:22:01,Route 5,POINT (-13151260.898 3937570.274)
139-1,52570,Route 5,07:06:00,20200909,3738,EMPTY,2020-09-09 14:21:12,Route 5,POINT (-13152056.677 3938024.776)
139-1,52502,Route 5,07:06:00,20200909,3738,EMPTY,2020-09-09 14:20:22,Route 5,POINT (-13153091.714 3938672.343)


In [15]:
gold

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3284140,99,15.0,1.0,,
3284140,99,16.0,3.0,,
3284140,99,17.0,1.0,,
3284140,99,19.0,1.0,,
3284140,99,21.0,1.0,,
...,...,...,...,...,...
5946107,Route 10,22.0,7.0,9.0,
5946107,Route 10,23.0,11.0,2.0,
5946107,Route 11,17.0,,1.0,
5946107,Route 16,0.0,1.0,,


In [16]:
gold_geo

Unnamed: 0_level_0,stop_name,geometry
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3287703,Wells Ctr.,"POLYGON ((-13126825.952 3963331.158, -13126827..."
3289176,Telephone & Wells,"POLYGON ((-13126719.838 3959542.060, -13126721..."
3289177,Telephone & Saticoy (Westbound),"POLYGON ((-13129302.574 3959375.582, -13129303..."
3289178,Telephone & Scandia,"POLYGON ((-13130102.580 3959310.061, -13130103..."
3289179,Telephone & Cachuma (Westbound),"POLYGON ((-13130835.218 3959289.067, -13130836..."
...,...,...
3733280,Royal Avenue and Erringer Road,"POLYGON ((-13029468.973 3890415.438, -13029470..."
5622774,Erringer Rd and Royal,"POLYGON ((-13029338.175 3890752.598, -13029339..."
5657316,Los Angeles Avenue and Executive Way,"POLYGON ((-13030832.622 3894818.366, -13030833..."
5657317,Los Angeles Avenue and First Street,"POLYGON ((-13032040.213 3895565.384, -13032041..."


In [19]:
filter_actual_stops(gold, feed_from_path('./data/to_process/moorpark_city_transit/gtfs.zip'))

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3284140,99,15.0,1.0,,
3284140,99,16.0,3.0,,
3284140,99,17.0,1.0,,
3284140,99,19.0,1.0,,
3284140,99,21.0,1.0,,


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,153-819,3580,0,To Leisure Village,,1,217605,9257,
1,153-830,3580,0,To Community Center,,0,217605,10901,
2,153-900,3580,0,To Leisure Village,,1,217605,10732,
3,153-920,3580,0,To Community Center,,0,217605,10900,
4,153-1000,3580,0,To Leisure Village,,1,217605,9257,


**** new style
stops_per_rt---> route_id
1                [3836292, 3836293, 3836294, 3836295, 3836296, ...
10               [5643933, 5644036, 5644085, 3772796, 3772797, ...
2                [3836288, 3836290, 3836291, 3837596, 3837597, ...
20               [5664552, 5670877, 3736638, 3733271, 3733272, ...
30               [3733289, 3733290, 3736696, 3772277, 3772278, ...
40               [3492868, 5894666, 3497013, 3510428, 3497014, ...
41               [3492868, 3495177, 3506072, 3495179, 3495180, ...
50               [4438837, 4438838, 4438839, 4436113, 4436535, ...
52               [4438837, 4438838, 4446580, 4446581, 4446582, ...
52X              [4439889, 4439892, 4436535, 4436113, 4438839, ...
60               [4439889, 4439890, 4461540, 4461539, 4461538, ...
70               [4438838, 4438837, 4466116, 4466117, 4466118, ...
77               [4477568, 4477569, 4471098, 4477570, 4466119, ...
80               [4481674, 4481675, 4481676, 4481677, 4481678, ...
80C              [448

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,route_id,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
route_id,stop_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,3836284,0.0,1,77.0,12.0,
1,3836284,14.0,1,98.0,,
1,3836284,15.0,1,58.0,,
1,3836284,16.0,1,29.0,,
1,3836284,17.0,1,100.0,,
...,...,...,...,...,...,...
TA,3679173,18.0,TA,4.0,,
TA,3679173,20.0,TA,8.0,,
TA,3679173,21.0,TA,3.0,,
TA,3679173,22.0,TA,3.0,,


In [17]:
def in_stop_list(x):
    try:
        test_list = list(stops_per_rt.loc[x['route_id'].iloc[0]])
    except:
        print(f"no stop data for route {x['route_id'].iloc[0]}!")
        return
    return x[x['stop_id'].isin(test_list)]

In [18]:
def filter_actual_stops(df, gtfs):
    
    global _debug0
    _debug0 = df
    
    def in_stop_list(x):
        try:
            test_list = list(stops_per_rt.loc[x['route_id'].iloc[0]])
        except:
            print(f"no stop data for route {x['route_id'].iloc[0]}!")
            return
        return x[x['stop_id'].isin(test_list)]
    
    display(df.head(5))
    display(gtfs.trips.head(5))
    df_routes = df.index.get_level_values('route_id')
    if not df_routes.isin(gtfs.trips['route_id']).all():
        print('**** new style')
        gtfs_routes = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
        trips = gtfs.trips.set_index('route_id').join(gtfs_routes)
        trips = trips.reset_index(drop=True)
        
        global _trips0
        _trips0 = trips
        
        trips = trips.rename(columns={'route_short_name':'route_id'})
        global _trips1
        _trips1 = trips
        
        trip_indexed = trips.set_index('trip_id')[['route_id', 'direction_id']]
        stops_per_rt = gtfs.stop_times.set_index('trip_id').join(trip_indexed).groupby('route_id')['stop_id'].unique()
    else:
        trip_indexed = gtfs.trips.set_index('trip_id')[['route_id', 'direction_id']]
        stops_per_rt = gtfs.stop_times.set_index('trip_id').join(trip_indexed).groupby('route_id')['stop_id'].unique()

        #     return stops_per_rt
    global _debug1
    _debug1 = df
    
    reset = df.reset_index()
    rt_group = reset.groupby('route_id')
    
    print('stops_per_rt--->', stops_per_rt)
    df = rt_group.apply(in_stop_list)
    global _debug2
    _debug2 = df
#     return df
    df = df.set_index('stop_id', append=True).set_index('hour', append=True).droplevel(1)
    
    return df

In [151]:
marin_gtfs.trips

Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,35875,2303,1,Canal,,0,227563,12332,
1,35878,2303,1,San Rafael,,1,227563,13333,
2,35877,2303,1,Canal,,0,227563,12332,
3,35880,2303,1,San Rafael,,1,227563,13333,
4,35879,2303,1,Canal,,0,227563,12332,
...,...,...,...,...,...,...,...,...,...
1190,35074,2303,2,San Rafael,,1,227605,13333,
1191,35071,2303,2,Canal,,0,227605,12332,
1192,35076,2303,2,San Rafael,,1,227605,13333,
1193,35073,2303,2,Canal,,0,227605,12332,


In [154]:
marin_gtfs.routes[['route_id', 'route_short_name']]

Unnamed: 0,route_id,route_short_name
0,16,219
1,1237,61
2,1244,251
3,2298,23
4,2302,29
5,2303,35
6,2305,36
7,1207,68
8,2306,71X
9,1203,233


In [153]:
marin_gtfs.stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
0,35875,86400.0,86400.0,4891560,0,,0,,0.000000,1.0
1,35875,86460.0,86460.0,2498718,1,,0,,493.617247,
2,35875,86700.0,86700.0,2505828,2,,0,,2376.959181,1.0
3,35875,86880.0,86880.0,2498852,3,,0,,2771.755190,1.0
4,35878,86880.0,86880.0,2498852,0,,0,,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...
27656,35078,85380.0,85380.0,2498870,4,,0,,1214.466766,
27657,35078,85500.0,85500.0,2498871,5,,0,,1538.517659,1.0
27658,35078,85740.0,85740.0,2498872,6,,0,,2387.378289,
27659,35078,85980.0,85980.0,71781,7,,0,,3087.167921,


In [67]:
mbta.reset_index()

occupancy_status,stop_id,route_id,hour,EMPTY,FEW_SEATS_AVAILABLE,FULL,MANY_SEATS_AVAILABLE
0,1,1,0.0,20.0,6.0,,42.0
1,1,1,1.0,21.0,2.0,,34.0
2,1,1,2.0,19.0,,,33.0
3,1,1,3.0,9.0,,,33.0
4,1,1,4.0,11.0,2.0,,26.0
...,...,...,...,...,...,...,...
282048,99991,Shuttle-Generic,12.0,1.0,,,
282049,99991,Shuttle-Generic,13.0,1.0,,,
282050,99991,Shuttle-Generic,16.0,2.0,,,
282051,99991,Shuttle-Generic,18.0,1.0,,,


In [68]:
mbta_filtered = filter_actual_stops(mbta, mbta_gtfs)

no stop data for route 62!
no stop data for route Shuttle-Generic!
no stop data for route Shuttle-Generic-Red!


In [69]:
mbta_filtered.loc[idx['1',:,:],:].index.get_level_values(1).unique()

Index(['1', '10003', '101', '10100', '10101', '102', '104', '10590', '106',
       '107', '108', '109', '110', '187', '188', '2', '57', '58', '59', '6',
       '62', '63', '64', '66', '67', '68', '69', '71', '72', '73', '74', '75',
       '77', '79', '80', '82', '83', '84', '854', '856', '87', '88', '89',
       '91', '93', '95', '97', '99'],
      dtype='object', name='stop_id')

In [70]:
mbta_filtered.to_parquet('./data/mbta/mbta_filtered.parquet')

In [71]:
##approx Aug 17 to Sep 4... 
##TODO implement timestamping in df fetch, or from S3 files...

In [12]:
mbta_filtered = pd.read_parquet('./data/mbta/mbta_filtered.parquet')

In [13]:
mbta_filtered

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,route_id,EMPTY,FEW_SEATS_AVAILABLE,FULL,MANY_SEATS_AVAILABLE
route_id,stop_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,0.0,1,20.0,6.0,,42.0
1,1,1.0,1,21.0,2.0,,34.0
1,1,2.0,1,19.0,,,33.0
1,1,3.0,1,9.0,,,33.0
1,1,4.0,1,11.0,2.0,,26.0
...,...,...,...,...,...,...,...
94,63241,19.0,94,24.0,,,
94,63241,20.0,94,11.0,,,
94,63241,21.0,94,9.0,,,1.0
94,63241,22.0,94,11.0,,,


In [75]:
mbta_geo.to_file('./data/mbta/mbta_geo.geojson', driver="GeoJSON")

In [14]:
mbta_geo = gpd.read_file('./data/mbta/mbta_geo.geojson').set_index('stop_id', drop=True)

In [26]:
def feeds_from_files(folder_path):
    '''
    '''
    feeds = {}
    subdirs = [x[0] for x in os.walk(folder_path)]
    for subdir in subdirs[1:]:
        agency = subdir.split('/')[-1]
        print(agency)
#         continue
#         feeds[agency] = {}
        for feed in os.listdir(subdir):
            if feed[0] == '.':
                continue
            if feed[-4:] != '.zip':
                continue
#             datestr = feed.split('_')[0]
#             date = dt.datetime.strptime(datestr,'%Y%m%d')
#             feeds[agency] = feed_from_path(subdir+'/'+feed)
            gtfs = feed_from_path(subdir+'/'+feed)
#         try:
        agency_df, agency_geo = agg_positions_with_crowding(agency, gtfs)
        agency_filtered = filter_actual_stops(agency_df, gtfs)
        agency_filtered.to_parquet(f'{subdir}/{agency}_filtered.parquet')
        agency_geo.to_file(f'{subdir}/{agency}_geo.geojson', driver="GeoJSON")
#         except:
#             print(f'No apparent crowding data for {agency}')
    return 

In [29]:
feeds_from_files('./data/to_process/')

mbta
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp
0,5195520,ADDED-1580543343,Red,19:12:00,20200918,R-5466AB2E,EMPTY,42.225,-71.0038,2020-09-18 23:59:26
1,5195521,45295471,Red,20:02:00,20200918,R-5466ADAF,EMPTY,42.2846,-71.0638,2020-09-18 23:59:26
2,5195522,45683708,15,19:35:00,20200918,y1775,MANY_SEATS_AVAILABLE,42.3038,-71.0708,2020-09-18 23:59:26
3,5195523,45684218,23,,20200918,y1854,MANY_SEATS_AVAILABLE,42.2836,-71.064,2020-09-18 23:59:26
4,5195524,45683592,10,19:30:00,20200918,y1826,MANY_SEATS_AVAILABLE,42.3377,-71.0721,2020-09-18 23:59:26


values---> route_id
1                         False
10                        False
100                        True
101                       False
104                       False
                          ...  
Orange                     True
Red                        True
Shuttle-Generic           False
Shuttle-Generic-Green      True
Shuttle-Generic-Orange    False
Name: occupancy_status, Length: 178, dtype: bool
rts_w_crowd---> Index(['1', '10', '101', '104', '106', '108', '109', '11', '110', '111',
       ...
       '89', '9', '90', '91', '92', '93', '95', '99', 'Shuttle-Generic',
       'Shuttle-Generic-Orange'],
      dtype='object', name='route_id', length=129)
129
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,5195442,45772193,354,19:33:00,20200918,y2079,MANY_SEATS_AVAILABLE,2020-09-18 23:59:26,POINT (756712.475 3004902.094)
1,5195443,45592050,83,19:50:00,20200918,y2027,MANY_SEATS_AVAILABLE,2020-09-18 23:59:26,POINT (759636.244 2966430.724)
2,5195444,45772195,354,,20200918,y1458,EMPTY,2020-09-18 23:59:26,POINT (775818.490 2956086.361)
3,5195445,45695463,57,19:45:00,20200918,y0836,FEW_SEATS_AVAILABLE,2020-09-18 23:59:26,POINT (752315.903 2953243.154)
4,5195446,45771858,109,19:38:00,20200918,y1952,MANY_SEATS_AVAILABLE,2020-09-18 23:59:26,POINT (773344.651 2969521.139)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,4442653,45710447,455,12:40:00,20200917.0,y0776,FULL,2020-09-17 16:53:11,POINT (797472.501 2990611.596)
1,4442654,45778515,108,12:37:00,20200917.0,y0424,EMPTY,2020-09-17 16:53:11,POINT (769404.426 2979778.447)
2,4442658,45403896,73,,,y4121,EMPTY,2020-09-17 16:53:11,POINT (758200.483 2961212.978)
3,4442659,45770836,110,12:28:00,20200917.0,y2084,FEW_SEATS_AVAILABLE,2020-09-17 16:53:11,POINT (790576.844 2974462.198)
4,4442661,45831216,240,,,y0757,MANY_SEATS_AVAILABLE,2020-09-17 16:53:11,POINT (780884.982 2928817.657)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,3691728,45710622,117,04:28:00,20200916,y0854,FULL,2020-09-16 08:41:13,POINT (780703.622 2962380.434)
1,3691729,45682801,66,04:45:00,20200916,y1908,EMPTY,2020-09-16 08:41:13,POINT (768655.106 2945190.357)
2,3691730,45524522,32,05:00:00,20200916,y1666,EMPTY,2020-09-16 08:41:13,POINT (761614.348 2935572.724)
3,3691731,45592955,77,05:23:00,20200916,y2044,EMPTY,2020-09-16 08:41:13,POINT (765746.023 2971233.019)
4,3691732,45523970,31,04:58:00,20200916,y1614,MANY_SEATS_AVAILABLE,2020-09-16 08:41:13,POINT (766174.157 2926738.531)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,2917557,45771460,111,15:27:00,20200914,y1996,EMPTY,2020-09-14 19:32:15,POINT (780028.302 2967369.507)
1,2917558,45522422,29,15:20:00,20200914,y1687,FEW_SEATS_AVAILABLE,2020-09-14 19:32:15,POINT (768322.242 2935786.566)
2,2917560,45593652,86,15:25:00,20200914,y2053,FEW_SEATS_AVAILABLE,2020-09-14 19:32:15,POINT (767053.881 2963258.237)
3,2917562,45772015,89,15:30:00,20200914,y2089,MANY_SEATS_AVAILABLE,2020-09-14 19:32:15,POINT (770690.409 2965499.048)
4,2917564,45523544,42,15:31:00,20200914,y1713,FEW_SEATS_AVAILABLE,2020-09-14 19:32:15,POINT (768276.719 2945152.072)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,1426983,45684548,47,06:30:00,20200911,y1783,MANY_SEATS_AVAILABLE,2020-09-11 10:58:29,POINT (773181.413 2948456.277)
1,1426984,45523083,50,06:55:00,20200911,y1713,MANY_SEATS_AVAILABLE,2020-09-11 10:58:29,POINT (756396.880 2922284.510)
2,1426985,45831010,240,06:30:00,20200911,y0811,MANY_SEATS_AVAILABLE,2020-09-11 10:58:29,POINT (780902.433 2875393.948)
3,1426986,45771082,93,,20200911,y2088,MANY_SEATS_AVAILABLE,2020-09-11 10:58:29,POINT (770665.022 2965170.934)
4,1426989,45682409,44,07:05:00,20200911,y1770,EMPTY,2020-09-11 10:58:29,POINT (767180.896 2948098.626)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,658423,45771582,111,,20200909.0,y1968,EMPTY,2020-09-09 21:02:18,POINT (775538.968 2957870.615)
1,658424,45695434,57,16:45:00,20200909.0,y0820,FEW_SEATS_AVAILABLE,2020-09-09 21:02:18,POINT (755529.902 2953876.308)
2,658425,45683062,66,16:31:00,20200909.0,y1828,FEW_SEATS_AVAILABLE,2020-09-09 21:02:18,POINT (755665.524 2953767.564)
3,658426,45772022,89,,20200909.0,y2054,MANY_SEATS_AVAILABLE,2020-09-09 21:02:18,POINT (770663.212 2965535.356)
4,658429,45771521,111,,,y2008,MANY_SEATS_AVAILABLE,2020-09-09 21:02:18,POINT (775512.508 2957761.150)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,FEW_SEATS_AVAILABLE,FULL,MANY_SEATS_AVAILABLE
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,0.0,8.0,,,31.0
1,1,1.0,11.0,,,39.0
1,1,2.0,9.0,,,27.0
1,1,3.0,2.0,2.0,,25.0
1,1,4.0,6.0,,,9.0


Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed
0,1,FallWeekday,45683746,Harvard,,0,C01-10,10085,1,,1-_-0,1
1,1,FallWeekday,45683748,Harvard,,0,C01-10,10085,1,,1-_-0,1
2,1,FallWeekday,45683750,Harvard,,0,C01-16,10085,1,,1-_-0,1
3,1,FallWeekday,45683751,Harvard,,0,C01-16,10085,1,,1-_-0,1
4,1,FallWeekday,45683752,Harvard,,0,C01-12,10085,1,,1-_-0,1


**** new style
stops_per_rt---> route_id
1      [64, 1, 2, 6, 10003, 57, 58, 10590, 87, 88, 18...
10     [175, 143, 178, 176, 1395, 1396, 1397, 1398, 2...
100    [5271, 9318, 9319, 9045, 5215, 5274, 5275, 527...
101    [45003, 5002, 5031, 5032, 5290, 5291, 5292, 52...
104    [53270, 5289, 5342, 5343, 5344, 5345, 5347, 53...
                             ...                        
SL2    [30250, 30251, 31259, 31255, 31257, 31256, 746...
SL3    [74611, 74612, 74613, 74624, 7096, 74637, 7463...
SL4    [64, 3, 4, 5, 1787, 1788, 5093, 5095, 15095, 6...
SL5    [49001, 8279, 49002, 49003, 5098, 5100, 19402,...
SLW    [74614, 74615, 74616, 74617, 74611, 74612, 746...
Name: stop_id, Length: 168, dtype: object
no stop data for route 214216!
no stop data for route 2427!
no stop data for route 441442!
no stop data for route 62!
no stop data for route 627!
no stop data for route 708!
no stop data for route 747!
no stop data for route Shuttle-Generic!
no stop data for route Shuttle-Generic-Orange!


In [None]:
## clean shit up, try running, target 2-3 to debug...

In [213]:
_debug0

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1
1512445,233,14.0,1.0,
1512445,245,0.0,7.0,
1512445,245,1.0,12.0,
1512445,245,14.0,20.0,
1512445,245,15.0,4.0,1.0
...,...,...,...,...
71918,49,15.0,,1.0
71918,49,17.0,2.0,
71918,49,20.0,2.0,
71918,49,22.0,1.0,2.0


In [214]:
_debug1

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1
1512445,233,14.0,1.0,
1512445,245,0.0,7.0,
1512445,245,1.0,12.0,
1512445,245,14.0,20.0,
1512445,245,15.0,4.0,1.0
...,...,...,...,...
71918,49,15.0,,1.0
71918,49,17.0,2.0,
71918,49,20.0,2.0,
71918,49,22.0,1.0,2.0


In [215]:
_debug2

In [216]:
_trips0

Unnamed: 0,trip_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,route_short_name
0,228802,1,San Rafael,,0,227558,13499,,228
1,228803,1,Fairfax Manor,,1,227558,13498,,228
2,228804,1,San Rafael,,0,227574,13499,,228
3,228805,1,Fairfax Manor,,1,227574,13498,,228
4,228806,1,San Rafael,,0,227558,13499,,228
...,...,...,...,...,...,...,...,...,...
1190,61803,1,Mill Valley,,0,227583,15201,,61M
1191,61810,1,Stinson Beach,,1,227583,15206,,61M
1192,61809,1,Mill Valley,,0,227583,15201,,61M
1193,61814,1,Stinson Beach,,1,227583,15206,,61M


In [218]:
_trips1

Unnamed: 0,trip_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,route_id
0,228802,1,San Rafael,,0,227558,13499,,228
1,228803,1,Fairfax Manor,,1,227558,13498,,228
2,228804,1,San Rafael,,0,227574,13499,,228
3,228805,1,Fairfax Manor,,1,227574,13498,,228
4,228806,1,San Rafael,,0,227558,13499,,228
...,...,...,...,...,...,...,...,...,...
1190,61803,1,Mill Valley,,0,227583,15201,,61M
1191,61810,1,Stinson Beach,,1,227583,15206,,61M
1192,61809,1,Mill Valley,,0,227583,15201,,61M
1193,61814,1,Stinson Beach,,1,227583,15206,,61M


### next: 
   * view logic, calculate metric
   * visualizer
