In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import sqlalchemy
import folium
from folium import plugins
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import datetime as dt

import os

In [2]:
import folium
from folium.plugins import MarkerCluster
import pysal as ps
from pysal.viz import mapclassify

import ipywidgets as widgets

In [3]:
import partridge as ptg
idx = pd.IndexSlice

In [4]:
def positions_from_db(db_name, limit, offset=0, routes=['all']):
    cols = 'oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp'
    aws_host='gtfs-rt-logging.clakglowlpps.us-west-2.rds.amazonaws.com'
    eng = sqlalchemy.create_engine(
        f'mysql://halfempty:tentoninety@{aws_host}:3306/{db_name}')
    
    ##timestamp in db is UTC, dict contains tz offsets for each agency
    ##brittle, rewrite better or change for DST...
    tz_utc_minus = {'mbta': 4, 'emery_go_round': 7, 'mmvta': 4,
                   'vctc_intercity': 7, 'pullman_transit': 7,
                   'rock_reg_metro': 5, 'shreveport': 5}
    
    where = ''
    sep = "'"
    
    if offset == 0:
        offset = ''
    else:
        offset = f'offset {offset}'
    if routes[0] != 'all':
        where = f"where route_id in {str(tuple(routes))} and weekday(timestamp) in (0,1,2,3,4)"
    else:
        where = 'where weekday(timestamp) in (0,1,2,3,4)'

    query = f'select {cols} from vehicle_positions {where} order by timestamp desc limit {limit} {offset}'
    print(query)
    df = pd.read_sql(query, con=eng)
    count = eng.execute('select count(*) from vehicle_positions').fetchall()[0][0]
    
    #apply tz correction
    df['timestamp'] = df['timestamp'] - dt.timedelta(hours=tz_utc_minus[db_name])
    return df, count


In [5]:
def gdf_from_positions_df(df):
    
    df = df.dropna(subset=['position_latitude', 'position_longitude'])
    geo = gpd.points_from_xy(df['position_longitude'], df['position_latitude'])
    gdf = gpd.GeoDataFrame(df, geometry=geo, crs='EPSG:4326')
    
    return gdf

In [6]:
def feed_from_path(path):
    '''Using Partridge, read a (GeoPandas enabled) GTFS feed given a filepath'''
    _date, service_ids = ptg.read_busiest_date(path)
    
    view = {
        'trips.txt': {'service_id': service_ids},
    }
    return ptg.load_geo_feed(path, view)

In [7]:
def aggregate_by_stop(df, gtfs):
    
    routes = df['route_id'].unique()
    
    trips_crowding_rts = gtfs.trips[gtfs.trips['route_id'].isin(routes)]['trip_id']
    stops_crowding_routes = gtfs.stop_times[gtfs.stop_times['trip_id'].isin(trips_crowding_rts)].drop_duplicates(subset=['stop_id'])['stop_id']
    stops_with_crowding = gtfs.stops[gtfs.stops['stop_id'].isin(stops_crowding_routes)]
    stops_with_crowding = stops_with_crowding[['stop_id', 'stop_name', 'geometry']]
    
    if stops_with_crowding.shape[0] == 0:
        rt_id_name = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
        trips = gtfs.trips.set_index('route_id').join(rt_id_name)
        trips_crowding_rts = trips[trips['route_short_name'].isin(routes)]['trip_id']
        stops_crowding_routes = gtfs.stop_times[gtfs.stop_times['trip_id'].isin(trips_crowding_rts)].drop_duplicates(subset=['stop_id'])['stop_id']
        stops_with_crowding = gtfs.stops[gtfs.stops['stop_id'].isin(stops_crowding_routes)]
        stops_with_crowding = stops_with_crowding[['stop_id', 'stop_name', 'geometry']]
    
    #https://spatialreference.org/ref/epsg/3586/
    stops_projected = stops_with_crowding.to_crs('EPSG:3586')

    #250ft buffer
    stops_projected['geometry'] = stops_projected['geometry'].buffer(250)
#     return stops_projected

    df = df.to_crs('EPSG:3586')
    ##filter df smaller...
    try:
        df.drop(columns=['position_latitude',
                          'position_longitude'], inplace=True)
    except KeyError:
        pass
    
    display(stops_projected.head(5))
    display(df.head(5))

    join1 = gpd.sjoin(stops_projected, df, how='left', op='contains')
#     return join1
    join1['hour'] = join1['timestamp'].dt.hour
    grouped = join1.groupby(['stop_id', 'route_id', 'hour'])
    series = grouped['occupancy_status'].value_counts().transpose()
    testdf = (pd.DataFrame(series)
              .rename(columns={'occupancy_status':'reports'})
              .unstack(level=[3]))

    ##This is it!
    testdf = testdf.droplevel(0, axis=1)
    geo_df = stops_projected.set_index('stop_id')
    
    return testdf, geo_df

In [8]:
def route_id_from_gtfs(df, gtfs):
    trips = gtfs.trips[['trip_id', 'route_id']].set_index('route_id')
    rt_id_name = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
    merge1 = (df.set_index('trip_id')
                .join(trips.join(rt_id_name)
                .set_index('trip_id'))
                 .dropna(subset=['route_short_name']))
    merge1['route_id'] = merge1['route_short_name']
    return merge1

In [9]:
def agg_positions_with_crowding(db_name, gtfs):
    print('Agg runs!')
    #fetch a subset and sense which routes provide crowding data
    subset_allrt, row_count = positions_from_db(db_name, 100000)
#     return subset_allrt

    if (subset_allrt['route_id'] == '').all():
        subset_allrt = route_id_from_gtfs(subset_allrt, gtfs)
        route_data = False
    else:
        route_data = True
        
    display(subset_allrt.head(5))
#     print(row_count)
#     return
    #routes where crowding info isn't all the same, i.e. actual data
    values_by_rt = subset_allrt.groupby('route_id')['occupancy_status'].nunique().eq(1)
    print('values--->',values_by_rt)
    routes_with_crowding = values_by_rt[values_by_rt == False].index
    print('rts_w_crowd--->',routes_with_crowding)
    print(len(routes_with_crowding))
    if list(routes_with_crowding) == ['']:
        print('No actual crowding data!')
        return
    
    ##TODO too small for MBTA but need memory for gtfs stops, implement batching every 10**6 on final.
    i = 0
    while i < row_count:
        if route_data:
            gdf_crowding, _count = positions_from_db(db_name, 5*10**5, 
                                         offset=i, routes=routes_with_crowding)
        else:
            gdf_crowding, _count = positions_from_db(db_name, 5*10**5, 
                                         offset=i,)
            gdf_crowding = route_id_from_gtfs(gdf_crowding, gtfs)
#             return gdf_crowding, routes_with_crowding
            gdf_crowding = gdf_crowding[gdf_crowding['route_id'].isin(routes_with_crowding)]
#             return gdf_crowding
        if gdf_crowding.shape[0] == 0:
            break
        gdf_crowding = gdf_from_positions_df(gdf_crowding)
        aggregated_df, geo_df = aggregate_by_stop(gdf_crowding, gtfs)
        if i == 0:
            global db1
            db1 = aggregated_df
            aggregated_dfs = aggregated_df
        else:
            global db2
            db2 = aggregated_df
            aggregated_dfs = aggregated_dfs.add(aggregated_df, fill_value=0)
        i += 5*10**5
    return aggregated_dfs, geo_df


In [10]:
# gold, gold_geo = (agg_positions_with_crowding('moorpark_city_transit', feed_from_path('./data/to_process/moorpark_city_transit/gtfs.zip')))

In [11]:
def in_stop_list(x):
    try:
        test_list = list(stops_per_rt.loc[x['route_id'].iloc[0]])
    except:
        print(f"no stop data for route {x['route_id'].iloc[0]}!")
        return
    return x[x['stop_id'].isin(test_list)]

In [12]:
def filter_actual_stops(df, gtfs):
    
    global _debug0
    _debug0 = df
    
    def in_stop_list(x):
        try:
            test_list = list(stops_per_rt.loc[x['route_id'].iloc[0]])
        except:
            print(f"no stop data for route {x['route_id'].iloc[0]}!")
            return
        return x[x['stop_id'].isin(test_list)]
    
    display(df.head(5))
    display(gtfs.trips.head(5))
    df_routes = df.index.get_level_values('route_id')
    if not df_routes.isin(gtfs.trips['route_id']).all():
        print('**** new style')
        gtfs_routes = gtfs.routes[['route_id', 'route_short_name']].set_index('route_id')
        trips = gtfs.trips.set_index('route_id').join(gtfs_routes)
        trips = trips.reset_index(drop=True)
        
        global _trips0
        _trips0 = trips
        
        trips = trips.rename(columns={'route_short_name':'route_id'})
        global _trips1
        _trips1 = trips
        
        trip_indexed = trips.set_index('trip_id')[['route_id', 'direction_id']]
        stops_per_rt = gtfs.stop_times.set_index('trip_id').join(trip_indexed).groupby('route_id')['stop_id'].unique()
    else:
        trip_indexed = gtfs.trips.set_index('trip_id')[['route_id', 'direction_id']]
        stops_per_rt = gtfs.stop_times.set_index('trip_id').join(trip_indexed).groupby('route_id')['stop_id'].unique()

        #     return stops_per_rt
    global _debug1
    _debug1 = df
    
    reset = df.reset_index()
    rt_group = reset.groupby('route_id')
    
    print('stops_per_rt--->', stops_per_rt)
    df = rt_group.apply(in_stop_list)
    global _debug2
    _debug2 = df
#     return df
    df = df.set_index('stop_id', append=True).set_index('hour', append=True).droplevel(1)
    
    return df

In [70]:
# mbta_filtered.to_parquet('./data/mbta/mbta_filtered.parquet')

In [71]:
##approx Aug 17 to Sep 4... 
##TODO implement timestamping in df fetch, or from S3 files...

In [12]:
# mbta_filtered = pd.read_parquet('./data/mbta/mbta_filtered.parquet')

In [75]:
# mbta_geo.to_file('./data/mbta/mbta_geo.geojson', driver="GeoJSON")

In [14]:
# mbta_geo = gpd.read_file('./data/mbta/mbta_geo.geojson').set_index('stop_id', drop=True)

In [13]:
def feeds_from_files(folder_path):
    '''
    '''
    feeds = {}
    subdirs = [x[0] for x in os.walk(folder_path)]
    for subdir in subdirs[1:]:
        agency = subdir.split('/')[-1]
        print(agency)
#         continue
#         feeds[agency] = {}
        for feed in os.listdir(subdir):
            if feed[0] == '.':
                continue
            if feed[-4:] != '.zip':
                continue
#             datestr = feed.split('_')[0]
#             date = dt.datetime.strptime(datestr,'%Y%m%d')
#             feeds[agency] = feed_from_path(subdir+'/'+feed)
            gtfs = feed_from_path(subdir+'/'+feed)
#         try:
        agency_df, agency_geo = agg_positions_with_crowding(agency, gtfs)
        agency_filtered = filter_actual_stops(agency_df, gtfs)
        agency_filtered.to_parquet(f'{subdir}/{agency}_filtered.parquet')
        agency_geo.to_file(f'{subdir}/{agency}_geo.geojson', driver="GeoJSON")
#         except:
#             print(f'No apparent crowding data for {agency}')
    return 

In [36]:
feeds_from_files('./data/processed/')

rock_reg_metro
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1051-Weekday,307599,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,34.7216,-92.2816,2020-09-18 15:38:28,2
1051-Weekday,307568,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,34.7217,-92.2853,2020-09-18 15:37:42,2
1051-Weekday,307537,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,34.7217,-92.2862,2020-09-18 15:36:55,2
1051-Weekday,307506,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,34.7249,-92.2861,2020-09-18 15:36:09,2
1051-Weekday,307475,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,34.7269,-92.2847,2020-09-18 15:35:23,2


values---> route_id
10    False
11    False
13    False
14    False
16    False
18    False
2     False
22    False
23    False
3     False
4     False
5     False
6     False
8     False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['10', '11', '13', '14', '16', '18', '2', '22', '23', '3', '4', '5', '6',
       '8'],
      dtype='object', name='route_id')
14
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,1506992,Pulaski Tech South,"POLYGON ((-5630515.210 913929.759, -5630516.41..."
1,1506997,13328 Frontage Rd,"POLYGON ((-5632052.940 913610.556, -5632054.14..."
2,1506998,13325 Frontage Rd,"POLYGON ((-5632432.691 913291.802, -5632433.89..."
3,1520662,13001 Frontage Rd,"POLYGON ((-5627800.249 913998.481, -5627801.45..."
4,1506999,12819 Frontage Rd,"POLYGON ((-5627492.375 913986.838, -5627493.57..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1051-Weekday,307599,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,2020-09-18 15:38:28,2,POINT (-5578027.886 930151.677)
1051-Weekday,307568,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,2020-09-18 15:37:42,2,POINT (-5579106.926 930457.740)
1051-Weekday,307537,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,2020-09-18 15:36:55,2,POINT (-5579371.546 930523.532)
1051-Weekday,307506,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,2020-09-18 15:36:09,2,POINT (-5579058.819 931655.743)
1051-Weekday,307475,2,15:24:00,20200918,3395,STANDING_ROOM_ONLY,2020-09-18 15:35:23,2,POINT (-5578470.140 932265.605)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1421629,10,6,78.0,56.0,
1421629,10,7,130.0,31.0,
1421629,10,8,86.0,135.0,2.0
1421629,10,9,71.0,112.0,4.0
1421629,10,10,15.0,112.0,5.0


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,47452-Weekday,1545,0,Outbound,,0,226399,11069,
1,50718-Weekday,1532,0,Inbound,,1,226383,11171,
2,50455-Weekday,2159,0,Southbound,,0,226401,14198,
3,48745-Weekday,1530,0,Inbound,,1,226394,13476,
4,51126-Weekday,1541,0,Inbound,,1,226377,14188,


**** new style
stops_per_rt---> route_id
10    [1423179, 1476985, 1470055, 1476986, 1476987, ...
11    [1423179, 1476985, 1482545, 1482546, 1482547, ...
13    [1423179, 1476985, 1482545, 1482546, 1493485, ...
14    [1423179, 1476985, 1482545, 1482546, 1493350, ...
16    [1502588, 1502598, 1502599, 1502600, 1502601, ...
18    [1502662, 1502663, 1502664, 1502665, 1502666, ...
2     [1421629, 1460385, 1460386, 1465568, 1465569, ...
22    [1650493, 1505927, 1505928, 1505929, 1505930, ...
23    [1421629, 1493535, 1493536, 1493537, 1493538, ...
3     [1423179, 1460385, 1460386, 1460387, 1471425, ...
4     [1423179, 1476985, 1470055, 1476986, 1476987, ...
5     [1423179, 1476985, 1482545, 1482546, 1482547, ...
6     [1421629, 1460385, 1460386, 1465568, 1465569, ...
8     [1490185, 1427192, 1482735, 1482736, 1482737, ...
Name: stop_id, dtype: object
mbta
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitud

Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp
0,5195520,ADDED-1580543343,Red,19:12:00,20200918,R-5466AB2E,EMPTY,42.225,-71.0038,2020-09-18 19:59:26
1,5195521,45295471,Red,20:02:00,20200918,R-5466ADAF,EMPTY,42.2846,-71.0638,2020-09-18 19:59:26
2,5195522,45683708,15,19:35:00,20200918,y1775,MANY_SEATS_AVAILABLE,42.3038,-71.0708,2020-09-18 19:59:26
3,5195523,45684218,23,,20200918,y1854,MANY_SEATS_AVAILABLE,42.2836,-71.064,2020-09-18 19:59:26
4,5195524,45683592,10,19:30:00,20200918,y1826,MANY_SEATS_AVAILABLE,42.3377,-71.0721,2020-09-18 19:59:26


values---> route_id
1                         False
10                        False
100                        True
101                       False
104                       False
                          ...  
Orange                     True
Red                        True
Shuttle-Generic           False
Shuttle-Generic-Green      True
Shuttle-Generic-Orange    False
Name: occupancy_status, Length: 178, dtype: bool
rts_w_crowd---> Index(['1', '10', '101', '104', '106', '108', '109', '11', '110', '111',
       ...
       '89', '9', '90', '91', '92', '93', '95', '99', 'Shuttle-Generic',
       'Shuttle-Generic-Orange'],
      dtype='object', name='route_id', length=129)
129
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,5195442,45772193,354,19:33:00,20200918,y2079,MANY_SEATS_AVAILABLE,2020-09-18 19:59:26,POINT (756712.475 3004902.094)
1,5195443,45592050,83,19:50:00,20200918,y2027,MANY_SEATS_AVAILABLE,2020-09-18 19:59:26,POINT (759636.244 2966430.724)
2,5195444,45772195,354,,20200918,y1458,EMPTY,2020-09-18 19:59:26,POINT (775818.490 2956086.361)
3,5195445,45695463,57,19:45:00,20200918,y0836,FEW_SEATS_AVAILABLE,2020-09-18 19:59:26,POINT (752315.903 2953243.154)
4,5195446,45771858,109,19:38:00,20200918,y1952,MANY_SEATS_AVAILABLE,2020-09-18 19:59:26,POINT (773344.651 2969521.139)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,4442653,45710447,455,12:40:00,20200917.0,y0776,FULL,2020-09-17 12:53:11,POINT (797472.501 2990611.596)
1,4442654,45778515,108,12:37:00,20200917.0,y0424,EMPTY,2020-09-17 12:53:11,POINT (769404.426 2979778.447)
2,4442658,45403896,73,,,y4121,EMPTY,2020-09-17 12:53:11,POINT (758200.483 2961212.978)
3,4442659,45770836,110,12:28:00,20200917.0,y2084,FEW_SEATS_AVAILABLE,2020-09-17 12:53:11,POINT (790576.844 2974462.198)
4,4442661,45831216,240,,,y0757,MANY_SEATS_AVAILABLE,2020-09-17 12:53:11,POINT (780884.982 2928817.657)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,3691728,45710622,117,04:28:00,20200916,y0854,FULL,2020-09-16 04:41:13,POINT (780703.622 2962380.434)
1,3691729,45682801,66,04:45:00,20200916,y1908,EMPTY,2020-09-16 04:41:13,POINT (768655.106 2945190.357)
2,3691730,45524522,32,05:00:00,20200916,y1666,EMPTY,2020-09-16 04:41:13,POINT (761614.348 2935572.724)
3,3691731,45592955,77,05:23:00,20200916,y2044,EMPTY,2020-09-16 04:41:13,POINT (765746.023 2971233.019)
4,3691732,45523970,31,04:58:00,20200916,y1614,MANY_SEATS_AVAILABLE,2020-09-16 04:41:13,POINT (766174.157 2926738.531)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,2917557,45771460,111,15:27:00,20200914,y1996,EMPTY,2020-09-14 15:32:15,POINT (780028.302 2967369.507)
1,2917558,45522422,29,15:20:00,20200914,y1687,FEW_SEATS_AVAILABLE,2020-09-14 15:32:15,POINT (768322.242 2935786.566)
2,2917560,45593652,86,15:25:00,20200914,y2053,FEW_SEATS_AVAILABLE,2020-09-14 15:32:15,POINT (767053.881 2963258.237)
3,2917562,45772015,89,15:30:00,20200914,y2089,MANY_SEATS_AVAILABLE,2020-09-14 15:32:15,POINT (770690.409 2965499.048)
4,2917564,45523544,42,15:31:00,20200914,y1713,FEW_SEATS_AVAILABLE,2020-09-14 15:32:15,POINT (768276.719 2945152.072)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,1426983,45684548,47,06:30:00,20200911,y1783,MANY_SEATS_AVAILABLE,2020-09-11 06:58:29,POINT (773181.413 2948456.277)
1,1426984,45523083,50,06:55:00,20200911,y1713,MANY_SEATS_AVAILABLE,2020-09-11 06:58:29,POINT (756396.880 2922284.510)
2,1426985,45831010,240,06:30:00,20200911,y0811,MANY_SEATS_AVAILABLE,2020-09-11 06:58:29,POINT (780902.433 2875393.948)
3,1426986,45771082,93,,20200911,y2088,MANY_SEATS_AVAILABLE,2020-09-11 06:58:29,POINT (770665.022 2965170.934)
4,1426989,45682409,44,07:05:00,20200911,y1770,EMPTY,2020-09-11 06:58:29,POINT (767180.896 2948098.626)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0,stop_id,stop_name,geometry
0,1,Washington St opp Ruggles St,"POLYGON ((769238.539 2945905.168, 769237.335 2..."
1,10,Theo Glynn Way @ Newmarket Sq,"POLYGON ((773015.876 2945777.456, 773014.672 2..."
2,10000,Tremont St opp Temple Pl,"POLYGON ((774557.780 2954945.924, 774556.577 2..."
3,10003,Albany St opp Randall St,"POLYGON ((770999.551 2946144.896, 770998.347 2..."
4,10005,Albany St opp E Concord St,"POLYGON ((772333.606 2947400.089, 772332.402 2..."


Unnamed: 0,oid,trip_id,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,geometry
0,658423,45771582,111,,20200909.0,y1968,EMPTY,2020-09-09 17:02:18,POINT (775538.968 2957870.615)
1,658424,45695434,57,16:45:00,20200909.0,y0820,FEW_SEATS_AVAILABLE,2020-09-09 17:02:18,POINT (755529.902 2953876.308)
2,658425,45683062,66,16:31:00,20200909.0,y1828,FEW_SEATS_AVAILABLE,2020-09-09 17:02:18,POINT (755665.524 2953767.564)
3,658426,45772022,89,,20200909.0,y2054,MANY_SEATS_AVAILABLE,2020-09-09 17:02:18,POINT (770663.212 2965535.356)
4,658429,45771521,111,,,y2008,MANY_SEATS_AVAILABLE,2020-09-09 17:02:18,POINT (775512.508 2957761.150)


select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where route_id in ('1', '10', '101', '104', '106', '108', '109', '11', '110', '111', '112', '116', '117', '119', '120', '121', '134', '137', '14', '15', '16', '17', '18', '19', '201', '202', '21', '210', '211', '212', '214', '214216', '215', '216', '22', '220', '221', '222', '225', '226', '23', '230', '236', '238', '24', '240', '2427', '245', '26', '27', '29', '30', '31', '32', '33', '34E', '35', '350', '354', '36', '37', '38', '40', '41', '42', '424', '426', '429', '43', '435', '436', '439', '44', '441', '441442', '442', '45', '450', '455', '465', '47', '50', '501', '504', '51', '52', '553', '556', '558', '57', '57A', '60', '61', '62', '627', '64', '66', '67', '68', '69', '7', '70', '708', '71', '72', '73', '74', '747', '75', '77', '78', '79', '8', '80', '83', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '9

Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,FEW_SEATS_AVAILABLE,FULL,MANY_SEATS_AVAILABLE
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,0.0,6.0,,,9.0
1,1,1.0,,,,4.0
1,1,4.0,3.0,1.0,,5.0
1,1,5.0,12.0,2.0,,15.0
1,1,6.0,15.0,2.0,,19.0


Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed
0,1,FallWeekday,45683746,Harvard,,0,C01-10,10085,1,,1-_-0,1
1,1,FallWeekday,45683748,Harvard,,0,C01-10,10085,1,,1-_-0,1
2,1,FallWeekday,45683750,Harvard,,0,C01-16,10085,1,,1-_-0,1
3,1,FallWeekday,45683751,Harvard,,0,C01-16,10085,1,,1-_-0,1
4,1,FallWeekday,45683752,Harvard,,0,C01-12,10085,1,,1-_-0,1


**** new style
stops_per_rt---> route_id
1      [64, 1, 2, 6, 10003, 57, 58, 10590, 87, 88, 18...
10     [175, 143, 178, 176, 1395, 1396, 1397, 1398, 2...
100    [5271, 9318, 9319, 9045, 5215, 5274, 5275, 527...
101    [45003, 5002, 5031, 5032, 5290, 5291, 5292, 52...
104    [53270, 5289, 5342, 5343, 5344, 5345, 5347, 53...
                             ...                        
SL2    [30250, 30251, 31259, 31255, 31257, 31256, 746...
SL3    [74611, 74612, 74613, 74624, 7096, 74637, 7463...
SL4    [64, 3, 4, 5, 1787, 1788, 5093, 5095, 15095, 6...
SL5    [49001, 8279, 49002, 49003, 5098, 5100, 19402,...
SLW    [74614, 74615, 74616, 74617, 74611, 74612, 746...
Name: stop_id, Length: 168, dtype: object
no stop data for route 214216!
no stop data for route 2427!
no stop data for route 441442!
no stop data for route 62!
no stop data for route 627!
no stop data for route 708!
no stop data for route 747!
no stop data for route Shuttle-Generic!
no stop data for route Shuttle-Generic-Orange!
e

Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10_Trip1_SP_COVID,66871,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,37.8295,-122.267,2020-09-18 07:06:42,Shellmound/Powell
10_Trip1_SP_COVID,66865,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,37.8297,-122.269,2020-09-18 07:05:56,Shellmound/Powell
10_Trip1_SP_COVID,66859,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,37.8306,-122.274,2020-09-18 07:05:10,Shellmound/Powell
10_Trip1_SP_COVID,66853,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,37.8312,-122.279,2020-09-18 07:04:24,Shellmound/Powell
10_Trip1_SP_COVID,66847,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,37.8309,-122.28,2020-09-18 07:03:37,Shellmound/Powell


values---> route_id
Hollis               False
Shellmound/Powell    False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['Hollis', 'Shellmound/Powell'], dtype='object', name='route_id')
2
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,2309331,MacArthur BART Station,"POLYGON ((-13194867.940 5552144.974, -13194869..."
1,2316832,Park Ave at Pixar,"POLYGON ((-13198177.070 5555825.103, -13198178..."
2,2316833,Hollis at 53rd,"POLYGON ((-13198403.453 5557918.863, -13198404..."
3,2318569,Horton at 59th (AMTRAK),"POLYGON ((-13198362.931 5559900.361, -13198364..."
4,2318570,Hollis at 65th,"POLYGON ((-13197126.520 5561803.138, -13197127..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10_Trip1_SP_COVID,66871,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,2020-09-18 07:06:42,Shellmound/Powell,POINT (-13195086.171 5552521.872)
10_Trip1_SP_COVID,66865,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,2020-09-18 07:05:56,Shellmound/Powell,POINT (-13195524.887 5552907.133)
10_Trip1_SP_COVID,66859,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,2020-09-18 07:05:10,Shellmound/Powell,POINT (-13196539.728 5553991.271)
10_Trip1_SP_COVID,66853,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,2020-09-18 07:04:24,Shellmound/Powell,POINT (-13197615.923 5554984.753)
10_Trip1_SP_COVID,66847,Shellmound/Powell,06:30:00,20200918,2633,MANY_SEATS_AVAILABLE,2020-09-18 07:03:37,Shellmound/Powell,POINT (-13197917.163 5555056.471)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2309331,Hollis,5,45.0,8.0,
2309331,Hollis,6,294.0,37.0,
2309331,Hollis,7,279.0,89.0,1.0
2309331,Hollis,8,252.0,104.0,1.0
2309331,Hollis,9,239.0,113.0,


Unnamed: 0,route_id,service_id,trip_id,trip_short_name,trip_headsign,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,trip_type,drt_max_travel_time,drt_avg_travel_time,drt_advance_book_min,drt_pickup_message,drt_drop_off_message,continuous_pickup_message,continuous_drop_off_message
0,4817,c_19355_b_25565_d_31,2_Trip8_H_COVID,2_Trip8_H_COVID,Hollis,0,2,p_9025,,,,1.0t+0.00,1.0t+0.00,0.0,,,,
1,4817,c_19355_b_25565_d_31,3_Trip8_H_COVID,3_Trip8_H_COVID,Hollis,0,3,p_9025,,,,1.0t+0.00,1.0t+0.00,0.0,,,,
2,4817,c_19355_b_25565_d_31,1_Trip10_H_COVID,1_Trip10_H_COVID,Hollis,0,1,p_9025,,,,1.0t+0.00,1.0t+0.00,0.0,,,,
3,4817,c_19355_b_25565_d_31,4_Trip5_H_COVID,4_Trip5_H_COVID,Hollis,0,4,p_9025,,,,1.0t+0.00,1.0t+0.00,0.0,,,,
4,4817,c_19355_b_25565_d_31,2_Trip9_H_COVID,2_Trip9_H_COVID,Hollis,0,2,p_9025,,,,1.0t+0.00,1.0t+0.00,0.0,,,,


**** new style
stops_per_rt---> route_id
Hollis               [2309331, 2319181, 2316832, 2319183, 2316833, ...
Shellmound/Powell    [2309331, 2318580, 2319195, 2319196, 2319197, ...
Name: stop_id, dtype: object
pullman_transit
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
RF1-1,58518,Blue,06:30:00,20200918,3472,EMPTY,46.7318,-117.182,2020-09-18 07:02:51,Blue
RF1-1,58514,Blue,06:30:00,20200918,3472,EMPTY,46.7309,-117.179,2020-09-18 07:02:05,Blue
RF1-1,58510,Blue,06:30:00,20200918,3472,EMPTY,46.7322,-117.175,2020-09-18 07:01:20,Blue
RF1-1,58506,Blue,06:30:00,20200918,3472,EMPTY,46.7344,-117.172,2020-09-18 07:00:34,Blue
RF1-1,58502,Blue,06:30:00,20200918,3472,EMPTY,46.7335,-117.167,2020-09-18 06:59:47,Blue


values---> route_id
Blue              False
Coug A Express    False
Crimson           False
Gray              False
Loop              False
Paradise          False
Silver            False
Wheat             False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['Blue', 'Coug A Express', 'Crimson', 'Gray', 'Loop', 'Paradise',
       'Silver', 'Wheat'],
      dtype='object', name='route_id')
8
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,3053989,Merman at Pine Ridge,"POLYGON ((-10284421.373 7557321.979, -10284422..."
1,3053990,Westwood at Birch Hills,"POLYGON ((-10283679.879 7557140.534, -10283681..."
2,3053991,Westwood at Maple Valley,"POLYGON ((-10283505.307 7556180.383, -10283506..."
3,3053992,Northwood at Aspen Village,"POLYGON ((-10284480.222 7555890.242, -10284481..."
4,3053993,Terre View at Cougar Ridge,"POLYGON ((-10285694.067 7555813.686, -10285695..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
RF1-1,58518,Blue,06:30:00,20200918,3472,EMPTY,2020-09-18 07:02:51,Blue,POINT (-10294376.275 7556315.659)
RF1-1,58514,Blue,06:30:00,20200918,3472,EMPTY,2020-09-18 07:02:05,Blue,POINT (-10293895.275 7555647.339)
RF1-1,58510,Blue,06:30:00,20200918,3472,EMPTY,2020-09-18 07:01:20,Blue,POINT (-10292787.165 7555542.905)
RF1-1,58506,Blue,06:30:00,20200918,3472,EMPTY,2020-09-18 07:00:34,Blue,POINT (-10291727.409 7555850.084)
RF1-1,58502,Blue,06:30:00,20200918,3472,EMPTY,2020-09-18 06:59:47,Blue,POINT (-10290813.645 7554925.149)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3052090,Blue,6,68.0,,
3052090,Blue,7,106.0,21.0,
3052090,Blue,8,115.0,59.0,
3052090,Blue,9,95.0,66.0,
3052090,Blue,10,79.0,42.0,


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,RF2-1,3138,0,Outbound,,0,227803,14948,
1,RF1-1,3135,0,Loop,,0,227802,14927,
2,RF3-1,3544,0,Loop,,0,227804,14589,
3,RF4-1,3137,0,Loop,,0,227805,14951,
4,RF33-1,3138,0,Outbound,,0,227810,14948,


**** new style
stops_per_rt---> route_id
Blue              [3052090, 3052091, 4560304, 3052099, 3052100, ...
Coug A Express    [3052108, 3052109, 4582127, 4582129, 3053995, ...
Crimson           [3052279, 3053451, 3053453, 3053454, 3053455, ...
Gray              [3053996, 3052120, 3052121, 3052123, 3052124, ...
Lentil            [3052128, 3052281, 3053997, 3053998, 3053999, ...
Loop              [3052090, 3052091, 3053181, 3053182, 3052119, ...
Paradise          [3052090, 3678509, 3052770, 3052771, 3052772, ...
Silver            [3052090, 3052276, 3052277, 3052278, 3052279, ...
Wheat             [3052128, 3052281, 3053997, 3053998, 3053999, ...
Name: stop_id, dtype: object
vctc_intercity
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
139-1,187457,Route 5,07:06:00,20200911,3768,EMPTY,34.199,-119.178,2020-09-11 07:23:31,Route 5
139-1,187393,Route 5,07:06:00,20200911,3768,EMPTY,34.199,-119.178,2020-09-11 07:22:42,Route 5
139-1,187329,Route 5,07:06:00,20200911,3768,EMPTY,34.199,-119.179,2020-09-11 07:21:51,Route 5
139-1,187265,Route 5,07:06:00,20200911,3768,EMPTY,34.198,-119.181,2020-09-11 07:20:52,Route 5
139-1,187201,Route 5,07:06:00,20200911,3768,EMPTY,34.1976,-119.181,2020-09-11 07:20:04,Route 5


values---> route_id
1           False
10          False
2            True
20          False
30          False
40          False
41          False
50           True
52          False
60          False
70           True
77          False
80          False
80C         False
80X          True
81          False
81B          True
82           True
83          False
84           True
84U         False
85           True
85C          True
86          False
87          False
88           True
99          False
Fillmore     True
Piru        False
Route 1     False
Route 10    False
Route 11    False
Route 15    False
Route 16    False
Route 17    False
Route 19    False
Route 2     False
Route 21    False
Route 23    False
Route 3     False
Route 4     False
Route 42    False
Route 43    False
Route 44    False
Route 5     False
Route 6     False
Route 7     False
Route 8     False
TA          False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['1', '10', '20', '30', '40', '41', '52'

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,3287703,Wells Ctr.,"POLYGON ((-13126825.952 3963331.158, -13126827..."
1,3289176,Telephone & Wells,"POLYGON ((-13126719.838 3959542.060, -13126721..."
2,3289177,Telephone & Saticoy (Westbound),"POLYGON ((-13129302.574 3959375.582, -13129303..."
3,3289178,Telephone & Scandia,"POLYGON ((-13130102.580 3959310.061, -13130103..."
4,3289179,Telephone & Cachuma (Westbound),"POLYGON ((-13130835.218 3959289.067, -13130836..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
139-1,187457,Route 5,07:06:00,20200911,3768,EMPTY,2020-09-11 07:23:31,Route 5,POINT (-13150250.818 3937458.376)
139-1,187393,Route 5,07:06:00,20200911,3768,EMPTY,2020-09-11 07:22:42,Route 5,POINT (-13150250.818 3937458.376)
139-1,187329,Route 5,07:06:00,20200911,3768,EMPTY,2020-09-11 07:21:51,Route 5,POINT (-13150509.587 3937620.242)
139-1,187265,Route 5,07:06:00,20200911,3768,EMPTY,2020-09-11 07:20:52,Route 5,POINT (-13151221.935 3937632.559)
139-1,187201,Route 5,07:06:00,20200911,3768,EMPTY,2020-09-11 07:20:04,Route 5,POINT (-13151299.862 3937507.989)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3284140,99,9.0,2.0,,
3284140,99,10.0,2.0,,
3284140,99,12.0,2.0,,
3284140,99,17.0,1.0,,
3284140,Route 1,4.0,5.0,,


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,153-819,3580,0,To Leisure Village,,1,217605,9257,
1,153-830,3580,0,To Community Center,,0,217605,10901,
2,153-900,3580,0,To Leisure Village,,1,217605,10732,
3,153-920,3580,0,To Community Center,,0,217605,10900,
4,153-1000,3580,0,To Leisure Village,,1,217605,9257,


**** new style
stops_per_rt---> route_id
1                [3836292, 3836293, 3836294, 3836295, 3836296, ...
10               [5643933, 5644036, 5644085, 3772796, 3772797, ...
2                [3836288, 3836290, 3836291, 3837596, 3837597, ...
20               [5664552, 5670877, 3736638, 3733271, 3733272, ...
30               [3733289, 3733290, 3736696, 3772277, 3772278, ...
40               [3492868, 5894666, 3497013, 3510428, 3497014, ...
41               [3492868, 3495177, 3506072, 3495179, 3495180, ...
50               [4438837, 4438838, 4438839, 4436113, 4436535, ...
52               [4438837, 4438838, 4446580, 4446581, 4446582, ...
52X              [4439889, 4439892, 4436535, 4436113, 4438839, ...
60               [4439889, 4439890, 4461540, 4461539, 4461538, ...
70               [4438838, 4438837, 4466116, 4466117, 4466118, ...
77               [4477568, 4477569, 4471098, 4477570, 4466119, ...
80               [4481674, 4481675, 4481676, 4481677, 4481678, ...
80C              [448

Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
100,207200,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,32.4999,-93.7543,2020-09-18 13:46:24,Alt 101
100,207181,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,32.4996,-93.7563,2020-09-18 13:45:37,Alt 101
100,207162,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,32.4996,-93.7563,2020-09-18 13:44:51,Alt 101
100,207143,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,32.4996,-93.7563,2020-09-18 13:44:04,Alt 101
100,207124,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,32.4996,-93.7563,2020-09-18 13:43:18,Alt 101


values---> route_id
Alt 101     False
Alt 102     False
Alt 103     False
Alt 104     False
Alt 105     False
Alt 106     False
Alt 107     False
Alt 108     False
Route 28    False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['Alt 101', 'Alt 102', 'Alt 103', 'Alt 104', 'Alt 105', 'Alt 106',
       'Alt 107', 'Alt 108', 'Route 28'],
      dtype='object', name='route_id')
9
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,2424431,Dr. Martin Luther King Dr & David Raines,"POLYGON ((-6239051.513 273523.154, -6239052.71..."
1,5016724,Dr. Martin Luther King Dr & Hill St (Inbound),"POLYGON ((-6237710.401 273161.192, -6237711.60..."
2,2581480,Dr. Martin Luther King Dr & Audrey (Inbound),"POLYGON ((-6236415.463 272821.217, -6236416.66..."
3,3148465,Dr. Martin Luther King Dr & Linear St (Inbound),"POLYGON ((-6235141.094 272469.640, -6235142.29..."
4,2424338,Dr. Martin Luther King Dr & Legardy St (Inbound),"POLYGON ((-6233941.284 272148.780, -6233942.48..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100,207200,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,2020-09-18 13:46:24,Alt 101,POINT (-6220901.534 251946.072)
100,207181,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,2020-09-18 13:45:37,Alt 101,POINT (-6221534.033 252000.429)
100,207162,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,2020-09-18 13:44:51,Alt 101,POINT (-6221534.033 252000.429)
100,207143,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,2020-09-18 13:44:04,Alt 101,POINT (-6221534.033 252000.429)
100,207124,Alt 101,12:55:00,20200918,1407,MANY_SEATS_AVAILABLE,2020-09-18 13:43:18,Alt 101,POINT (-6221534.033 252000.429)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2416145,Alt 104,0.0,5.0,,
2416145,Alt 104,5.0,17.0,1.0,
2416145,Alt 104,6.0,49.0,26.0,
2416145,Alt 104,7.0,24.0,11.0,
2416145,Alt 104,8.0,41.0,10.0,


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,398,4301,0,Inbound,,1,225968,13140,
1,397,4299,0,Inbound,,1,225965,11729,
2,385,4295,0,Inbound,,1,225966,12787,
3,171,4295,0,Outbound,,0,225940,14881,
4,261,4298,0,,,0,225938,12952,


**** new style
stops_per_rt---> route_id
Alt 101     [3819509, 2426386, 5877745, 2426387, 5834207, ...
Alt 102     [3819509, 2423780, 2423781, 2423782, 2423783, ...
Alt 103     [3819509, 5338113, 2462692, 2462693, 2462694, ...
Alt 104     [3819509, 2461118, 3141367, 3142614, 2550434, ...
Alt 105     [3819509, 3351621, 2428581, 2430116, 2428582, ...
Alt 106     [3819509, 2425208, 2426551, 2426552, 2426553, ...
Alt 107     [3819509, 3351621, 2428581, 2430116, 2428582, ...
Alt 108     [2428602, 2428603, 2428604, 2428605, 2428606, ...
Route 28    [3819509, 4397135, 4310871, 4713220, 2822207, ...
Name: stop_id, dtype: object
mmvta
Agg runs!
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 100000 


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,position_latitude,position_longitude,timestamp,route_short_name
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,104771,ComA,04:25:00,20200918,772,EMPTY,40.4438,-79.9942,2020-09-18 06:13:40,ComA
1,104762,ComA,04:25:00,20200918,772,EMPTY,40.4436,-79.9955,2020-09-18 06:12:54,ComA
1,104753,ComA,04:25:00,20200918,772,EMPTY,40.4413,-79.9977,2020-09-18 06:12:08,ComA
1,104744,ComA,04:25:00,20200918,772,EMPTY,40.4394,-79.9991,2020-09-18 06:11:22,ComA
1,104736,ComA,04:25:00,20200918,772,EMPTY,40.438,-80.0,2020-09-18 06:10:36,ComA


values---> route_id
ComA       False
E3         False
Ex1        False
Ex2        False
L2         False
L3         False
Local 1    False
Name: occupancy_status, dtype: bool
rts_w_crowd---> Index(['ComA', 'E3', 'Ex1', 'Ex2', 'L2', 'L3', 'Local 1'], dtype='object', name='route_id')
7
select oid, trip_id, route_id, trip_start_time, trip_start_date, vehicle_id, occupancy_status, position_latitude, position_longitude, timestamp from vehicle_positions where weekday(timestamp) in (0,1,2,3,4) order by timestamp desc limit 500000 


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,stop_id,stop_name,geometry
0,426264,MMVTA Bus Yard Departing,"POLYGON ((-1675457.509 2276282.351, -1675458.7..."
1,344310,Meldon @ 11th,"POLYGON ((-1675749.324 2277906.834, -1675750.5..."
2,321520,Meldon @ 9th (southbound),"POLYGON ((-1675707.892 2276793.351, -1675709.0..."
3,344311,Meldon @ 7th (southbound),"POLYGON ((-1675857.837 2275850.673, -1675859.0..."
4,344312,Meldon @ 6th (soutbound),"POLYGON ((-1675943.787 2275327.670, -1675944.9..."


Unnamed: 0_level_0,oid,route_id,trip_start_time,trip_start_date,vehicle_id,occupancy_status,timestamp,route_short_name,geometry
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,104771,ComA,04:25:00,20200918,772,EMPTY,2020-09-18 06:13:40,ComA,POINT (-1705222.315 2375600.037)
1,104762,ComA,04:25:00,20200918,772,EMPTY,2020-09-18 06:12:54,ComA,POINT (-1705589.767 2375563.497)
1,104753,ComA,04:25:00,20200918,772,EMPTY,2020-09-18 06:12:08,ComA,POINT (-1706282.721 2374790.295)
1,104744,ComA,04:25:00,20200918,772,EMPTY,2020-09-18 06:11:22,ComA,POINT (-1706739.535 2374140.014)
1,104736,ComA,04:25:00,20200918,772,EMPTY,2020-09-18 06:10:36,ComA,POINT (-1707039.687 2373657.220)


Unnamed: 0_level_0,Unnamed: 1_level_0,occupancy_status,EMPTY,MANY_SEATS_AVAILABLE,STANDING_ROOM_ONLY
stop_id,route_id,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1321285,L2,7.0,5.0,,
1321285,L2,9.0,4.0,,
1321285,L2,11.0,4.0,1.0,
1321285,L2,12.0,1.0,2.0,
1321285,L2,13.0,4.0,1.0,


Unnamed: 0,trip_id,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
0,1,1212,0,To Pittsburgh,,0,224045,1671,
1,2,1212,0,To Pittsburgh,,0,224046,1671,
2,3,1212,0,To Pittsburgh,,0,224047,1671,
3,4,1212,0,To Pittsburgh,,0,224048,1671,
4,5,1212,0,To Pittsburgh,,0,224049,1671,


**** new style
stops_per_rt---> route_id
Cal C      [492534, 344829, 5241287, 344831, 344832, 3448...
ComA       [426264, 344310, 321520, 344311, 344312, 34431...
E3         [426264, 5238962, 4827668, 345120, 344636, 344...
Ex1        [426264, 344271, 344273, 344282, 344640, 34428...
Ex2        [426264, 344318, 344334, 345120, 344584, 34463...
L2         [446114, 344909, 345142, 344915, 344916, 34491...
L3         [345010, 5221260, 344555, 5988779, 5235765, 52...
Local 1    [345010, 5221260, 344555, 5221421, 5221422, 34...
Name: stop_id, dtype: object


In [None]:
## clean shit up, try running, target 2-3 to debug...

### next: 
   * view logic, calculate metric
   * visualizer
