In [3]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona

### Plan

* start from all shapes for a single operator
* for each shape, assess for each hour of day x weekday/sat/sun (with day of week/time of day cols):
    * (can loop over weekday/sat/sun here at trip+stop_times join)
    * existing frequency at midpoint stop
    * current total runtime
* preserve route_id in main table
* (seperately) calculate operator/routes/shapes in each Census tract
* can then join tracts to service info; characterize route service target geographically
    * generate additional frequencies/service hours/service miles for service target
* (optional) generate hypothetical trips table
* (optional) assign service hours/miles to tracts (not sure why we'd need this yet)

In [4]:
def get_operator_views(itp_id):
    '''Returns relevant views from the data warehouse for a single transit operator.
    '''
    shapes = tbl.gtfs_schedule.shapes() >> filter(_.calitp_itp_id == int(itp_id)) >> collect()
    shapes = gpd.GeoDataFrame(shapes, 
                              geometry = gpd.points_from_xy(shapes.shape_pt_lon, shapes.shape_pt_lat),
                              crs = 'EPSG:4326').to_crs('EPSG:6414') ## https://epsg.io/6414 (meters)
    print('loaded shapes')
    if itp_id != 273:
        wednesday = (tbl.views.gtfs_schedule_fact_daily_service() 
                 >> filter(_.calitp_itp_id == int(itp_id)) >> collect())
        wednesday = wednesday >> arrange(-_.service_date)
        wednesday = wednesday[wednesday['service_date'].apply(lambda x: x.weekday() == 2)]
        wednesday = wednesday[wednesday['service_date'].apply(lambda x: x.month < 11)]
        wednesday = wednesday[wednesday['service_date'] == wednesday['service_date'].iloc[0]] ## pick most recent Wednesday from Oct or earlier
    
    bus_routes = (tbl.gtfs_schedule.routes()
                    >> filter(_.calitp_itp_id == int(itp_id))
                    >> filter(_.route_type.isin(['3', '11'])) ## bus and trolleybus
                    >> select(_.route_id) >> collect())
    print('loaded bus routes')
    
    if itp_id == 273:
        trips = (tbl.gtfs_schedule.trips()
         >> filter(_.calitp_itp_id == int(itp_id))
         >> filter(_.service_id.isin(['1'])) ## temporary hardcode for SacRT
         >> filter(_.route_id.isin(bus_routes.route_id))
         >> collect())
    else:
        trips = (tbl.gtfs_schedule.trips()
                 >> filter(_.calitp_itp_id == int(itp_id))
                 >> filter(_.service_id.isin(wednesday.service_id))
                 >> filter(_.route_id.isin(bus_routes.route_id))
                 >> collect())
    print('loaded trips')
    stop_times = (tbl.gtfs_schedule.stop_times()
                  >> filter(_.calitp_itp_id == int(itp_id))
                  >> collect())
    stop_times =  (stop_times >> filter(_.trip_id.isin(trips.trip_id))
                  >> select(-_.calitp_itp_id, -_.calitp_extracted_at))
    print('loaded stop times')

    stops = (tbl.gtfs_schedule.stops() 
             >> filter(_.calitp_itp_id == itp_id)
             >> select(_.stop_id, _.stop_lat, _.stop_lon)
             >> collect())
    stops = gpd.GeoDataFrame(stops,
                     geometry = gpd.points_from_xy(stops.stop_lon, stops.stop_lat),
                     crs = 'EPSG:4326').to_crs('EPSG:6414') ## https://epsg.io/6414 (meters)
    print('loaded stops')

    return shapes, trips, stop_times, stops

In [5]:
bbb_views = get_operator_views(300)

loaded shapes
loaded bus routes
loaded trips
loaded stop times
loaded stops


In [7]:
bbb_views[1]

Unnamed: 0,calitp_itp_id,calitp_url_number,route_id,service_id,trip_id,shape_id,trip_headsign,trip_short_name,direction_id,block_id,wheelchair_accessible,bikes_allowed,calitp_extracted_at
0,300,0,3328,10,856000,25311,UCLA,,0,103558,0,0,2021-10-21
1,300,0,3328,10,855999,25311,UCLA,,0,103557,0,0,2021-10-21
2,300,0,3328,10,855982,25313,UCLA,,0,103554,0,0,2021-10-21
3,300,0,3328,10,855986,25313,UCLA,,0,103568,0,0,2021-10-21
4,300,0,3328,10,855987,25313,UCLA,,0,103556,0,0,2021-10-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1578,300,0,3349,77710,860261,25387,SMC Bundy Campus,,1,103857,0,0,2021-10-21
1579,300,0,3349,77710,860263,25387,SMC Bundy Campus,,1,103859,0,0,2021-10-21
1580,300,0,3349,77710,860273,25387,SMC Bundy Campus,,1,103857,0,0,2021-10-21
1581,300,0,3339,85510,859769,25357,Playa Vista,,1,104275,0,0,2021-10-21


In [6]:
bbb_views[0]

Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled,calitp_extracted_at,geometry
0,300,0,25387,34.019639,-118.48052,12,0.7617,2021-10-21,POINT (140335.914 -442863.952)
1,300,0,25386,34.01539,-118.4618,66,3.2335,2021-10-21,POINT (142072.348 -443307.328)
2,300,0,25387,34.01645,-118.443499,96,5.8188,2021-10-21,POINT (143760.597 -443162.177)
3,300,0,25386,34.018,-118.44438,20,0.7332,2021-10-21,POINT (143676.413 -442991.602)
4,300,0,25386,34.021119,-118.45202,49,2.1315,2021-10-21,POINT (142965.200 -442657.224)
...,...,...,...,...,...,...,...,...,...
22213,300,0,25403,34.04798,-118.526739,114,2.6709,2021-10-21,POINT (136018.725 -439787.936)
22214,300,0,25403,34.03493,-118.519629,154,4.4796,2021-10-21,POINT (136697.704 -441225.138)
22215,300,0,25403,34.04878,-118.533319,77,1.9319,2021-10-21,POINT (135409.902 -439708.623)
22216,300,0,25403,34.044759,-118.543839,20,0.5947,2021-10-21,POINT (134445.581 -440169.584)


In [None]:
def find_midpoint_stop(shape, 