In [49]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import datetime as dt

### Plan

* start from all shapes for a single operator
* for each shape, assess for each hour of day x weekday/sat/sun (with day of week/time of day cols):
    * (can loop over weekday/sat/sun here at trip+stop_times join)
    * existing frequency at midpoint stop
    * current total runtime
* preserve route_id in main table
* (seperately) calculate operator/routes/shapes in each Census tract
* can then join tracts to service info; characterize route service target geographically
    * generate additional frequencies/service hours/service miles for service target
* (optional) generate hypothetical trips table
* (optional) assign service hours/miles to tracts (not sure why we'd need this yet)

In [3]:
def find_midpoint_stop(shape, trips, stop_times):
    return

In [4]:
weekday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-07')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [5]:
saturday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-09')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [6]:
sunday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-10')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [7]:
weekday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-07,-8005615262662976499,860326,True
1,300,2021-10-07,-2347950696713114201,860356,True
2,300,2021-10-07,5972313532268407090,860329,True
3,300,2021-10-07,-4389330616705953268,860325,True
4,300,2021-10-07,-5369901348763502615,859774,True
...,...,...,...,...,...
1479,300,2021-10-07,3547471617223319058,857155,True
1480,300,2021-10-07,7605641356338795082,857182,True
1481,300,2021-10-07,-8754426279522343550,857208,True
1482,300,2021-10-07,1602920764620440434,857148,True


In [8]:
saturday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-09,-5660938195740595097,857596,True
1,300,2021-10-09,-5934572765200396958,857620,True
2,300,2021-10-09,-2052654702648429925,857598,True
3,300,2021-10-09,8467432339565330534,857627,True
4,300,2021-10-09,-6502762296593102424,857632,True
...,...,...,...,...,...
865,300,2021-10-09,-326715567438086592,856928,True
866,300,2021-10-09,-8932879938120206269,856914,True
867,300,2021-10-09,-7676239828272945270,856883,True
868,300,2021-10-09,-3501791437994681494,856895,True


In [9]:
sunday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-10,6407675930774017823,857684,True
1,300,2021-10-10,7556948753016434442,857699,True
2,300,2021-10-10,1775399891315883904,857740,True
3,300,2021-10-10,-3725519161234949155,857710,True
4,300,2021-10-10,-2619155360884474825,857719,True
...,...,...,...,...,...
765,300,2021-10-10,6790961670397659471,856988,True
766,300,2021-10-10,8852637779826185595,858404,True
767,300,2021-10-10,-1997155012495959257,857057,True
768,300,2021-10-10,152020242831870601,857043,True


In [17]:
weekday_st = (tbl.views.gtfs_schedule_data_feed_trip_stops()
              >> select(_.route_id, _.stop_time_key, _.stop_id, _.trip_key)
              >> filter(_.trip_key.isin(weekday.trip_key))
             )
              # >> join(_, tbl.views.gtfs_schedule_dim_stop_times(), on='stop_time_key')
              # >> collect())

In [18]:
weekday_st

Unnamed: 0,route_id,stop_time_key,stop_id,trip_key
0,3330,1548626839242107096,1668,4459971720766894158
1,3330,3299383018333716763,539,3343321908651692874
2,3343,7802880374131013120,434,-3293551457937161370
3,3330,5409390141539688849,38,7552140708196582455
4,3345,5908392781944862054,62,-5486027458831834026


In [54]:
tbl_trips = (
    tbl.views.gtfs_schedule_dim_trips()
    >> filter(_.calitp_extracted_at <= '2021-10-07', _.calitp_deleted_at > '2021-10-07')
    >> filter(_.calitp_itp_id == 300)
    >> select(_.trip_key, _.shape_id)
)

In [55]:
df = (tbl.views.gtfs_schedule_dim_stop_times()
      >> select(_.trip_id, _.stop_sequence, _.departure_time, _.stop_time_key)
      # >> filter(_.stop_time_key.isin(weekday_st.stop_time_key))
      >> join(_, weekday_st, on = 'stop_time_key')
      >> join(_, tbl_trips, on = 'trip_key')
      >> collect())

In [56]:
df

Unnamed: 0,trip_id,stop_sequence,departure_time,stop_time_key,trip_key,route_id,stop_id,shape_id
0,857225,58,07:36:00,-3944156195350464396,-8975224383800726395,3334,1344,25334
1,857535,1,06:25:00,-2454847760193184470,507937176497115447,3336,21,25342
2,857575,1,06:00:00,-5876918209241522447,-7392505570042088157,3337,1512,25348
3,858129,1,11:56:00,2466202161753205595,-425288716733747710,3340,1350,25361
4,858145,2,14:06:19,5693876491150218852,337350927090638005,3340,140,25362
...,...,...,...,...,...,...,...,...
49923,860383,5,16:36:01,4022014887089468492,1328881323144156416,3352,829,25400
49924,860386,5,17:36:01,-536593660429621456,-3034720808708112526,3352,829,25400
49925,860388,6,18:23:26,9053079045269131423,6409280168477922283,3352,431,25400
49926,860361,6,09:32:26,8991050266734813722,75956977623262593,3352,431,25400


## keep middle for each

In [57]:
df.stop_id = df.stop_id.astype('int64')

In [58]:
##TODO group by shape id
middle_stops = df >> group_by(_.shape_id) >> summarize(middle_stop = _.stop_sequence.median())
middle_stops.middle_stop = middle_stops.middle_stop.astype('int64')

In [64]:
middle_st = middle_stops >> select(_.stop_sequence == _.middle_stop, _.shape_id) >> inner_join(_, df, on=['shape_id', 'stop_sequence'])

In [65]:
middle_st

Unnamed: 0,stop_sequence,shape_id,trip_id,departure_time,stop_time_key,trip_key,route_id,stop_id
0,16,25311,856000,05:28:00,-8306767555319805005,-6796483405910857216,3328,1637
1,16,25311,855999,06:13:00,1047345617436979040,5448889784642514610,3328,1637
2,21,25313,855980,22:39:51,1865866210386394891,-8693712239920243661,3328,375
3,21,25313,855987,20:33:25,-8931979372684468948,-8567572727272275352,3328,375
4,21,25313,855990,19:47:32,4042060323765503978,-6869890217226956032,3328,375
...,...,...,...,...,...,...,...,...
1479,4,25402,860355,07:02:00,180980672168379348,5716689547396310102,3352,621
1480,4,25402,860356,07:23:00,-5382019601827416985,-2347950696713114201,3352,621
1481,4,25402,860357,07:48:00,7223959197657317631,-8355402421519374223,3352,621
1482,17,25403,857560,15:14:01,6465188654978151506,-1415168282708859693,3336,1147


In [66]:
middle_st.departure_time.iloc[0]

'05:28:00'

In [67]:
def fix_gtfs_time(gtfs_timestring):
    '''Reformats a GTFS timestamp (which allows the hour to exceed 24 to mark service day continuity)
    to standard 24-hour time.
    '''
    split = gtfs_timestring.split(':')
    hour = int(split[0])
    if hour >= 24:
        split[0] = str(hour - 24)
        corrected = (':').join(split)
        return corrected.strip()
    else:
        return gtfs_timestring.strip()

In [68]:
middle_st.departure_time = middle_st.departure_time.apply(fix_gtfs_time)

In [69]:
middle_st['departure_dt'] = middle_st['departure_time'].apply(lambda x:
                                                                dt.datetime.strptime(x, '%H:%M:%S'))
middle_st['departure_hour'] = middle_st['departure_dt'].apply(lambda x: x.hour)

#### Shape trip count by hour-- TODO expand to day of week, operator levels

In [71]:
middle_st >> count(_.shape_id, _.departure_hour, sort = True)

Unnamed: 0,shape_id,departure_hour,n
0,25315,18,7
1,25315,8,6
2,25314,13,6
3,25315,16,6
4,25315,15,6
...,...,...,...
554,25356,7,1
555,25357,14,1
556,25357,16,1
557,25359,17,1


In [53]:
(tbl.views.gtfs_schedule_dim_routes() 
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.route_id == '3328'))

Unnamed: 0,route_key,calitp_itp_id,calitp_url_number,route_id,route_type,agency_id,route_short_name,route_long_name,route_desc,route_url,...,route_continuous_drop_off,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email,calitp_extracted_at,calitp_deleted_at
0,5298754852876447831,300,0,3328,3,6216179,1,Main St & Santa Monica Blvd/UCLA,,http://bigbluebus.com/Routes-and-Schedules/Rou...,...,,Big Blue Bus,http://www.bigbluebus.com,America/Los_Angeles,en,310-451-5444,,,2021-07-27,2099-01-01


In [74]:
middle_st >> filter(_.shape_id == '25315')

Unnamed: 0,stop_sequence,shape_id,trip_id,departure_time,stop_time_key,trip_key,route_id,stop_id,departure_dt,departure_hour
90,23,25315,856128,11:48:53,-8326571985010357471,5934560359711113263,3328,325,1900-01-01 11:48:53,11
91,23,25315,856146,08:46:53,-5578405365431413085,4693523290513956173,3328,325,1900-01-01 08:46:53,8
92,23,25315,856116,13:51:53,-2592193845754151739,-1814272882911542131,3328,325,1900-01-01 13:51:53,13
93,23,25315,856091,18:00:39,-7956451856912978944,7473260928576424592,3328,325,1900-01-01 18:00:39,18
94,23,25315,856087,18:38:15,6614307895849206963,5057111300636375037,3328,325,1900-01-01 18:38:15,18
...,...,...,...,...,...,...,...,...,...,...
161,23,25315,856100,16:32:28,4105401700056768686,6710153630310311938,3328,325,1900-01-01 16:32:28,16
162,23,25315,856151,07:55:53,3495629271157421505,8137427846320301694,3328,325,1900-01-01 07:55:53,7
163,23,25315,856095,17:22:28,8518523843135418333,4913408361454154644,3328,325,1900-01-01 17:22:28,17
164,23,25315,856104,15:51:53,-6054036106565546413,-4086913101323362626,3328,325,1900-01-01 15:51:53,15
