In [1]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import datetime as dt

from utils import *

### Plan

* start from all shapes for a single operator
* for each shape, assess for each hour of day x weekday/sat/sun (with day of week/time of day cols):
    * (can loop over weekday/sat/sun here at trip+stop_times join)
    * existing frequency at midpoint stop
    * current total runtime
* preserve route_id in main table
* (seperately) calculate operator/routes/shapes in each Census tract
* can then join tracts to service info; characterize route service target geographically
    * generate additional frequencies/service hours/service miles for service target
* (optional) generate hypothetical trips table
* (optional) assign service hours/miles to tracts (not sure why we'd need this yet)

In [54]:
get_recent_dates()

{'thurs': '2021-10-28', 'sat': '2021-10-23', 'sun': '2021-10-24'}

In [3]:
weekday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-07')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [4]:
saturday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-09')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [5]:
sunday = (tbl.views.gtfs_schedule_fact_daily_trips()
     >> select(_.calitp_itp_id, _.service_date, _.trip_key, _.trip_id, _.is_in_service)
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.service_date == '2021-10-10')
     >> filter(_.is_in_service == True)
     # >> show_query()
     >> collect())

In [7]:
weekday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-07,-8005615262662976499,860326,True
1,300,2021-10-07,-2347950696713114201,860356,True
2,300,2021-10-07,5972313532268407090,860329,True
3,300,2021-10-07,-4389330616705953268,860325,True
4,300,2021-10-07,-5369901348763502615,859774,True
...,...,...,...,...,...
1479,300,2021-10-07,3547471617223319058,857155,True
1480,300,2021-10-07,7605641356338795082,857182,True
1481,300,2021-10-07,-8754426279522343550,857208,True
1482,300,2021-10-07,1602920764620440434,857148,True


In [8]:
saturday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-09,-5660938195740595097,857596,True
1,300,2021-10-09,-5934572765200396958,857620,True
2,300,2021-10-09,-2052654702648429925,857598,True
3,300,2021-10-09,8467432339565330534,857627,True
4,300,2021-10-09,-6502762296593102424,857632,True
...,...,...,...,...,...
865,300,2021-10-09,-326715567438086592,856928,True
866,300,2021-10-09,-8932879938120206269,856914,True
867,300,2021-10-09,-7676239828272945270,856883,True
868,300,2021-10-09,-3501791437994681494,856895,True


In [9]:
sunday

Unnamed: 0,calitp_itp_id,service_date,trip_key,trip_id,is_in_service
0,300,2021-10-10,6407675930774017823,857684,True
1,300,2021-10-10,7556948753016434442,857699,True
2,300,2021-10-10,1775399891315883904,857740,True
3,300,2021-10-10,-3725519161234949155,857710,True
4,300,2021-10-10,-2619155360884474825,857719,True
...,...,...,...,...,...
765,300,2021-10-10,6790961670397659471,856988,True
766,300,2021-10-10,8852637779826185595,858404,True
767,300,2021-10-10,-1997155012495959257,857057,True
768,300,2021-10-10,152020242831870601,857043,True


In [6]:
weekday['day'] = 'thurs'
saturday['day'] = 'sat'
sunday['day'] = 'sun'

In [7]:
all_days = (weekday[['trip_key', 'trip_id', 'day']]
            .append(saturday[['trip_key', 'trip_id', 'day']])
            .append(sunday[['trip_key', 'trip_id', 'day']])
           )

In [11]:
all_days_st = (tbl.views.gtfs_schedule_data_feed_trip_stops()
              >> filter(_.calitp_extracted_at <= '2021-10-07', _.calitp_deleted_at > '2021-10-07')
              >> filter(_.calitp_itp_id == 300)
              >> select(_.route_id, _.stop_time_key, _.stop_id, _.trip_key)
              >> collect()
              >> inner_join(_, all_days, on = 'trip_key')
              # >> filter(_.trip_key.isin(all_days.trip_key))
             )
              # >> join(_, tbl.views.gtfs_schedule_dim_stop_times(), on='stop_time_key')
              # >> collect())

In [12]:
all_days_st

Unnamed: 0,route_id,stop_time_key,stop_id,trip_key,trip_id,day
0,3340,-5094104451106730267,1351,-7578898001636533005,858125,thurs
1,3340,5564129767278448566,714,-7578898001636533005,858125,thurs
2,3340,-2898319813463766166,1350,-7578898001636533005,858125,thurs
3,3340,1121811470406653228,1099,-7578898001636533005,858125,thurs
4,3340,-7879518862298263615,1515,-7578898001636533005,858125,thurs
...,...,...,...,...,...,...
113669,3348,7331054157126359625,1291,-4678264582198362009,858684,thurs
113670,3348,-7585669476700052117,1471,-4678264582198362009,858684,thurs
113671,3348,-8408769084880974232,1555,-4678264582198362009,858684,thurs
113672,3348,7038819420888377581,676,-4678264582198362009,858684,thurs


In [19]:
tbl_trips = (tbl.views.gtfs_schedule_dim_trips()
    >> filter(_.calitp_extracted_at <= '2021-10-07', _.calitp_deleted_at > '2021-10-07')
    >> filter(_.calitp_itp_id == 300)
    >> select(_.trip_key, _.shape_id)
    >> collect()
)

In [20]:
df = (tbl.views.gtfs_schedule_dim_stop_times()
      >> filter(_.calitp_extracted_at <= '2021-10-07', _.calitp_deleted_at > '2021-10-07')
      >> filter(_.calitp_itp_id == 300)
      >> select(_.trip_id, _.stop_sequence, _.departure_time, _.stop_time_key)
      # >> filter(_.stop_time_key.isin(weekday_st.stop_time_key))

      >> collect()
      >> inner_join(_, all_days_st, on = 'stop_time_key')
      >> inner_join(_, tbl_trips, on = 'trip_key')
    )

In [21]:
df

Unnamed: 0,trip_id_x,stop_sequence,departure_time,stop_time_key,route_id,stop_id,trip_key,trip_id_y,day,shape_id
0,855769,1,11:17:00,-6261241442056615353,3328,1305,3793445201881961523,855769,sat,25313
1,855769,8,11:22:40,6293861349682025743,3328,338,3793445201881961523,855769,sat,25313
2,855769,18,11:36:16,1371771584402430604,3328,1234,3793445201881961523,855769,sat,25313
3,855769,25,11:42:23,244869098712489678,3328,6,3793445201881961523,855769,sat,25313
4,855769,9,11:23:49,-3520867493137772856,3328,1664,3793445201881961523,855769,sat,25313
...,...,...,...,...,...,...,...,...,...,...
113669,860325,2,10:34:00,-330559371963288948,3352,34,-4389330616705953268,860325,thurs,25399
113670,860325,1,10:30:00,7098011521916498646,3352,1344,-4389330616705953268,860325,thurs,25399
113671,860325,5,11:13:25,-8489583631268933134,3352,1308,-4389330616705953268,860325,thurs,25399
113672,860325,4,10:49:00,5383280922326337962,3352,762,-4389330616705953268,860325,thurs,25399


## keep middle for each

In [22]:
df.stop_id = df.stop_id.astype('int64')

In [23]:
##TODO group by shape id
middle_stops = df >> group_by(_.shape_id) >> summarize(middle_stop = _.stop_sequence.median())
middle_stops.middle_stop = middle_stops.middle_stop.astype('int64')

In [24]:
middle_st = middle_stops >> select(_.stop_sequence == _.middle_stop, _.shape_id) >> inner_join(_, df, on=['shape_id', 'stop_sequence'])

In [25]:
middle_st

Unnamed: 0,stop_sequence,shape_id,trip_id_x,departure_time,stop_time_key,route_id,stop_id,trip_key,trip_id_y,day
0,16,25311,855999,06:13:00,1047345617436979040,3328,1637,5448889784642514610,855999,thurs
1,16,25311,856000,05:28:00,-8306767555319805005,3328,1637,-6796483405910857216,856000,thurs
2,21,25313,855769,11:38:40,-2551891055862821802,3328,375,3793445201881961523,855769,sat
3,21,25313,855887,15:48:47,-1646763458978052323,3328,375,8810119345934236926,855887,sun
4,21,25313,855736,17:01:13,175816359969469680,3328,375,1467389354707774587,855736,sat
...,...,...,...,...,...,...,...,...,...,...
3119,4,25402,860354,06:41:00,5816033093813157160,3352,621,-7850518669924244751,860354,thurs
3120,4,25402,860356,07:23:00,-5382019601827416985,3352,621,-2347950696713114201,860356,thurs
3121,4,25402,860358,08:14:00,2364837188864047471,3352,621,-155762065916788653,860358,thurs
3122,17,25403,857560,15:14:01,6465188654978151506,3336,1147,-1415168282708859693,857560,thurs


In [26]:
middle_st.departure_time.iloc[0]

'06:13:00'

In [27]:
def fix_gtfs_time(gtfs_timestring):
    '''Reformats a GTFS timestamp (which allows the hour to exceed 24 to mark service day continuity)
    to standard 24-hour time.
    '''
    split = gtfs_timestring.split(':')
    hour = int(split[0])
    if hour >= 24:
        split[0] = str(hour - 24)
        corrected = (':').join(split)
        return corrected.strip()
    else:
        return gtfs_timestring.strip()

In [28]:
middle_st.departure_time = middle_st.departure_time.apply(fix_gtfs_time)

In [29]:
middle_st['departure_dt'] = middle_st['departure_time'].apply(lambda x:
                                                                dt.datetime.strptime(x, '%H:%M:%S'))
middle_st['departure_hour'] = middle_st['departure_dt'].apply(lambda x: x.hour)

#### Shape trip count by hour-- TODO expand to day of week, operator levels

In [32]:
middle_st >> count(_.shape_id, _.route_id, _.departure_hour, _.day, sort = True)

Unnamed: 0,shape_id,route_id,departure_hour,day,n
0,25315,3328,18,thurs,7
1,25315,3328,17,thurs,6
2,25314,3328,16,thurs,6
3,25315,3328,8,thurs,6
4,25315,3328,9,thurs,6
...,...,...,...,...,...
1201,25346,3336,19,sun,1
1202,25346,3336,19,sat,1
1203,25346,3336,17,sun,1
1204,25346,3336,17,sat,1


In [53]:
(tbl.views.gtfs_schedule_dim_routes() 
     >> filter(_.calitp_itp_id == 300)
     >> filter(_.route_id == '3328'))

Unnamed: 0,route_key,calitp_itp_id,calitp_url_number,route_id,route_type,agency_id,route_short_name,route_long_name,route_desc,route_url,...,route_continuous_drop_off,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email,calitp_extracted_at,calitp_deleted_at
0,5298754852876447831,300,0,3328,3,6216179,1,Main St & Santa Monica Blvd/UCLA,,http://bigbluebus.com/Routes-and-Schedules/Rou...,...,,Big Blue Bus,http://www.bigbluebus.com,America/Los_Angeles,en,310-451-5444,,,2021-07-27,2099-01-01


In [74]:
middle_st >> filter(_.shape_id == '25315')

Unnamed: 0,stop_sequence,shape_id,trip_id,departure_time,stop_time_key,trip_key,route_id,stop_id,departure_dt,departure_hour
90,23,25315,856128,11:48:53,-8326571985010357471,5934560359711113263,3328,325,1900-01-01 11:48:53,11
91,23,25315,856146,08:46:53,-5578405365431413085,4693523290513956173,3328,325,1900-01-01 08:46:53,8
92,23,25315,856116,13:51:53,-2592193845754151739,-1814272882911542131,3328,325,1900-01-01 13:51:53,13
93,23,25315,856091,18:00:39,-7956451856912978944,7473260928576424592,3328,325,1900-01-01 18:00:39,18
94,23,25315,856087,18:38:15,6614307895849206963,5057111300636375037,3328,325,1900-01-01 18:38:15,18
...,...,...,...,...,...,...,...,...,...,...
161,23,25315,856100,16:32:28,4105401700056768686,6710153630310311938,3328,325,1900-01-01 16:32:28,16
162,23,25315,856151,07:55:53,3495629271157421505,8137427846320301694,3328,325,1900-01-01 07:55:53,7
163,23,25315,856095,17:22:28,8518523843135418333,4913408361454154644,3328,325,1900-01-01 17:22:28,17
164,23,25315,856104,15:51:53,-6054036106565546413,-4086913101323362626,3328,325,1900-01-01 15:51:53,15
