In [7]:
%run 0_data_model.ipynb

pk_str = ["calitp_itp_id", "calitp_url_number"]
pk_col = (_.calitp_itp_id, _.calitp_url_number)

DATE_START = "2021-04-01"
DATE_END = "2021-05-01"

## Table overview

* gtfs_schedule_trips
    * stops_and_times
    * schedule_daily

Tables used for questions:

* **Stop times by location type** - stops_and_times
* **Stops in service** - stops_and_times + (gtfs_schedule_trips + schedule_daily)

## Stop times by location types

It looks like stops are either not coded for location type or type 1 (stations).

In [11]:
(tbl_stops_and_times
  >> count(_.location_type)
)

Unnamed: 0,location_type,n
0,,5524733
1,0.0,4054292


## Stop sequences

In [30]:
from siuba.dply.vector import dense_rank

(
    tbl_stops_and_times
    >> group_by(_.trip_id)
    >> mutate(
        stop_sequence=_.stop_sequence.astype(int),
        stop_order=dense_rank(_.stop_sequence, na_option="keep"),
    )
    >> ungroup()
    >> summarize(max=_.stop_order.max())
)

Unnamed: 0,max
0,136


## Stops in service on a specific day

In [15]:
recent_trip_stops = (
    tbl_stops_and_times
    >> inner_join(
        _,
        tbl.gtfs_schedule_trips() >> select(_.trip_id, _.service_id, _.route_id, *pk_col),
        [*pk_str, "trip_id"],
    )
    >> inner_join(_, tbl_schedule_daily >> filter(_.service_date == DATE_END), [*pk_str, "service_id"])
)

In [25]:
# counts number of stop *times*
recent_trip_stops >> count()

Unnamed: 0,n
0,7023803


In [21]:
# counts stops that are being serviced. note that the distinct
# ensures we do not count a physical stop more than once
recent_agency_stops = recent_trip_stops >> distinct(*pk_col, _.stop_id)

In [22]:
recent_agency_stops >> count()

Unnamed: 0,n
0,92222


In [23]:
recent_agency_stops >> count(*pk_col)

Unnamed: 0,calitp_itp_id,calitp_url_number,n
0,182,0,13283
1,142,0,5171
2,235,0,5171
3,4,0,4579
4,278,0,3427


## Stops out of service


In [19]:
tbl_stops_and_times

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,stop_id,stop_sequence,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,calitp_extracted_at_x,parent_station,stop_code,zone_id,stop_lat,stop_url,level_id,stop_timezone,stop_lon,stop_desc,calitp_extracted_at_y,wheelchair_boarding,platform_code,tts_stop_name,stop_name,location_type
0,111,0,781444,11064,68,16:10:00,16:10:00,,,,,,,,2021-04-16,,11064.0,0.0,38.688547,,,,-121.186693,Buses head NB,2021-04-16,,,,AMERICAN RIVER CANYON DR & GREY CANYON DR (NB),0.0
1,111,0,781449,11160,63,11:05:00,11:05:00,,,,,,,,2021-04-16,,11160.0,0.0,38.672938,,,,-121.202331,Buses head NB,2021-04-16,,,,MAIN AVE & MADISON AVE (NB),0.0
2,111,0,781375,11065,67,11:09:00,11:09:00,,,,,,,,2021-04-16,,11065.0,0.0,38.68488,,,,-121.18873,Buses head NB,2021-04-16,,,,AMERICAN RIVER CANYON DR & BOULDER CANYON WAY (NB),0.0
3,350,1,40090,79013,0,11:44:00,11:44:00,,0.0,0.0,,,0.0,1.0,2021-04-29,,79013.0,,37.598792,,,America/Los_Angeles,-122.065656,Terminal,2021-04-29,1.0,,,Union Landing Transit Center,0.0
4,2,0,BCT109 NB_SAT.T10,163,67,16:35:30,16:35:30,,,,,,,,2021-04-16,,,,33.930741,,,,-118.387028,,2021-04-16,,,,Imperial Hwy. / Nash St.,
