In [1]:
%run 0_data_model.ipynb

pk_str = ["calitp_itp_id", "calitp_url_number"]
pk_col = (_.calitp_itp_id, _.calitp_url_number)

DATE_START = "2021-04-01"
DATE_END = "2021-05-01"

## Table overview

* gtfs_schedule_trips
    * stops_and_times
    * schedule_daily

Tables used for questions:

* **Stop times by location type** - stops_and_times
* **Stops in service** - stops_and_times + (gtfs_schedule_trips + schedule_daily)

## Stop times by location types

It looks like stops are either not coded for location type or type 1 (stations).

In [2]:
(tbl_stops_and_times
  >> count(_.location_type)
)

Unnamed: 0,location_type,n
0,,5524733
1,0.0,4054292


## Stop sequences

In [3]:
from siuba.dply.vector import dense_rank

(
    tbl_stops_and_times
    >> group_by(_.trip_id)
    >> mutate(
        stop_sequence=_.stop_sequence.astype(int),
        stop_order=dense_rank(_.stop_sequence, na_option="keep"),
    )
    >> ungroup()
    >> summarize(max=_.stop_order.max())
)

Unnamed: 0,max
0,136


## Stops in service on a specific day

In [4]:
recent_trip_stops = (
    tbl_stops_and_times
    >> inner_join(
        _,
        tbl.gtfs_schedule_trips() >> select(_.trip_id, _.service_id, _.route_id, *pk_col),
        [*pk_str, "trip_id"],
    )
    >> inner_join(_, tbl_schedule_daily >> filter(_.service_date == DATE_END), [*pk_str, "service_id"])
)

In [5]:
# counts number of stop *times*
recent_trip_stops >> count()

Unnamed: 0,n
0,7023803


In [6]:
# counts stops that are being serviced. note that the distinct
# ensures we do not count a physical stop more than once
recent_agency_stops = recent_trip_stops >> distinct(*pk_col, _.stop_id)

In [7]:
recent_agency_stops >> count()

Unnamed: 0,n
0,92222


In [8]:
recent_agency_stops >> count(*pk_col)

Unnamed: 0,calitp_itp_id,calitp_url_number,n
0,182,0,13283
1,142,0,5171
2,235,0,5171
3,4,0,4579
4,278,0,3427


## Stops out of service


In [9]:
tbl_stops_and_times

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,stop_id,stop_sequence,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,calitp_extracted_at,stop_timezone,stop_code,stop_desc,stop_lat,stop_lon,stop_name,location_type,parent_station,level_id,zone_id,wheelchair_boarding,platform_code,stop_url,tts_stop_name,stop_sequence_rank
0,372,0,009e81cc-688d-4a96-9049-9b008afb4306,688a134e-8c33-4776-b276-d03da1dd587c,0,14:07:00,14:07:00,,0,0,,,0.0,,2021-04-16,,23358,"""Cache Creek Casino""",38.733355,-122.142352,Cache Creek Casino Resort,0,,,,,,,,1
1,372,0,009e81cc-688d-4a96-9049-9b008afb4306,4fb90529-df3d-4817-b289-7913bab8e803,100,14:16:00,14:16:00,,0,0,,,11067.33,,2021-04-16,,23526,,38.70734,-122.048604,Hwy. 16 at Rd. 85 (Capay) EB,0,,,,,,,,2
2,372,0,009e81cc-688d-4a96-9049-9b008afb4306,7b7a754a-71f2-4796-b7c9-c9bfda47d8e6,140,14:20:00,14:20:00,,0,0,,,14827.6,,2021-04-16,,23355,,38.692962,-122.016728,Yolo at Grafton (Esparto) SB,0,,,,,,,,3
3,372,0,009e81cc-688d-4a96-9049-9b008afb4306,14989d21-0e8e-43d1-93f9-fbd511ed63fd,168,14:26:00,14:26:00,,0,0,,,20391.6,,2021-04-16,,23353,"""""",38.679074,-121.968471,Railroad at Main (Madison),0,,,,,,,,4
4,372,0,009e81cc-688d-4a96-9049-9b008afb4306,ce57f7f5-4110-40e8-a062-fb58b27e5dfa,242,14:39:00,14:39:00,,0,0,,,35749.63,,2021-04-16,,23293,,38.675253,-121.801838,W. Lincoln at Rd 98 EB,0,,,,,,,,5
