# Amtrak Thruway Buses

Amtrak Thruway Buses do not have a `shape_id`, which means can't get `route` line geom associated with it.

We can get those trips in `stops` and `stop_times`.

In [1]:
import dask_geopandas as dg
import dask.dataframe as dd
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import geography_utils, rt_dates, rt_utils

catalog = intake.open_catalog("../high_quality_transit_areas/*.yml")

SELECTED_DATE = rt_dates.DATES["sep2022"]
COMPILED_CACHED_VIEWS = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/"
ITP_ID = 13



In [2]:
trips = dd.read_parquet(
    f"{COMPILED_CACHED_VIEWS}trips_{SELECTED_DATE}_all.parquet")

amtrak_trips = trips[trips.calitp_itp_id == ITP_ID].compute()

thruway_bus = "Amtrak Thruway Connecting Service"

amtrak_thruway = amtrak_trips[amtrak_trips.route_long_name==thruway_bus]

In [3]:
amtrak_thruway.head(2)

Unnamed: 0,calitp_itp_id,calitp_url_number,service_date,trip_key,trip_id,route_id,direction_id,shape_id,calitp_extracted_at,calitp_deleted_at,route_short_name,route_long_name,route_desc,route_type
0,13,0,2022-09-14,-1358039879891994037,86712853015,21313,0,,2022-07-19,2099-01-01,,Amtrak Thruway Connecting Service,,3
1,13,0,2022-09-14,4173850620104641197,88222853673,43,1,,2022-07-25,2099-01-01,,Amtrak Thruway Connecting Service,,3


In [4]:
routelines = dg.read_parquet(
        f"{COMPILED_CACHED_VIEWS}routelines_{SELECTED_DATE}_all.parquet")

amtrak_routes = routelines[routelines.calitp_itp_id==ITP_ID].compute()

for col in ["trip_id", "trip_key", "route_id"]:
    print(f"column: {col}")
    subset = amtrak_routes[amtrak_routes[col].isin(amtrak_thruway[col])]
    display(subset)

column: trip_id


Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry,service_date,trip_key,trip_id,route_id,direction_id,calitp_extracted_at,calitp_deleted_at,route_type,route_desc,route_long_name,route_short_name


column: trip_key


Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry,service_date,trip_key,trip_id,route_id,direction_id,calitp_extracted_at,calitp_deleted_at,route_type,route_desc,route_long_name,route_short_name


column: route_id


Unnamed: 0,calitp_itp_id,calitp_url_number,shape_id,geometry,service_date,trip_key,trip_id,route_id,direction_id,calitp_extracted_at,calitp_deleted_at,route_type,route_desc,route_long_name,route_short_name


In [5]:
stop_times = dd.read_parquet(
        f"{COMPILED_CACHED_VIEWS}st_{SELECTED_DATE}_all.parquet")

amtrak_stop_times = stop_times[stop_times.calitp_itp_id==ITP_ID].compute()

In [6]:
stops = dg.read_parquet(
        f"{COMPILED_CACHED_VIEWS}stops_{SELECTED_DATE}_all.parquet")

amtrak_stops = stops[stops.calitp_itp_id==ITP_ID].compute()

In [7]:
ca = catalog.ca_boundary.read()

ca_stops = amtrak_stops.clip(ca)

In [8]:
amtrak_thruway_stop_times = amtrak_stop_times[
    (amtrak_stop_times.trip_id.isin(amtrak_thruway.trip_id)) & 
    (amtrak_stop_times.stop_id.isin(ca_stops.stop_id))
]

In [9]:
amtrak_thruway_stop_times.head(2)

Unnamed: 0,calitp_itp_id,trip_id,stop_id,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,...,stop_time_key,calitp_deleted_at,stop_sequence,stop_time_continuous_pickup,stop_time_continuous_drop_off,stop_sequence_rank,arrival_ts,departure_ts,trip_key,departure_hour
2,13,32102848217,GAC,21:52:00,21:52:00,,0,0,,,...,-5652449305220166169,2099-01-01,3,,,3,78720.0,78720.0,3919119754903841310,21
4,13,39152851527,DBP,20:50:00,20:50:00,,1,0,,,...,3680785221064896487,2099-01-01,4,,,4,75000.0,75000.0,7492902480220650796,20
