# GTFS schedule tables demo

* Take a look at 1 route for 1 operator, and see how `trips`, `shapes`, `stops`, and `stop_times` look

In [None]:
import geopandas as gpd
import pandas as pd

date = "2023-08-16"
GCS_FILE_PATH = ("gs://calitp-analytics-data/data-analyses/"
                 "rt_delay/compiled_cached_views/")

In [None]:
trips = pd.read_parquet(
    f"{GCS_FILE_PATH}trips_{date}.parquet"
)

trips = trips[(trips.name == "Sacramento Schedule") &
              (trips.route_id == "507")][
    ["feed_key", "name", "trip_id", 
     "route_id", "direction_id", 
     "shape_array_key", "shape_id"]
].reset_index(drop=True)

In [None]:
sac_feed = trips.feed_key.iloc[0]
sac_feed

In [None]:
sac_trips = trips.trip_id.unique().tolist()
len(sac_trips)

In [None]:
sac_shapes = trips.shape_array_key.unique().tolist()
sac_shapes

In [None]:
shapes = gpd.read_parquet(
    f"{GCS_FILE_PATH}routelines_{date}.parquet"
)

shapes = shapes[
    shapes.shape_array_key.isin(sac_shapes)][
    ["shape_array_key", "shape_id", "geometry"]
].reset_index(drop=True)

In [None]:
stop_times = pd.read_parquet(
    f"{GCS_FILE_PATH}st_{date}.parquet"
)

stop_times = stop_times[(stop_times.feed_key == sac_feed) & 
                        (stop_times.trip_id.isin(sac_trips))
                       ][["feed_key", "trip_id", 
                          "stop_id", "stop_sequence", 
                          "arrival_sec"]]

In [None]:
sac_stops = stop_times.stop_id.unique().tolist()
len(sac_stops)

In [None]:
stops = gpd.read_parquet(
    f"{GCS_FILE_PATH}stops_{date}.parquet"
)

stops = stops[(stops.feed_key==sac_feed) & 
              (stops.stop_id.isin(sac_stops))
             ][
    ["feed_key", "stop_id", "geometry"]
].reset_index(drop=True)

In [None]:
trips.shape, stops.shape, stop_times.shape, shapes.shape

In [None]:
trips.head()

In [None]:
trips.direction_id.value_counts()

In [None]:
trips.shape_array_key.value_counts()

In [None]:
shapes.head(1).explore(
    "shape_id",
    tiles = "CartoDB Positron",
    categorical = True
)

In [None]:
shapes.tail(1).explore(
    "shape_id",
    tiles = "CartoDB Positron",
    categorical = True
)

In [None]:
stops.head()

In [None]:
stops.explore("stop_id", tiles = "CartoDB Positron")

In [None]:
stop_times.sort_values(["trip_id", "stop_sequence"]).head(20)

In [None]:
pd.merge(
    stop_times,
    stops,
    on = ["feed_key", "stop_id"]
)[["feed_key", "trip_id", "stop_id", "geometry"]].sort_values("trip_id")

In [None]:
pd.merge(
    shapes,
    trips,
    on = ["shape_array_key", "shape_id"]
)[["feed_key", "shape_array_key", "shape_id", "trip_id", "geometry"]]