In [3]:
%%sh
pip install gtfs-lite

Collecting gtfs-lite
  Using cached gtfs_lite-0.2.1-py3-none-any.whl.metadata (1.6 kB)
Using cached gtfs_lite-0.2.1-py3-none-any.whl (14 kB)
Installing collected packages: gtfs-lite
Successfully installed gtfs-lite-0.2.1


In [1]:
import datetime as dt
import pathlib

import geopandas as gpd
import google.auth
import numpy as np
import pandas as pd
from gtfslite import GTFS
from shared_utils import catalog_utils, gtfs_utils_v2, rt_dates

In [2]:
from retrospective_feed_generation import *
from warehouse_utils import *
from gtfs_utils import *

In [3]:
get_schedule_rt_stop_times_table

<function warehouse_utils.get_schedule_rt_stop_times_table(feed_key: str, service_date: datetime.date | str) -> pandas.core.frame.DataFrame>

### Get RT Data

In [4]:
CREDENTIALS, _ = google.auth.default()

# not used
def safe_read_geoparquet(*args, **kwargs):
    assert "storage_options" not in kwargs
    return gpd.read_parquet(
        *args, **kwargs, storage_options={"token": CREDENTIALS.token}
    )

In [5]:
SAMPLE_DATE_STR = rt_dates.DATES["feb2025"]
FEED_NAME = "Big Blue Bus Schedule"

In [6]:
SAMPLE_DATE_STR

'2025-02-12'

In [7]:
feed_key = (
    gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(
        selected_date=SAMPLE_DATE_STR, keep_cols=["name", "gtfs_dataset_key"]
    )
    .set_index("name")
    .at["Big Blue Bus Schedule", "gtfs_dataset_key"]
)
feed_key

'efbbd5293be71f7a5de0cf82b59febe1'

In [8]:
schedule_rt_stop_times_single_agency = get_schedule_rt_stop_times_table(
    feed_key,
    SAMPLE_DATE_STR
)

### Get schedule feed

In [13]:
#TODO: right now this was just a download based on the url in airtable
# Need to make it traceable instead
#GTFS_FEED_PARENT = "../conveyal_update/feeds_2025-04-16/socal/Big_Blue_Bus_Schedule_7a3f513c343b16a30c135ed7d332b6d6_gtfs.zip/"
#GTFS_FEED_GLOB = "*.zip"

GTFS_FEED_PARENT = "./feeds/"
GTFS_FEED_GLOB = "big_blue_bus_2025-02*.zip"

ARBITRARY_SERVICE_ID = "0"
GTFS_DATE_STRFTIME_CODE = "%Y%m%d"

In [14]:
feed_paths = pathlib.Path(GTFS_FEED_PARENT).glob(GTFS_FEED_GLOB)
feed_path = next(feed_paths)
assert next(feed_paths, None) is None, "Ambiguous Schedule Feed"
feed = GTFS.load_zip(feed_path)
feed_filtered = subset_schedule_feed_to_one_date(
    feed, dt.date.fromisoformat(SAMPLE_DATE_STR)
)

### Merge schedule / rt

In [15]:
output_feed = make_retrospective_feed_single_date(
    filtered_input_feed=feed_filtered,
    stop_times_table=schedule_rt_stop_times_single_agency,
    stop_times_desired_columns=[
        "trip_id",
        "arrival_time",
        "departure_time"
        "drop_off_type",
        "pickup_type",
        "stop_headsign",
        "stop_id",
        "stop_sequence",
    ]
)

In [16]:
output_feed.write_zip(f"output_feeds/bbb_test_{SAMPLE_DATE_STR}.zip")

### Dropped shapes and stops

In [17]:
print("Get dropped shapes by their frequency")
feed_filtered.trips.loc[
    ~feed_filtered.trips.shape_id.isin(output_feed.trips.shape_id.unique()), "shape_id"
].value_counts()

Get dropped shapes by their frequency


27158    80
27156    80
27132    78
27125    76
27137    75
         ..
27118     1
27138     1
27135     1
27124     1
27122     1
Name: shape_id, Length: 61, dtype: int64

In [18]:
print("Get dropped stops by the number of trips serving them in the original feed")
pd.DataFrame(
    feed_filtered.stop_times.loc[
        ~feed_filtered.stop_times.stop_id.isin(
            output_feed.stop_times.stop_id.unique()
        ),
        "stop_id",
    ]
    .value_counts()
    .rename("stop_count")
).merge(
    feed_filtered.stops.set_index("stop_id")["stop_name"],
    how="left",
    left_index=True,
    right_index=True,
)

Get dropped stops by the number of trips serving them in the original feed


Unnamed: 0,stop_count,stop_name
34,388,4TH SB & SANTA MONICA PLACE (Downtown SM Station)
283,309,4TH NB & PICO FS
303,304,WESTWOOD SB & WEYBURN NS
1344,303,7TH SB & OLYMPIC BLVD NS
8,285,UCLA HILGARD TERMINAL
...,...,...
1017,1,ROBERTSON NB & CASHIO NS
1092,1,AIRDROME EB & LIVONIA NS
1090,1,BAGLEY EB & MONTE MAR FS
86,1,WESTWOOD SB & WILSHIRE FS


### Sample Trip

In [19]:
output_feed.stop_times.loc[output_feed.stop_times["trip_id"] == "902110"].sort_values(
    "stop_sequence"
)

Unnamed: 0,arrival_time,pickup_type,stop_headsign,stop_id,stop_sequence,trip_id,departure_time


In [20]:
feed_filtered.stop_times.loc[
    feed_filtered.stop_times["trip_id"] == "902110"
].sort_values("stop_sequence")

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
