In [27]:
import pandas as pd

In [29]:
# StopTimes
# trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,shape_dist_traveled
df_stop_times = pd.read_csv("gtfs-data/stop_times.txt")
# print(df_stop_times.head())

In [30]:
# Trips
# route_id,service_id,trip_id,direction_id,block_id,shape_id,direction,wheelchair_accessible,schd_trip_id
df_trips = pd.read_csv("gtfs-data/trips.txt")
# print(df_trips.head())

In [31]:
# Routes
# route_id,route_short_name,route_long_name,route_type,route_url,route_color,route_text_color
df_routes = pd.read_csv("gtfs-data/routes.txt")
# print(df_routes.head())

In [32]:
# Stops
# stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding
df_stops = pd.read_csv("gtfs-data/stops.txt")
# print(df_stops.head())

In [37]:
# RouteStops - {route_id : ordered stops}
merged = df_stop_times.merge(df_trips[['trip_id', 'route_id']], on='trip_id', how='left')
merged = merged.sort_values(by=['route_id', 'trip_id', 'stop_sequence'])

RouteStops = (
    merged.groupby('route_id')['stop_id']
    .apply(lambda stops: list(dict.fromkeys(stops)))
    .to_dict()
)

# print first 3 routes 
# for route, stops in list(RouteStops.items())[:3]:  
#     print(f"Route {route}: {stops[:10]} ... ({len(stops)} stops total)")

In [38]:
# RouteTrips - {route_id : trips}
RouteTrips = (
    df_trips.groupby('route_id')['trip_id']
    .apply(list)
    .to_dict()
)

# print first 3 routes
# for route, trips in list(RouteTrips.items())[:3]:  
#     print(f"Route {route}: {trips[:10]} ... ({len(trips)} trips total)")

In [39]:
# Convert GTFS time input to unix time
from datetime import datetime, timedelta

def gtfs_time_to_unix(time_str, date_str="2025-01-01"):
    h, m, s = map(int, time_str.split(":"))
    dt = datetime.strptime(date_str, "%Y-%m-%d") + timedelta(h, m, s)
    return int(dt.timestamp())

In [40]:
# Trips - {trip_id : {stops : {stop_id : arrival_time, departure_time}}}
df_stop_times = df_stop_times.sort_values(by=["trip_id", "stop_sequence"])

trip_info_columns = [col for col in df_trips.columns if col != "trip_id"]

Trips = (
    df_trips.set_index("trip_id")[trip_info_columns]
    .to_dict(orient="index")
)

for trip_id, group in df_stop_times.groupby("trip_id"):
    stops_dict = {}
    for _, row in group.iterrows():
        stop_id = int(row["stop_id"])
        arrival = row["arrival_time"]
        departure = row["departure_time"]

        stops_dict[stop_id] = {
            "arrival_time": gtfs_time_to_unix(arrival),
            "departure_time": gtfs_time_to_unix(departure)
        }

    if trip_id in Trips:
        Trips[trip_id]["stops"] = stops_dict
    else:
        Trips[trip_id] = {"stops": stops_dict}

for trip, trip_info in list(Trips.items())[:3]:
    print(f"\nTrip {trip}:")
    for k, v in trip_info.items():
        if k == "stops":
            # show first 3 stops (as dict preview)
            subset = dict(list(v.items())[:3])
            print(f"  {k}: {subset} ... ({len(v)} stops total)")
        else:
            print(f"  {k}: {v}")


Trip 6750027567020:
  route_id: X9
  service_id: 67501
  direction_id: 1
  block_id: 675000004127
  shape_id: 67508469
  direction: North
  wheelchair_accessible: 1
  schd_trip_id: 27567020
  stops: {6168: {'arrival_time': 1736143205, 'departure_time': 1736143205}, 14864: {'arrival_time': 1736143207, 'departure_time': 1736143207}, 6179: {'arrival_time': 1736143210, 'departure_time': 1736143210}} ... (32 stops total)

Trip 6750029005020:
  route_id: X9
  service_id: 67501
  direction_id: 1
  block_id: 675000002417
  shape_id: 67508470
  direction: North
  wheelchair_accessible: 1
  schd_trip_id: 29005020
  stops: {6131: {'arrival_time': 1736920849, 'departure_time': 1736920849}, 6147: {'arrival_time': 1736920850, 'departure_time': 1736920850}, 14781: {'arrival_time': 1736920852, 'departure_time': 1736920852}} ... (38 stops total)

Trip 6750030457020:
  route_id: X9
  service_id: 67501
  direction_id: 1
  block_id: 675000002797
  shape_id: 67508470
  direction: North
  wheelchair_access

In [41]:
# StopRoutes - {stop_id : routes}
merged = df_stop_times.merge(df_trips[['trip_id', 'route_id']], on='trip_id', how='left')

StopRoutes = (
    merged.groupby('stop_id')['route_id']
    .apply(lambda x: set(x.dropna()))
    .to_dict()
)

# print first 3 stops
# for stop, routes in list(StopRoutes.items())[:3]:
#     print(f"Stop {stop}: {routes}")

In [42]:
# Earliest trip in route r (route) that one can catch at stop p (next_stop)
def earliest_trip(route, next_stop):
    best_trip = None
    best_arr_time = float('inf')
    
    for trip in RouteTrips[route]:
        if next_stop not in Trips[trip]['stops']:
            continue
        arr_time = Trips[trip]['stops'][next_stop]['arrival_time']
        if arr_time < best_arr_time:
            best_arr_time = arr_time
            best_trip = trip
            
    return best_trip

In [49]:
def raptor(source_stop, dest_stop, departure_time, K):
    stop_arrival_times = {} 
    earliest_stop_arrival_times = {}
    
    # initializing the earliest arrival times to each stop to infinity
    for stop in StopRoutes.keys():
        stop_arrival_times[stop] = [float('inf')] * (K + 1) # {stops : []*(K+1)}
        earliest_stop_arrival_times[stop] = float('inf') # {stop : arrival time}
    
    stop_arrival_times[source_stop][0] = departure_time # set initial stop arrival time to be departure time for the first round
    
    # mark first stop
    marked_stops = []
    marked_stops.append(source_stop)
    
    Q = []
    for k in range(1, K + 1):
        Q.clear()
        
        # for each stop marked in the previous round
        for marked_stop in marked_stops:
            # iterate through all the routes that serve that stop
            for route in StopRoutes[marked_stop]:
                # make sure Q contains unique routes with earliest marked stop for that route
                filtered_route_stops = [t for t in Q if t[0] == route]
                if len(filtered_route_stops) == 0:
                    Q.append((route, marked_stop))
                    continue
                    
                (_, other_stop) = filtered_route_stops[0]
                if RouteStops[route].index(marked_stop) < RouteStops[route].index(other_stop):
                    Q.remove((route, other_stop))
                    Q.append((route, marked_stop))
                    
        # unmark all stops       
        marked_stops.clear()
        print(Q)

        # traverse each marked route and stop
        for (route, stop) in Q:
            current_trip = None
            
            stop_idx = RouteStops[route].index(stop)
            # traverse every possible next stop in the route after the current stop
            for next_stop in RouteStops[route][stop_idx:]:
                if current_trip is not None:
                    # set the curr_earliest_time to the minimum cached times for the next_stop or dest_stop               
                    curr_earliest_time = min(earliest_stop_arrival_times[next_stop], earliest_stop_arrival_times[dest_stop])
                    # if this trip contains next stop
                    if next_stop in Trips[current_trip]['stops']:
                        # update earliest_stop_arrival_times if it can be improved  
                        if Trips[current_trip]['stops'][next_stop]['arrival_time'] < curr_earliest_time:
                            stop_arrival_times[next_stop][k] = Trips[current_trip]['stops'][next_stop]['arrival_time']
                            earliest_stop_arrival_times[next_stop] = Trips[current_trip]['stops'][next_stop]['arrival_time']
                            marked_stops.append(next_stop)

                if current_trip is None:
                    # calculate the earliest trip in route that one can catch at next stop
                    current_trip = earliest_trip(route, next_stop, departure_time)
                elif next_stop in Trips[current_trip]['stops']:
                    # update current_trip to the earliest trip in route that one can catch at next stop
                    if stop_arrival_times[stop][k - 1] <= Trips[current_trip]['stops'][next_stop]['departure_time']:
                        current_trip = earliest_trip(route, next_stop, departure_time)

        if len(marked_stops) == 0:
            break        
    return

In [51]:
raptor(1, 636, 12, 5)

[('126', 1)]
[('126', 124), ('7', 15138)]
[('7', 235), ('57', 3856), ('126', 17074), ('157', 6691), ('60', 200), ('37', 18661), ('1', 18661), ('125', 18661), ('124', 18498), ('28', 14461), ('156', 14461), ('151', 14461), ('143', 73), ('147', 73), ('J14', 73), ('X4', 73), ('3', 73), ('26', 73), ('4', 73)]
[('57', 246), ('126', 48), ('9', 51), ('X9', 51), ('156', 14461), ('7', 74), ('56', 445), ('148', 1120), ('146', 1120), ('121', 593), ('120', 593), ('3', 593), ('37', 18661), ('124', 18498), ('54B', 18365), ('8', 6712), ('70', 2042), ('74', 6597), ('125', 5020), ('29', 754), ('65', 754), ('66', 14161), ('2', 759), ('60', 1106), ('147', 1106), ('X4', 1106), ('157', 1106), ('4', 1106), ('20', 450), ('6', 4870), ('63', 5202), ('15', 7176), ('192', 17646), ('135', 14061), ('77', 4987), ('143', 1072), ('151', 1072), ('134', 1072), ('73', 1410), ('136', 14058), ('62', 1880), ('24', 1880), ('36', 1880), ('22', 1880), ('152', 1163), ('80', 1165), ('155', 1757), ('N5', 17564), ('30', 7226), ('1