In [100]:
import pandas as pd

In [None]:
# StopTimes
# trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,shape_dist_traveled
df_stop_times = pd.read_csv("google_transit/stop_times.txt")
print(df_stop_times.head())

In [None]:
# Trips
# route_id,service_id,trip_id,direction_id,block_id,shape_id,direction,wheelchair_accessible,schd_trip_id
df_trips = pd.read_csv("google_transit/trips.txt")
print(df_trips.head())

In [None]:
# Routes
# route_id,route_short_name,route_long_name,route_type,route_url,route_color,route_text_color
df_routes = pd.read_csv("google_transit/routes.txt")
print(df_routes.head())

In [None]:
# Stops
# stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding
df_stops = pd.read_csv("google_transit/stops.txt")
print(df_stops.head())

In [None]:
# RouteStops - Dictionary of Routes to Set of Stops in order
merged = df_stop_times.merge(df_trips[['trip_id', 'route_id']], on='trip_id', how='left')
merged = merged.sort_values(by=['route_id', 'trip_id', 'stop_sequence'])

RouteStops = (
    merged.groupby('route_id')['stop_id']
    .apply(lambda stops: list(dict.fromkeys(stops)))  # preserves order, removes duplicates
    .to_dict()
)

for route, stops in list(RouteStops.items())[:3]:  # print first 3 routes
    print(f"Route {route}: {stops[:10]} ... ({len(stops)} stops total)")

In [None]:
# RouteTrips - Dictionary of Routes to Trip Ids
RouteTrips = (
    df_trips.groupby('route_id')['trip_id']
    .apply(list)
    .to_dict()
)
for route, trips in list(RouteTrips.items())[:3]:  # print first 3 routes
    print(f"Route {route}: {trips[:10]} ... ({len(trips)} trips total)")

In [None]:
from datetime import datetime, timedelta

def gtfs_time_to_unix(time_str, date_str="2025-01-01"):
    h, m, s = map(int, time_str.split(":"))
    base = datetime.strptime(date_str, "%Y-%m-%d")
    dt = base + timedelta(hours=h, minutes=m, seconds=s)
    return int(dt.timestamp())

In [None]:
# Trips - Dictionary of Trip Ids to Relevant Info
df_stop_times = df_stop_times.sort_values(by=["trip_id", "stop_sequence"])

trip_info_columns = [col for col in df_trips.columns if col != "trip_id"]

Trips = (
    df_trips.set_index("trip_id")[trip_info_columns]
    .to_dict(orient="index")
)

for trip_id, group in df_stop_times.groupby("trip_id"):
    stops_dict = {}
    for _, row in group.iterrows():
        stop_id = int(row["stop_id"])
        arrival = row["arrival_time"]
        departure = row["departure_time"]

        stops_dict[stop_id] = {
            "arrival_time": gtfs_time_to_unix(arrival),
            "departure_time": gtfs_time_to_unix(departure)
        }

    if trip_id in Trips:
        Trips[trip_id]["stops"] = stops_dict
    else:
        Trips[trip_id] = {"stops": stops_dict}

for trip, trip_info in list(Trips.items())[:3]:
    print(f"\nTrip {trip}:")
    for k, v in trip_info.items():
        if k == "stops":
            # show first 3 stops (as dict preview)
            subset = dict(list(v.items())[:3])
            print(f"  {k}: {subset} ... ({len(v)} stops total)")
        else:
            print(f"  {k}: {v}")

In [None]:
# StopRoutes - Dictionary of Stops to Set of Routes
merged = df_stop_times.merge(df_trips[['trip_id', 'route_id']], on='trip_id', how='left')

StopRoutes = (
    merged.groupby('stop_id')['route_id']
    .apply(lambda x: set(x.dropna()))
    .to_dict()
)

for stop, routes in list(StopRoutes.items())[:3]:
    print(f"Stop {stop}: {routes}")

In [None]:
def earliest_trip(route, next_stop, departure_time):
    best_trip = None
    best_arr_time = float('inf')
    
    for trip in RouteTrips[route]:
        if next_stop not in Trips[trip]['stops']:
            continue
        arr_time = Trips[trip]['stops'][next_stop]['arrival_time']
        if arr_time < best_arr_time:
            best_arr_time = arr_time
            best_trip = trip
            
    return best_trip

In [None]:
def raptor(source_stop, dest_stop, departure_time, K):
    stop_arrival_times = {} # dict of stops to list of length K
    earliest_stop_arrival_times = {}
    
    # initializing the earliest arrival times to each stop to inf
    for stop in StopRoutes.keys():
        stop_arrival_times[stop] = [float('inf')] * (K + 1)
        earliest_stop_arrival_times[stop] = float('inf')
    
    stop_arrival_times[source_stop][0] = departure_time
    
    # mark first stop
    marked_stops = []
    marked_stops.append(source_stop)
    
    Q = []
    for k in range(1, K + 1):
        Q.clear()
        
        # for each stop marked in the previous round
        for marked_stop in marked_stops:
            # iterate through all the routes that serve that stop
            for route in StopRoutes[marked_stop]:
                # make sure Q contains unique routes with earliest marked stop
                filtered_route_stops = [t for t in Q if t[0] == route]
                if len(filtered_route_stops) == 0:
                    Q.append((route, marked_stop))
                    continue
                    
                (_, other_stop) = filtered_route_stops[0]
                if RouteStops[route].index(marked_stop) < RouteStops[route].index(other_stop):
                    Q.remove((route, other_stop))
                    Q.append((route, marked_stop))
                    
        # unmark all stops       
        marked_stops.clear()
        print(Q)
        
        for (route, stop) in Q:
            current_trip = None
            
            stop_idx = RouteStops[route].index(stop)
            for next_stop in RouteStops[route][stop_idx:]:
                if current_trip is not None:
                    curr_earliest_time = min(earliest_stop_arrival_times[next_stop], earliest_stop_arrival_times[dest_stop])
                    print(current_trip)
                    if next_stop in Trips[current_trip]['stops']:
                        if Trips[current_trip]['stops'][next_stop]['arrival_time'] < curr_earliest_time:
                            stop_arrival_times[next_stop][k] = Trips[current_trip]['stops'][next_stop]['arrival_time']
                            earliest_stop_arrival_times[next_stop] = Trips[current_trip]['stops'][next_stop]['arrival_time']
                            marked_stops.append(next_stop)
            
                
                if current_trip is None:
                    current_trip = earliest_trip(route, next_stop, departure_time)
                elif next_stop in Trips[current_trip]['stops']:
                    if stop_arrival_times[stop][k - 1] <= Trips[current_trip]['stops'][next_stop]['departure_time']:
                        current_trip = earliest_trip(route, next_stop, departure_time)

        if len(marked_stops) == 0:
            break
        
    return

In [None]:
raptor(1, 636, 12, 5)