In [1]:
import pandas as pd
from datetime import datetime, timedelta

In [2]:
def is_reachable(T, S, c):
    return c.trip_id in T or (c.departure_stop in S and S[c.departure_stop] < c.departure_time)

def connection_scan(source, target, start_time, footpaths, connections):
    S = {}
    T = {}
    J = {}
    S[source] = start_time
    for _, f in footpaths[footpaths['from_stop_id'] == source].iterrows():
        S[f.to_stop_id] = start_time + timedelta(minutes=int(f.transfer_time))
    
    for _, c in connections.iterrows():
        if target in S and S[target] <= c.departure_time:
            break
        
        if is_reachable(T, S, c):
            if c.trip_id not in T:
                T[c.trip_id] = c
            for _, f in footpaths[footpaths['from_stop_id'] == c.arrival_stop].iterrows():
                if c.arrival_stop not in S or c.arrival_time + timedelta(minutes=int(f.transfer_time)) < S[c.arrival_stop]:
                    S[c.arrival_stop] = c.arrival_time + timedelta(minutes=int(f.transfer_time))
                    J[c.arrival_stop] = (T[c.trip_id], c, f.transfer_time)
    return J

def get_journey(source, target, J, footpaths):
    journey = []
    t = target
    while t in J and J[t] is not None:
        journey = [J[t]] + journey
        t = J[t][0]['departure_stop']
    mask = (footpaths['from_stop_id'] == source) & (footpaths['to_stop_id'] == t)
    if not footpaths.loc[mask].empty:
        path = footpaths.loc[mask].iloc[0]
        journey = [path] + journey
    return journey

In [3]:
connections_df = pd.read_csv('../data/connections.csv')
transfers_df = pd.read_csv('../data/transfers.csv')

# connections_df['departure_time'] = pd.to_datetime(connections_df['departure_time'])
# connections_df['arrival_time'] = pd.to_datetime(connections_df['arrival_time'])

statr_time = datetime(year=2023, month=5, day=21, hour=6, minute=0)

transform_today_time = lambda col: lambda row: datetime.fromisoformat(f'{today} {row[col]}')

connections_df['departure_time'] = connections_df.apply(transform_today_time('departure_time'), axis=1)
connections_df['arrival_time'] = connections_df.apply(transform_today_time('arrival_time'), axis=1)

In [4]:
# small test from Züirch HB to Züirch, Sädlenweg
source = '8503000' # Züirch HB 
target = '8573711' # Zürich, Sädlenweg

J = connection_scan(source, target, statr_time, transfers_df, connections_df.sort_values(by='departure_time'))
# the suggested route is: 1. walk from Züirch HB to Zürich (8503000), Bahnhofquai/HB (8587349)
#                         2. take tram from Zürich, Bahnhofquai/HB (8587349) to Zürich Wiedikon, Bahnhof (8573710)
#                         3. transfer to another tram from Zürich Wiedikon (8573710), Bahnhof to Zürich, Sädlenweg (8573711)
print(get_journey(source, target, J, transfers_df))

[from_stop_id     8503000
to_stop_id       8587349
transfer_time        7.0
Name: 3078, dtype: object, (trip_id           66.TA.91-14-A-j22-1.7.H
departure_time        2023-05-21 06:11:00
arrival_time          2023-05-21 06:12:00
departure_stop                    8587349
arrival_stop                      8587348
route_desc                              T
Name: 1035123, dtype: object, trip_id           66.TA.91-14-A-j22-1.7.H
departure_time        2023-05-21 06:19:00
arrival_time          2023-05-21 06:21:00
departure_stop                    8591427
arrival_stop                      8573710
route_desc                              T
Name: 399712, dtype: object, 4.0), (trip_id           18.TA.96-182-4-j22-1.2.H
departure_time         2023-05-21 06:28:00
arrival_time           2023-05-21 06:30:00
departure_stop                     8573710
arrival_stop                       8591341
route_desc                               B
Name: 194137, dtype: object, trip_id           18.TA.96-182-4-j22-1.