In [1]:
import pandas as pd
from datetime import datetime, timedelta

In [None]:
!git lfs pull

In [2]:
def connection_scan(source, target, start_time, connections, transfers):
    '''
    Args:
        source: stop_id of the initial stop
        target: stop_id of the destination stop
        start_time: initial datetime
        connections: df containing connections of all considered trips
        transfers: df containing transfer time on foot
        
    Returns:
        J: a dictionary of triples (enter connection, exit connection, transfer time) for explored stops
    '''
    S = {}
    T = {}
    J = {}
    S[source] = start_time
    for _, f in transfers[transfers['from_stop_id'] == source].iterrows():
        S[f.to_stop_id] = start_time + timedelta(minutes=int(f.transfer_time))

    valid_connections = connections[connections['departure_time'] >= start_time]
    for _, c in valid_connections.iterrows():
        if target in S and S[target] <= c.departure_time:
            break
        
        # check if the connections is reachable
        # a connections is reachable if the trip has been reached or
        # the arrival time at the stop is less than the connection's departure time
        if c.trip_id in T or (c.departure_stop in S and S[c.departure_stop] <= c.departure_time):
            if c.trip_id not in T:
                T[c.trip_id] = c
            
            # update the arrival times of stops around the arrival stop reachable on foot
            for _, f in transfers[transfers['from_stop_id'] == c.arrival_stop].iterrows():
                # if we've reached target, don't add transfer time
                if f.from_stop_id == target and f.to_stop_id == target:
                    arrival_time = c.arrival_time
                else:
                    arrival_time = c.arrival_time + timedelta(minutes=int(f.transfer_time))
                if f.to_stop_id not in S or arrival_time < S[f.to_stop_id]:
                    S[f.to_stop_id] = arrival_time
                    J[f.to_stop_id] = (T[c.trip_id], c, f.transfer_time)
    return J

def get_journey(source, target, J, transfers):
    '''construct a journey with J through backtracking
    Args:
        source: stop_id of the initial stop
        target: stop_id of the destination stop
        J: journey pointers
        transfers: df containing transfer time on foot
        
    Returns:
        journey: an array of transfers and trip legs
    '''
    journey = []
    t = target
    while t in J and J[t] is not None:
        journey = [J[t]] + journey
        t = J[t][0]['departure_stop']
    mask = (transfers['from_stop_id'] == source) & (transfers['to_stop_id'] == t)
    if not transfers.loc[mask].empty:
        path = transfers.loc[mask].iloc[0]
        journey = [path] + journey
    return journey

In [3]:
connections_df = pd.read_csv('../data/connections.csv')
transfers_df = pd.read_csv('../data/transfers.csv')

# connections_df['departure_time'] = pd.to_datetime(connections_df['departure_time'])
# connections_df['arrival_time'] = pd.to_datetime(connections_df['arrival_time'])

statr_time = datetime(year=2023, month=5, day=21, hour=6, minute=0)
date = '2023-05-21'

transform_today_time = lambda col: lambda row: datetime.fromisoformat(f'{date} {row[col]}')

connections_df['departure_time'] = connections_df.apply(transform_today_time('departure_time'), axis=1)
connections_df['arrival_time'] = connections_df.apply(transform_today_time('arrival_time'), axis=1)

In [4]:
# small test from Züirch HB to Züirch, Sädlenweg
source = '8503000' # Züirch HB 
target = '8573711' # Zürich, Sädlenweg

J = connection_scan(source, target, statr_time, connections_df.sort_values(by='departure_time'), transfers_df)
# the suggested route is: 1. walk from Züirch HB (8503000) to Zürich HB (8503000:0:31)
#                         2. take S-Bahn from Zürich HB (8503000:0:31) to Zürich Wiedikon (8503011:0:1) and walk to Zürich Wiedikon, Bahnhof (8573710)
#                         3. transfer to bus from Zürich Wiedikon, Bahnhof (8573710) to Zürich, Sädlenweg (8573711)
print(get_journey(source, target, J, transfers_df))

[from_stop_id          8503000
to_stop_id       8503000:0:31
transfer_time             7.0
Name: 11395, dtype: object, (trip_id           434.TA.91-8-D-j22-1.208.R
departure_time          2023-05-21 06:07:00
arrival_time            2023-05-21 06:09:00
departure_stop                 8503000:0:31
arrival_stop                    8503011:0:1
route_desc                                S
Name: 464371, dtype: object, trip_id           434.TA.91-8-D-j22-1.208.R
departure_time          2023-05-21 06:07:00
arrival_time            2023-05-21 06:09:00
departure_stop                 8503000:0:31
arrival_stop                    8503011:0:1
route_desc                                S
Name: 464371, dtype: object, 4.0), (trip_id           120.TA.96-182-5-j22-1.2.H
departure_time          2023-05-21 06:18:00
arrival_time            2023-05-21 06:20:00
departure_stop                      8573710
arrival_stop                        8591341
route_desc                                B
Name: 1092181, dtype: o