In [7]:
%%local

a = {1} 
a | {2}

{1, 2}

In [None]:
# This function assumes a (spark) dataframe with columns 'start_vertex', 'end_vertex', 'start_time', 'duration', 'route_id' 
# edges is a dataframe that represents the edges of the graph, 
# edges should be sorted by order of descending starting time
# inward_walking_edges is a map route_id -> list(route_id), representing the nodes from which we can walk to a given node
# walking_edge_duration is a map (route_id, route_id) -> float, representing the duration of the walk
# end_time should be in the format 'HH:MM'
def latest_departure_paths(edges, end_stop, end_time, walking_edges, walking_edge_duration):
    
    def add_walking_edges_for_node(start_vertex, min_time, exclude_nodes):
        
        if start_vertex in nodes_w_processed:
            return
        
        next_edges = all_roads.get(start_vertex, [])
        
        exclude_nodes = exclude_nodes | {start_vertex}
        
        for end_vertex in walking_edges.get(start_vertex, []):
            
            dur = walking_edge_duration[(start_vertex, end_vertex)]
            
            add_walking_edges_for_node(end_vertex, min_time, exclude_nodes)
            
            for edge in all_roads.get(end_vertex, []):
                time_v, next_v, route_id_v, dur_v = edge
                if route_id_v == 'Walking' and dur_v + dur <= max_walking_time and time_v - dur >= min_time:
                    next_edges.append((time_v - dur, end_vertex, 'Walking', dur))
                elif route_id != 'Walking' and time_v - dur - transfer_time >= min_time:
                    next_edges.append((time_v - dur - transfer_time, end_vertex, 'Walking', dur))
                    
        
        nodes_w_processed.add(start_vertex)
        all_roads[start_vertex] = list(set(next_edges))
        return
    
    # This function computes whether edge (u, v, t, dur, route_id) can be taken
    # returns update to node u if possible, and None otherwise
    def edge_valid(u, v, time, dur, route_id):
        
        add_walking_edges_for_node(v, time, {u})
        next_edges = all_roads.get(v, [])
        
        for edge in next_edges: 
            time_v, next_v, route_id_v, dur_v = edge
            if ((route_id == route_id_v or v == end_stop) and time + duration <= time_v) or (time + duration + transfer_time <= time_v): 
                return (time, v, route_id, dur)

        return None
                
    def time_to_minutes(timestamp):
        return int(timestamp[:2]) * 60 + int(timestamp[3:5])
    
    
    transfer_time = 2 # at least 2 minutes to transfer
    max_walking_time = 10

    nodes_w_processed = set()
    
    all_roads = {end_stop}
    end_time = time_to_minutes(end_time)
    start_time = end_time - 120 # look only at edges departing at most 2 hours before end_time
    
    all_roads[end_stop] = [(end_time, end_stop, None, 0)]

    edges = edges.filter(F.col('start_time').between(start_time, end_time)).toPandas().sort_values(by=['start_time'], ascending=False).to_numpy()

    for row in edges:
        start_vertex, end_vertex, start, duration, route_id = row[:5]
        
        nodes_w_processed = nodes_w_processed - [u for u in walking_edges[start_vertex]]
        update = edge_valid(start_vertex, end_vertex, start, duration, route_id)
        if update:
            if start_vertex in all_roads:    
                all_roads[start_vertex].append(update)
            else:
                all_roads[start_vertex] = [update]
                
                            
    return all_roads