# Path Finder

## Setup Spark environment

In [1]:
%%local
import os
username = os.environ['JUPYTERHUB_USER']
get_ipython().run_cell_magic('configure', line="-f", cell='{ "name":"%s-final-path-finder", "executorMemory":"4G", "executorCores":4, "numExecutors":5, "driverMemory": "4G" }' % username)

ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
6358,application_1618324153128_5874,pyspark,idle,Link,Link,,
6406,application_1618324153128_5941,pyspark,idle,Link,Link,,
6462,application_1618324153128_6023,pyspark,idle,Link,Link,,
6533,application_1618324153128_6110,pyspark,busy,Link,Link,,
6556,application_1618324153128_6136,pyspark,busy,Link,Link,,
6574,application_1618324153128_6156,pyspark,idle,Link,Link,,
6575,application_1618324153128_6161,pyspark,idle,Link,Link,,
6576,application_1618324153128_6163,pyspark,idle,Link,Link,,
6620,application_1618324153128_6218,pyspark,idle,Link,Link,,
6632,application_1618324153128_6233,pyspark,idle,Link,Link,,


In [2]:
%%send_to_spark -i username -t str -n username

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
6672,application_1618324153128_6274,pyspark,idle,Link,Link,,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Successfully passed 'username' as 'username' to Spark kernel

In [3]:
import pyspark.sql.functions as F
from pyspark.sql.types import *

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


## Import data from HDFS

In [4]:
routes = spark.read.csv('/data/sbb/csv/timetable/routes/2019/05/07/routes.csv', header=True )
stops = spark.read.orc('/data/sbb/orc/geostops')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [5]:
walking_edges = spark.read.parquet('/user/%s/final/parquet/walking_edges' % username)
transport_edges = spark.read.parquet('/user/%s/final/parquet/transport_edges' % username)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

## Shortest paths

In [6]:
walking_edges = walking_edges.withColumn('value', F.struct(F.col('end_vertex'), F.col('duration')))\
                             .groupBy('start_vertex').agg({'value': 'collect_set'})\
                             .toPandas()\
                             .set_index('start_vertex')\
                             .to_dict()['collect_set(value)']

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [7]:
edges = transport_edges.filter('weekday == "monday"').cache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [8]:
# This function assumes a (spark) dataframe with columns 'start_vertex', 'end_vertex', 'start_time', 'duration', 'route_id' 
# edges is a dataframe that represents the edges of the graph, 
# end_time should be in the format 'HH:MM'
def latest_departure_paths(edges, end_stop, end_time, walking_edges):

    def edge_valid(u, v, time, dur, route_id): 
        next_edges = all_roads.get(v, [])
        
        # Did we reach the destination ?
        if v == end_stop and time + dur <= end_time: 
             return (time, v, route_id, dur)
        
        if u != end_stop and v != end_stop:
            for edge in next_edges[::-1]: # Traverse the list in reverse order because it's sorted by descending starting time
                time_v, next_v, route_id_v, dur_v = edge
                if (route_id == route_id_v and time + dur <= time_v) or (time + dur + transfer_time <= time_v): 
                    return (time, v, route_id, dur)            
            
            # Can we walk from v instead?
            for end_vertex, walking_duration in walking_edges.get(v, []):
                if end_vertex != u: # do not loop back to u
                    for next_next_edge in all_roads.get(end_vertex, []):
                        time_next = next_next_edge[0]
                        if time + dur <= time_next - walking_duration - transfer_time : # can we make in time to v in order to walk
                            return (time, v, route_id, dur)
                
        return None
                
    def time_to_minutes(timestamp):
        return int(timestamp[:2]) * 60 + int(timestamp[3:5])
    
    
    transfer_time = 2 # at least 2 minutes to transfer
    
    all_roads = {}
    end_time = time_to_minutes(end_time)
    start_time = end_time - 120 # look only at edges departing at most 2 hours before end_time
        
    edges = edges.filter(F.col('start_time').between(start_time, end_time)).toPandas().sort_values(by=['start_time'], ascending=False).to_numpy()

    for row in edges:
        start_vertex, end_vertex, start, duration, route_id = row[:5]
        update = edge_valid(start_vertex, end_vertex, start, duration, route_id)
        if update:
            if start_vertex in all_roads:    
                all_roads[start_vertex].append(update)
            else:
                all_roads[start_vertex] = [update]
                        
    return all_roads

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [13]:
from itertools import groupby
import numpy as np

# Journey reconstruction

# Returns a List of all possible edges that can be taken after edge 'edge'
def select_next(all_roads, walking_edges, edge, transfer_time=2):
    
    prev_time, node, prev_route_id, prev_duration = edge[:4]
    next_edges = all_roads.get(node, [])
    
    if prev_route_id == 'Walking': 
        # A walking edge encodes only one next trip at the node, stored as an index of the trip in all_roads[node] 
        return [next_edges[edge[-1]]]
    else:
        possible_edges = []
        # We may take a transport edge or a walking edge in this case
        next_edges = next_edges +  get_walking_edges(all_roads, walking_edges, node)
        min_departures = {}
        for edge_n in next_edges:
            t, v, route_id, dur = edge_n[:4]
            if route_id == 'Walking' and prev_time + prev_duration <= t:
                possible_edges.append(edge_n)
            elif prev_route_id == route_id and prev_time + prev_duration <= t:
                possible_edges.append(edge_n)
                if (route_id, v) in min_departures and  min_departures[(route_id, v)][0] > t :
                    min_departures[(route_id, v)] = edge_n
                elif route_id not in min_departures:
                    min_departures[(route_id, v)] = edge_n
            elif prev_route_id != route_id and prev_time + prev_duration + transfer_time <= t:
                possible_edges.append(edge_n)
#             possible_edges += min_departures.values()

        return possible_edges 

# Recursively iterate on all possible paths to the destination, and yield journeys as we find them 
def iterate_all_edges(all_roads, walking_edges, start_edges, end_stop, end_time, cur_journey):
    for edge in start_edges:
        t, v, route_id, dur = edge[:4]
        if v == end_stop and t + dur <= end_time: # have we reached the destination?
            yield cur_journey + [edge]
        elif v != end_stop:
            print(cur_journey + [edge])
            next_edges = select_next(all_roads, walking_edges, edge)
            
            for journey in iterate_all_edges(all_roads, walking_edges, next_edges, end_stop, end_time, cur_journey + [edge]):
                yield journey
                
# Compute the number of transfers as well as the total walking duration for a Journey
def get_journey_attributes(journey):
    walking_distance = sum([t[-2] for t in journey if t[2] == 'Walking'])
    transfers = len(set([t[2] for t in journey if t[2] != 'Walking']))
    return [walking_distance, transfers]

# Compute viable walks starting at node start_stop
# A walking edge u -> v holds information about the single trip to take from v
# This avoids unnecessary path computations
def get_walking_edges(all_roads, walking_edges, start_stop, transfer_time=2):
    possible_walking_edges = []
    for end_vertex, duration in walking_edges.get(start_stop, []):
        for index, edge in enumerate(all_roads.get(end_vertex, [])):
            time_next, node_next = edge[:2]
            if node_next != start_stop:
                possible_walking_edges.append((time_next - duration - transfer_time, end_vertex, 'Walking', duration, index))
    return possible_walking_edges
    
def generate_best_paths(all_roads, walking_edges, start_stop, end_stop, end_time):
    # add possible paths that start by walking
    start_edges = all_roads.get(start_stop, []) + get_walking_edges(all_roads, walking_edges, start_stop)
    if not start_edges: # not paths were found
        yield []
    else:
        # sort edges and group by start_time
        start_edges = sorted(start_edges, key=lambda x:x[0], reverse=True)
        start_edges = groupby(start_edges, key=lambda x:x[0])
        for start_time, edges in start_edges:
            # find all possible journeys that start at time start_time
            all_journeys = list(iterate_all_edges(all_roads, walking_edges, list(edges), end_stop, end_time, []))
            journey_attribs = np.array([get_journey_attributes(journey) for journey in all_journeys])
            sorted_indices = np.lexsort(journey_attribs.T)
            for i in sorted_indices:
                yield all_journeys[i]

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [14]:
# Processes a list of edges (Journey) into a list of segments
def journey_to_segments (journey, start_stop):
    
    grouped_journey = [(route_id, list(edges)) for (route_id, edges) in  groupby(journey, lambda x: x[2])]
    segments = []
    for index, (route_id, edges) in enumerate(grouped_journey):
        
        start_time_s = edges[0][0]
        
        if route_id == 'Walking' and index > 0:
            # Start walking as soon as you get to the previous stop
            start_time_s = segments[-1][4] + segments[-1][5]
             
        end_stop_s = edges[-1][1]
        duration_s = edges[-1][0] + edges[-1][3] - edges[0][0]
        
        start_stop_s = start_stop if index == 0 else segments[index - 1][2]
            
        stops_s = [start_stop_s] + [t[1] for t in edges]
                
        segments.append((index + 1, start_stop_s, end_stop_s, route_id, start_time_s, duration_s, stops_s))
            
    return segments

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [15]:
def journey_to_df(journey_list):
    arr = []
    journey_number = 0
    for journey in journey_list:
        journey_number += 1
        arr = arr + [(journey_number,) + seg for seg in journey]
    
    schema = StructType([
        StructField("journey_number", IntegerType(), True),
        StructField("segment_number", IntegerType(), True),
        StructField("start_vertex", StringType(), True),
        StructField("end_vertex", StringType(), True),
        StructField("route_id", StringType(), True),
        StructField("departure_time", StringType(), True),
        StructField("duration", StringType(), True),
        StructField("stop_seq", ArrayType(StringType()), True)
    ])
    
    journey_df = spark.createDataFrame(arr, schema)
    
    return journey_df

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [16]:
stop_id_to_stop = stops.select('stop_id', 'stop_name')
stop_id_to_stop_dict = stop_id_to_stop.toPandas().set_index('stop_id').to_dict()['stop_name']

route_id_to_route_name = routes.withColumn('route_name', F.concat(F.col('route_desc'), F.lit(' '), F.col('route_short_name')))\
                               .select('route_id', 'route_desc', 'route_short_name', 'route_name')
route_id_to_route_name_dict = route_id_to_route_name.toPandas().set_index('route_id').to_dict()['route_name']
route_id_to_route_name_dict['Walking'] = 'Walk'

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [17]:
def minutes_to_timestamp(minutes):
    return str(minutes // 60) + ':' + str(minutes % 60).zfill(2)

def pretty_print_journey(journey):
    journey_str = '\n'.join(["%s : %s\n|\n| %s (%s min)\n| %s\n|\n%s : %s\n" % (minutes_to_timestamp(time), 
                                                                             stop_id_to_stop_dict[start],
                                                                             route_id_to_route_name_dict[route_id],
                                                                             minutes_to_timestamp(duration),
                                                                             ' -> '.join([stop_id_to_stop_dict[s] for s in stops]) if route_id != 'Walking' else '',
                                                                             minutes_to_timestamp(time + duration),
                                                                             stop_id_to_stop_dict[end]) for _, start, end, route_id, time, duration, stops in journey])
    return journey_str

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [18]:
end_stop = '8591221'
import time
start = time.time()
all_roads = latest_departure_paths(edges, end_stop , '20:00', walking_edges)
end = time.time() 
print(end - start)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

5.22864198685

In [19]:
len(all_roads.keys())

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

1702

In [20]:
all_roads[all_roads.keys()[671]]

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

[(1147, u'8587799', u'79-736-j19-1', 5), (1144, u'8593523', u'79-736-j19-1', 2), (1132, u'8587799', u'79-736-j19-1', 5), (1129, u'8593523', u'79-736-j19-1', 2), (1117, u'8587799', u'79-736-j19-1', 5), (1114, u'8593523', u'79-736-j19-1', 2), (1112, u'8590620', u'26-764-j19-1', 5), (1111, u'8590620', u'26-764-j19-1', 5), (1102, u'8587799', u'79-736-j19-1', 5), (1099, u'8593523', u'79-736-j19-1', 2), (1087, u'8587799', u'79-736-j19-1', 5), (1084, u'8593523', u'79-736-j19-1', 2), (1082, u'8590620', u'26-764-j19-1', 5), (1081, u'8590620', u'26-764-j19-1', 5)]

In [21]:
start_stop = all_roads.keys()[671]
end_time = 20 * 60
generator = generate_best_paths(all_roads, walking_edges, start_stop, end_stop, end_time)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [22]:
journey_1 = next(generator)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

[(1147, u'8587799', u'79-736-j19-1', 5)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1), (1161, u'8590620', u'26-12-A-j19-1', 2)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1), (1161, u'8590620', u'26-12-A-j19-1', 2), (1163, u'8503310:0:2', 'Walking', 2, 0)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1), (1161, u'8590620', u'26-12-A-j19-1', 2), (1163, u'8503310:0:2', 'Walking', 2, 0), (1167, u'8503006:0:6', u'26-15-j19-1', 2)]
[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1), (1161, u'85906

In [38]:
journey_1

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

[(1147, u'8587799', u'79-736-j19-1', 5), (1159, u'8573244', u'26-12-A-j19-1', 1), (1160, u'8590762', u'26-12-A-j19-1', 1), (1161, u'8590620', u'26-12-A-j19-1', 2), (1163, u'8503310:0:2', 'Walking', 2, 0), (1167, u'8503006:0:6', u'26-15-j19-1', 2), (1170, u'8503020:0:2', u'26-15-j19-1', 4), (1174, u'8503000:0:43/44', u'26-15-j19-1', 4), (1178, u'8587348', 'Walking', 3, 0), (1183, u'8588078', u'26-31-j19-1', 2), (1185, u'8591287', u'26-31-j19-1', 1), (1186, u'8591239', u'26-31-j19-1', 1), (1187, u'8591375', u'26-31-j19-1', 1), (1188, u'8530813', u'26-31-j19-1', 2), (1190, u'8591364', u'26-31-j19-1', 1), (1191, u'8530812', u'26-31-j19-1', 1), (1192, u'8591137', u'26-31-j19-1', 1), (1193, u'8591233', u'26-31-j19-1', 1), (1194, u'8591221', u'26-31-j19-1', 2)]

In [138]:
journey_1 == journey_2

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

False

In [139]:
generator = generate_best_paths(all_roads, walking_edges, start_stop, end_stop, end_time)
journeys = []
for i in range(20):
    journey = next(generator)
    for journey_o in journeys:
        assert len(journey) != len(journey_o) or\
                any([journey[i] != journey_o[i] for i in range(len(journey))])
    journeys.append(journey)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [137]:
print(pretty_print_journey(journey_to_segments(journey_1, start_stop)))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

19:07 : Zürich Flughafen, Werkhof
|
| Bus 736 (0:05 min)
| Zürich Flughafen, Werkhof -> Kloten Balsberg, Bahnhof
|
19:12 : Kloten Balsberg, Bahnhof

19:19 : Kloten Balsberg, Bahnhof
|
| Tram 12 (0:04 min)
| Kloten Balsberg, Bahnhof -> Glattbrugg, Unterriet -> Rümlang, Bäuler -> Glattbrugg, Bahnhof
|
19:23 : Glattbrugg, Bahnhof

19:23 : Glattbrugg, Bahnhof
|
| Walk (0:02 min)
| 
|
19:25 : Glattbrugg

19:27 : Glattbrugg
|
| S-Bahn 15 (0:11 min)
| Glattbrugg -> Zürich Oerlikon -> Zürich Hardbrücke -> Zürich HB
|
19:38 : Zürich HB

19:38 : Zürich HB
|
| Walk (0:03 min)
| 
|
19:41 : Zürich, Bahnhofplatz/HB

19:43 : Zürich, Bahnhofplatz/HB
|
| Bus 31 (0:13 min)
| Zürich, Bahnhofplatz/HB -> Zürich, Central -> Zürich, Neumarkt -> Zürich, Kunsthaus -> Zürich, Sprecherstrasse -> Zürich Kreuzplatz -> Zürich, Signaustrasse -> Zürich Hegibachplatz -> Zürich, Freiestrasse -> Zürich, Klusplatz -> Zürich, Kapfstrasse
|
19:56 : Zürich, Kapfstrasse

In [123]:
print(pretty_print_journey(journey_to_segments(journey_2, start_stop)))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

19:07 : Zürich Flughafen, Werkhof
|
| Bus 736 (0:05 min)
| Zürich Flughafen, Werkhof -> Kloten Balsberg, Bahnhof
|
19:12 : Kloten Balsberg, Bahnhof

19:19 : Kloten Balsberg, Bahnhof
|
| Tram 12 (0:04 min)
| Kloten Balsberg, Bahnhof -> Glattbrugg, Unterriet -> Rümlang, Bäuler -> Glattbrugg, Bahnhof
|
19:23 : Glattbrugg, Bahnhof

19:23 : Glattbrugg, Bahnhof
|
| Walk (0:02 min)
| 
|
19:25 : Glattbrugg

19:27 : Glattbrugg
|
| S-Bahn 15 (0:14 min)
| Glattbrugg -> Zürich Oerlikon -> Zürich Hardbrücke -> Zürich HB -> Zürich Stadelhofen
|
19:41 : Zürich Stadelhofen

19:41 : Zürich Stadelhofen
|
| Walk (0:01 min)
| 
|
19:42 : Zürich Stadelhofen FB

19:45 : Zürich Stadelhofen FB
|
| Tram 8 (0:01 min)
| Zürich Stadelhofen FB -> Zürich Kreuzplatz
|
19:46 : Zürich Kreuzplatz

19:50 : Zürich Kreuzplatz
|
| Bus 31 (0:06 min)
| Zürich Kreuzplatz -> Zürich, Signaustrasse -> Zürich Hegibachplatz -> Zürich, Freiestrasse -> Zürich, Klusplatz -> Zürich, Kapfstrasse
|
19:56 : Zürich, Kapfstrasse

## Generate 5 journeys

In [None]:
journey_list = []
generator = generate_best_paths(all_roads, walking_edges, all_roads.keys()[0], end_stop, 20 * 60)
for _ in range(5):
    journey = next(generator)
    journey_list.append(journey_to_segments(journey, all_roads.keys()[0]))

In [None]:
journey_df = journey_to_df(journey_list)

## Send to local

In [None]:
%%spark -o journey_df -n -1

In [None]:
%%local
journey_df['start_vertex'] = journey_df.start_vertex.astype(str)
journey_df['end_vertex'] = journey_df.end_vertex.astype(str)

In [None]:
%%spark -o stop_id_to_stop -n -1
stop_id_to_stop = stops.select('stop_id', 'stop_name')

In [None]:
%%spark -o stop_id_to_lon -n -1
stop_id_to_lon = stops.select('stop_id', 'stop_lon')

In [None]:
%%spark -o stop_id_to_lat -n -1
stop_id_to_lat = stops.select('stop_id', 'stop_lat')

In [None]:
%%spark -o route_id_to_route_name -n -1
route_id_to_route_name = routes.withColumn('route_name', F.concat(F.col('route_desc'), F.lit(' '), F.col('route_short_name')))\
                               .select('route_id', 'route_desc', 'route_short_name', 'route_name')

## Display results

In [None]:
%%local
stop_id_to_stop_dict = stop_id_to_stop.set_index('stop_id').to_dict()['stop_name']

stop_id_to_lon_dict = stop_id_to_lon.set_index('stop_id').to_dict()['stop_lon']
stop_id_to_lat_dict = stop_id_to_lat.set_index('stop_id').to_dict()['stop_lat']

route_id_to_route_name_dict = route_id_to_route_name.set_index('route_id').to_dict()['route_name']
route_id_to_route_name_dict['Walking'] = 'Walk'

In [None]:
%%local
def minutes_to_timestamp(minutes):
    if minutes < 60:
        return str(minutes)
    return str(minutes // 60) + ':' + str(minutes % 60).zfill(2)

In [None]:
%%local
def minutes_to_duration(minutes):
    if minutes < 60:
        return str(minutes) + ' min'
    return str(minutes // 60) + ' hr ' + str(minutes % 60) + ' min'

### Print paths

In [None]:
%%local
def pretty_print_journey(journey):
    journey_str = '\n'.join(["%s : %s\n|\n| %s (%s min)\n| %s\n|\n%s : %s\n" % (minutes_to_timestamp(time), 
                                                                             stop_id_to_stop_dict[start],
                                                                             route_id_to_route_name_dict[route_id],
                                                                             minutes_to_timestamp(duration),
                                                                             ' -> '.join([stop_id_to_stop_dict[s] for s in stops]) if route_id != 'Walking' else '',
                                                                             minutes_to_timestamp(time + duration),
                                                                             stop_id_to_stop_dict[end]) for _, start, end, route_id, time, duration, stops in journey])
    return journey_str

In [None]:
%%local
def journeys_to_str(df):
    if len(df) == 0:
        return "No possible path"
    
    n = df['journey_number'].unique()

    res = []
    for i in sorted(n):
        segment_list = sorted(df[df.journey_number == i].values[:,1:], key = lambda e: e[0])
        res.append(pretty_print_journey(segment_list))
    return '\n******************************\n'.join(res)

In [None]:
%%local
print(journeys_to_str(journey_df))

### Map visualization

In [None]:
%%local
import matplotlib.pyplot as plt

def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

In [None]:
%%local
import plotly.graph_objects as go
from matplotlib.colors import rgb2hex
from ipywidgets import interact

def map_plot():
    fig = go.Figure()

    fig.add_trace(go.Scattermapbox(
    ))

    fig.update_layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            style='carto-positron',
            # style='open-street-map',
            bearing=0,
            pitch=0,
            zoom=11
        ),
        height=900,
        width=1200
    )

    return fig

In [None]:
%%local

fig = map_plot()
stream_fig = go.FigureWidget(fig)
stream_data = stream_fig.data

@interact(Journey=journey_df.journey_number.unique())
def show_path(Journey):
    df=journey_df

    if len(df) == 0:
        return

    n = df['journey_number'].unique()

    journey = sorted(df[df.journey_number == Journey].values[:,1:], key = lambda e: e[0])        
    
    departure_time = journey[0][4]
    arrival_time = journey[-1][4] + journey[-1][5]
    
    stops_per_segment = [seg[-1] for seg in journey]
    
    stops_per_segment_no_duplicates = [stops_per_segment[0]] + [ss[1:] for ss in stops_per_segment[1:]]

    stops = [item for sublist in stops_per_segment_no_duplicates for item in sublist]
    
    lats = [stop_id_to_lat_dict[s] for s in stops]
    lons = [stop_id_to_lon_dict[s] for s in stops]
    
    # Reset figure and center figure
    with stream_fig.batch_update():
        stream_fig.data = stream_fig.data[:1]
        stream_fig.update_layout(mapbox_center=dict(lat = (max(lats)+min(lats))/2, lon = (max(lons)+min(lons))/2))
        stream_fig.update_layout(legend_traceorder="reversed", legend_valign='top')
        stream_fig.update_layout(title_text='%s (%s) -> %s (%s)' % (stop_id_to_stop_dict[stops[0]],
                                                                    minutes_to_timestamp(departure_time),
                                                                    stop_id_to_stop_dict[stops[-1]],
                                                                    minutes_to_timestamp(arrival_time)))
    
    
    stops_per_route = [(seg[3], seg[4], seg[5], seg[-1]) for seg in journey]
    
    for route_id, time, duration, stops in reversed(stops_per_route):
        stop_names = [stop_id_to_stop_dict[s] for s in stops]
        
        trace_name = '<b>' + route_id_to_route_name_dict[route_id] + '</b><br>%s %s<br>%s' % (stop_names[0],
                                                                                              minutes_to_timestamp(time),
                                                                                              minutes_to_duration(duration))
        
        if route_id != 'Walking':
            trace_name += ' (%d stops)' % (len(stops)-1)
        trace_name += '<br>%s %s' % (stop_names[-1], minutes_to_timestamp(time + duration))
        
        with stream_fig.batch_update():
            stream_fig.add_trace(go.Scattermapbox(
                lat=[stop_id_to_lat_dict[s] for s in stops],
                lon=[stop_id_to_lon_dict[s] for s in stops],
                mode='lines+markers',
                marker=go.scattermapbox.Marker(
                    size=6,
                ),
                opacity=1,
                hovertext=stop_names,
                name=trace_name,
                hoverinfo="text+name",
                hoverlabel_namelength=-1
            ))
        
stream_fig