#### Imports and configurations

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pickle
import numpy as np
import pandas as pd
import networkx as nx

from datetime import *
from helper_functions import *
from scipy.stats import poisson

# Robust Journey planner - Results Vizualisation
## 1. Load saved files
We start by loading useful files that we saved when we constructed the graph. We load the graph, the metadata of the stations, the lambdas and all the unique lines. Note that as the other jupyter notebook, we only kept function defintions that we considered to be meaningful. The other functions, as the ploting functions or time translation functions can be found in the file 'helper_functions.py'.

In [3]:
# Load the graph with netowrkx
G = nx.read_gpickle('Ressources/graph.pickle')
# Load the metadata of the stations
metadata = pd.read_pickle('Ressources/metadata')
metadata = metadata[['x', 'y', 'STOP_NAME']]
# Load the lambda table
lambdas = pd.read_pickle('Ressources/lambdas')
# Load the different line
df_lines_unique = pd.read_pickle('Ressources/unique_lines')

## 2. Plot all the lines
To get a better insight on the transportation network of Zürich, we plot all the distinct lines.

In [29]:
# Plot all the distinct lines on the map
plot_all_lines(df_lines_unique)

## 3. Get the optimal itinerary
### 3.1 Trip parameters
We start by asking the user to enter the trip parameters, i.e. the source and destination, the departure and arrival time and the requirement - if he would like to arrive the earliest possible of if he would like to leave the latest possible.

In [70]:
# Define parameters for the query
departure_time = datetime(2019, 1, 14, 15, 0, 0)
arrival_time = datetime(2019, 1, 14, 17, 30, 0)
source = 'Zürich, Paradeplatz'
destination = 'Rüschlikon, Bodengasse'
prob_success = 0.9

### 3.2 Subgraph
Now that we have the trip informations, we create a subgraph in order to reduce the computation time.

In [71]:
# Create the subgraph where the time constraints are respected
def create_subgraph(G, departure_time, arrival_time):
    nodes_time_constr = []
    i = 0
    for node in G.nodes():
        if((node[1] == dt_weekday2pyspark_weekday(datetime.weekday(arrival_time))) and
           (node[2] < arrival_time.time()) and
           (node[2] > departure_time.time())):
            nodes_time_constr.append(node)        
    return G.subgraph(nodes_time_constr)

### 3.3 Get the itinerary
Now that we have everything in hands, we run the Dijkstra's algorithm to obtain the shortest path from the source to the destination.

In [72]:
def get_sources_destinations(G, source, destination):
    """ This functions returns the possible sources and destinations, i.e. all nodes
        in the subgraph that have the same stop_name as the source or destination but
        a departure_time later or an arrival_time earlier. """
    sources, destinations = [], []
    for node in G.nodes():
        stop_name = node[0]
        if(stop_name == source):
            sources.append(node)
        if(stop_name == destination):
            destinations.append(node)
    return sorted(sources, key=lambda x: x[2]), sorted(destinations, key=lambda x: x[2])

In [73]:
def build_weight_djikstra(source, destination):
    """ This function builds custom dijkstra weight function for this trip. """
    
    def weight_djikstra(from_node, to_node, d):
        is_first_station = from_node[0] == to_node[0] == source
        is_last_station = from_node[0] == to_node[0] == destination
        
        if is_first_station:
            return int(d['trip']) + int(d['walk']) + int(d['wait'])
        elif is_last_station:
            return int(d['trip']) + int(d['walk']) + int(d['wait'])
        else:
            return int(d['trip']) + int(d['walk']) + int(d['wait']) 
    
    return weight_djikstra

#### 3.3.1 Uncertainty of a trip
After gettin the shortest paths from a departing station to an arriving station, we need to compute the uncertainty of a trip. To do this we use the lambdas table to estimate the probability of arrival delay in the station where there is a connexion. We calculate te probability of succesfully making a connexion using poission cumulative density function: $ P(success) = P(delay \leq departure - arrival) =  poissionCDF(departure - arrival)$ where we have the different parameter estimators in the lambdas dataframe.As discussed, we will consider the possible delays are independent, hence the total success probability of a trip can be estimated by simply computing: $ P(success) = P(A_1)\times P(A_2)...\times P(A_n)$ where $A_i$ is the probability of success of the ith connexion

In [79]:
def get_lambda(node, weekday, lambdas):
    """ This function looks into the lambda table to get the correct
        probability distribution associated with the node. """
    rush = (node[2].hour >= 7 and node[2].hour < 9) or (node[2].hour >= 17 and node[2].hour < 18)
    # Get only the entry in lambda table for the corresponding node
    df = lambdas[(lambdas.LINE_ID == node[3]) & 
                 (lambdas.STOP_NAME == node[0]) &
                 (lambdas.LINE_TEXT == node[4]) &
                 (lambdas.weekday == weekday) &
                 (lambdas.rush == rush)]
    # Check lambda is present, if not return no delay lambda
    if(len(df)):
        return df['lambda'].values[0]
    else:
        return 0.01

Starting from the end vertex, we will compute the probability of success by multiplying the current probabilty by the probability of success at the current connexion

In [80]:
def success_of_trip(dijkstra_path, arrival_time, lambdas):
    """ This function returns the probability of success of the path. """
    # Get current_node to see if a connexion happened
    next_node = dijkstra_path[-1]
    weekday = (next_node[1] < 7 and next_node[1] > 1)
    success = poisson.cdf(substract_times(arrival_time.time(), next_node[2]).total_seconds(),
                          get_lambda(next_node, weekday, lambdas))
    for current_node in reversed(dijkstra_path):
        # Check if there was a connexion by looking at the line_id
        if(next_node[3] != current_node[3]):
            # Consider the time delta as the wait time value in edge
            time_delta = G[current_node][next_node]["wait"]
            success *= poisson.cdf(time_delta, get_lambda(current_node, weekday, lambdas))
        next_node = current_node
    return success

#### 3.3.2 Get the itinerary
We can finally combine everything together and get the best trip possible.

In [81]:
def best_itinerary(G, source, destination, arrival_time, departure_time, lambdas, prob_success):
    """ This function returns the optimal path and corresponding travel time
        given a source, a destination and an arrival time."""
    # Get the day of the week
    weekday = dt_weekday2pyspark_weekday(datetime.weekday(arrival_time))
    weight_func = build_weight_djikstra(source, destination)
    # Get the possible sources and destinations
    sources, destinations = get_sources_destinations(G, source, destination)
    for i in reversed(range(len(destinations))):
        try:
            weight, trip = nx.multi_source_dijkstra(G, sources, destinations[i], weight=weight_func)
            success = success_of_trip(trip, arrival_time, lambdas)
            if(success >= prob_success):
                return success, trip
            else:
                continue
        except nx.NetworkXNoPath:
            continue
    return 0, -1

In [82]:
# Create the subgraph
subgraph = create_subgraph(G, departure_time, arrival_time)
# Get the best itinerary and print it
success, trip = best_itinerary(subgraph, source, destination, arrival_time, departure_time, lambdas, prob_success)
print_trip(success, trip)

Probability of success: 92.80%
Travel time: 0:28:00 
-----------------------------------------------------------------
Zürich, Paradeplatz      		16:49		Tram		
Zürich, Bürkliplatz      		16:52		Tram		
-----------------------------------------------------------------
Zürich, Bürkliplatz      		16:56		Bus		
Zürich, Rentenanstalt    		17:00		Bus		
Zürich, Schweizer Rück   		17:00		Bus		
Zürich, Schweizer Rück   		17:01		Bus		
Zürich, Sukkulentensammlu		17:02		Bus		
Zürich, Landiwiese       		17:03		Bus		
Zürich, Rote Fabrik      		17:05		Bus		
Zürich, Seerose          		17:06		Bus		
Zürich, Stadtgrenze      		17:07		Bus		
Kilchberg ZH, Paradiesstr		17:09		Bus		
Kilchberg ZH, Bendlikon  		17:10		Bus		
Kilchberg ZH, Schooren   		17:11		Bus		
Rüschlikon, Schlossstrass		17:12		Bus		
Rüschlikon, Schlossstrass		17:13		Bus		
Rüschlikon, Bodengasse   		17:13		Bus		
Rüschlikon, Bahnhof      		17:14		Bus		
Rüschlikon, Weidstrasse  		17:15		Bus		
Rüschlikon, Weidstrasse  		17:16		Bus		
Rüschlikon, B

### 3.4. Vizualisation
Now that we have computed the best itinerary, we vizualise it on a map using Folium.

In [83]:
# We plot the itinerary
plot_itinerary(trip, metadata)

## 4. Isochronous Map
As a last step, we considered intersting to plot the isochronous map, given a stating point. Note that in what follows, we do not take into account the success probability of the trip since it would have slowed down the computation.

In [84]:
def isochronous_points(source, trip_lengths, prob_success, metadata):
    """ This functions returns the points that are reachable from the source
        in at most m minutes. The list trip_lengths contains many m to test. """
    points = []
    departure_time = datetime.now()
    for trip_l in trip_lengths:
        arrival_time = departure_time + timedelta(minutes=trip_l)
        points_trip_l = []
        subgraph = create_subgraph(G, departure_time, arrival_time)
        for row in metadata.iterrows():
            destination, lat, long = row[1][2], row[1][1], row[1][0]
            success, trip = best_itinerary(subgraph, source, destination, arrival_time,
                                           departure_time, lambdas, prob_success)
            if(trip != -1):
                points_trip_l.append((lat, long))
        points.append(points_trip_l)
    return points

In [85]:
# Define parameters
trip_lengths = [10, 20, 30]
prob_success = 0

In [86]:
# Get the isochronous points
points_trip_l = isochronous_points(source, trip_lengths, prob_success, metadata)
# Plot the result
plot_isochronous(points_trip_l, trip_lengths, metadata)