## Routing performance evaluation

compare the results of our routing engine with rejseplanen

In [1]:
from graph_ds import PyH3Graph
import h3.api.numpy_int as h3
import numpy as np
import pandas as pd
import requests
import time
import datetime
import tqdm

from lets_plot import *
LetsPlot.setup_html()


In [24]:
osm_path = "../resources/processed/København Kommune_processed.osm.pbf"
gtfs_path = "../resources/copenhagen/rejseplanen.zip"

# build the graph
graph_all = PyH3Graph(weight_options={}, k_ring=2, layers="all")
graph_no_bike = PyH3Graph(weight_options={}, k_ring=2, layers="walk+transit")
graph_all.create(osm_path=osm_path, gtfs_paths=[gtfs_path])
graph_no_bike.create(osm_path=osm_path, gtfs_paths=[gtfs_path])

processing osm pbf file: ../resources/processed/København Kommune_processed.osm.pbf
converted OSM file into 382287 edges
osm graph created with 326898 nodes in 2.119477 s
getting GTFS feed from ../resources/copenhagen/rejseplanen.zip
routes: 1631
gtfs graph created with 103525 nodes in 9.313319 s
merged gtfs graph into osm graph, now has 428651 nodes, took 80 ms
hash: 4736217103143947563
processing osm pbf file: ../resources/processed/København Kommune_processed.osm.pbf
converted OSM file into 328446 edges
osm graph created with 274679 nodes in 1.735842 s
getting GTFS feed from ../resources/copenhagen/rejseplanen.zip
routes: 1631
gtfs graph created with 103525 nodes in 9.021873 s
merged gtfs graph into osm graph, now has 376432 nodes, took 75 ms
hash: 11603647893462833771


In [4]:
def route_length(graph, start, end, hour_of_week):
    res = graph.matrix_distance([start], [end], hour_of_week=hour_of_week)
    return res[start][0]

def rejseplanen_route(start, end):
    url = "https://rejseplanen.hexagonal.workers.dev/"
    payload = {
        "origin": start,
        "destination": end,
    }
    response = requests.post(url, data=payload, headers={"Content-Type": "application/x-www-form-urlencoded"})
    # print(f"[{response.status_code}] {response.url}")

    trip = response.json()["TripList"]["Trip"][0]['Leg']

    start_time, start_date = trip[0]['Origin']['time'], trip[0]['Origin']['date']
    end_time, end_date = trip[-1]['Destination']['time'], trip[-1]['Destination']['date']

    duration = datetime.datetime.strptime(end_time, "%H:%M") - datetime.datetime.strptime(start_time, "%H:%M")
    return duration.seconds / 60.0


current_hour_of_week = int(time.time() // 3600 % (24 * 7))


In [5]:
origins = []
destinations = []
our_times = []
our_times_no_bike = []
rejseplanen_times = []

for i in tqdm.tqdm(range(200)):
    start = graph_all.get_random_node()
    end = graph_all.get_random_node()

    try:
        our_distance_all = route_length(graph_all, start, end, current_hour_of_week)
        our_distance_no_bike = route_length(graph_no_bike, start, end, current_hour_of_week)

        rejseplanen_distance = rejseplanen_route(start, end)
    except:
        continue

    our_times.append(our_distance_all)
    our_times_no_bike.append(our_distance_no_bike)
    rejseplanen_times.append(rejseplanen_distance)
    origins.append(start)
    destinations.append(end)

100%|██████████| 200/200 [21:06<00:00,  6.33s/it]


In [6]:
valid_times = []
for our, our_nb, rejseplanen, start, end in zip(our_times, our_times_no_bike, rejseplanen_times, origins, destinations):
    if our != None and our != np.inf:
        valid_times.append([our, our_nb, rejseplanen, our-rejseplanen, start, end])

df = pd.DataFrame(valid_times, columns=["our", "our_no_bike", "rejseplanen", "difference", "start", "end"]).sort_values(by='our').reset_index()
df['index'] = df.index
df.head()

Unnamed: 0,index,our,our_no_bike,rejseplanen,difference,start,end
0,0,7.509395,10.67797,17.0,-9.490605,631049684127517695,631049684132699135
1,1,10.098155,10.130203,10.0,0.098155,631049690184718847,631049690157373439
2,2,14.606498,34.84267,38.0,-23.393502,631049690152393727,631049690193281535
3,3,15.804032,24.268133,23.0,-7.195968,631049684199237631,631049684140840959
4,4,17.127508,18.127508,18.0,-0.872492,631049684387677183,631049684450205183


In [2]:
# df.to_csv("../resources/routing_eval.csv")
df = pd.read_csv("../resources/routing_eval.csv")

df['no_bike_difference'] = df['our_no_bike'] - df['rejseplanen']
df = df[df['our'] < 90]

In [3]:
# put 'our', 'our_no_bike' and 'rejseplanen' in one column and add a column for the type
df_melted = df.melt(id_vars=['index', 'start', 'end'], value_vars=['our', 'our_no_bike', 'rejseplanen'], var_name='type', value_name='time')

means = df_melted.groupby('type').mean().sort_values(by='time').reset_index()
means['mean'] = means['time'].round()
means['index'] = means.index * 4

In [4]:
p1 = ggplot(df_melted) + \
    geom_bar(aes(x='index', y='time', fill='type'), stat='identity', position='dodge', alpha=0.8, width=0.8) + \
    geom_point(aes(x='index', y='difference'), data=df, color='black', alpha=0.7) + \
    geom_hline(aes(color='type', yintercept='time'), data=means, linetype='dashed') + \
    geom_label(aes(x='index', y='time', label='mean', color='type'), data=means, size=7, alpha=0.7) + \
    scale_fill_manual(values=["#8071AD", "#5923FA", "#FAAE69"]) + \
    scale_color_manual(values=["#8071AD", "#FAAE69", "#5923FA"]) + \
    xlab('') + ylab('travel time in minutes') + \
    ggsize(1400, 400)

p1.show()



In [9]:
p2 = ggplot(df) + \
    geom_density(aes(x='difference'), fill='#8071AD', color='#8071AD', alpha=0.5, label="graph_ds") + \
    geom_density(aes(x='no_bike_difference'), fill='#5923FA', color='#5923FA', alpha=0.5, label="no bike") + \
    geom_vline(xintercept=df['difference'].mean(), color='#5923FA', width=2) + \
    geom_vline(xintercept=df['no_bike_difference'].mean(), color='#5923FA', width=2) + \
    geom_label(x=df['difference'].mean() - 12, y=0.05, label=f"All - Rejseplanen | µ={df['difference'].mean():.2f}", color='#8071AD', size=7) + \
    geom_label(x=df['no_bike_difference'].mean() + 16, y=0.05, label=f"Walk and Transit - Rejseplanen | µ={df['no_bike_difference'].mean():.2f}", color='#5923FA', size=7) + \
    xlab("difference in minutes") + \
    ggsize(700, 400)

p2.show()

In [10]:
ggsave(p1, "routing_eval.png")
ggsave(p2, "routing_eval_diff.png")

'/home/inkompotato/personal/n-minute-city/python/lets-plot-images/routing_eval_diff.png'

In [7]:
df['difference'].std(), df['no_bike_difference'].std()

(9.202152479765529, 8.404218826256754)