## Routing performance evaluation

compare the results of our routing engine with rejseplanen

In [2]:
from graph_ds import PyH3Graph
import h3.api.numpy_int as h3
import numpy as np
import pandas as pd
import requests
import time
import datetime
import tqdm

from lets_plot import *
LetsPlot.setup_html()


In [3]:
osm_path = "../resources/processed/København Kommune_processed.osm.pbf"
gtfs_path = "../resources/rejseplanen.zip"

# build the graph
graph_all = PyH3Graph(bike_penalty=1, k_ring=2, layers="all")
graph_no_bike = PyH3Graph(bike_penalty=1, k_ring=2, layers="walk+transit")
graph_all.create(osm_path=osm_path, gtfs_paths=[gtfs_path])
graph_no_bike.create(osm_path=osm_path, gtfs_paths=[gtfs_path])

processing osm pbf file: ../resources/processed/København Kommune_processed.osm.pbf
converted OSM file into 382287 edges
osm graph created with 326898 nodes in 2.5594974 s
getting GTFS feed from ../resources/rejseplanen.zip
routes: 1631
gtfs graph created with 103525 nodes in 10.016023 s
merged gtfs graph into osm graph, now has 428651 nodes, took 92 ms
hash: 4736217103143947563
processing osm pbf file: ../resources/processed/København Kommune_processed.osm.pbf
converted OSM file into 328446 edges
osm graph created with 274679 nodes in 1.7510283 s
getting GTFS feed from ../resources/rejseplanen.zip
routes: 1631
gtfs graph created with 103525 nodes in 9.67844 s
merged gtfs graph into osm graph, now has 376432 nodes, took 86 ms
hash: 11603647893462833771


In [4]:
def route_length(graph, start, end, hour_of_week):
    res = graph.matrix_distance([start], [end], hour_of_week=hour_of_week)
    return res[start][0]

def rejseplanen_route(start, end):
    url = "https://rejseplanen.hexagonal.workers.dev/"
    payload = {
        "origin": start,
        "destination": end,
    }
    response = requests.post(url, data=payload, headers={"Content-Type": "application/x-www-form-urlencoded"})
    # print(f"[{response.status_code}] {response.url}")

    trip = response.json()["TripList"]["Trip"][0]['Leg']

    start_time, start_date = trip[0]['Origin']['time'], trip[0]['Origin']['date']
    end_time, end_date = trip[-1]['Destination']['time'], trip[-1]['Destination']['date']

    duration = datetime.datetime.strptime(end_time, "%H:%M") - datetime.datetime.strptime(start_time, "%H:%M")
    return duration.seconds / 60.0


current_hour_of_week = int(time.time() // 3600 % (24 * 7))


In [5]:
origins = []
destinations = []
our_times = []
our_times_no_bike = []
rejseplanen_times = []

for i in tqdm.tqdm(range(100)):
    start = graph_all.get_random_node()
    end = graph_all.get_random_node()

    try:
        our_distance_all = route_length(graph_all, start, end, current_hour_of_week)
        our_distance_no_bike = route_length(graph_no_bike, start, end, current_hour_of_week)

        rejseplanen_distance = rejseplanen_route(start, end)
    except:
        continue

    our_times.append(our_distance_all)
    our_times_no_bike.append(our_distance_no_bike)
    rejseplanen_times.append(rejseplanen_distance)
    origins.append(start)
    destinations.append(end)

100%|██████████| 100/100 [08:51<00:00,  5.31s/it]


In [6]:
valid_times = []
for our, our_nb, rejseplanen, start, end in zip(our_times, our_times_no_bike, rejseplanen_times, origins, destinations):
    if our != None and our != np.inf:
        valid_times.append([our, our_nb, rejseplanen, our-rejseplanen, start, end])

df = pd.DataFrame(valid_times, columns=["our", "our_no_bike", "rejseplanen", "difference", "start", "end"]).sort_values(by='our').reset_index()
df['index'] = df.index
df.head()

Unnamed: 0,index,our,our_no_bike,rejseplanen,difference,start,end
0,0,7.759631,13.784286,16.0,-8.240369,631049684141138431,631049684447774207
1,1,13.269032,26.403703,25.0,-11.730968,631049690208999423,631049690703468031
2,2,18.636828,20.177968,19.0,-0.363172,631049690230695423,631049690146066943
3,3,19.742992,19.898693,22.0,-2.257008,631049684408012287,631049684367006719
4,4,20.469649,22.326661,24.0,-3.530351,631049688122076159,631049688149414399


In [7]:
df.to_csv("../resources/routing_eval.csv")
df = pd.read_csv("../resources/routing_eval.csv")

# df['our_dj'] = df[['start', 'end']].apply(lambda x: graph.djikstra_path(x[0], x[1], current_hour_of_week), axis=1)

In [8]:
x = list(range(len(df['index'])))

plot_df = df[df['our'] < 90]

ggplot(plot_df) + \
    geom_bar(aes(y='our', x='index'), fill='red', alpha=0.5, stat='identity', label='graph_ds') + \
    geom_bar(aes(y='rejseplanen', x='index'), fill='blue', alpha=0.5, stat='identity', label='rejseplanen') + \
    geom_bar(aes(y='our_no_bike', x='index'), fill='orange', alpha=0.5, stat='identity', label='graph_ds_no_bike') + \
    geom_point(aes(y='difference', x='index'), color='black', size=2) + \
    geom_hline(yintercept=np.mean(plot_df['our']), color='red', linetype='dashed') + \
    geom_hline(yintercept=np.mean(plot_df['rejseplanen']), color='blue', linetype='dashed') + \
    geom_hline(yintercept=np.mean(plot_df['our_no_bike']), color='orange', linetype='dashed') + \
    ggtitle("calculated route length for random routes")



In [9]:
ggplot(df) + \
    geom_density(aes(x='our'), color='red', alpha=0.5, label="graph_ds") + \
    geom_density(aes(x='our_no_bike'), color='orange', alpha=0.5, label="no bike") + \
    geom_density(aes(x='rejseplanen'), color='blue', alpha=0.5, label="rejseplanen") + \
    ggtitle("Route length distribution (Copenhagen network, 100 random routes)")

In [12]:
path, dist = graph_all.dijkstra_path(631049688122076159, 631049684447774207, current_hour_of_week)
print(dist)
coords = np.array(list(map(lambda x: h3.h3_to_geo(x), path)))

ggplot() + \
    geom_livemap() + \
    geom_path(aes(x=coords[:,1], y=coords[:,0]))

astar from 631049688122076159 to 631049684447774207
56.387755163083604


[backtrace] found start node
