In [1]:
import pandas as pd
import plotly.graph_objects as go
import datetime
import numpy as np
import cProfile
import sys
import pickle
import random

from graph import TransportGraph
from forward_search import FCH

# Build transport graph

In [2]:
CITY = 'kuopio' #belfast, kuopio

## Optional 
### Add inverted road connections

In [3]:
transport_connections = pd.read_csv(F'data/{CITY}/network_temporal_day.csv', sep=';')
walk_connections = pd.read_csv(F'data/{CITY}/network_walk.csv', sep=';')

In [4]:
transport_connections = transport_connections[
    transport_connections['from_stop_I']!=transport_connections['to_stop_I']]

In [5]:
df_walk_invert = walk_connections.copy()
df_walk_invert = df_walk_invert.rename(columns={'from_stop_I': 'to_stop_I', 'to_stop_I': 'from_stop_I'})
walk_connections = pd.concat((walk_connections, df_walk_invert))

In [6]:
tg = TransportGraph(transport_connections=transport_connections, walk_connections=walk_connections)

In [7]:
1000/60

16.666666666666668

In [8]:
tg.edges_cnt, tg.nodes_cnt, tg.timetable_stats

(8891,
 549,
 {'min_size': 0,
  'mean_size': 3.2843324710381285,
  'std_size': 15.760627889293113,
  'max_size': 306})

# Build CH graph

### Calculate Sparce-CH

In [9]:
%%time
cProfile.run('ch_tg = tg.contraction_hierarchy(just_buses=True)')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 549/549 [00:03<00:00, 178.68it/s]

         20231346 function calls (17497975 primitive calls) in 10.583 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       20    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:100(acquire)
     10/3    0.000    0.000    0.020    0.007 <frozen importlib._bootstrap>:1022(_find_and_load)
     14/7    0.000    0.000    0.010    0.001 <frozen importlib._bootstrap>:1053(_handle_fromlist)
       20    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:125(release)
       10    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:165(__init__)
       10    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:169(__enter__)
       10    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:173(__exit__)
       20    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:179(_get_module_lock)
       10    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:




In [10]:
ch_tg.edges_cnt, ch_tg.nodes_cnt, ch_tg.timetable_stats

(16263,
 549,
 {'min_size': 0,
  'mean_size': 3.872471253766218,
  'std_size': 15.344007708222097,
  'max_size': 306})

### Calculate Full-CH

In [None]:
%%time
cProfile.run('ch_tg_optimal = tg.contraction_hierarchy(just_buses=False)')

 58%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                            | 316/549 [00:23<00:59,  3.91it/s]

In [None]:
ch_tg_optimal.edges_cnt, ch_tg_optimal.nodes_cnt, ch_tg_optimal.timetable_stats

# Precalculate Geometrical containers for FCH

In [None]:
%%time
ch_tg_optimal.geometrical_container()

In [None]:
pickle.dump(ch_tg_optimal, open(F'{CITY}_original.pkl', 'wb'), 
            pickle.HIGHEST_PROTOCOL)

In [None]:
pickle.dump(ch_tg, open(F'{CITY}_trunc.pkl', 'wb'), 
            pickle.HIGHEST_PROTOCOL)

In [7]:
ch_tg = pickle.load(open(F'kuopio_trunc.pkl', 'rb'))
ch_tg_optimal = pickle.load(open(F'kuopio_original.pkl', 'rb'))

# Pathfinding

In [None]:
transport_connections['dep_time_ut'].min(), transport_connections['dep_time_ut'].max()

In [None]:
test_pairs = pd.DataFrame([
    {'start_time': 1481514083, 'start_node': 330, 'end_node': 54},
    {'start_time': 1481519880, 'start_node': 345, 'end_node': 141},
    {'start_time': 1481518900, 'start_node': 1, 'end_node': 132},
    {'start_time': 1481514540, 'start_node': 116, 'end_node': 118},
    {'start_time': 1481519640, 'start_node': 1, 'end_node': 50}, 
    {'start_time': 1481515900, 'start_node': 1, 'end_node': 51},
    {'start_time': 1481515900, 'start_node': 1, 'end_node': 50}, 
    {'start_time': 1481571574, 'start_node': 146, 'end_node': 7}
])
for index, row in test_pairs.iterrows():
    pathfinding = FCH(graph=ch_tg_optimal,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60)
    print('Path duration', datetime.timedelta(seconds=int(path['arrival'] - row['start_time'])))
    print('Calculation duration', datetime.timedelta(seconds=int(path['duration'])))
    print()

In [None]:
test_pairs = pd.DataFrame([
    {'start_time': 1481514083, 'start_node': 330, 'end_node': 54},
    {'start_time': 1481519880, 'start_node': 345, 'end_node': 141},
    {'start_time': 1481518900, 'start_node': 1, 'end_node': 132},
    {'start_time': 1481514540, 'start_node': 116, 'end_node': 118},
    {'start_time': 1481519640, 'start_node': 1, 'end_node': 50}, 
    {'start_time': 1481515900, 'start_node': 1, 'end_node': 51},
    {'start_time': 1481515900, 'start_node': 1, 'end_node': 50}, 
    {'start_time': 1481571574, 'start_node': 146, 'end_node': 7}
])
for index, row in test_pairs.iterrows():
    pathfinding = FCH(graph=ch_tg,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60, geometrical_containers=False)
    print('Path duration', datetime.timedelta(seconds=int(path['arrival'] - row['start_time'])))
    print('Calculation duration', datetime.timedelta(seconds=int(path['duration'])))
    print()

# Compare 2 solutions

In [None]:
N = 1_000
test_data = pd.DataFrame({'start_time': [random.randint(transport_connections['dep_time_ut'].min(), 
                                           transport_connections['dep_time_ut'].max()) for i in range(N)],
             'start_node' : [random.sample(ch_tg.nodes, 1)[0] for i in range(N)], 
              'end_node' : [random.sample(ch_tg.nodes, 1)[0] for i in range(N)]
             })
new_duration = []
optimal_duration = []
for index, row in test_data.iterrows():
    
    # new search
    pathfinding = FCH(graph=ch_tg,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60, search_with_switching_graphs=True, geometrical_containers=False)
    if path['path']:
        new_duration.append(path['duration'])
    new_arrival = path['arrival']
    
    # optimal_duration
    pathfinding = FCH(graph=ch_tg_optimal,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60, search_with_switching_graphs=False, geometrical_containers=True)
    if path['path']:
        optimal_duration.append(path['duration'])
    optimal_arrival = path['arrival']
    
    # validate
    assert new_arrival == optimal_arrival

In [None]:
np.mean(optimal_duration), np.median(optimal_duration), np.std(optimal_duration)

In [None]:
np.mean(new_duration), np.median(new_duration), np.std(new_duration)

### Withoout geometrical containers

In [None]:
N = 1_000
test_data = pd.DataFrame({'start_time': [random.randint(transport_connections['dep_time_ut'].min(), 
                                           transport_connections['dep_time_ut'].max()) for i in range(N)],
             'start_node' : [random.sample(ch_tg.nodes, 1)[0] for i in range(N)], 
              'end_node' : [random.sample(ch_tg.nodes, 1)[0] for i in range(N)]
             })
new_duration = []
optimal_duration = []
for index, row in test_data.iterrows():
    
    # new search
    pathfinding = FCH(graph=ch_tg,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60, search_with_switching_graphs=True, geometrical_containers=False)
    if path['path']:
        new_duration.append(path['duration'])
    new_arrival = path['arrival']
    
    # optimal_duration
    pathfinding = FCH(graph=ch_tg_optimal,
                      start_time=row['start_time'],
                      start_node=row['start_node'], 
                      end_node=row['end_node'])
    path = pathfinding.shortest_path(60, search_with_switching_graphs=False, geometrical_containers=False)
    if path['path']:
        optimal_duration.append(path['duration'])
    optimal_arrival = path['arrival']
    
    # validate
    assert new_arrival == optimal_arrival

In [None]:
np.mean(optimal_duration), np.median(optimal_duration), np.std(optimal_duration)

In [None]:
np.mean(new_duration), np.median(new_duration), np.std(new_duration)

# Visualize path

In [8]:
%%time
pathfinding = FCH(graph=ch_tg,
                      start_time=1481519880,
                      start_node=345, 
                      end_node=141)
path = pathfinding.shortest_path(60)

CPU times: user 4.06 ms, sys: 1.18 ms, total: 5.24 ms
Wall time: 6.07 ms


In [9]:
nodes = pd.read_csv(F'data/{CITY}/network_nodes.csv', sep=';')
nodes_dict = nodes.set_index('stop_I').to_dict('index')

In [12]:
for station in path['path']:
    print(station, nodes_dict[station]['lat'], nodes_dict[station]['lon'], nodes_dict[station]['name'])

345 62.894633 27.645134 Canthia E
114 62.891878 27.637245 Snellmania E
116 62.88952 27.632591 Teknia E
118 62.888191 27.630635 Technopolis E
141 62.88333 27.61765 Neulamäentie 5 L


In [13]:
i = 0
k = 0
while i < len(path['routes']):
    x = path['routes'][i]
    if k == 0:
        i += 1
        if i == len(path['routes']):
            fig = go.Figure(go.Scattermapbox(
            mode = "markers+lines",
            lon = [nodes_dict[path['path'][j]]['lon'] for j in range(k, i+1)],
            lat = [nodes_dict[path['path'][j]]['lat'] for j in range(k, i+1)],
            text = [(path['path'][j], nodes_dict[path['path'][j]]['name']) for j in range(k, i+1)],
            name = path['routes'][k],
            marker = {'size': 10}))
            k = i
        elif x != path['routes'][i]:
            fig = go.Figure(go.Scattermapbox(
            mode = "markers+lines",
            lon = [nodes_dict[path['path'][j]]['lon'] for j in range(k, i+1)],
            lat = [nodes_dict[path['path'][j]]['lat'] for j in range(k, i+1)],
            text = [(path['path'][j], nodes_dict[path['path'][j]]['name']) for j in range(k, i+1)],
            name = path['routes'][k],
            marker = {'size': 10}))
            k = i
    else:
        i += 1
        if i < len(path['routes']):
            if x != path['routes'][i]:
                fig.add_trace(go.Scattermapbox(
                mode = "markers+lines",
                lon = [nodes_dict[path['path'][j]]['lon'] for j in range(k, i+1)],
                lat = [nodes_dict[path['path'][j]]['lat'] for j in range(k, i+1)],
                text = [(path['path'][j], nodes_dict[path['path'][j]]['name']) for j in range(k, i+1)],
                name = path['routes'][k],
                marker = {'size': 10}))
                k = i
        else:
            fig.add_trace(go.Scattermapbox(
            mode = "markers+lines",
            lon = [nodes_dict[path['path'][j]]['lon'] for j in range(k, i+1)],
            lat = [nodes_dict[path['path'][j]]['lat'] for j in range(k, i+1)],
            text = [(path['path'][j], nodes_dict[path['path'][j]]['name']) for j in range(k, i+1)],
            name = path['routes'][k],
            marker = {'size': 10}))
            k = i
        

fig.update_layout(
    mapbox = {
        'style': "stamen-terrain",
        'center': {'lon': nodes['lon'].iloc[0], 'lat': nodes['lat'].iloc[0]},
        'zoom': 9}, 
    showlegend=True)

fig.show()