# graph of walking distances between all pubs/bars/cafes in Wien

create dataset of distances between all bar/pubs/cafes in Vienna which can be used to find optimized route for visiting one pub in each district (Bezirk) - inspired by https://www.seidlrallye.at/

In [1]:
import xml.etree.ElementTree as ET

# get nodes with info from osm file - file generated on https://overpass-turbo.eu/
def get_nodes(osm_filename):
    tree = ET.parse(osm_filename)
    root = tree.getroot()
    nodes = []
    for i in range(len(root)):
        node = {}
        #print(root[i].tag, root[i].attrib)
        if root[i].tag != "node":
            continue
        node["id"] = root[i].attrib["id"]
        node["lat"] = root[i].attrib["lat"]
        node["lon"] = root[i].attrib["lon"]
        for j in range(len(root[i])):
            attrib = root[i][j].attrib
            node[attrib["k"]] = attrib["v"]
        nodes.append(node)
    return nodes

In [2]:
# list of nodes for bar, pub, cafe amenity from https://overpass-turbo.eu/
nodes_bar = get_nodes('wien_bar.osm')
nodes_pub = get_nodes('wien_pub.osm')
nodes_cafe = get_nodes('wien_cafe.osm')
all_nodes = nodes_bar + nodes_pub + nodes_cafe

In [3]:
import pandas as pd
df = pd.DataFrame.from_records(all_nodes)
df_postcode = df[df['addr:postcode'].notna()] # leave only those with PLZ so we can identify Bezirk
df_postcode.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1041 entries, 0 to 1861
Columns: 227 entries, id to snack
dtypes: object(227)
memory usage: 1.8+ MB


In [4]:
# leave only those inside of Wien

df_w = pd.DataFrame()

for i in range(1,24):
    plz="1{:02d}0".format(i)
    df_w = pd.concat([df_w, df_postcode[df_postcode['addr:postcode']==plz]])
    
df_w.info()    

<class 'pandas.core.frame.DataFrame'>
Int64Index: 987 entries, 20 to 1725
Columns: 227 entries, id to snack
dtypes: object(227)
memory usage: 1.7+ MB


In [5]:
from math import isnan

# make node dict, remove empty fields
def get_clean_nodes_dict(df):
    nodes_dict = {}
    for row in df.to_dict(orient="records"):
        node = {k: row[k] for k in row if not (isinstance(row[k], float) and isnan(row[k]))}
        nodes_dict[row["id"]] = node
    return nodes_dict

In [6]:
nodes_dict = get_clean_nodes_dict(df_w)

# node id as int
nodes = [n for n in nodes_dict.values()]
for n in nodes:
    n['id'] = int(n['id'])

In [7]:
# get map of Wien as a graph

import osmnx as ox
import matplotlib.pyplot as plt
import networkx as nx
from IPython.display import IFrame

ox.config(log_console=True, use_cache=True)

place_name = "Vienna, Austria"
graph = ox.graph_from_place(place_name, network_type='walk')




In [8]:
%%time

# find nearest nodes in map based on coordinates
osm_node_dict = {}

for n in nodes:
    osm_node_dict[n['id']] = ox.nearest_nodes(graph, float(n['lon']), float(n['lat']))

CPU times: user 9min 46s, sys: 126 ms, total: 9min 46s
Wall time: 9min 47s


In [18]:
# create list of from-to nodes for which we want to calculate distances

osm_nodes = list(osm_node_dict.keys())
#osm_nodes = osm_nodes[:50]
from_to_distances_dict = {}

for id1 in range(len(osm_nodes)):
    for id2 in range(id1+1, len(osm_nodes)):
        id1_node = osm_nodes[id1]
        id2_node = osm_nodes[id2]
        from_to_distances_dict[(id1_node, id2_node)]=-1
        
work = list(from_to_distances_dict.keys())        

In [10]:
def compute_distance(nodes_ids):
    (from_node, to_node) = nodes_ids
    # shortest walking distance in meters
    distance = nx.shortest_path_length(graph, osm_node_dict[from_node], osm_node_dict[to_node], weight='length')
    return round(distance)

In [19]:
%%time

# get distances in parallel with progressbar
import multiprocessing
from multiprocessing import Pool
import tqdm

pool = Pool(multiprocessing.cpu_count())
distances = []
for distance in tqdm.tqdm(pool.imap(func=compute_distance, iterable=work), total=len(work)):
        distances.append(distance)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 486591/486591 [3:19:41<00:00, 40.61it/s]

CPU times: user 28min 3s, sys: 2min 42s, total: 30min 45s
Wall time: 3h 19min 41s





In [22]:
# combine work and results
for w, d in zip(work, distances):
    from_to_distances_dict[w]=d
    #print(w,d)

# create nice dict which can be exported as json
from_to_distances = []
for k, v in from_to_distances_dict.items():
    from_to_distance = {"from" : k[0], "to": k[1], 'distance' : v}
    from_to_distances.append(from_to_distance)

In [23]:
final_dict = {
    "nodes" : nodes,
    "edges" : from_to_distances
}

import json

# save the json file
with open("wien_bar-pub-cafe_walk_distances.json", "wt") as f:
    json.dump(final_dict, f, indent=4, ensure_ascii=False)

In [24]:
final_dict['edges'][:5]

[{'from': 566660740, 'to': 566660785, 'distance': 109},
 {'from': 566660740, 'to': 870980192, 'distance': 531},
 {'from': 566660740, 'to': 901058218, 'distance': 1184},
 {'from': 566660740, 'to': 956159131, 'distance': 946},
 {'from': 566660740, 'to': 956430135, 'distance': 644}]