In [176]:
import pandas as pd
import overpass
import osmium
import pickle
import shapely.wkb as wkblib
import geopandas as gpd
import os
import folium
import branca
import json
from shapely.strtree import STRtree
from rtree import index
from shapely.geometry import Point, LineString, Polygon
from overpass import API
from sys import exc_info

# Getting Way-ids from Node-ids

ORSM return node-id information but not Way-id. The latter is useful for is because it can bring `max speed` information and we can use it to infer traffic situations. The challenge is how to connect node-id to way-id and the possibilities analysed so far are:

* Overpass API can bring us the closest way to a node as (lat,lon) [Reference](https://stackoverflow.com/questions/49944535/gps-to-way-id-matching-in-openstreetmaps) and [Reference2](https://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide#.22around.22_-_finding_something_near_something_else). It usually brings more than 1 *Way Id* but seems it could be used to extract max speed. However, using one of the free servers, it's **too slow**, taking ~1 second per query.  We could instantiate a local overpass API server installing Overpass on computers and installing data from OSM into a PostGIS databaes using [Osm2pgsql](https://wiki.openstreetmap.org/wiki/Osm2pgsql). However, it would be one extra server running an application
* The same process but with [Mapbox Route-Annotator](https://github.com/mapbox/route-annotator). Haven't tried it yet but it seems it provide results as [this](https://github.com/Project-OSRM/osrm-backend/issues/5473) and this thread show
* Use OSM API to retrieve node-id - way-id relationship like [here](https://help.openstreetmap.org/questions/19276/getting-way-id-from-its-nodes-id). This seems good but, as it's using a remote API, it could be susceptible to **number of calls limit**. Best was if this information was locally present.
* Locally retrieve the needed information above from pbf. files and [pyosmium](https://docs.osmcode.org/pyosmium/latest/intro.html). It provides functionalities on how to process this kind of file from OSM and extract its elements, *i.e.*, node, ways and relatations. The idea is, if we manage to build a relation `from node-id to way-id` and its metadata, we could be able to locally lookup for informations 

## Overpass API Test

In [2]:
# OVERPASS API Test

# sample lat lons
locs = [[13.3709, 52.511582], [13.370935, 52.511495], [13.371089, 52.511118], [13.370534, 52.511023], [13.369092, 52.510785], [13.368213, 52.510637], [13.367225, 52.51047], [13.366241, 52.510303], [13.365261, 52.510138], [13.364288, 52.509976], [13.363238, 52.509806], [13.362312, 52.509797], [13.36137, 52.509818], [13.360447, 52.509839], [13.359551, 52.509866], [13.358561, 52.509903], [13.357576, 52.509904], [13.356591, 52.509846], [13.35558, 52.509759], [13.354612, 52.509675], [13.353614, 52.509598], [13.35269, 52.509652], [13.351723, 52.509703], [13.351658, 52.509698], [13.351651, 52.509698], [13.351651, 52.509698], [13.351651, 52.509698], [13.351247, 52.509667], [13.351701, 52.507865], [13.351849, 52.507144], [13.35194, 52.506447], [13.351857, 52.505827], [13.351611, 52.5052], [13.351165, 52.504597], [13.350724, 52.50405], [13.350334, 52.503632], [13.349591, 52.503095], [13.348905, 52.502719], [13.348216, 52.502395], [13.347481, 52.502031], [13.346909, 52.50163], [13.346656, 52.501425], [13.345665, 52.500639], [13.344568, 52.500303], [13.343555, 52.500203], [13.342811, 52.500151], [13.342745, 52.500147], [13.34245, 52.500133], [13.341416, 52.500024], [13.340411, 52.499888], [13.33937, 52.499728], [13.338208, 52.499525], [13.337167, 52.499371], [13.336297, 52.499328], [13.335804, 52.499351], [13.335765, 52.499356], [13.335589, 52.49938], [13.335025, 52.499145], [13.334343, 52.498559], [13.333567, 52.497966], [13.332765, 52.497437], [13.332016, 52.496986], [13.33142, 52.496629], [13.331094, 52.496448]]
locs = [[lat, lon] for lon,lat in locs]
locs[:2]
 
api = API()
  
locations=locs
for loc in locations:
    xx,yy=loc[0],loc[1]
    try:
        response = api.Get('way(around:5,{0},{1});'.format(loc[0],loc[1])) 
        print("For location with coordinates {0}, {1} found way IDs:".format(loc[0],loc[1]))
        print(response)
        print('\n')
    except:
        print("Got error: {0}".format(exc_info()))

For location with coordinates 52.511582, 13.3709 found way IDs:
{"features": [{"geometry": null, "id": 7552152, "properties": {"abandoned:highway": "primary", "description": "historic, since 2006, former Entlastungsstra\u00dfe", "end_date": "2006", "name": "Entlastungsstra\u00dfe", "wikipedia": "de:Entlastungsstra\u00dfe"}, "type": "Feature"}, {"geometry": null, "id": 168934839, "properties": {"bicycle": "no", "foot": "no", "highway": "trunk", "horse": "no", "lanes": "3", "lit": "yes", "maxheight": "4", "maxspeed": "50", "motorroad": "yes", "name": "Tunnel Tiergarten Spreebogen", "oneway": "yes", "placement": "right_of:2", "postal_code": "10785", "surface": "asphalt", "toll": "no", "turn:lanes": "left|through|none"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.511495, 13.370935 found way IDs:
{"features": [{"geometry": null, "id": 7552152, "properties": {"abandoned:highway": "primary", "description": "historic, since 2006, former Entlastungsstra

For location with coordinates 52.509903, 13.358561 found way IDs:
{"features": [{"geometry": null, "id": 279549312, "properties": {"cycleway": "lane", "cycleway:right": "yes", "highway": "secondary", "lit": "yes", "maxspeed": "50", "name": "Tiergartenstra\u00dfe", "postal_code": "10785", "ref": "L 1137", "surface": "asphalt"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.509904, 13.357576 found way IDs:
{"features": [{"geometry": null, "id": 279549312, "properties": {"cycleway": "lane", "cycleway:right": "yes", "highway": "secondary", "lit": "yes", "maxspeed": "50", "name": "Tiergartenstra\u00dfe", "postal_code": "10785", "ref": "L 1137", "surface": "asphalt"}, "type": "Feature"}, {"geometry": null, "id": 378130462, "properties": {"area:highway": "secondary", "surface": "asphalt"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.509846, 13.356591 found way IDs:
{"features": [{"geometry": null, "id": 279549312,

For location with coordinates 52.509667, 13.351247 found way IDs:
{"features": [{"geometry": null, "id": 4615993, "properties": {"cycleway:right": "lane", "highway": "secondary", "lanes": "4", "lit": "yes", "maxspeed": "50", "name": "St\u00fclerstra\u00dfe", "oneway": "yes", "postal_code": "10785", "surface": "asphalt", "turn:lanes": "left|left|through|through"}, "type": "Feature"}, {"geometry": null, "id": 153373877, "properties": {"cycleway:right": "lane", "highway": "secondary", "lanes": "1", "lit": "yes", "maxspeed": "50", "name": "St\u00fclerstra\u00dfe", "oneway": "yes", "postal_code": "10787", "surface": "asphalt"}, "type": "Feature"}, {"geometry": null, "id": 153652996, "properties": {"cycleway:right": "track", "highway": "primary", "lanes": "4", "lit": "yes", "maxspeed": "50", "name": "Klingelh\u00f6ferstra\u00dfe", "oneway": "yes", "postal_code": "10785", "surface": "asphalt", "turn:lanes": "left|through|through|through"}, "type": "Feature"}, {"geometry": null, "id": 49833319

For location with coordinates 52.502031, 13.347481 found way IDs:
{"features": [{"geometry": null, "id": 153151222, "properties": {"cycleway:right": "track", "highway": "primary", "lit": "yes", "maxspeed": "50", "name": "An der Urania", "oneway": "yes", "postal_code": "10787", "surface": "asphalt"}, "type": "Feature"}, {"geometry": null, "id": 382360350, "properties": {"landuse": "residential"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.50163, 13.346909 found way IDs:
{"features": [{"geometry": null, "id": 153151222, "properties": {"cycleway:right": "track", "highway": "primary", "lit": "yes", "maxspeed": "50", "name": "An der Urania", "oneway": "yes", "postal_code": "10787", "surface": "asphalt"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.501425, 13.346656 found way IDs:
{"features": [{"geometry": null, "id": 153151222, "properties": {"cycleway:right": "track", "highway": "primary", "lit": "yes", "ma

For location with coordinates 52.499328, 13.336297 found way IDs:
{"features": [{"geometry": null, "id": 318889282, "properties": {"cycleway:right": "track", "highway": "secondary", "lanes": "4", "lit": "yes", "maxspeed": "50", "name": "Lietzenburger Stra\u00dfe", "note": "\u00dcbergeordnete Stra\u00dfe, Stufe II", "oneway": "yes", "postal_code": "10789", "surface": "asphalt", "turn:lanes": "left|left|through|through"}, "type": "Feature"}, {"geometry": null, "id": 318889286, "properties": {"cycleway:right": "track", "highway": "secondary", "lanes": "3", "lit": "yes", "maxspeed": "50", "name": "Lietzenburger Stra\u00dfe", "note": "\u00dcbergeordnete Stra\u00dfe, Stufe II", "oneway": "yes", "postal_code": "10789", "surface": "asphalt", "turn:lanes": "left|through|none"}, "type": "Feature"}], "type": "FeatureCollection"}


For location with coordinates 52.499351, 13.335804 found way IDs:
{"features": [{"geometry": null, "id": 318889282, "properties": {"cycleway:right": "track", "highway":

## Osmium Test

https://oslandia.com/en/2017/07/10/osm-tag-genome-how-are-osm-objects-tagged/

In [177]:
class OsmRouterExtractor(osmium.SimpleHandler):
    
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.wkbfab = osmium.geom.WKBFactory()
        self.ways_list = []
        self.node_list = []
        self.counter = 0
        self.road_types = ['motorway', 'trunk', 'primary', 'secondary', 'tertiary', 'road', 'residential', 
                           'motorway_link', 'trunk_link', 'primary_link', 'secondary_link', 'tertiary_link']
        
    def process_way(self, elem):
        #  elem.nodes return a node list:
        # https://docs.osmcode.org/pyosmium/latest/ref_osm.html?highlight=noderef#osmium.osm.NodeRef
        
        # TagList can't be converted to dict automatically, see:
        # https://github.com/osmcode/pyosmium/issues/106
        keys = {tag.k: tag.v for tag in elem.tags}
        # filter all types of car driving highways: https://wiki.openstreetmap.org/wiki/Key:highway?uselang=en-GBs
        if(('highway' in keys.keys())):
            if(keys['highway'] in self.road_types):
                nodes = [n.ref for n in elem.nodes] 
                wkb = self.wkbfab.create_linestring(elem)
                line = wkblib.loads(wkb, hex=True)
                names = [el.v for el in elem.tags if el.k == 'name' ]
                maxspeeds = [el.v for el in elem.tags if el.k == 'maxspeed']

                self.ways_list.append([elem.id,
                                       nodes,
                                       line,
                                       line.length,
                                       names[0] if len(names) > 0 else '',
                                       maxspeeds[0] if len(maxspeeds) > 0 else np.nan])
                
    def way(self, elem):
        self.process_way(elem)

In [178]:
%%time

if(not os.path.exists('ways_berlin.pickle')):
    print('File ways_berlin.pickle NOT found. Recreating ways from pbf file')
    h = OsmRouterExtractor()

    h.apply_file("berlin-latest.osm.pbf", locations=True)

    colnames = ['way_id', 'nodes', 'line', 'line_length', 'name', 'maxspeed']
    ways_df = gpd.GeoDataFrame(h.ways_list, columns=colnames, geometry='line').set_index('way_id')
    
    with open('ways_berlin.pickle', 'wb') as f:
        pickle.dump(ways_df, f)
else:
    print('File ways_berlin.pickle found. Importing it locally')
    with open('ways_berlin.pickle', 'rb') as f:
        ways_df = pickle.load(f) 
        
display(ways_df.head())
 

File ways_berlin.pickle NOT found. Recreating ways from pbf file


Unnamed: 0_level_0,nodes,line,line_length,name,maxspeed
way_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4045150,"[1234120411, 6375730236, 262876417, 262877047,...","LINESTRING (13.6051677 52.373418, 13.6055003 5...",0.007237,Waldstraße,50
4045194,"[21432547, 5986493261]","LINESTRING (13.5313455 52.4926226, 13.5315321 ...",0.001309,Ursula-Goetze-Straße,30
4045220,"[28394961, 553334770, 3886937914, 29808849, 29...","LINESTRING (13.5278334 52.4904272, 13.5263856 ...",0.006061,Hönower Straße,30
4045223,"[29808846, 21432567, 3889914802, 3889914958, 3...","LINESTRING (13.523904 52.4828118, 13.5244956 5...",0.006651,Gundelfinger Straße,30
4045243,"[1822620447, 2845478638, 2845478635, 390192714...","LINESTRING (13.454214 52.5157088, 13.4543909 5...",0.004787,Frankfurter Allee,50


CPU times: user 43.2 s, sys: 504 ms, total: 43.7 s
Wall time: 41.6 s


In [149]:
# TODO: Preprocess df to remove duplicate rows

# Identify street with same name
# If street name has only 1 speed, keep just 1. Maybe the longest linestring?

We can use now the lat longs of map matching to retrieve the maximum speed, but **it's too slow**

In [143]:
points = [[13.3709, 52.511582]] * 100 # possible size of gps trace
points = [Point(p_) for p_ in points]

In [144]:
%%time
for p in points:
    min_distane_way_id = ways_df['line'].distance(p).idxmin() #168934839
display(ways_df.loc[min_distane_way_id])

CPU times: user 22.6 s, sys: 98.7 ms, total: 22.7 s
Wall time: 22.8 s


Using [STRtree](https://shapely.readthedocs.io/en/stable/manual.html#str-packed-r-tree) for spatial indexing makes it **super fast**

In [145]:
# adding metadata to geometry: 
# https://github.com/Toblerity/Shapely/issues/618

for way_id, row in ways_df.iterrows():
    row['line'].name = way_id
    
tree = STRtree(ways_df['line'])

In [147]:
%%time

points = [[13.3709, 52.511582]] * 100 # possible size of gps trace
points = [Point(p_) for p_ in points]

for p in points:
    way_response = tree.query(p)[0].name
display(ways_df.loc[way_response])

nodes       [26992494, 115774223, 411322982, 2021889550, 3...
line        LINESTRING (13.3700849 52.5135155, 13.3701683 ...
name                             Tunnel Tiergarten Spreebogen
maxspeed                                                   50
Name: 48859717, dtype: object

CPU times: user 92.3 ms, sys: 3.39 ms, total: 95.7 ms
Wall time: 94.9 ms


# Matching by pair <node - node = way>
    
Didn't work

In [11]:
node_df_list = []
for ix, row in ways_df.iterrows():
    lon_l = row['line'].xy[0]
    lat_l = row['line'].xy[1]
    for i in range(len(lon_l)-1):
        p1 = Point(lon_l[i], lat_l[i])
        p2 = Point(lon_l[i+1], lat_l[i+1])
        node_df_list.append([
            row['nodes'][i],
            row['nodes'][i+1],
            p1,
            p2,
            LineString([p1, p2]),
            ix,
            row['name'],
            row['maxspeed']
        ])
        
columns = ['node_first', 'node_second', 'geom_point_first', 'geom_point_second', 'geom_line',
           'way_id', 'way_name', 'way_max_speed']
nodes_df = pd.DataFrame(node_df_list, columns=columns)        

In [12]:
for way_id, row in nodes_df.iterrows():
    row['geom_line'].way_max_speed = row['way_max_speed']
    row['geom_line'].way_id = row['way_id']
    row['geom_line'].way_name = row['way_name']
    row['geom_line'].row_id = way_id
    
node_tree = STRtree(nodes_df['geom_line'])

No as well...because I'm calculating distance, it finds the closest points but I need to know the better `match` between the two linestrings

In [13]:
with open('temp.pickle', 'rb') as f:
        segments = pickle.load(f)

way_response = []
for line in segments:
    way_response.append(node_tree.query(line)[0].way_name)
way_response[:5]

['Tunnel Tiergarten Spreebogen',
 'Ben-Gurion-Straße',
 'Ben-Gurion-Straße',
 'Tiergartenstraße',
 'Tiergartenstraße']

# Buffered OSM Linestrings

In [102]:
# first with whole way
pols = []
for way_id, row in ways_df.iterrows():
    p = Polygon(row['line'].buffer(.0001).exterior.coords)
    p.maxspeed = row['maxspeed']
    p.way_id = way_id
    p.name = row['name']
    pols.append(p)
    
ways_df2 = gpd.GeoDataFrame(ways_df, geometry=pols)
ways_df2 = ways_df2.loc[ways_df2['maxspeed'].astype(str).str.isnumeric()]
ways_df2['maxspeed'] = ways_df2['maxspeed'].astype(int)
display(ways_df2.head())

Unnamed: 0_level_0,nodes,line,line_length,name,maxspeed,geometry
way_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4045150,"[1234120411, 6375730236, 262876417, 262877047,...","LINESTRING (13.6051677 52.373418, 13.6055003 5...",0.007237,Waldstraße,50,"POLYGON ((13.60547638114791 52.37359699731509,..."
4045194,"[21432547, 5986493261]","LINESTRING (13.5313455 52.4926226, 13.5315321 ...",0.001309,Ursula-Goetze-Straße,30,"POLYGON ((13.53143312069079 52.49393285118758,..."
4045220,"[28394961, 553334770, 3886937914, 29808849, 29...","LINESTRING (13.5278334 52.4904272, 13.5263856 ...",0.006061,Hönower Straße,30,"POLYGON ((13.5264488447908 52.48916763971058, ..."
4045223,"[29808846, 21432567, 3889914802, 3889914958, 3...","LINESTRING (13.523904 52.4828118, 13.5244956 5...",0.006651,Gundelfinger Straße,30,"POLYGON ((13.52440013643137 52.48473817762692,..."
4045243,"[1822620447, 2845478638, 2845478635, 390192714...","LINESTRING (13.454214 52.5157088, 13.4543909 5...",0.004787,Frankfurter Allee,50,"POLYGON ((13.45440082383998 52.51579000704922,..."


In [14]:
%%time
for seg in segments:
    ways_df2.loc[ways_df2.contains(seg)]

CPU times: user 12.6 s, sys: 85.5 ms, total: 12.7 s
Wall time: 12.8 s


Building an [R-Tree](http://toblerity.org/rtree/) for the buffered polygons. 

* If the tree doesn't find a perfect `contains`, we perform the same thing with just 1 point.

<font color=green>It seems it works! :D</font>

In [103]:
idx = index.Index()

# Populate R-tree index with bounds of grid cells
for ix, cell in ways_df2.iterrows():
    # assuming cell is a shapely object
    idx.insert(ix, cell['geometry'].bounds)

In [104]:
%%time
# Loop through each Shapely polygon
way_ids = []
for seg in segments:
    # Filter possible candidates using R-Tree
    idxs = idx.intersection(seg.bounds)
    # Now do actual intersection
    filter1 = ways_df2.loc[idxs].contains(seg)
    way_id = ways_df2.loc[filter1[filter1 == True].index]
    if(len(way_id) > 0):
        way_id = way_id['line_length'].idxmin()
    else:
        first_point = Point(seg.xy[0][1], seg.xy[1][1])
        idxs = idx.intersection(first_point.bounds)
        filter1 = ways_df2.loc[idxs].contains(first_point)
        way_id = ways_df2.loc[filter1[filter1 == True].index]['line_length'].idxmax()
    way_ids.append(way_id)

trace_speed = ways_df2.loc[way_ids]['maxspeed'].values
trace_speed[:5]    

CPU times: user 260 ms, sys: 5.92 ms, total: 266 ms
Wall time: 267 ms


In [154]:
with open('example_speeds.pickle', 'rb') as f:
    speeds = pickle.load(f)

In [155]:
# https://github.com/python-visualization/folium/blob/master/examples/Colormaps.ipynb
max_speed_cm = branca.colormap.LinearColormap( ['red', 'yellow', 'blue', 'purple'], 
                                        vmin=0, vmax=60, index=[10,20,50,60]) 
max_speed_cm.caption = 'Speed Km/h'

j = json.loads(gpd.GeoSeries(segments).buffer(.00005).to_json())
for elem, speed in zip(j['features'], speeds):
    elem['properties']['style'] = {}
    elem['properties']['style']['color'] = max_speed_cm(speed)
    elem['properties']['style']['fillOpacity'] = 1
    

map_ = folium.Map([52.521441,13.404057],
                  zoom_start=12)
traces = folium.features.GeoJson(j)
map_.add_child(traces)
map_.add_child(max_speed_cm)
map_    