In [1]:
import osmium
import shapely.wkb as wkblib
import numpy as np
import pandas as pd
import geopandas as gpd
from rtree import index
from shapely.geometry import Point, Polygon

In [2]:
# new-york-latest.osm.pbf

class RouteAnnotator(osmium.SimpleHandler):

    def __init__(self, pbf_path, bbox=None):
        """
        pbf_path: osm.pbf file path
        bbox: array[2] - [[lat, lon] bottom_left, [lat, lon] upper_right]
        """
        osmium.SimpleHandler.__init__(self)
        
        self.ROAD_TYPES = ['motorway', 'trunk', 'primary', 'secondary', 'tertiary', 'road', 'residential', 'service',
                           'motorway_link', 'trunk_link', 'primary_link', 'secondary_link', 'tertiary_link']
        self.GEOMETRY_NAME = 'line'
        
        self.wkbfab = osmium.geom.WKBFactory()
        self.df = []
        self.r_tree = None
        
        self.apply_file(pbf_path, locations=True)
        
        self.build_dataframe()
        
    def way(self, elem):
        #  elem.nodes return a node list:
        # https://docs.osmcode.org/pyosmium/latest/ref_osm.html?highlight=noderef#osmium.osm.NodeRef

        # TagList can't be converted to dict automatically, see:
        # https://github.com/osmcode/pyosmium/issues/106
        keys = {tag.k: tag.v for tag in elem.tags}
        # filter all types of car driving highways: https://wiki.openstreetmap.org/wiki/Key:highway?uselang=en-GBs
        if (('highway' in keys.keys())):
            if (keys['highway'] in self.ROAD_TYPES):
                nodes = [n.ref for n in elem.nodes]
                wkb = self.wkbfab.create_linestring(elem)
                line = wkblib.loads(wkb, hex=True)
                names = [el.v for el in elem.tags if el.k == 'name']
                maxspeeds = [el.v for el in elem.tags if el.k == 'maxspeed']
                highway_type = [el.v for el in elem.tags if el.k == 'highway']

                self.df.append([elem.id,
                                nodes,
                                line.length,
                                names[0] if len(names) > 0 else '',
                                maxspeeds[0] if len(maxspeeds) > 0 else np.nan,
                                highway_type,
                                line])
        
    def build_dataframe(self):
        cols = ['way_id', 'nodes', 'line_length', 'name', 'maxspeed', 'highway_type', self.GEOMETRY_NAME]
        self.df = pd.DataFrame(self.df, columns=cols)
        self.df['highway_type'] = [e[0] for e in self.df['highway_type']]
        not_numeric_flag = ~self.df['maxspeed'].astype(str).str.isnumeric()
        self.df.loc[not_numeric_flag, 'maxspeed'] = '0'
        self.df['maxspeed'] = self.df['maxspeed'].astype(int)
        self.df = gpd.GeoDataFrame(self.df, geometry=self.df[self.GEOMETRY_NAME]).reset_index()
        
        HIGHWAY_SPEED_LIMITS ={   # copied from https://github.com/Project-OSRM/osrm-backend/blob/master/profiles/car.lua
            'motorway':90,
            'motorway_link':45,
            'trunk':85,
            'trunk_link':40,
            'primary':65,
            'primary_link':30,
            'secondary':55,
            'secondary_link':25,
            'tertiary':40,
            'tertiary_link':20,
            'unclassified':25,
            'residential':25,
            'living_street':10,
            'service':15,
            'footway': 4,    # custom
            'path': 4,       # 
            'pedestrian': 4, # 
            'steps': 2,      #
            'track': 4,      #   
            'piste': 4,      #
            'corridor': 4,   #
            'bridleway': 4,  #   
            'razed': 4,      #  
            'elevator': 0.2  #  
        }

        speeds_df = pd.DataFrame({'highway_type': [elem for elem in HIGHWAY_SPEED_LIMITS.keys()],
                      'highway_speed': [elem for elem in HIGHWAY_SPEED_LIMITS.values()]})

        self.df = self.df.merge(speeds_df,
                         how='left',
                         left_on='highway_type',
                         right_on='highway_type')
        
#         self.df = self.df.drop('maxspeed', axis=1)
     
    def apply_bbox(self, bbox):
        """
        bbox: array[[lat, lon] bottom left, [lat, lon] upper right]
        example: 
        
        b_l = [40.498266, -74.270820]
        u_r = [40.915519, -73.680854]
        
        bbox = [b_l, u_r]
        
        bbox = Polygon([[bbox[0][1],bbox[0][0]], 
                         [bbox[1][1],bbox[0][0]],
                         [bbox[1][1],bbox[1][0]],
                         [bbox[0][1],bbox[1][0]]])
                         
        route_annotator.apply_bbox(bbox)
        """

        bbox = Polygon([[bbox[0][1],bbox[0][0]], 
                         [bbox[1][1],bbox[0][0]],
                         [bbox[1][1],bbox[1][0]],
                         [bbox[0][1],bbox[1][0]]])

        self.df = self.df.loc[self.df.intersects(bbox)].copy()
        
    def create_spatial_index(self):
        # Populate R-tree index with bounds of grid cells
        self.r_tree = index.Index()
        pols = []
        for way_id, row in self.df.iterrows():
            p = Polygon(row[self.GEOMETRY_NAME].buffer(.00005).exterior.coords)
            p.maxspeed = row['maxspeed']
            p.way_id = way_id
            p.name = row['name']
            pols.append(p)

            self.r_tree.insert(way_id, p.bounds) 
            
    # TODO: optmize removal by using r-tree and checking repetitive points only at neighborhood level
    def drop_duplicate_way_id_nodes(self):

        node_pair = {}
        way_id_1 = []
        way_id_2 = []

        for ix, row in self.df.iterrows():             # for each array from a way_id
            ix = 1
            while(ix < len(row['nodes'])):        # while we don't run trought all elements from array    
                node_pre = row['nodes'][ix-1]     # dict: key = node_pre, value = [[nodes_post][nodes_post_way_id]]
                node_post = row['nodes'][ix]
                if(node_pre in node_pair.keys()):                       # if node_pre is in keys
                    if(node_post in node_pair[node_pre][0]):               # check list of nodes_post. If node_post is there
                        way_id_1.append(node_pair[node_pre][1][node_post]) # it means a rep node_ids. 
                        way_id_2.append(row['way_id'])                     # Store the way_id pre and post
                    else:
                        node_pair[node_pre][0].append(node_post)           # if nodes_post in not in values
                        node_pair[node_pre][1][node_post] = row['way_id']  # store relation in the dictionary

                else:                                                   # if node_pre is NOT in keys 
                    node_pair[node_pre] = [[node_post], {node_post:row['way_id']}] # create node_pre array structure
                ix += 1

        repetitive_ways = pd.DataFrame({'way_id_1': way_id_1, 'way_id_2':way_id_2})

        print(f'number of ways if sharing nodes: {repetitive_ways.shape[0]}')


        ### get way_id_1 and 2's line length
        repetitive_ways_1 = repetitive_ways.merge(self.df[['way_id', 'line_length']],
                              how='left',
                              left_on='way_id_1',
                              right_on='way_id')

        repetitive_ways_1 = repetitive_ways_1.merge(self.df[['way_id', 'line_length']],
                              how='left',
                              left_on='way_id_2',
                              right_on='way_id')

        repetitive_ways_1.head()

        # retrieve way_id that has smaller length and remove
        smaller_repetitive = [row['way_id_1'] if row['line_length_x'] <= row['line_length_y'] else row['way_id_2'] for ix, row in repetitive_ways_1.iterrows()]
        smaller_repetitive = np.array(smaller_repetitive).astype(int)

        self.df = self.df.loc[self.df['way_id'].isin(smaller_repetitive)].copy()            

#     def get_street_max_speed(self, segment):
#     # rank 7, segment LINESTRING (13.28866358846426 52.45759948794097, 13.28908503055573 52.45704031539945)
#     # fails because of lack of precision, check out here http://arthur-e.github.io/Wicket/sandbox-gmaps3.html
#     # Need mapmatch
#     # Filter possible candidates using R-Tree
#         idxs = list(self.r_tree.intersection(segment.bounds))
#         if (len(idxs) > 0):
#             # Now do actual intersection
#             filter1 = self.df.loc[idxs].contains(segment)
#             way_id = self.df.loc[filter1[filter1 == True].index]
#             if (len(way_id) > 0):
#                 way_id = way_id['line_length'].idxmin()
#                 return self.df.loc[way_id]['maxspeed']
#             else:
#                 first_point = Point(segment.xy[0][0], segment.xy[1][0])
#                 idxs = list(self.r_tree.intersection(first_point.bounds))
#                 if (len(idxs) > 0):
#                     filter1 = self.df.loc[idxs].contains(first_point)
#                     if (np.sum(filter1) > 0):
#                         way_id = self.df.loc[filter1[filter1 == True].index]['line_length'].idxmin()
#                         return self.df.loc[way_id]['maxspeed']

#                 second_point = Point(segment.xy[0][1], segment.xy[1][1])
#                 idxs = list(self.r_tree.intersection(second_point.bounds))
#                 if (len(idxs) > 0):
#                     filter1 = self.df.loc[idxs].contains(second_point)
#                     if (np.sum(filter1) > 0):
#                         way_id = self.df.loc[filter1[filter1 == True].index]['line_length'].idxmin()
#                         return self.df.loc[way_id]['maxspeed']
#         raise Exception(
#             f'Error mapping segment {segment} to street. Please check which segment caused it and evaluate usage of Map Matching')

    
    
#         def node_lookup(self):
            
#         def segment_lookup(self, points):
            
#         def way_lookup(self):
            


In [3]:
%%time

PBF_PATH = 'router/new-york-latest.osm.pbf'
print(f'loading {PBF_PATH}...')
osm_ann = RouteAnnotator(PBF_PATH)

print('filtering bbox')
b_l = [40.498266, -74.270820]   # NY
u_r = [40.915519, -73.680854]
# b_l = [63.351348, -24.751708] # Iceland
# u_r = [66.615533, -12.362366]
bbox = [b_l, u_r]
osm_ann.apply_bbox(bbox)

print('removing way ids with duplicated node ids pairs')
osm_ann.drop_duplicate_way_id_nodes()

print('creating spatial index')
osm_ann.create_spatial_index()

loading router/new-york-latest.osm.pbf...
filtering bbox
removing way ids with duplicated node ids pairs
number of ways if sharing nodes: 45
creating spatial index
CPU times: user 5min 11s, sys: 2.18 s, total: 5min 13s
Wall time: 5min 5s


# Attempt with OSMNx

In [1]:
import osmnx as ox
import numpy as np
import networkx as nx

from itertools import combinations
from shapely.geometry import Point, LineString, Polygon

In [10]:
import osmnx as ox
import numpy as np
import networkx as nx

from itertools import combinations

class RouteAnnotator():

    # TODO add different forms of network retrieval from OSMNx
    def __init__(self, place, network_type):

        self.segment_lookup_ = None
        self.way_lookup_ = None
        self.node_lookup_ = None
        self.G = None

        # TODO: Need adjustent. See https://wiki.openstreetmap.org/wiki/New_York
        self.HIGHWAY_SPEED_LIMITS ={   # copied from https://github.com/Project-OSRM/osrm-backend/blob/master/profiles/car.lua
            'motorway':90,
            'motorway_link':45,
            'trunk':85,
            'trunk_link':40,
            'primary':65,
            'primary_link':30,
            'secondary':40, # original: 55 - changed to NY where secondary = 25 mph ~= 40 kmh
            'secondary_link':25,
            'tertiary':40,
            'tertiary_link':20,
            'unclassified':25,
            'residential':40,
            'living_street':10,
            'service':15,
            'footway': 4,    # custom
            'path': 4,       #
            'pedestrian': 4, #
            'steps': 2,      #
            'track': 4,      #
            'piste': 4,      #
            'corridor': 4,   #
            'bridleway': 4,  #
            'razed': 4,      #
            'elevator': 0.2  #
        }

        self.G = ox.graph_from_place(place, network_type=network_type, simplify=False) # example - 'new york, usa'
        self.add_speeds()
        self._build_lookups()

    def add_speeds(self):

        for u, v, k, data in self.G.edges(data=True, keys=True):
            if 'maxspeed' in data and type(data['maxspeed']) == str and data['maxspeed'].isdigit():
                continue
            else:
                if(type(data['highway']) == list): # sometimes data['highway'] comes with a list
                    cond = [elem in self.HIGHWAY_SPEED_LIMITS for elem in data['highway']]
                    highway_type = data['highway'][np.where(cond)[0][0]]
                else:
                    highway_type = data['highway']

                if(highway_type in self.HIGHWAY_SPEED_LIMITS):
                    speed = self.HIGHWAY_SPEED_LIMITS[highway_type]
                    data['maxspeed'] = speed

    def _build_lookups(self):
        # build segment lookup
        segment_lookup = {}
        segment_lengths = {}
        way2nodes = {}
        way2nodes_pair = {}
        way_lookup = {}
        way_segment_lengths = {}
        node_lookup = {}

        # build segment lookup
        for u, v, k, data in ra.G.edges(data=True, keys=True):

            if(type(data['osmid']) != list):
                way_ids = [data['osmid']]
            else:
                way_ids = data['osmid']

            for way in way_ids:
                if(way not in way2nodes.keys()):
                    way2nodes[way] = []
                    way2nodes_pair[way] = []
                    way_lookup[way] = data
                    way_segment_lengths[way] = []
                way2nodes[way].extend([u,v])                    # add all nodes associated to a way
                way2nodes_pair[way].append([u,v])               # add pair of nodes belonging to way id
                way_segment_lengths[way].append(data['length']) # collect way lengths to sum up afterwards

            if(u not in segment_lengths.keys()):                # store each node-node direct segment length.
                segment_lengths[u] = {}                         # NOT DOING ANYTHING WITH IT FOR NOW
            segment_lengths[u][v] = data['length']

        # 1st FOR, build node id sequence belonging to way_id
        # 2nd FOR, sum segments lengths and add node id list to way lookup
        final_node_sequence = {}
        for way_id, values in way2nodes_pair.items(): # key: way_id, values: pairs of node ids
            relations = {}
            for pair in values:                       # build dict - key: node_pre - value: node post
                relations[pair[0]] = pair[1]
            keys = relations.keys()
            values_ = relations.values()              # if a key (node_pre) doesn't exist in values
            begin_key = list(keys - values_)          # it means it has only origin = way initial node

            if(len(values) > 1 and len(begin_key) == 1): # if NOT cyclic sequence?
                begin_key = begin_key[0]
                node_sequence = [begin_key]
                val = relations[begin_key]
                try:
                    while(val not in node_sequence):  # run through `relations` finding the node sequence pair by pair
                        node_sequence.append(val)
                        count += 1
                        begin_key = val
                        val = relations[begin_key]
                except Exception as e:                # until a pair is not found in the dict anymore = exception
                    e                                 # do nothing in exception
                final_node_sequence[way_id] = node_sequence # and store node "ordered" list as a Way metadata
            else:
                final_node_sequence[way_id] = list(keys)[0] + list(values_)[0] # if way has only 1 node, store it as it is

        for key, value in way_lookup.items():
            way_lookup[key]['length'] = np.sum(way_segment_lengths[key])
            way_lookup[key]['node_sequence'] = final_node_sequence[key]

        # build dict: key1: node1, key2: node2, value: way_id between ALL pair of nodes id IN way
        nodes2way = {}
        for key, values in way2nodes.items():
            for pair in combinations(values,2):
                if(pair[0] not in nodes2way.keys()):
                    nodes2way[pair[0]] = {}
                nodes2way[pair[0]][pair[1]] = key

        # build dict key1: node_id, value: node_metadata
        for node in ra.G.nodes(data=True):
            node_lookup[node[0]] = node[1]


    def segment_lookup(self, node_id_list):
        if(type(node_id_list) == int):
            return self.segment_lookup_[node_id_list[i]][node_id_list[i+1]]
        else:
            ways_id = []
            i = 0
            while i < len(node_id_list) - 1:
                ways_id.append(self.segment_lookup_[node_id_list[i]][node_id_list[i+1]])
                i += 1
            return ways_id

    def way_lookup(self, way_id_list):
        if(type(way_id_list) == int):
            return self.way_lookup_[way_id_list]
        else:
            ways_lookup = []
            for way in way_id_list:
                ways_lookup.append(self.way_lookup_[way])
            return ways_lookup

    def node_lookup(self, node_id_list):
        if(type(node_id_list) == int):
            return self.node_lookup_[node_id_list]
        else:
            nodes_lookup = []
            for node in node_id_list:
                nodes_lookup.append(self.node_lookup_[node])
            return nodes_lookup

In [11]:
ra = RouteAnnotator('new york, usa', 'drive')

In [12]:
ra.way_lookup

<bound method RouteAnnotator.way_lookup of <__main__.RouteAnnotator object at 0x10d5d0f60>>