In [2]:
import pandas as pd
import numpy as np
import geojson
import geopandas
import osmnx as ox
import networkx as nx
from pathlib import Path
from sklearn.neighbors import BallTree
from datetime import datetime, timedelta
from geographiclib.geodesic import Geodesic
from shapely.geometry import Point, LineString

import warnings
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

RELEASE_PATH = Path('/private/data/mets10/release20221026_residential_unclassified')
COUNTER_PATH = Path('/private/data/mets10/loop_counters')

In [7]:
# Adopted from https://github.com/gboeing/osmnx/blob/main/osmnx/distance.py in order to return
# multiple candidate edges.
def get_k_nearest_edges(g, x, y, k=5, min_dist=40):
    EARTH_RADIUS_M = 6_371_009
    X = np.array(x)
    Y = np.array(y)
    
    geoms = ox.utils_graph.graph_to_gdfs(g, nodes=False)["geometry"]

    # interpolate points along edges to index with k-d tree or ball tree
    uvk_xy = list()
    for uvk, geom in zip(geoms.index, geoms.values):
        # Interpolate every ~5 meters
        uvk_xy.extend((uvk, xy) for xy in ox.utils_geo.interpolate_points(geom, 0.00005))
    labels, xy = zip(*uvk_xy)
    vertices = pd.DataFrame(xy, index=labels, columns=["x", "y"])
    
    search_k = k
    if k > 1:
        # If more than one result is desired the haversine query needs to select more candidates
        # as every edge will consist of many points of which many might be close.
        search_k = min(int((len(vertices) / len(geoms)) * 1.2) * k, len(vertices))

    if BallTree is None:  # pragma: no cover
        raise ImportError("scikit-learn must be installed to search an unprojected graph")
    # haversine requires lat, lng coords in radians
    vertices_rad = np.deg2rad(vertices[["y", "x"]])
    points_rad = np.deg2rad(np.array([Y, X]).T)
    dists, poss = BallTree(vertices_rad, metric="haversine").query(points_rad, k=search_k)
    dists = dists * EARTH_RADIUS_M  # convert radians -> meters
    nes = vertices.index.to_numpy()[poss]
    
    res_nes = []
    res_dists = []
    for ne, dist in zip(nes, dists):
        ne = pd.Series(ne)
        dist = pd.Series(dist)
        # Remove duplicates
        mask = ne.duplicated()
        ne = ne[~mask]
        dist = dist[~mask]
        # Remove points too far
        mask = dist > min_dist
        ne = ne[~mask]
        dist = dist[~mask]
        # Cap selection to k elements and add to the output.
        # BallTree query results are already sorted by default, so no need to do it here.
        ne = ne[:k]
        dist = dist[:k]
        res_nes.append(ne.tolist())
        res_dists.append(dist.tolist())

    return res_nes, res_dists


# In-notebook unit tests
def test_get_k_nearest_edges():
    g = nx.MultiDiGraph(crs="epsg:4326")
    g.add_nodes_from([(1, {'x': 0.1, 'y': 0.1}), (2, {'x': 0.18, 'y': 0.1}), (3, {'x': 0.14, 'y': 0.17})])
    g.add_edges_from([(1, 2), (1, 3), (2, 3)])
    
    print("get_k_nearest_edges(g, [0.12], [0.12], k=1, min_dist=4000)")
    nn, dd = get_k_nearest_edges(g, [0.12], [0.12], k=1, min_dist=4000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221]])
    
    print("get_k_nearest_edges(g, [0.12], [0.12], k=2, min_dist=8000)")
    nn, dd = get_k_nearest_edges(g, [0.12], [0.12], k=2, min_dist=8000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0), (1, 2, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221, 2223.9016744838271]])
    
    print("get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=8000)")
    nn, dd = get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=8000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0), (1, 2, 0), (2, 3, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221, 2223.9016744838271, 4689.289257479777]])
    
    print("get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=4000)")
    nn, dd = get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=4000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0), (1, 2, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221, 2223.9016744838271]])
    
    print("get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=2000)")
    nn, dd = get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=2000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221]])
    
    print("get_k_nearest_edges(g, [0.12, 0.18], [0.12, 0.12], k=3, min_dist=2000)")
    nn, dd = get_k_nearest_edges(g, [0.12, 0.18], [0.12, 0.12], k=3, min_dist=2000)
    print((nn, dd))
    assert(nn == [[(1, 3, 0)], [(2, 3, 0)]])
    np.testing.assert_almost_equal(dd, [[827.5247451916221], [1103.3630281000585]])


test_get_k_nearest_edges()

get_k_nearest_edges(g, [0.12], [0.12], k=1, min_dist=4000)
([[(1, 3, 0)]], [[827.5247451916221]])
get_k_nearest_edges(g, [0.12], [0.12], k=2, min_dist=8000)
([[(1, 3, 0), (1, 2, 0)]], [[827.5247451916221, 2223.901674483827]])
get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=8000)


  ne = ne[:k]
  dist = dist[:k]
  ne = ne[:k]
  dist = dist[:k]


([[(1, 3, 0), (1, 2, 0), (2, 3, 0)]], [[827.5247451916221, 2223.901674483827, 4689.289257479777]])
get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=4000)
([[(1, 3, 0), (1, 2, 0)]], [[827.5247451916221, 2223.901674483827]])
get_k_nearest_edges(g, [0.12], [0.12], k=3, min_dist=2000)


  ne = ne[:k]
  dist = dist[:k]


([[(1, 3, 0)]], [[827.5247451916221]])
get_k_nearest_edges(g, [0.12, 0.18], [0.12, 0.12], k=3, min_dist=2000)
([[(1, 3, 0)], [(2, 3, 0)]], [[827.5247451916221], [1103.3630281000585]])


In [42]:
def read_counter_data(counter_file):
    counter_df = pd.read_parquet(counter_file)
    if 'day' in counter_df.columns:
        counter_df['month'] = counter_df['day'].str[:7]
    else:
        counter_df['month'] = counter_df['time_bin'].str[:7]
    if 'heading' not in counter_df.columns:
        counter_df['heading'] = -1
    if 'name' not in counter_df.columns:
        counter_df['name'] = ''
    counter_df = counter_df[['id', 'lat', 'lon', 'month', 'heading', 'name']]
    return counter_df


def get_counter_locations(city):
    loop_counter_files = sorted(list((COUNTER_PATH / city).glob('**/counters_*.parquet')))
    cdfs = []
    for lcf in loop_counter_files:
        cdfs.append(read_counter_data(lcf))
    counter_df = pd.concat(cdfs)
    counter_locations_df = counter_df[['id', 'lat', 'lon', 'heading', 'name']].groupby(by=['id']).last().reset_index()
    return counter_locations_df


def get_bearing(lat1, lon1, lat2, lon2):
    return Geodesic.WGS84.Inverse(lat1, lon1, lat2, lon2)["azi1"] % 360


def bearing_diff(b1, b2):
    res= (b1-b2) % 360
    if res < 180:
        return res
    else:
        return (360-res)


def get_edge_geometry(g, edge, edge_data=None):
    if not edge_data:
        edge_data = g.edges[edge]
    if 'geometry' not in edge_data:
        n1, n2, ekey = edge
        mp1 = g.nodes[n1]
        mp2 = g.nodes[n2]
        line = LineString([Point(mp1['x'],mp1['y']), Point(mp2['x'],mp2['y'])])
        edge_data['geometry'] = line
    else:
        line = edge_data['geometry']
    return line


def find_nearest_ways(g, df):
    xs = df['lon'].tolist()
    ys = df['lat'].tolist()
    edges, dists = get_k_nearest_edges(g, xs, ys)
    headings = df['heading']
    names = df['name']
    
    way = []
    way_dist = []
    us = []
    vs = []
    for edge_candidates, dist_candidates, heading, counter_name in zip(edges, dists, headings, names):
        if not edge_candidates:
            way.append(-1)
            way_dist.append(-1)
            us.append(-1)
            vs.append(-1)
            continue
        best_edge = edge_candidates[0]
        best_dist = 1e7
        best_diff = best_dist + 360
        match_diff = best_diff
        for edge, dist in zip(edge_candidates, dist_candidates):
            # Check if there's a better edge than the closest
            ed = g.edges[edge]
            if counter_name and 'ref' in ed:
                # If there's a name compare them
                ref = ed['ref']
                if ref.lower() in counter_name.lower():
                    match_diff = 0
            if heading >= 0:
                # if there's a heading use it
                line = get_edge_geometry(g, edge)
                p1 = line.coords[0]
                p2 = line.coords[1]
                bearing = get_bearing(p1[1], p1[0], p2[1], p2[0])
                angle_diff = abs(bearing_diff(bearing, heading))
                match_diff = angle_diff + dist
            if match_diff < best_diff:
                best_edge = edge
                best_dist = dist
                best_diff = match_diff
        ed = g.edges[best_edge]
        way.append(ed['osmid'])
        way_dist.append(best_dist)
        u, v, key = best_edge
        us.append(u)
        vs.append(v)
    
    df['ways'] = edges
    df['way_dists'] = dists
    df['way'] = way
    df['way_dist'] = way_dist
    df['u'] = us
    df['v'] = vs
    return df


def save_matched_locations(df, city_path):
    gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.lon, df.lat))
    gdf['ways'] = gdf['ways'].astype(str)
    gdf['way_dists'] = gdf['way_dists'].astype(str)
    if gdf['id'].dtype == np.dtype('O'):
        gdf['id'] = gdf['id'].astype(str)
    gdf.to_parquet(city_path / 'counters_matched.parquet', compression="snappy")
    gdf.to_file(city_path / 'counters_matched.geojson')
    return gdf


def simplify_graph(g):
    if "simplified" not in g.graph or not g.graph["simplified"]:
        g = ox.simplify_graph(g)
        print(f"Simplified road graph {len(g.nodes)} nodes and {len(g.edges)} edges")
    return g

# Madrid Locations

In [10]:
MADRID_GRAPH_PATH = RELEASE_PATH / '2022' / 'road_graph' / 'madrid'
madrid_g = ox.load_graphml(MADRID_GRAPH_PATH / 'road_graph.graphml')
print(f'Road graph has {len(madrid_g.nodes)} nodes and {len(madrid_g.edges)} edges')

Road graph has 344387 nodes and 492470 edges


In [11]:
madrid_g = simplify_graph(madrid_g)

Simplified road graph 71757 nodes and 143410 edges


In [12]:
madrid_locations_df = get_counter_locations('madrid')
# id	lat	lon	heading	time_bin	type	volume	occupation	congestion_level	speed_avg
madrid_locations_df

Unnamed: 0,id,lat,lon,heading,name
0,1001,40.409729,-3.740786,62.428189,
1,1002,40.408030,-3.743760,66.768303,
2,1003,40.406824,-3.746834,67.775190,
3,1006,40.411894,-3.736324,69.955706,
4,1009,40.416234,-3.724909,68.506837,
...,...,...,...,...,...
4407,10916,40.375355,-3.655874,288.299315,
4408,10917,40.379500,-3.671742,56.643724,
4409,10918,40.379595,-3.670096,312.224245,
4410,10919,40.380650,-3.670044,234.875019,


In [19]:
# !!! This takes 10-15 minutes for the whole graph !!!
madrid_matched_df = find_nearest_ways(madrid_g, madrid_locations_df)
madrid_matched_df[madrid_matched_df['way'] != -1]

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,1001,40.409729,-3.740786,62.428189,,"[(21702026, 315518315, 0), (499633486, 3155183...","[1.4433022167418321, 6.839383661400331, 12.080...",44618925,1.443302,21702026,315518315,POINT (-3.74079 40.40973)
1,1002,40.408030,-3.743760,66.768303,,"[(21702026, 315518315, 0), (21702026, 31551770...","[2.209060687018068, 8.790978137683027, 14.2283...",44618925,2.209061,21702026,315518315,POINT (-3.74376 40.40803)
2,1003,40.406824,-3.746834,67.775190,,"[(255883289, 21702026, 0), (1425927492, 255883...","[1.9701542307576267, 16.24507476725276, 28.663...",44618924,1.970154,255883289,21702026,POINT (-3.74683 40.40682)
3,1006,40.411894,-3.736324,69.955706,,"[(305399719, 305399717, 0), (315518315, 217020...","[2.76047591158687, 8.549680213918583, 18.87443...",28701928,2.760476,305399719,305399717,POINT (-3.73632 40.41189)
4,1009,40.416234,-3.724909,68.506837,,"[(315518315, 21702049, 0), (305399711, 3155192...","[1.1733859763193506, 4.8042663391643226, 12.68...","[4085376, 194757411]",1.173386,315518315,21702049,POINT (-3.72491 40.41623)
...,...,...,...,...,...,...,...,...,...,...,...,...
4407,10916,40.375355,-3.655874,288.299315,,"[(307712572, 316782662, 0), (316782661, 307712...","[2.098968282556398, 7.46277580928028, 38.47938...","[818687835, 72133557, 27875198, 261435166]",2.098968,307712572,316782662,POINT (-3.65587 40.37535)
4408,10917,40.379500,-3.671742,56.643724,,"[(306044017, 306043013, 0), (306043013, 306044...","[2.0372134883845705, 2.0372134883845705, 15.89...",172906541,2.037213,306044017,306043013,POINT (-3.67174 40.37950)
4409,10918,40.379595,-3.670096,312.224245,,"[(306043019, 307701118, 0), (307701118, 306043...","[0.909355212130055, 0.909355212130055]",27875174,0.909355,307701118,306043019,POINT (-3.67010 40.37960)
4410,10919,40.380650,-3.670044,234.875019,,"[(307700924, 2237921393, 0), (2237921393, 3077...","[2.3391319376665964, 2.3391319376665964, 17.70...",72136968,2.339132,2237921393,307700924,POINT (-3.67004 40.38065)


In [22]:
madrid_matched_df['way'] = madrid_matched_df['way'].astype(str)

In [23]:
save_matched_locations(madrid_matched_df, MADRID_GRAPH_PATH)

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,1001,40.409729,-3.740786,62.428189,,"[(21702026, 315518315, 0), (499633486, 3155183...","[1.4433022167418321, 6.839383661400331, 12.080...",44618925,1.443302,21702026,315518315,POINT (-3.74079 40.40973)
1,1002,40.408030,-3.743760,66.768303,,"[(21702026, 315518315, 0), (21702026, 31551770...","[2.209060687018068, 8.790978137683027, 14.2283...",44618925,2.209061,21702026,315518315,POINT (-3.74376 40.40803)
2,1003,40.406824,-3.746834,67.775190,,"[(255883289, 21702026, 0), (1425927492, 255883...","[1.9701542307576267, 16.24507476725276, 28.663...",44618924,1.970154,255883289,21702026,POINT (-3.74683 40.40682)
3,1006,40.411894,-3.736324,69.955706,,"[(305399719, 305399717, 0), (315518315, 217020...","[2.76047591158687, 8.549680213918583, 18.87443...",28701928,2.760476,305399719,305399717,POINT (-3.73632 40.41189)
4,1009,40.416234,-3.724909,68.506837,,"[(315518315, 21702049, 0), (305399711, 3155192...","[1.1733859763193506, 4.8042663391643226, 12.68...","[4085376, 194757411]",1.173386,315518315,21702049,POINT (-3.72491 40.41623)
...,...,...,...,...,...,...,...,...,...,...,...,...
4407,10916,40.375355,-3.655874,288.299315,,"[(307712572, 316782662, 0), (316782661, 307712...","[2.098968282556398, 7.46277580928028, 38.47938...","[818687835, 72133557, 27875198, 261435166]",2.098968,307712572,316782662,POINT (-3.65587 40.37535)
4408,10917,40.379500,-3.671742,56.643724,,"[(306044017, 306043013, 0), (306043013, 306044...","[2.0372134883845705, 2.0372134883845705, 15.89...",172906541,2.037213,306044017,306043013,POINT (-3.67174 40.37950)
4409,10918,40.379595,-3.670096,312.224245,,"[(306043019, 307701118, 0), (307701118, 306043...","[0.909355212130055, 0.909355212130055]",27875174,0.909355,307701118,306043019,POINT (-3.67010 40.37960)
4410,10919,40.380650,-3.670044,234.875019,,"[(307700924, 2237921393, 0), (2237921393, 3077...","[2.3391319376665964, 2.3391319376665964, 17.70...",72136968,2.339132,2237921393,307700924,POINT (-3.67004 40.38065)


# London Locations

In [24]:
LONDON_GRAPH_PATH = RELEASE_PATH / '2022' / 'road_graph' / 'london'
london_g = ox.load_graphml(LONDON_GRAPH_PATH / 'road_graph.graphml')
print(f'Road graph has {len(london_g.nodes)} nodes and {len(london_g.edges)} edges')

Road graph has 566097 nodes and 1077400 edges


In [25]:
london_g = simplify_graph(london_g)

Simplified road graph 116304 nodes and 271117 edges


In [26]:
london_locations_df = get_counter_locations('london')
# day	time_bin	id	flow_15m	sat_bandings	det_no	num_det	detector_rate	ts	lat	lon
# id	name	lat	lon	heading	time_bin	volume	speed
london_locations_df

Unnamed: 0,id,lat,lon,heading,name
0,5,51.575617,0.283162,-1,M25/5764B
1,8,51.433749,-0.538796,-1,M25/4876A
2,14,51.408466,0.381381,-1,A2/8392M
3,24,51.520645,-2.049237,-1,M4/3479A
4,25,51.664773,-0.493692,-1,M25/5135B
...,...,...,...,...,...
5959,32/210,51.612580,-0.113407,-1,
5960,32/224,51.632029,-0.073554,-1,
5961,32/225,51.631973,-0.073398,-1,
5962,32/228,51.629826,-0.097257,-1,


In [27]:
# !!! This takes 10-15 minutes for the whole graph !!!
london_matched_df = find_nearest_ways(london_g, london_locations_df)
london_matched_df[london_matched_df['way'] != -1]

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v
5,28,51.268629,-0.166750,-1,M25/4490B,"[(1639045045, 202952, 0), (206223889, 16390449...","[1.7473350855586416, 22.314578360426612, 29.15...",23189325,1.747335,1639045045,202952
11,46,51.264317,-0.132305,-1,M25/4465A,"[(12079094, 202953, 0), (202952, 203009, 0)]","[2.453989539425953, 18.082068670467034]","[2714200, 108688595]",2.453990,12079094,202953
14,57,51.273244,0.063816,-1,M25/4322B,"[(203171, 170792507, 0), (1639108484, 203172, 0)]","[5.635538236864448, 20.39117466656207]","[10660889, 10660898, 16536383]",5.635538,203171,170792507
19,75,51.259332,-0.108098,-1,M25/4447B,"[(203009, 12079111, 0), (12079109, 2015848786,...","[3.425799135308956, 18.37990609198298]","[4794119, 226518825, 682852587, 151074097, 108...",3.425799,203009,12079111
21,80,51.258246,-0.053938,-1,M25/4409B,"[(203108, 170644582, 0), (170644470, 163910846...","[0.5905465516563868, 15.031632632080552]","[10503271, 16536183, 16536392, 16536296, 16536...",0.590547,203108,170644582
...,...,...,...,...,...,...,...,...,...,...,...
5959,32/210,51.612580,-0.113407,-1,,"[(196428, 1745361161, 0), (1745285962, 1745285...","[0.5058183806568601, 10.5399731372893, 16.9187...","[51180740, 108469928, 51249389, 404260437, 404...",0.505818,196428,1745361161
5960,32/224,51.632029,-0.073554,-1,,"[(5308552473, 6583849322, 0), (26001619, 10526...","[2.110936163160965, 11.648444596432814]","[8122037, 183043102, 682948503]",2.110936,5308552473,6583849322
5961,32/225,51.631973,-0.073398,-1,,"[(26001619, 1052641793, 0), (5308552473, 65838...","[0.7067135283972795, 13.211708987076507]","[775324853, 1656423]",0.706714,26001619,1052641793
5962,32/228,51.629826,-0.097257,-1,,"[(256630996, 8085242318, 0), (8085242318, 2566...","[2.094607763208028, 2.094607763208028, 13.0407...","[867401512, 867401516, 867401518, 867401459, 8...",2.094608,256630996,8085242318


In [28]:
london_matched_df['way'] = london_matched_df['way'].astype(str)

In [29]:
save_matched_locations(london_matched_df, LONDON_GRAPH_PATH)

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,5,51.575617,0.283162,-1,M25/5764B,[],[],-1,-1.000000,-1,-1,POINT (0.28316 51.57562)
1,8,51.433749,-0.538796,-1,M25/4876A,[],[],-1,-1.000000,-1,-1,POINT (-0.53880 51.43375)
2,14,51.408466,0.381381,-1,A2/8392M,[],[],-1,-1.000000,-1,-1,POINT (0.38138 51.40847)
3,24,51.520645,-2.049237,-1,M4/3479A,[],[],-1,-1.000000,-1,-1,POINT (-2.04924 51.52064)
4,25,51.664773,-0.493692,-1,M25/5135B,[],[],-1,-1.000000,-1,-1,POINT (-0.49369 51.66477)
...,...,...,...,...,...,...,...,...,...,...,...,...
5959,32/210,51.612580,-0.113407,-1,,"[(196428, 1745361161, 0), (1745285962, 1745285...","[0.5058183806568601, 10.5399731372893, 16.9187...","[51180740, 108469928, 51249389, 404260437, 404...",0.505818,196428,1745361161,POINT (-0.11341 51.61258)
5960,32/224,51.632029,-0.073554,-1,,"[(5308552473, 6583849322, 0), (26001619, 10526...","[2.110936163160965, 11.648444596432814]","[8122037, 183043102, 682948503]",2.110936,5308552473,6583849322,POINT (-0.07355 51.63203)
5961,32/225,51.631973,-0.073398,-1,,"[(26001619, 1052641793, 0), (5308552473, 65838...","[0.7067135283972795, 13.211708987076507]","[775324853, 1656423]",0.706714,26001619,1052641793,POINT (-0.07340 51.63197)
5962,32/228,51.629826,-0.097257,-1,,"[(256630996, 8085242318, 0), (8085242318, 2566...","[2.094607763208028, 2.094607763208028, 13.0407...","[867401512, 867401516, 867401518, 867401459, 8...",2.094608,256630996,8085242318,POINT (-0.09726 51.62983)


# Berlin Locations

In [30]:
BERLIN_GRAPH_PATH = RELEASE_PATH / '2021' / 'road_graph' / 'berlin'
berlin_g = ox.load_graphml(BERLIN_GRAPH_PATH / 'road_graph.graphml')
print(f'Road graph has {len(berlin_g.nodes)} nodes and {len(berlin_g.edges)} edges')

Road graph has 239489 nodes and 439564 edges


In [31]:
berlin_g = simplify_graph(berlin_g)

Simplified road graph 34308 nodes and 88894 edges


In [32]:
berlin_locations_df = get_counter_locations('berlin')
berlin_locations_df

Unnamed: 0,id,lat,lon,heading,name
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2
4,100101010000874,52.436642,13.261301,180,TE004_Det_HF1
...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1


In [33]:
# !!! This takes 10-15 minutes for the whole graph !!!
berlin_matched_df = find_nearest_ways(berlin_g, berlin_locations_df)
berlin_matched_df[berlin_matched_df['way'] != -1]

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1,"[(484310, 27088394, 0), (27088393, 274115813, ...","[1.6619848854382284, 12.674320256897182, 13.87...","[81629728, 150912137]",1.661985,484310,27088394
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2,"[(484310, 27088394, 0), (27088393, 274115813, ...","[1.6619848854382284, 12.674320256897182, 13.87...","[81629728, 150912137]",1.661985,484310,27088394
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1,"[(27088393, 274115813, 0), (268076066, 2619099...","[0.7473601310109975, 5.883360599936063, 5.8833...","[4402002, 120233963, 233307349, 92093079]",0.747360,27088393,274115813
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2,"[(27088393, 274115813, 0), (268076066, 2619099...","[0.7473601310109975, 5.883360599936063, 5.8833...","[4402002, 120233963, 233307349, 92093079]",0.747360,27088393,274115813
4,100101010000874,52.436642,13.261301,180,TE004_Det_HF1,"[(150895819, 26162586, 0), (26905938, 26906132...","[0.6433814473254731, 9.59534207976239]","[43336764, 329983269]",0.643381,150895819,26162586
...,...,...,...,...,...,...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2,"[(27556652, 29674292, 0), (29674290, 26876325,...","[6.561686063168479, 14.389146856550148]","[1067561756, 1067561757, 171364191]",6.561686,27556652,29674292
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1,"[(26736176, 26736175, 0), (26736157, 26736154,...","[6.177326915871354, 17.91124377691932]",724621035,6.177327,26736176,26736175
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2,"[(26736176, 26736175, 0), (26736157, 26736154,...","[6.177326915871354, 17.91124377691932]",724621035,6.177327,26736176,26736175
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1,"[(26736157, 26736154, 0), (26736176, 26736175,...","[6.249463950609168, 17.945597051981217]","[1056616525, 327805903]",6.249464,26736157,26736154


In [34]:
berlin_matched_df['way'] = berlin_matched_df['way'].astype(str)

In [35]:
save_matched_locations(berlin_matched_df, BERLIN_GRAPH_PATH)

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1,"[(484310, 27088394, 0), (27088393, 274115813, ...","[1.6619848854382284, 12.674320256897182, 13.87...","[81629728, 150912137]",1.661985,484310,27088394,POINT (13.19258 52.43387)
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2,"[(484310, 27088394, 0), (27088393, 274115813, ...","[1.6619848854382284, 12.674320256897182, 13.87...","[81629728, 150912137]",1.661985,484310,27088394,POINT (13.19258 52.43387)
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1,"[(27088393, 274115813, 0), (268076066, 2619099...","[0.7473601310109975, 5.883360599936063, 5.8833...","[4402002, 120233963, 233307349, 92093079]",0.747360,27088393,274115813,POINT (13.19275 52.43381)
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2,"[(27088393, 274115813, 0), (268076066, 2619099...","[0.7473601310109975, 5.883360599936063, 5.8833...","[4402002, 120233963, 233307349, 92093079]",0.747360,27088393,274115813,POINT (13.19275 52.43381)
4,100101010000874,52.436642,13.261301,180,TE004_Det_HF1,"[(150895819, 26162586, 0), (26905938, 26906132...","[0.6433814473254731, 9.59534207976239]","[43336764, 329983269]",0.643381,150895819,26162586,POINT (13.26130 52.43664)
...,...,...,...,...,...,...,...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2,"[(27556652, 29674292, 0), (29674290, 26876325,...","[6.561686063168479, 14.389146856550148]","[1067561756, 1067561757, 171364191]",6.561686,27556652,29674292,POINT (13.38420 52.45744)
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1,"[(26736176, 26736175, 0), (26736157, 26736154,...","[6.177326915871354, 17.91124377691932]",724621035,6.177327,26736176,26736175,POINT (13.30172 52.50923)
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2,"[(26736176, 26736175, 0), (26736157, 26736154,...","[6.177326915871354, 17.91124377691932]",724621035,6.177327,26736176,26736175,POINT (13.30172 52.50923)
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1,"[(26736157, 26736154, 0), (26736176, 26736175,...","[6.249463950609168, 17.945597051981217]","[1056616525, 327805903]",6.249464,26736157,26736154,POINT (13.30218 52.50853)


# Uber loop counter compare

scp -r moritz.neun@tu-fat-1.lan.iarai.ac.at:/iarai/public/t4c/data_pipeline/release20221028_historic_uber/road_graph .

scp -r moritz.neun@tu-fat-1.lan.iarai.ac.at:/iarai/public/t4c/data_pipeline/release20221028_historic_uber/speed_classes .

In [6]:
UBER_BASE = Path('/Users/neun/data/t4c/data_pipeline/validations_ext_uber/release20221028_historic_uber')
list(UBER_BASE.glob('*'))

[PosixPath('/Users/neun/data/t4c/data_pipeline/validations_ext_uber/release20221028_historic_uber/.DS_Store'),
 PosixPath('/Users/neun/data/t4c/data_pipeline/validations_ext_uber/release20221028_historic_uber/speed_classes'),
 PosixPath('/Users/neun/data/t4c/data_pipeline/validations_ext_uber/release20221028_historic_uber/road_graph')]

### Uber London

In [46]:
uber_london_edges_df = geopandas.read_parquet(UBER_BASE / 'road_graph' / 'london' / 'road_graph_freeflow.parquet')
uber_london_edges_df

Unnamed: 0,u,v,gkey,osmid,speed_kph,maxspeed,highway,oneway,lanes,tunnel,length_meters,geometry,intersecting_cells,free_flow_kph
0,78112,25508583,78112_25508583_129375498,129375498,40.4,,unclassified,,,,19.407060,"LINESTRING (-0.14579 51.52698, -0.14578 51.52680)","[(172, 223, 2, 0.0), (173, 223, 2, 1.0), (173,...",45.176471
1,78112,25508584,78112_25508584_129375498,129375498,40.4,,unclassified,,,,63.857249,"LINESTRING (-0.14579 51.52698, -0.14581 51.52755)","[(173, 223, 0, 0.041804563658226324), (172, 22...",45.176471
2,99936,2146383887,99936_2146383887_204647020,204647020,48.3,30 mph,unclassified,,,,12.645031,"LINESTRING (-0.15279 51.52361, -0.15268 51.52363)","[(176, 216, 0, 1.0)]",28.705882
3,99936,4544836433,99936_4544836433_233623258,233623258,48.3,30 mph,unclassified,,,,47.220818,"LINESTRING (-0.15279 51.52361, -0.15294 51.523...","[(175, 216, 1, 0.037851813122377106), (175, 21...",28.705882
4,99937,200047,99937_200047_4257261,4257261,48.3,30 mph,unclassified,,,,59.033945,"LINESTRING (-0.15202 51.52302, -0.15232 51.523...","[(176, 217, 3, 0.0), (176, 216, 1, 1.0), (176,...",29.647059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234303,6577730921,254761207,6577730921_254761207_358627524,358627524,46.2,,tertiary,,,,134.082828,"LINESTRING (-0.20947 51.52118, -0.20849 51.52188)","[(178, 159, 0, 0.4780438104955831), (178, 160,...",28.705882
234304,6577730921,6577730928,6577730921_6577730928_8227762,8227762,36.3,,residential,,,,51.325126,"LINESTRING (-0.20947 51.52118, -0.20935 51.521...","[(178, 160, 2, 0.0), (178, 159, 0, 1.0), (178,...",22.588235
234305,6577730928,2379175,6577730928_2379175_700435511,700435511,36.3,,residential,,,,57.576832,"LINESTRING (-0.20901 51.52114, -0.20906 51.521...","[(178, 160, 3, 0.0), (178, 159, 1, 1.0), (178,...",26.823529
234306,6577745911,36728211,6577745911_36728211_646234311,646234311,48.3,30 mph,primary,,,,305.243521,"LINESTRING (-0.23975 51.41129, -0.23904 51.411...","[(289, 130, 2, 0.38704878562390743), (288, 130...",48.941176


In [47]:
uber_london_nodes_df = geopandas.read_parquet(UBER_BASE / 'road_graph' / 'london' / 'road_graph_nodes.parquet')
uber_london_nodes_df

Unnamed: 0,node_id,x,y,geometry
0,78112,-0.145792,51.526976,POINT (-0.14579 51.52698)
1,99936,-0.152791,51.523611,POINT (-0.15279 51.52361)
2,99937,-0.152024,51.523018,POINT (-0.15202 51.52302)
3,99940,-0.155763,51.523085,POINT (-0.15576 51.52308)
4,101818,-0.148104,51.535179,POINT (-0.14810 51.53518)
...,...,...,...,...
140407,6577710674,-0.225660,51.505155,POINT (-0.22566 51.50515)
140408,6577710675,-0.227720,51.502322,POINT (-0.22772 51.50232)
140409,6577730921,-0.209469,51.521176,POINT (-0.20947 51.52118)
140410,6577730928,-0.209009,51.521139,POINT (-0.20901 51.52114)


In [49]:
uber_london_edges_df = uber_london_edges_df.set_index(['u', 'v', 'gkey'])

In [50]:
uber_london_edges_df.index.is_unique

True

In [38]:
uber_london_g = ox.graph_from_gdfs(uber_london_nodes_df, uber_london_edges_df)
uber_london_g

<networkx.classes.multidigraph.MultiDiGraph at 0x7fb111d8bc10>

In [39]:
print(f'Road graph has {len(uber_london_g.nodes)} nodes and {len(uber_london_g.edges)} edges')

Road graph has 279579 nodes and 234308 edges


In [40]:
london_locations_df = get_counter_locations('london')
# day	time_bin	id	flow_15m	sat_bandings	det_no	num_det	detector_rate	ts	lat	lon
# id	name	lat	lon	heading	time_bin	volume	speed
london_locations_df

Unnamed: 0,id,lat,lon,heading,name
0,5,51.575617,0.283162,-1.0,M25/5764B
1,8,51.433749,-0.538796,-1.0,M25/4876A
2,14,51.408466,0.381381,-1.0,A2/8392M
3,24,51.520645,-2.049237,-1.0,M4/3479A
4,25,51.664773,-0.493692,-1.0,M25/5135B
...,...,...,...,...,...
5912,32/210,51.612580,-0.113407,-1.0,
5913,32/224,51.632029,-0.073554,-1.0,
5914,32/225,51.631973,-0.073398,-1.0,
5915,32/228,51.629826,-0.097257,-1.0,


In [43]:
# !!! This takes 10-15 minutes for the whole graph !!!
uber_london_matched_df = find_nearest_ways(uber_london_g, london_locations_df)
uber_london_matched_df[uber_london_matched_df['way'] != -1]

  ne = ne[:k]
  dist = dist[:k]


Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v
0,5,51.575617,0.283162,-1.0,M25/5764B,"[(246241383, 210969437, 246241383_210969437_22...","[1.6568537321059769, 16.015736358032573, 20.85...",22890035,10000000.0,246241383,210969437
1,8,51.433749,-0.538796,-1.0,M25/4876A,"[(1588846837, 1588846655, 1588846837_158884665...","[2.712841032414038, 23.267715270397552]",256060531,10000000.0,1588846837,1588846655
2,14,51.408466,0.381381,-1.0,A2/8392M,"[(945016995, 260108636, 945016995_260108636_42...",[30.615473857873027],42400295,10000000.0,945016995,260108636
4,25,51.664773,-0.493692,-1.0,M25/5135B,"[(1776487329, 13353263, 1776487329_13353263_42...","[2.137603239380767, 17.36775801843935]",4208528,10000000.0,1776487329,13353263
5,28,51.268629,-0.166750,-1.0,M25/4490B,"[(1639045045, 202952, 1639045045_202952_231893...","[1.0402302020833283, 22.314515722000312, 29.15...",23189325,10000000.0,1639045045,202952
...,...,...,...,...,...,...,...,...,...,...,...
5912,32/210,51.612580,-0.113407,-1.0,,"[(4065358677, 196424, 4065358677_196424_404260...","[1.5074836979383113, 10.46175080563993, 21.934...",404260442,10000000.0,4065358677,196424
5913,32/224,51.632029,-0.073554,-1.0,,"[(1439294240, 497161, 1439294240_497161_812203...","[1.8084786013917726, 11.618542945204045]",8122037,10000000.0,1439294240,497161
5914,32/225,51.631973,-0.073398,-1.0,,"[(4297211162, 1052641793, 4297211162_105264179...","[1.406376688201597, 13.290152730149995]",1656423,10000000.0,4297211162,1052641793
5915,32/228,51.629826,-0.097257,-1.0,,"[(256632553, 26026556, 256632553_26026556_8122...","[1.5865900035410672, 1.5865900035410672, 1.586...",8122359,10000000.0,256632553,26026556


In [44]:
uber_london_matched_df['way'] = uber_london_matched_df['way'].astype(str)

In [45]:
save_matched_locations(uber_london_matched_df, UBER_BASE / 'road_graph' / 'london')

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,5,51.575617,0.283162,-1.0,M25/5764B,"[(246241383, 210969437, '246241383_210969437_2...","[1.6568537321059769, 16.015736358032573, 20.85...",22890035,10000000.0,246241383,210969437,POINT (0.28316 51.57562)
1,8,51.433749,-0.538796,-1.0,M25/4876A,"[(1588846837, 1588846655, '1588846837_15888466...","[2.712841032414038, 23.267715270397552]",256060531,10000000.0,1588846837,1588846655,POINT (-0.53880 51.43375)
2,14,51.408466,0.381381,-1.0,A2/8392M,"[(945016995, 260108636, '945016995_260108636_4...",[30.615473857873027],42400295,10000000.0,945016995,260108636,POINT (0.38138 51.40847)
3,24,51.520645,-2.049237,-1.0,M4/3479A,[],[],-1,-1.0,-1,-1,POINT (-2.04924 51.52064)
4,25,51.664773,-0.493692,-1.0,M25/5135B,"[(1776487329, 13353263, '1776487329_13353263_4...","[2.137603239380767, 17.36775801843935]",4208528,10000000.0,1776487329,13353263,POINT (-0.49369 51.66477)
...,...,...,...,...,...,...,...,...,...,...,...,...
5912,32/210,51.612580,-0.113407,-1.0,,"[(4065358677, 196424, '4065358677_196424_40426...","[1.5074836979383113, 10.46175080563993, 21.934...",404260442,10000000.0,4065358677,196424,POINT (-0.11341 51.61258)
5913,32/224,51.632029,-0.073554,-1.0,,"[(1439294240, 497161, '1439294240_497161_81220...","[1.8084786013917726, 11.618542945204045]",8122037,10000000.0,1439294240,497161,POINT (-0.07355 51.63203)
5914,32/225,51.631973,-0.073398,-1.0,,"[(4297211162, 1052641793, '4297211162_10526417...","[1.406376688201597, 13.290152730149995]",1656423,10000000.0,4297211162,1052641793,POINT (-0.07340 51.63197)
5915,32/228,51.629826,-0.097257,-1.0,,"[(256632553, 26026556, '256632553_26026556_812...","[1.5865900035410672, 1.5865900035410672, 1.586...",8122359,10000000.0,256632553,26026556,POINT (-0.09726 51.62983)


### Uber Berlin

In [51]:
uber_berlin_edges_df = geopandas.read_parquet(UBER_BASE / 'road_graph' / 'berlin' / 'road_graph_freeflow.parquet')
uber_berlin_edges_df

Unnamed: 0,u,v,gkey,osmid,speed_kph,maxspeed,highway,oneway,lanes,tunnel,length_meters,geometry,intersecting_cells,free_flow_kph
0,172546,530313,172546_530313_24295532,24295532,50.0,50,secondary,,,,46.759468,"LINESTRING (13.35225 52.56774, 13.35266 52.56784)","[(286, 163, 0, 1.0)]",48.156863
1,172558,172559,172558_172559_160130963,160130963,50.0,50,primary,,,,164.463017,"LINESTRING (13.33737 52.55878, 13.33725 52.558...","[(294, 147, 1, 0.7021122973989331), (295, 148,...",50.352941
2,172559,29688040,172559_29688040_160130963,160130963,50.0,50,primary,,,,153.966487,"LINESTRING (13.33607 52.55952, 13.33578 52.559...","[(293, 145, 1, 0.10916459105032425), (293, 146...",52.235294
3,172562,530351,172562_530351_4804203,4804203,50.0,50,primary_link,,,,23.283372,"LINESTRING (13.33066 52.56260, 13.33047 52.56253)","[(291, 141, 3, 1.0)]",25.176471
4,172564,530185,172564_530185_4432669,4432669,50.0,50,secondary,,,,44.345716,"LINESTRING (13.32771 52.56405, 13.32759 52.563...","[(289, 138, 3, 0.17365008160005327), (290, 138...",40.156863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16274,6171409045,6171409044,6171409045_6171409044_71143837,71143837,50.0,50,secondary,,,,71.576136,"LINESTRING (13.39911 52.50819, 13.39949 52.50873)","[(345, 210, 0, 1.0)]",54.196078
16275,6172685050,26745400,6172685050_26745400_417807344,417807344,50.0,50,residential,,,,5.821343,"LINESTRING (13.36256 52.50585, 13.36258 52.50590)","[(348, 173, 0, 1.0)]",35.764706
16276,6172693100,26746095,6172693100_26746095_16943617,16943617,30.0,30,primary,,,,9.151821,"LINESTRING (13.36812 52.50590, 13.36809 52.50582)","[(348, 179, 3, 1.0)]",29.803922
16277,6173378369,426425009,6173378369_426425009_132105452,132105452,50.0,50,tertiary,,,,3.764184,"LINESTRING (13.49198 52.50117, 13.49202 52.50116)","[(352, 303, 2, 0.5120481927622961), (352, 302,...",39.058824


In [52]:
uber_berlin_nodes_df = geopandas.read_parquet(UBER_BASE / 'road_graph' / 'berlin' / 'road_graph_nodes.parquet')
uber_berlin_nodes_df

Unnamed: 0,node_id,x,y,geometry
0,172546,13.352255,52.567738,POINT (13.35225 52.56774)
1,172558,13.337368,52.558781,POINT (13.33737 52.55878)
2,172559,13.336074,52.559517,POINT (13.33607 52.55952)
3,172562,13.330664,52.562600,POINT (13.33066 52.56260)
4,172564,13.327708,52.564046,POINT (13.32771 52.56405)
...,...,...,...,...
12650,6171409045,13.399112,52.508193,POINT (13.39911 52.50819)
12651,6172685050,13.362564,52.505849,POINT (13.36256 52.50585)
12652,6172693100,13.368124,52.505898,POINT (13.36812 52.50590)
12653,6173378369,13.491984,52.501171,POINT (13.49198 52.50117)


In [54]:
uber_berlin_edges_df = uber_berlin_edges_df.set_index(['u', 'v', 'gkey'])
uber_berlin_edges_df.index.is_unique

True

In [55]:
uber_berlin_g = ox.graph_from_gdfs(uber_berlin_nodes_df, uber_berlin_edges_df)
print(f'Road graph has {len(uber_berlin_g.nodes)} nodes and {len(uber_berlin_g.edges)} edges')

Road graph has 25310 nodes and 16279 edges


In [56]:
berlin_locations_df = get_counter_locations('berlin')
berlin_locations_df

Unnamed: 0,id,lat,lon,heading,name
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2
4,100101010000874,52.436642,13.261301,180,TE004_Det_HF1
...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1


In [57]:
# !!! This takes 2 minutes for the whole graph !!!
uber_berlin_matched_df = find_nearest_ways(uber_berlin_g, berlin_locations_df)
uber_berlin_matched_df[uber_berlin_matched_df['way'] != -1]

  ne = ne[:k]
  dist = dist[:k]


Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1,"[(268076575, 1827856523, 268076575_1827856523_...","[0.26406754832700025, 32.81005424259161]",150912137,0.264068,268076575,1827856523
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2,"[(268076575, 1827856523, 268076575_1827856523_...","[0.26406754832700025, 32.81005424259161]",150912137,0.264068,268076575,1827856523
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1,"[(268076575, 1827856523, 268076575_1827856523_...",[13.200678028254265],150912137,13.200678,268076575,1827856523
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2,"[(268076575, 1827856523, 268076575_1827856523_...",[13.200678028254265],150912137,13.200678,268076575,1827856523
6,100101010001076,52.435112,13.263106,270,TE005_Det_HF1,"[(2448173157, 26906129, 2448173157_26906129_30...","[2.016740674391748, 5.591400479496861, 27.4898...",30243829,2.016741,2448173157,26906129
...,...,...,...,...,...,...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2,"[(27556652, 1824003295, 27556652_1824003295_32...","[5.336229322748185, 16.103532659073107]",327627452,5.336229,27556652,1824003295
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1,"[(26736176, 26736175, 26736176_26736175_316210...","[6.14839484607444, 17.77976719045062, 18.89012...",31621007,6.148395,26736176,26736175
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2,"[(26736176, 26736175, 26736176_26736175_316210...","[6.14839484607444, 17.77976719045062, 18.89012...",31621007,6.148395,26736176,26736175
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1,"[(26736157, 791127364, 26736157_791127364_3278...","[6.50455329146837, 17.950528439866044]",327805903,6.504553,26736157,791127364


In [58]:
uber_berlin_matched_df['way'] = uber_berlin_matched_df['way'].astype(str)

In [59]:
save_matched_locations(uber_berlin_matched_df, UBER_BASE / 'road_graph' / 'berlin')

Unnamed: 0,id,lat,lon,heading,name,ways,way_dists,way,way_dist,u,v,geometry
0,100101010000167,52.433868,13.192578,225,TE001_Det_HF1,"[(268076575, 1827856523, '268076575_1827856523...","[0.26406754832700025, 32.81005424259161]",150912137,0.264068,268076575,1827856523,POINT (13.19258 52.43387)
1,100101010000268,52.433868,13.192578,225,TE001_Det_HF2,"[(268076575, 1827856523, '268076575_1827856523...","[0.26406754832700025, 32.81005424259161]",150912137,0.264068,268076575,1827856523,POINT (13.19258 52.43387)
2,100101010000369,52.433813,13.192747,45,TE002_Det_HF1,"[(268076575, 1827856523, '268076575_1827856523...",[13.200678028254265],150912137,13.200678,268076575,1827856523,POINT (13.19275 52.43381)
3,100101010000470,52.433813,13.192747,45,TE002_Det_HF2,"[(268076575, 1827856523, '268076575_1827856523...",[13.200678028254265],150912137,13.200678,268076575,1827856523,POINT (13.19275 52.43381)
4,100101010000874,52.436642,13.261301,180,TE004_Det_HF1,[],[],-1,-1.000000,-1,-1,POINT (13.26130 52.43664)
...,...,...,...,...,...,...,...,...,...,...,...,...
542,100101010097975,52.457440,13.384196,0,TE583_Det_HF2,"[(27556652, 1824003295, '27556652_1824003295_3...","[5.336229322748185, 16.103532659073107]",327627452,5.336229,27556652,1824003295,POINT (13.38420 52.45744)
543,100101010099692,52.509232,13.301719,180,TE592_Det_HF1,"[(26736176, 26736175, '26736176_26736175_31621...","[6.14839484607444, 17.77976719045062, 18.89012...",31621007,6.148395,26736176,26736175,POINT (13.30172 52.50923)
544,100101010099793,52.509232,13.301719,180,TE592_Det_HF2,"[(26736176, 26736175, '26736176_26736175_31621...","[6.14839484607444, 17.77976719045062, 18.89012...",31621007,6.148395,26736176,26736175,POINT (13.30172 52.50923)
545,100101010099894,52.508531,13.302183,0,TE593_Det_HF1,"[(26736157, 791127364, '26736157_791127364_327...","[6.50455329146837, 17.950528439866044]",327805903,6.504553,26736157,791127364,POINT (13.30218 52.50853)
