In [1]:
import os
import pickle
import shapely
import numpy as np
import osmnx as ox
import networkx as nx
import geopandas as gpd
import pandas as pd
from datetime import date
import geonetworkx as gnx
from networkx.classes.filters import no_filter
from statistics import mode
from tqdm import tqdm
from shapely import LineString

tqdm.pandas()

DATA_PATH = './data'

In [2]:
# Get OSM raod network within Central Hungary with necessary attributes without merging road segments on specific attributes
useful_tags = ['surface', 'lit', 'lanes:forward', 'lanes:backward']
ox.settings.useful_tags_way += useful_tags
graph = ox.graph_from_place('Central Hungary', simplify=False, retain_all=True,
                            custom_filter = '["highway"~"motorway|trunk|primary|secondary|tertiary|unclassified|residential"]')
graph = ox.simplification.simplify_graph(graph, edge_attrs_differ=['highway', 'maxspeed'] + useful_tags)

In [3]:
# Add geometry columns to non-simplified graph edges
for k in graph.edges:
    if 'geometry' not in graph.edges[k].keys():
        graph.edges[k]['geometry'] = LineString([[graph.nodes[k[0]]['x'], graph.nodes[k[0]]['y']], [graph.nodes[k[1]]['x'], graph.nodes[k[1]]['y']]])

In [4]:
# Discretize roads to linestrings
graph = gnx.read_geograph_with_coordinates_attributes(graph)
graph_view = nx.graphviews.subgraph_view(graph, filter_node=no_filter, filter_edge=no_filter)
edges_as_lines = nx.get_edge_attributes(graph_view, graph.edges_geometry_key)
gdf_graph_edges = gpd.GeoDataFrame({'gnx_edge_id': list(edges_as_lines.keys()), 'geometry': list(edges_as_lines.values())}, crs='EPSG:4326')
print(gdf_graph_edges.shape)

(182960, 2)


In [5]:
# Get OSM polygon for Budapest administrative area
gdf_poly = ox.geocode_to_gdf('Budapest, Hungary')
gdf_poly.rename(columns={'osm_id': 'osmid'}, inplace=True)

In [6]:
# Get Budapest road network by overlaying the Budapest polygon on the Central Hungary road network
gdf_graph_edges = gdf_graph_edges.overlay(gdf_poly[['osmid', 'name', 'geometry']], how='intersection', keep_geom_type=True)
print(gdf_graph_edges.shape)

(63221, 4)


In [7]:
# Add relevant graph attributes to road network gdf
filter_gnx_dict = lambda d, keys: {k: d[k] if k in d.keys() else np.nan for k in keys}
attributes = ['highway', 'maxspeed', 'oneway', 'length', 'lit', 'surface']
s = gdf_graph_edges.apply(lambda r: filter_gnx_dict(graph_view.edges[r.gnx_edge_id], attributes), axis=1)
gdf_graph_edges = pd.concat([gdf_graph_edges, pd.DataFrame(s.to_list())], axis=1)
print(gdf_graph_edges.shape)

(63221, 10)


In [8]:
# Drop redundant columns containing a single value
gdf_graph_edges.drop(columns=['name'], inplace=True)
print(gdf_graph_edges.shape)

(63221, 9)


In [9]:
# Solve problem of two-way road not having the same gdf_edge_id for both directions - 2 examples found in the Budapest road network for now
twoway_dir_set = set(gdf_graph_edges[~gdf_graph_edges['oneway']]['gnx_edge_id'].to_list())
twoway_undir_set = set(gdf_graph_edges[~gdf_graph_edges['oneway']]['gnx_edge_id'].apply(lambda t: t if t[0] < t[1] else (t[1], t[0], t[2])).to_list())
twoway_undir_dupl_set = twoway_undir_set.union(set(map(lambda t: (t[1], t[0], t[2]), twoway_undir_set)))
twoway_undir_dupl_set.difference(twoway_dir_set)

{(69182479, 4386858889, 0),
 (1380502040, 1380502054, 0),
 (1380502054, 1380502040, 1),
 (4386858889, 69182479, 1)}

In [10]:
gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].apply(lambda t: (t[0] == 1380502054) | (t[1] == 1380502054))]

Unnamed: 0,gnx_edge_id,osmid,geometry,highway,maxspeed,oneway,length,lit,surface
46929,"(659534228, 1380502054, 0)",37244,"LINESTRING (19.13094 47.37885, 19.13066 47.37871)",unclassified,,False,26.053,,
51211,"(1380502040, 1380502054, 0)",37244,"LINESTRING (19.13031 47.37851, 19.13052 47.378...",unclassified,,True,85.126,,
51212,"(1380502040, 1380502054, 1)",37244,"LINESTRING (19.13031 47.37851, 19.13053 47.378...",unclassified,,False,34.41,,
51213,"(1380502054, 659534228, 0)",37244,"LINESTRING (19.13066 47.37871, 19.13094 47.37885)",unclassified,,False,26.053,,
51214,"(1380502054, 1380502040, 0)",37244,"LINESTRING (19.13066 47.37871, 19.13053 47.378...",unclassified,,False,34.41,,


In [11]:
# Swap gdf_edge_id values
gdf_graph_edges.loc[gdf_graph_edges['gnx_edge_id'] == (1380502040, 1380502054, 0), 'gnx_edge_id'] = 'temp'
filt = gdf_graph_edges['gnx_edge_id'] == (1380502040, 1380502054, 1)
gdf_graph_edges.loc[filt, 'gnx_edge_id'] = pd.Series([(1380502040, 1380502054, 0)], index=filt[filt].index)
filt = gdf_graph_edges['gnx_edge_id'] == 'temp'
gdf_graph_edges.loc[filt, 'gnx_edge_id'] = pd.Series([(1380502040, 1380502054, 1)], index=filt[filt].index)

In [12]:
gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].apply(lambda t: (t[0] == 69182479) | (t[1] == 69182479))]

Unnamed: 0,gnx_edge_id,osmid,geometry,highway,maxspeed,oneway,length,lit,surface
17618,"(69182479, 4386858889, 0)",37244,"LINESTRING (19.10761 47.59697, 19.10773 47.596...",residential,,True,502.291,,
17619,"(69182479, 4386858889, 1)",37244,"LINESTRING (19.10761 47.59697, 19.10842 47.597...",unclassified,,False,306.246,,
17620,"(69182479, 5956704802, 0)",37244,"LINESTRING (19.10761 47.59697, 19.10730 47.596...",unclassified,,False,104.021,,
58438,"(4386858889, 69182479, 0)",37244,"LINESTRING (19.11105 47.59844, 19.11063 47.598...",unclassified,,False,306.246,,
60280,"(5956704802, 69182479, 0)",37244,"LINESTRING (19.10656 47.59636, 19.10666 47.596...",unclassified,,False,104.021,,


In [13]:
# Swap gdf_edge_id values
gdf_graph_edges.loc[gdf_graph_edges['gnx_edge_id'] == (69182479, 4386858889, 0), 'gnx_edge_id'] = 'temp'
filt = gdf_graph_edges['gnx_edge_id'] == (69182479, 4386858889, 1)
gdf_graph_edges.loc[filt, 'gnx_edge_id'] = pd.Series([(69182479, 4386858889, 0)], index=filt[filt].index)
filt = gdf_graph_edges['gnx_edge_id'] == 'temp'
gdf_graph_edges.loc[filt, 'gnx_edge_id'] = pd.Series([(69182479, 4386858889, 1)], index=filt[filt].index)

In [14]:
# Remove 0 maxspeeds
for k in gdf_graph_edges[gdf_graph_edges['maxspeed'] == '0'].gnx_edge_id:
    del graph.edges[k]['maxspeed']
gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['maxspeed'] == '0'].index, 'maxspeed'] = np.nan
print(gdf_graph_edges.shape)

(63221, 9)


In [15]:
# Aggregate surface attributes
paved_attrs = ['chipseal', 'cobblestone', 'compacted', 'concrete', 'sett', 'paving_stones']
unpaved_attrs = ['dirt', 'earth', 'fine_gravel', 'grass', 'gravel', 'ground', 'pebblestone', 'rock', 'sand']

def replace_graph_attributes(graph, attr, old, new):
    for k in graph.edges:
        if attr in graph.edges[k].keys() and graph.edges[k][attr] == old:
            graph.edges[k][attr] = new
    return graph

for k in gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('cobblestone'))].gnx_edge_id:
    graph.edges[k]['surface'] = 'cobblestone'
for k in gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('concrete'))].gnx_edge_id:
    graph.edges[k]['surface'] = 'concrete'
for k in gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('grass'))].gnx_edge_id:
    graph.edges[k]['surface'] = 'grass'
for k in gdf_graph_edges[gdf_graph_edges['surface'].isin(paved_attrs)].gnx_edge_id:
    graph.edges[k]['surface'] = 'paved'
for k in gdf_graph_edges[gdf_graph_edges['surface'].isin(unpaved_attrs)].gnx_edge_id:
    graph.edges[k]['surface'] = 'unpaved'
for old in paved_attrs:
    graph = replace_graph_attributes(graph, 'surface', old, 'paved')
for old in unpaved_attrs:
    graph = replace_graph_attributes(graph, 'surface', old, 'unpaved')

gdf_graph_edges.loc[gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('cobblestone'))].index, 'surface'] = 'cobblestone'
gdf_graph_edges.loc[gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('concrete'))].index, 'surface'] = 'concrete'
gdf_graph_edges.loc[gdf_graph_edges[~(gdf_graph_edges['surface'].isna()) & (gdf_graph_edges['surface'].str.contains('grass'))].index, 'surface'] = 'grass'
gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['surface'].isin(['chipseal', 'cobblestone', 'compacted', 'concrete', 'sett', 'paving_stones'])].index, 'surface'] = 'paved'
gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['surface'].isin(['dirt', 'earth', 'fine_gravel', 'grass', 'gravel', 'ground', 'pebblestone', 'rock', 'sand'])].index, 'surface'] = 'unpaved'

print(gdf_graph_edges.shape)

(63221, 9)


In [16]:
# Find missing attributes for an edge as the most common value of the 1-hop neighbors of the same type
def find_attr_from_neighbors(graph, edge, attr, dt):
    o, d, _ = edge

    l = (list(nx.bfs_edges(graph, source=o, depth_limit=dt)) + list(nx.bfs_edges(graph, source=d, depth_limit=dt)))
    if (o, d) in l:
        l.remove((o, d))
    if (d, o) in l:
        l.remove((d, o))
    
    neighb_attrs = []
    for it in l:
        i = 0
        while not it + (i,) in graph.edges.keys():
            i += 1
        e = graph.edges[it + (i,)]
        if attr in e.keys() and e['highway'] == graph.edges[edge]['highway']:
            neighb_attrs.append(e[attr])
    if len(neighb_attrs):
        return mode(neighb_attrs)
    return None


for attr in ['maxspeed', 'lit', 'surface']:
    print(attr)
    print('NaNs before:', gdf_graph_edges[gdf_graph_edges[attr].isna()].shape[0])
    for dt in tqdm([1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 20, 20, 20, 1000]):
        for i in gdf_graph_edges[gdf_graph_edges[attr].isna()].index:
            edge = gdf_graph_edges.loc[i, 'gnx_edge_id']
            val = find_attr_from_neighbors(graph, edge, attr, dt)
            if val != None:
                gdf_graph_edges.loc[i, attr] = val
                graph.edges[edge][attr] = val
    print('NaNs after:', gdf_graph_edges[gdf_graph_edges[attr].isna()].shape[0])
    print()
print(gdf_graph_edges.shape)

maxspeed
NaNs before: 19876


100%|██████████| 15/15 [01:02<00:00,  4.16s/it]


NaNs after: 165

lit
NaNs before: 28631


100%|██████████| 15/15 [02:35<00:00, 10.37s/it]


NaNs after: 153

surface
NaNs before: 14165


100%|██████████| 15/15 [00:21<00:00,  1.42s/it]

NaNs after: 88

(63221, 9)





In [17]:
# Complete missing attributes with the most common value from all road segments of the same type
for attr in ['maxspeed', 'lit', 'surface']:
    print(attr)
    print('NaNs before:', gdf_graph_edges[gdf_graph_edges[attr].isna()].shape[0])
    common_attr_per_highway = gdf_graph_edges.groupby(['highway'])[attr].agg(pd.Series.mode)
    gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges[attr].isna()].index, attr] = gdf_graph_edges[gdf_graph_edges[attr].isna()].apply(
                                                                                         lambda row: common_attr_per_highway[row['highway']], axis=1)
    print('NaNs after:', gdf_graph_edges[gdf_graph_edges[attr].isna()].shape[0])
    print()
print(gdf_graph_edges.shape)

maxspeed
NaNs before: 165
NaNs after: 0

lit
NaNs before: 153
NaNs after: 0

surface
NaNs before: 88
NaNs after: 0

(63221, 9)


In [18]:
# Extract link information to a sepparate column
gdf_graph_edges['link'] = False
gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['highway'].str.contains('_')].index, 'link'] = True
gdf_graph_edges.loc[:, 'highway'] = gdf_graph_edges.highway.str.split('_').apply(lambda l: l[0])
print(gdf_graph_edges.shape)

(63221, 10)


In [19]:
# Add geographical information as coordinates of linestring representative point
gdf_graph_edges['repr_lon'] = gdf_graph_edges['geometry'].to_crs('EPSG:23700').representative_point().x
gdf_graph_edges['repr_lat'] = gdf_graph_edges['geometry'].to_crs('EPSG:23700').representative_point().y
print(gdf_graph_edges.shape)

(63221, 12)


In [20]:
# Convert CUT directed road network gdf back to graph

def node_name_logic(edge_ls, graph, eps=1e-3):
    from_node_no = int(edge_ls['gnx_edge_id'][0])
    if np.abs(graph.nodes[from_node_no]['x'] - shapely.get_point(edge_ls.geometry, 0).x) > eps or \
        np.abs(graph.nodes[from_node_no]['y'] - shapely.get_point(edge_ls.geometry, 0).y) > eps:
        # introduce new node id if non-existent in OSM road network
        from_node_no = f'CUT_{edge_ls.name}_{from_node_no}'
    edge_ls['FROMNODENO'] = from_node_no

    to_node_no = int(edge_ls['gnx_edge_id'][1])
    if np.abs(graph.nodes[to_node_no]['x'] - shapely.get_point(edge_ls.geometry, -1).x) > eps or \
        np.abs(graph.nodes[to_node_no]['y'] - shapely.get_point(edge_ls.geometry, -1).y) > eps:
        # introduce new node id if non-existent in OSM road network
        to_node_no = f'CUT_{edge_ls.name}_{to_node_no}'
    edge_ls['TONODENO'] = to_node_no

    edge_ls['KEY'] = int(edge_ls['gnx_edge_id'][2])

    return edge_ls


def derive_node_gdf_from_osm_lines(edges):
    edges["geom1"] = shapely.get_point(edges.geometry, 0)
    edges["geom2"] = shapely.get_point(edges.geometry, -1)

    fromnode = edges.groupby(['FROMNODENO','geom1']).size().reset_index()
    tonode = edges.groupby(['TONODENO','geom2']).size().reset_index()

    nodes = pd.DataFrame(np.concatenate((fromnode.values, tonode.values), axis=0), columns=['osmid', 'geometry', 'size'])
    nodes = gpd.GeoDataFrame(nodes.drop_duplicates(subset='osmid'), geometry='geometry')
    nodes.drop(columns=['size'], inplace=True)   # it is ambiguous as calculated above - size_in / size_out would be correct 

    nodes['x'] = nodes.geometry.x
    nodes['y'] = nodes.geometry.y
    
    return nodes.set_index('osmid')


def create_edge_gdf_from_osm_lines(edges, attribute_cols):
    edges['u'] = edges['FROMNODENO']
    edges['v'] = edges['TONODENO']
    edges['key'] = edges['KEY']

    edges = edges[['u', 'v', 'key', 'geometry'] + attribute_cols]
    return edges.set_index(['u', 'v', 'key'])


def replace_node_ids_with_value(edge_id, to_replace, value):
    edge_id = list(edge_id)
    if edge_id[0] == to_replace:
        edge_id[0] = value
    if edge_id[1] == to_replace:
        edge_id[1] = value
    return tuple(edge_id)


# Explode MultiLineStrings resulted by overlay to LineStrings
gdf_graph_edges = gdf_graph_edges.explode(index_parts=False)
gdf_graph_edges.reset_index(inplace=True, drop=True)

attribute_cols = gdf_graph_edges.columns[1:-1].to_list()

gdf_bp_edges = gdf_graph_edges.progress_apply(lambda row: node_name_logic(row, graph), axis=1)
gdf_graph_edges['gnx_edge_id'] = list(zip(gdf_bp_edges['FROMNODENO'], gdf_bp_edges['TONODENO'], gdf_bp_edges['KEY']))

gdf_nodes = derive_node_gdf_from_osm_lines(gdf_bp_edges)
gdf_edges = create_edge_gdf_from_osm_lines(gdf_bp_edges, attribute_cols=attribute_cols)

# Drop redundant nodes obtained by CUTTING - nodes with same coordinates, but different ids
df = gdf_nodes[gdf_nodes.index.get_level_values('osmid').astype(str).str.contains('CUT')]
redundant_node_list = list(filter(lambda t: len(t)>1, df.groupby(list(df))['geometry'].apply(lambda x: tuple(x.index)).tolist()))
for group in tqdm(redundant_node_list):
    for node in group[1:]:
        gdf_nodes.drop(node, inplace=True)
        gdf_edges.index = list(map(lambda edge_id: replace_node_ids_with_value(edge_id, node, group[0]), gdf_edges.index.tolist()))
        gdf_graph_edges['gnx_edge_id'] = gdf_graph_edges['gnx_edge_id'].apply(lambda edge_id: replace_node_ids_with_value(edge_id, node, group[0]))

# Increase key - 3rd tuple component - of duplicated edge ids
gdf_edges = gdf_edges.reset_index()
while gdf_edges[gdf_edges['index'].duplicated()].shape[0]:
    gdf_edges.loc[gdf_edges[gdf_edges['index'].duplicated()].index, 'index'] = gdf_edges.loc[gdf_edges[gdf_edges['index'].duplicated()].index, 'index'].apply(lambda row: (row[0], row[1], row[2]+1))
gdf_edges.set_index('index', inplace=True)
while gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].duplicated()].shape[0]:
    gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].duplicated()].index, 'gnx_edge_id'] = gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].duplicated()].index, 'gnx_edge_id'].apply(lambda row: (row[0], row[1], row[2]+1))

# # Add osmid field to gdf_edges for later to_undirected conversion
# gdf_edges = gdf_edges.reset_index()
# gdf_edges['osmid'] = gdf_edges['index'].astype(str)
# gdf_edges = gdf_edges.set_index('index')

graph_bp_edges = ox.convert.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=None)

print(gdf_graph_edges.shape)
print(gdf_bp_edges.shape)
print(len(graph_bp_edges.edges))

100%|██████████| 63599/63599 [00:53<00:00, 1193.53it/s]
100%|██████████| 155/155 [00:09<00:00, 15.55it/s]


(63599, 12)
(63599, 20)
63599


In [21]:
# Change order of FROM- and TONODE elements of gnx_edge_id for oneway streets that have been changed in the graph by the to_undirected
def change_edge_id_by_graph_node_order(row):
    if row[0] in list(graph_bp_edges.nodes) and row[1] in list(graph_bp_edges.nodes) and\
                list(graph_bp_edges.nodes).index(row[0]) > list(graph_bp_edges.nodes).index(row[1]):
        return (row[1], row[0], row[2])
    return row
gdf_graph_edges.loc[gdf_graph_edges[gdf_graph_edges['oneway']].index, 'gnx_edge_id'] = gdf_graph_edges[gdf_graph_edges['oneway']]['gnx_edge_id'].progress_apply(change_edge_id_by_graph_node_order)

100%|██████████| 10159/10159 [00:13<00:00, 737.00it/s]


In [22]:
# Keep only the largest fully connected graph from the road network
graph_u = ox.convert.to_undirected(graph_bp_edges)
S = [graph_u.subgraph(c).copy() for c in nx.connected_components(graph_u)]
max_comp = S[np.argmax([len(g.edges) for g in S])]
max_comp_edges = list(max_comp.edges)
gdf_graph_edges_whole_undir = gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].isin(list(graph_u.edges))].copy()
gdf_graph_edges_conn_undir = gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].isin(max_comp_edges)].copy()
print(len(graph_u.edges))
print(gdf_graph_edges_whole_undir.shape)
print(len(max_comp.edges))
print(gdf_graph_edges_conn_undir.shape)

36879
(36739, 12)
36647
(36513, 12)


In [23]:
# Convert gdfs to graph once more to get rid of remaining anomalies
df = gdf_graph_edges_conn_undir.copy()
df['FROMNODENO'] = df['gnx_edge_id'].apply(lambda t: t[0])
df['TONODENO'] = df['gnx_edge_id'].apply(lambda t: t[1])
df['KEY'] = df['gnx_edge_id'].apply(lambda t: t[2])
gdf_nodes = derive_node_gdf_from_osm_lines(df)
gdf_edges = create_edge_gdf_from_osm_lines(df, attribute_cols=attribute_cols)
graph_conn = ox.convert.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=None)
gdf_graph_edges_conn_undir = ox.convert.graph_to_gdfs(graph_conn, nodes=False)
gdf_graph_edges_conn_undir.reset_index(inplace=True)
gdf_graph_edges_conn_undir['gnx_edge_id'] = gdf_graph_edges_conn_undir.apply(lambda row: (row['u'], row['v'], row['key']), axis=1)
gdf_graph_edges_conn_undir.drop(columns=['u', 'v', 'key', 'osmid'], inplace=True)

df = gdf_graph_edges_whole_undir.copy()
df['FROMNODENO'] = df['gnx_edge_id'].apply(lambda t: t[0])
df['TONODENO'] = df['gnx_edge_id'].apply(lambda t: t[1])
df['KEY'] = df['gnx_edge_id'].apply(lambda t: t[2])
gdf_nodes = derive_node_gdf_from_osm_lines(df)
gdf_edges = create_edge_gdf_from_osm_lines(df, attribute_cols=attribute_cols)
graph_whole = ox.convert.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=None)
gdf_graph_edges_whole_undir = ox.convert.graph_to_gdfs(graph_whole, nodes=False)
gdf_graph_edges_whole_undir.reset_index(inplace=True)
gdf_graph_edges_whole_undir['gnx_edge_id'] = gdf_graph_edges_whole_undir.apply(lambda row: (row['u'], row['v'], row['key']), axis=1)
gdf_graph_edges_whole_undir.drop(columns=['u', 'v', 'key', 'osmid'], inplace=True)

print(len(graph_conn.edges))
print(gdf_graph_edges_conn_undir.shape)
print(len(graph_whole.edges))
print(gdf_graph_edges_whole_undir.shape)

36484
(36484, 11)
36710
(36710, 11)


In [24]:
# Get largest fully connected graph as directed gnx graph
max_comp_edges_dir = max_comp_edges + [(tup[1], tup[0], tup[2]) for tup in max_comp_edges]  # Add swapped indexes for directed graph
gdf_graph_edges_dir = gdf_graph_edges[gdf_graph_edges['gnx_edge_id'].isin(max_comp_edges_dir)].copy()

gdf_graph_edges_dir['FROMNODENO'] = gdf_graph_edges_dir['gnx_edge_id'].apply(lambda x: x[0])
gdf_graph_edges_dir['TONODENO'] = gdf_graph_edges_dir['gnx_edge_id'].apply(lambda x: x[1])
gdf_graph_edges_dir['KEY'] = gdf_graph_edges_dir['gnx_edge_id'].apply(lambda x: x[2])

gdf_nodes = derive_node_gdf_from_osm_lines(gdf_graph_edges_dir)
gdf_edges = create_edge_gdf_from_osm_lines(gdf_graph_edges_dir, attribute_cols=attribute_cols)

graph_bp_edges_dir = ox.convert.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=None)

gdf_dir = ox.convert.graph_to_gdfs(graph_bp_edges_dir, nodes=False)

print(len(graph_bp_edges_dir.edges))
print(gdf_dir.shape)

62889
(62889, 11)


In [25]:
# Save processed largest connected component road network as geojson
gdf_graph_edges_conn_undir['gnx_edge_id'] = gdf_graph_edges_conn_undir['gnx_edge_id'].astype(str)
today = date.today().strftime('%d-%m-%Y')
out_path = os.path.join(DATA_PATH, 'OSM_road_network')
if not os.path.exists(out_path):
    os.makedirs(out_path)
gdf_graph_edges_conn_undir.to_file(os.path.join(out_path, f'GDF_BP-network-conn-undir.json'), driver='GeoJSON')

In [26]:
# Save processed largest connected component road network as geojson
gdf_graph_edges_whole_undir['gnx_edge_id'] = gdf_graph_edges_whole_undir['gnx_edge_id'].astype(str)
gdf_graph_edges_whole_undir.to_file(os.path.join(out_path, f'GDF_BP-network-whole-undir.json'), driver='GeoJSON')

In [27]:
# Save processed largest connected component road network as directed gnx graph
pickle.dump(graph_bp_edges_dir, open(os.path.join(out_path, f'GNX_BP-network-conn-dir.json'), 'wb'))

In [28]:
# Save processed largest connected component road network as directed gnx graph
pickle.dump(graph_bp_edges, open(os.path.join(out_path, f'GNX_BP-network-whole-dir.json'), 'wb'))