In [1]:
import os
import geopandas as gpd
import pickle
from datetime import date
from tqdm import tqdm

tqdm.pandas()

DATA_PATH = './data'

In [2]:
today = date.today().strftime('%d-%m-%Y')
out_path = os.path.join(DATA_PATH, 'OSM_road_network')

gdf_graph_edges_conn_undir = gpd.read_file(os.path.join(out_path, f'GDF_BP-network-conn-undir.json'))
graph_bp_edges_dir = pickle.load(open(os.path.join(out_path, f'GNX_BP-network-conn-dir.json'), 'rb'))

---

## All graph edges have all attributes - e.g. link attribute is there

In [3]:
graph_bp_edges_dir.edges[(9515749, 502897904, 0)]

{'geometry': <LINESTRING (19.221 47.44, 19.22 47.44, 19.22 47.44, 19.22 47.44)>,
 'osmid': 37244,
 'highway': 'unclassified',
 'maxspeed': '50',
 'oneway': True,
 'length': 37.763000000000005,
 'lit': 'yes',
 'surface': 'asphalt',
 'link': False,
 'repr_lon': 663050.5423291762,
 'repr_lat': 232905.50863739836}

In [4]:
gdf_graph_edges_conn_undir.head()

Unnamed: 0,highway,maxspeed,oneway,length,lit,surface,link,repr_lon,repr_lat,gnx_edge_id,geometry
0,unclassified,50,True,37.763,yes,asphalt,False,663050.542329,232905.508637,"(9515749, 502897904, 0)","LINESTRING (19.22072 47.44008, 19.22047 47.439..."
1,unclassified,50,True,69.843,yes,asphalt,False,663032.114808,232916.285357,"(9515749, 304982587, 0)","LINESTRING (19.22072 47.44008, 19.22041 47.440..."
2,unclassified,50,True,36.095,yes,asphalt,False,663068.998673,232915.921782,"(9515749, 704478994, 0)","LINESTRING (19.22115 47.44022, 19.22072 47.44008)"
3,unclassified,50,True,10.286,yes,asphalt,False,663043.130528,232890.628392,"(502897904, 502897905, 0)","LINESTRING (19.22037 47.43986, 19.22036 47.43976)"
4,primary,50,True,55.457,yes,asphalt,False,663000.655257,232926.422113,"(502897904, 304982587, 0)","LINESTRING (19.22037 47.43986, 19.21981 47.44018)"


---

## All graph edges CAN BE and GET projected to gdf

### No duplicate edge ids in graph

In [5]:
graph_edges = list(graph_bp_edges_dir.edges)

In [6]:
graph_edges_set = set(graph_edges)

In [7]:
assert len(graph_edges) == len(graph_edges_set)

### All dir graph ids have undir gdf id pairs

In [8]:
def gnx_edge_id_str_to_tuple(s):
    l = s[1:-1].split(', ')
    for i in range(len(l)):
        try:
            l[i] = int(l[i])
        except:
            l[i] = l[i][1:-1]
    return tuple(l)

In [9]:
gdf_edges = gdf_graph_edges_conn_undir['gnx_edge_id'].apply(gnx_edge_id_str_to_tuple).to_list() +\
            gdf_graph_edges_conn_undir[~gdf_graph_edges_conn_undir['oneway']]['gnx_edge_id'].apply(gnx_edge_id_str_to_tuple).apply(lambda t: (t[1], t[0], t[2])).to_list()
gdf_edges_set = set(gdf_edges)

In [10]:
assert len(gdf_edges_set.intersection(graph_edges_set)) == len(gdf_edges_set)

### Duplicates - circular road segments (origin = destination)

In [11]:
gdf_all_set = set(gdf_graph_edges_conn_undir['gnx_edge_id'].apply(gnx_edge_id_str_to_tuple).to_list())
gdf_undir_rev_set = set(gdf_graph_edges_conn_undir[~gdf_graph_edges_conn_undir['oneway']]['gnx_edge_id'].apply(gnx_edge_id_str_to_tuple).apply(lambda t: (t[1], t[0], t[2])).to_list())
len(gdf_all_set.intersection(gdf_undir_rev_set))

7

In [12]:
gdf_graph_edges_conn_undir[gdf_graph_edges_conn_undir['gnx_edge_id'].isin(list(map(str, gdf_all_set.intersection(gdf_undir_rev_set))))]

Unnamed: 0,highway,maxspeed,oneway,length,lit,surface,link,repr_lon,repr_lat,gnx_edge_id,geometry
1947,residential,30,False,343.244,yes,asphalt,False,661339.911226,233198.662033,"(528612162, 528612162, 0)","LINESTRING (19.19671 47.44224, 19.19806 47.441..."
24882,residential,30,False,55.595,yes,asphalt,False,654154.126851,240360.779997,"(9269957275, 9269957275, 0)","LINESTRING (19.10256 47.50703, 19.10263 47.507..."
25136,residential,30,False,418.086,yes,asphalt,False,646327.455731,239343.483855,"(277674093, 277674093, 0)","LINESTRING (18.99983 47.49845, 18.99973 47.498..."
32389,unclassified,40,False,119.623,yes,asphalt,False,650193.888712,242966.764843,"(420302042, 420302042, 0)","LINESTRING (19.04978 47.53077, 19.04983 47.530..."
35114,residential,50,False,234.762,yes,unpaved,False,649633.208479,227445.352141,"(6111913476, 6111913476, 0)","LINESTRING (19.04314 47.39110, 19.04276 47.391..."
35206,residential,30,False,1397.429,yes,unpaved,False,658385.999422,226703.506854,"(4109417097, 4109417097, 0)","LINESTRING (19.15575 47.38579, 19.15508 47.385..."
36306,residential,50,False,241.779,yes,asphalt,False,658342.753295,236247.64633,"(4258284198, 4258284198, 0)","LINESTRING (19.15881 47.46959, 19.15812 47.470..."


### All dir graph id - undir gdf id pairs match (geometries)

- oneway: same id both in gdf and graph
- twoway: gdf id and inverse [(to, from, key)] in graph

In [13]:
def verify_matching_geometries(edge_id, oneway):
    t = gnx_edge_id_str_to_tuple(edge_id)
    graph_geom = graph_bp_edges_dir.edges[t]['geometry']
    gdf_geom = gdf_graph_edges_conn_undir[gdf_graph_edges_conn_undir['gnx_edge_id'] == edge_id].iloc[0].geometry
    if oneway:
        return (gdf_geom == graph_geom)  # geometry match
    else:
        t_rev = (t[1], t[0], t[2])
        graph_geom_rev = graph_bp_edges_dir.edges[t_rev]['geometry']
        return ((gdf_geom == graph_geom) & (gdf_geom.reverse() == graph_geom_rev)) | (t == t_rev)   # geometry + reverse geometry match or circular segment

In [14]:
filt = gdf_graph_edges_conn_undir.progress_apply(lambda row: verify_matching_geometries(row['gnx_edge_id'], row['oneway']), axis=1)

100%|██████████| 36484/36484 [01:14<00:00, 487.04it/s]


In [15]:
gdf_graph_edges_conn_undir[~filt]

Unnamed: 0,highway,maxspeed,oneway,length,lit,surface,link,repr_lon,repr_lat,gnx_edge_id,geometry


In [16]:
assert gdf_graph_edges_conn_undir[~filt].shape[0] == 0