In [1]:
import pandas as pd
import osmnx as ox
import folium
import geopandas as gpd
from shapely.geometry import Point



### Load risk

In [2]:
df_risk = pd.read_csv('../data/NYC_risk.csv', index_col=0)

In [3]:
df_risk.head()

Unnamed: 0,latitude,longitude,accidents,total_injured,total_killed
0,40.501465,-74.24523,1,0.0,0.0
1,40.50331,-74.237465,1,0.0,0.0
2,40.503387,-74.24883,1,0.0,0.0
3,40.503414,-74.24496,1,0.0,0.0
4,40.50447,-74.243454,1,0.0,0.0


In [4]:
geometry = [Point(xy) for xy in zip(df_risk.longitude, df_risk.latitude)]
gdf_risk = gpd.GeoDataFrame(df_risk, crs="EPSG:4326", geometry=geometry)

In [5]:
gdf_risk.head()

Unnamed: 0,latitude,longitude,accidents,total_injured,total_killed,geometry
0,40.501465,-74.24523,1,0.0,0.0,POINT (-74.24523 40.50147)
1,40.50331,-74.237465,1,0.0,0.0,POINT (-74.23747 40.50331)
2,40.503387,-74.24883,1,0.0,0.0,POINT (-74.24883 40.50339)
3,40.503414,-74.24496,1,0.0,0.0,POINT (-74.24496 40.50341)
4,40.50447,-74.243454,1,0.0,0.0,POINT (-74.24345 40.50447)


### Load graph

In [22]:
G = ox.load_graphml(filepath='../data/NYC_drive.osm')

In [7]:
G.number_of_nodes()

55361

In [8]:
G.number_of_edges()

140811

In [9]:
nodes_proj, edges_proj = ox.graph_to_gdfs(G, nodes=True, edges=True)

### Create basic maps

In [10]:
latitude = 40.677834
longitude = -74.012443
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)
map_nyc

### Insert few markers

In [11]:
for i in range(5):
    folium.Marker([df_risk.iloc[i,0], df_risk.iloc[i,1]], popup=i, icon=folium.Icon(color="green")).add_to(map_nyc)


In [12]:
map_nyc

### Look after marker on lon/lat

In [13]:
nodes_proj.head()

Unnamed: 0_level_0,y,x,ref,highway,street_count,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
39076461,40.786409,-73.794627,33,motorway_junction,3,POINT (-73.79463 40.78641)
39076490,40.762429,-73.757091,31W,motorway_junction,3,POINT (-73.75709 40.76243)
39076504,40.753467,-73.744164,30W,motorway_junction,3,POINT (-73.74416 40.75347)
42421728,40.798048,-73.960044,,traffic_signals,3,POINT (-73.96004 40.79805)
42421731,40.798645,-73.961474,,traffic_signals,4,POINT (-73.96147 40.79865)


In [14]:
edges_proj.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,highway,length,geometry,lanes,ref,name,maxspeed,bridge,access,tunnel,width,junction,service
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
39076461,42854803,0,25161578,True,motorway_link,254.709,"LINESTRING (-73.79463 40.78641, -73.79361 40.7...",,,,,,,,,,
39076461,274283981,0,"[122633397, 25161349]",True,motorway,767.8,"LINESTRING (-73.79463 40.78641, -73.79309 40.7...",2.0,CIP,Cross Island Parkway,50 mph,,,,,,
39076490,277672046,0,5699971,True,motorway_link,259.674,"LINESTRING (-73.75709 40.76243, -73.75721 40.7...",,,,,,,,,,
39076490,277672005,0,39084898,True,motorway,291.839,"LINESTRING (-73.75709 40.76243, -73.75741 40.7...",3.0,CIP,Cross Island Parkway,50 mph,,,,,,
39076504,462124701,0,"[618709517, 618709515, 5700693]",True,motorway_link,433.148,"LINESTRING (-73.74416 40.75347, -73.74453 40.7...",1.0,,,,yes,,,,,


In [15]:
for i in range(100):
    print(i)
    print(nodes_proj[(nodes_proj.y==df_risk.iloc[i,0]) & (nodes_proj.x==df_risk.iloc[i,1])])
    print(ox.get_nearest_node(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_dist=True))

0
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42971016, 0.5073093519301438)
1
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(3658673953, 1.5886952720046006)
2
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42989170, 2.1034914598947796)
3
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42989156, 80.67459394037911)
4
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42989145, 0.3560021870177833)
5
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42989162, 83.2774066359121)
6
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42948879, 0.7609179464710885)
7
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42989122, 0.4047526847174118)
8
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry

(42988996, 67.55028318853115)
72
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42991518, 29.1558611885998)
73
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(43003189, 0.43702409943187553)
74
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42988992, 0.852564296982264)
75
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42991460, 13.378140216978728)
76
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42951686, 41.10576105741409)
77
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(43000925, 0.2768893757463647)
78
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(43000925, 123.55244612603246)
79
Empty GeoDataFrame
Columns: [y, x, ref, highway, street_count, geometry]
Index: []
(42992450, 33.2157272071133)
80
Empty GeoDataFrame
Columns: [y, x, r

In [16]:
for i in range(5):
    node_id, distance = ox.get_nearest_node(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_dist=True)
    print(i, " - ", node_id, " - ", distance)
    folium.Marker([G.nodes[node_id]['y'], G.nodes[node_id]['x']], popup=f"{i}, distance = {distance}", icon=folium.Icon(color="red")).add_to(map_nyc)

0  -  42971016  -  0.5073093519301438
1  -  3658673953  -  1.5886952720046006
2  -  42989170  -  2.1034914598947796
3  -  42989156  -  80.67459394037911
4  -  42989145  -  0.3560021870177833


In [17]:
map_nyc

### Test one or more specific points

In [18]:
latitude = 40.677834
longitude = -74.012443
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)

index = [70,
93,
96,
66,
55,
75,
22]

for i in index:
    node_id, distance = ox.get_nearest_node(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_dist=True)
    print(i, " - ", node_id, " - ", distance)
    folium.Marker([df_risk.iloc[i,0], df_risk.iloc[i,1]], popup=i, icon=folium.Icon(color="green")).add_to(map_nyc)
    folium.Marker([G.nodes[node_id]['y'], G.nodes[node_id]['x']], popup=f"{i}, distance = {distance}", icon=folium.Icon(color="red")).add_to(map_nyc)
map_nyc

70  -  42983670  -  4.363974749220224
93  -  42980117  -  6.197664805433195
96  -  447832833  -  8.26367050976746
66  -  42959842  -  9.461859369846719
55  -  42968257  -  9.917268089017906
75  -  42991460  -  13.378140216978728
22  -  42978560  -  15.113204247804262


### TRY TO FIND THE EDGE

In [37]:
distance_limit = 10

for i in range(10):
    node_id, distance = ox.get_nearest_node(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_dist=True)
    if distance >= distance_limit:
        print(i, " - ", node_id, " - ", distance)
        # ATTENTION - X / Y ici et non Y / X
        nearest_edge = ox.get_nearest_edge(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_geom=True, return_dist=True)
        print(nearest_edge)
        edge = G.edges[nearest_edge[0], nearest_edge[1], nearest_edge[2]]
        print(edge)
        


3  -  42989156  -  80.67459394037911
(42989156, 42991087, 0, <shapely.geometry.linestring.LineString object at 0x16838fc70>, 7.712209202418146e-07)
{'osmid': 68665158, 'name': 'Main Street', 'highway': 'residential', 'oneway': False, 'length': 285.641}
5  -  42989162  -  83.2774066359121
(42989162, 42951353, 0, <shapely.geometry.linestring.LineString object at 0x16bc06250>, 6.096998575547062e-07)
{'osmid': 744313477, 'name': 'Carteret Street', 'highway': 'residential', 'oneway': False, 'length': 292.61}


In [45]:
from datetime import datetime

In [46]:
print(f"[!] Start Loading: {datetime.now()}")
distance_limit = 10

latitude = 40.677834
longitude = -74.012443
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)

for i in range(100):
    node_id, distance = ox.get_nearest_node(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_dist=True)
    if distance >= distance_limit:
        print(i, " - ", node_id, " - ", distance)
        # ATTENTION - X / Y ici et non Y / X
        nearest_edge = ox.get_nearest_edge(G, (df_risk.iloc[i,0], df_risk.iloc[i,1]), return_geom=True, return_dist=True)
        print(nearest_edge)
        #edge = G.edges[nearest_edge[0], nearest_edge[1], nearest_edge[2]]
        #print(edge)
        folium.Marker([df_risk.iloc[i,0], df_risk.iloc[i,1]], popup=i, icon=folium.Icon(color="green")).add_to(map_nyc)
        folium.Choropleth(nearest_edge[3], popup=f"{i}, distance = {distance}", line_weight=5, line_color='red', line_opacity=0.5).add_to(map_nyc)
        print(f"[!] End loading edge: {datetime.now()}")
print(f"[!] End Loading: {datetime.now()}")
map_nyc

[!] Start Loading: 2021-01-10 18:05:40.841507
3  -  42989156  -  80.67459394037911
(42989156, 42991087, 0, <shapely.geometry.linestring.LineString object at 0x1527dca60>, 7.712209202418146e-07)
[!] End loading edge: 2021-01-10 18:06:21.834876
5  -  42989162  -  83.2774066359121
(42989162, 42951353, 0, <shapely.geometry.linestring.LineString object at 0x1530d4850>, 6.096998575547062e-07)
[!] End loading edge: 2021-01-10 18:06:57.890046
10  -  43007069  -  44.016530568475204
(43007069, 43007072, 0, <shapely.geometry.linestring.LineString object at 0x152e31220>, 7.220663124792896e-06)
[!] End loading edge: 2021-01-10 18:07:37.019785
12  -  42989117  -  70.94009899869464
(42976153, 42989117, 0, <shapely.geometry.linestring.LineString object at 0x1530289a0>, 3.249035753732037e-05)
[!] End loading edge: 2021-01-10 18:08:15.046316
17  -  42991645  -  18.96018372506337
(42960083, 42991645, 0, <shapely.geometry.linestring.LineString object at 0x153028070>, 8.785090065968767e-06)
[!] End loading

[!] End Loading: 2021-01-10 18:37:15.092185


### Set the risk on the nodes

In [64]:
def compute_risk(accident, total_injured, total_killed):
    return accident + total_injured + (total_killed * 2)

In [68]:
print(f"[!] Start Loading: {datetime.now()}")
i = 0 

for idx, row in df_risk.iterrows():
    node_id, distance = ox.get_nearest_node(G, (row["latitude"], row["longitude"]), return_dist=True)
    if distance < 10:
        G.nodes[node_id]["accidents"] = G.nodes[node_id].get("accidents", 0) + row.accidents
        G.nodes[node_id]["total_injured"] = G.nodes[node_id].get("total_injured", 0) + row.total_injured
        G.nodes[node_id]["total_killed"] = G.nodes[node_id].get("total_killed", 0) + row.total_killed
        G.nodes[node_id]["risk"] = compute_risk(G.nodes[node_id]["accidents"], 
                                                G.nodes[node_id]["total_injured"], 
                                                G.nodes[node_id]["total_killed"])
    if idx % 100 == 0:
        print(f"[{idx}]: {datetime.now()}") 
    
print(f"[!] End Loading: {datetime.now()}") 
print(f"[!] Start save: {datetime.now()}")  
ox.save_graphml(G, filepath="NYC_drive_risk.osm")
print(f"[!] End save: {datetime.now()}")  
    

[!] Start Loading: 2021-01-10 20:01:18.845965
[0]: 2021-01-10 20:01:30.425571
[100]: 2021-01-10 20:01:40.471692
[200]: 2021-01-10 20:01:49.195064
[300]: 2021-01-10 20:01:59.602559
[400]: 2021-01-10 20:02:11.533356
[500]: 2021-01-10 20:02:24.473952
[600]: 2021-01-10 20:02:34.588596
[700]: 2021-01-10 20:02:44.415088
[800]: 2021-01-10 20:02:54.565187
[900]: 2021-01-10 20:03:04.193811
[1000]: 2021-01-10 20:03:14.499803
[1100]: 2021-01-10 20:03:24.136592
[1200]: 2021-01-10 20:03:33.128420
[1300]: 2021-01-10 20:03:42.654562
[1400]: 2021-01-10 20:03:54.358739
[1500]: 2021-01-10 20:04:07.457265
[1600]: 2021-01-10 20:04:18.418252
[1700]: 2021-01-10 20:04:28.706753
[1800]: 2021-01-10 20:04:38.056905
[1900]: 2021-01-10 20:04:48.610938
[2000]: 2021-01-10 20:04:59.713119
[2100]: 2021-01-10 20:05:12.203103
[2200]: 2021-01-10 20:05:25.769520
[2300]: 2021-01-10 20:05:36.837422
[2400]: 2021-01-10 20:05:50.557416
[2500]: 2021-01-10 20:06:03.439317
[2600]: 2021-01-10 20:06:14.054541
[2700]: 2021-01-10 20

[23000]: 2021-01-10 20:38:28.655542
[23100]: 2021-01-10 20:38:37.567066
[23200]: 2021-01-10 20:38:46.471725
[23300]: 2021-01-10 20:38:55.427202
[23400]: 2021-01-10 20:39:04.356392
[23500]: 2021-01-10 20:39:13.288468
[23600]: 2021-01-10 20:39:22.319020
[23700]: 2021-01-10 20:39:31.311444
[23800]: 2021-01-10 20:39:40.236102
[23900]: 2021-01-10 20:39:49.208932
[24000]: 2021-01-10 20:39:58.258402
[24100]: 2021-01-10 20:40:07.290987
[24200]: 2021-01-10 20:40:17.102240
[24300]: 2021-01-10 20:40:26.138735
[24400]: 2021-01-10 20:40:35.064290
[24500]: 2021-01-10 20:40:43.951019
[24600]: 2021-01-10 20:40:52.900987
[24700]: 2021-01-10 20:41:01.814710
[24800]: 2021-01-10 20:41:10.839926
[24900]: 2021-01-10 20:41:19.756729
[25000]: 2021-01-10 20:41:28.724108
[25100]: 2021-01-10 20:41:37.672507
[25200]: 2021-01-10 20:41:46.644103
[25300]: 2021-01-10 20:41:55.760496
[25400]: 2021-01-10 20:42:04.700980
[25500]: 2021-01-10 20:42:13.654052
[25600]: 2021-01-10 20:42:22.652493
[25700]: 2021-01-10 20:42:31

In [81]:
print(f"[!] Start Loading: {datetime.now()}")
gdf_point_not_found = gpd.GeoDataFrame()

i = 0 

for idx, row in df_risk.iterrows():
    node_id, distance = ox.get_nearest_node(G, (row["latitude"], row["longitude"]), return_dist=True)
    if distance >= 10:
        gdf_point_not_found = gdf_point_not_found.append(row)
    if idx % 100 == 0:
        print(f"[{idx}]: {datetime.now()}") 
    
print(f"[!] End Loading: {datetime.now()}") 
print(f"[!] Start save: {datetime.now()}")  
#ox.save_graphml(G, filepath="NYC_drive_risk.osm")
gdf_point_not_found.to_csv('point_not_found.csv')
print(f"[!] End save: {datetime.now()}")  


[!] Start Loading: 2021-01-10 22:30:33.431307
[0]: 2021-01-10 22:30:38.422877
[100]: 2021-01-10 22:30:52.737558
[200]: 2021-01-10 22:31:04.337042
[300]: 2021-01-10 22:31:17.961289
[400]: 2021-01-10 22:31:29.623632
[500]: 2021-01-10 22:31:40.580037
[600]: 2021-01-10 22:31:53.429387
[700]: 2021-01-10 22:32:05.921280
[800]: 2021-01-10 22:32:23.825814
[900]: 2021-01-10 22:32:39.939712
[1000]: 2021-01-10 22:32:57.467877
[1100]: 2021-01-10 22:33:13.596804
[1200]: 2021-01-10 22:33:29.256879
[1300]: 2021-01-10 22:33:56.385956
[1400]: 2021-01-10 22:34:30.317225
[1500]: 2021-01-10 22:34:52.505580
[1600]: 2021-01-10 22:35:14.972280
[1700]: 2021-01-10 22:35:38.835433
[1800]: 2021-01-10 22:36:06.903337
[1900]: 2021-01-10 22:36:35.209594
[2000]: 2021-01-10 22:37:05.696567
[2100]: 2021-01-10 22:37:31.719835
[2200]: 2021-01-10 22:38:00.781049
[2300]: 2021-01-10 22:38:27.051653
[2400]: 2021-01-10 22:39:21.456885
[2500]: 2021-01-10 22:39:43.272975
[2600]: 2021-01-10 22:40:07.480667
[2700]: 2021-01-10 22

[23000]: 2021-01-11 08:37:04.226952
[23100]: 2021-01-11 08:39:27.087447
[23200]: 2021-01-11 08:43:28.649347
[23300]: 2021-01-11 08:45:56.041341
[23400]: 2021-01-11 08:48:04.529982
[23500]: 2021-01-11 08:50:09.943663
[23600]: 2021-01-11 08:52:04.103454
[23700]: 2021-01-11 08:54:33.920305
[23800]: 2021-01-11 08:56:20.081634
[23900]: 2021-01-11 08:57:54.842999
[24000]: 2021-01-11 09:00:15.495732
[24100]: 2021-01-11 09:04:59.311333
[24200]: 2021-01-11 09:08:23.754818
[24300]: 2021-01-11 09:11:59.094989
[24400]: 2021-01-11 09:15:05.380909
[24500]: 2021-01-11 09:19:24.160992


KeyboardInterrupt: 

In [82]:

gdf_point_not_found.shape


(13096, 6)

In [83]:
gdf_point_not_found.to_csv('point_not_found.csv')