In [1]:
import networkx as nx
import osmnx as ox
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from datetime import datetime
print(ox.__version__)
print(nx.__version__)
print(gpd.__version__)
print(pd.__version__)



KeyboardInterrupt: 

In [None]:
df_risk = pd.read_csv('../data/data_100000_out_final.csv')
geometry = [Point(xy) for xy in zip(df_risk.longitude, df_risk.latitude)]
gdf_risk = gpd.GeoDataFrame(df_risk, crs="EPSG:4326", geometry=geometry)

In [None]:
df_risk.sort_values(by=["zip_code", "latitude", "longitude"], inplace=True, ignore_index=True)
df_risk.head()

In [None]:
df_risk.groupby(['zip_code']).size()

In [None]:
df_risk.loc[df_risk["zip_code"]<1000, 'zip_code'] = -1

In [None]:
df_risk.head(40)

In [None]:
df_zip_bounds = gpd.read_file("../data/nyc_zip_code_tabulation_areas_polygons.geojson")

#df_zip_bounds = df_risk.groupby("zip_code").agg({'latitude':['min', 'max'],'longitude':['min', 'max']})
df_zip_bounds

In [None]:
tb = df_zip_bounds.loc[0, "geometry"].envelope
print(tb.exterior.coords[0])
print(tb.exterior.coords[1])
print(tb.exterior.coords[2])
print(tb.exterior.coords[3])

In [None]:
#from datetime import datetime

#dict_Gox = {}
#current_zip_loaded = 0
#G = None
#for idx, row in df_zip_bounds.iterrows():
    
    # Chargement du graph 
#    if current_zip_loaded != row.postalcode:
#        print("[!] Start ", row.postalcode, " - ", datetime.now())
#        current_zip_loaded = row.postalcode
#        tb = row.geometry.envelope
#        try:
#            print(tb.exterior.coords[2][1])
#            G = ox.graph_from_bbox(tb.exterior.coords[2][1], tb.exterior.coords[0][1],
#                                   tb.exterior.coords[2][0], tb.exterior.coords[0][0],
#                                   network_type='drive')
#            dict_Gox[current_zip_loaded] = G
#            print("nodes = ", G.number_of_nodes())
#            print("edges = ", G.number_of_edges())
#            print("[!] End ", row.postalcode, " - ", datetime.now())
#        except Exception:
#            print("[!] Oups ", row.postalcode, " - ", datetime.now())


In [None]:
dict_Gox = {}
for idx, row in df_zip_bounds.iterrows():
    key = row.postalcode
    print(key)
    
    try:
        dict_Gox[key] = ox.load_graphml(filepath=f'data/OSM/NYC_drive{key}.osm')
    except Exception:
        pass

In [None]:
len(dict_Gox)

In [None]:
#for key in dict_Gox:
#    print("[!] Start ", key, " - ", datetime.now())
#    ox.save_graphml(dict_Gox[key], filepath=f'data/OSM/NYC_drive_{key}.osm')

In [None]:
import numpy as np
df_risk["u"] = np.nan
df_risk["v"] = np.nan
df_risk["k"] = np.nan
df_risk["dist"] = np.nan

In [None]:
df_risk.head()

In [None]:
current_zip = None
G = None
for idx, row in df_risk.iterrows():
    
    if row.zip_code != current_zip:
        try:
            # Load du Graph
            current_zip = row.zip_code
            G = dict_Gox[str(current_zip)]
        except KeyError:
            G = None
    if G is not None:
        node_id, distance = ox.get_nearest_node(G, (row.latitude, row.longitude), return_dist=True)
        if distance <= 10 :
            #print('NODE ', node_id, ' - ', distance)
            df_risk.at[idx,"u"] = node_id
            df_risk.at[idx,"dist"] = distance 
        else:
            #print('EDGE ', node_id, ' - ', distance)
            u, v, k, dist = ox.get_nearest_edge(G, (row.latitude, row.longitude), return_geom=False, return_dist=True)
            df_risk.at[idx,"u"] = u
            df_risk.at[idx,"v"] = v
            df_risk.at[idx,"k"] = k
            df_risk.at[idx,"dist"] = distance
    if idx % 1000 == 0:
        print("[!] Loaded", idx , " - ", datetime.now())
        
    

In [None]:
df_risk.to_csv("NYC_crashes_100000_osmid.csv")

In [None]:
df_risk.sort_values(by=["dist"], inplace=True, ignore_index=False, na_position='first')

In [None]:
df_risk.loc[1708]

In [None]:
df_risk[(df_risk.dist > 200) & ((df_risk.off_street_name == 'Unknown'))].shape
#df_risk[(df_risk.dist.isna())].shape

In [None]:
import folium

latitude = 40.677834
longitude = -74.012443
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)

#index = df_risk.index #[3041,3043,3042,2983,2985]
index = [1708]
for i in index:
#for i in range(100):
    folium.Marker([df_risk.at[i,'latitude'], df_risk.at[i,'longitude']], popup=f'{i}, distance = {df_risk.at[i, "dist"]}', icon=folium.Icon(color="green")).add_to(map_nyc)
    G = dict_Gox[str(df_risk.at[i,"zip_code"])]
    nearest_edge = ox.get_nearest_edge(G, (df_risk.at[i,'latitude'], df_risk.at[i,'longitude']), return_geom=True, return_dist=True)
    folium.Choropleth(nearest_edge[3], line_weight=5, line_color='red', line_opacity=0.5).add_to(map_nyc)
        
map_nyc

In [None]:
i = 1183
G = dict_Gox[str(df_risk.at[i,"zip_code"])]
node_id, distance = ox.get_nearest_node(G,  (df_risk.at[i,'latitude'], df_risk.at[i,'longitude']), return_dist=True)
nearest_edge = ox.get_nearest_edge(G, (df_risk.at[i,'latitude'], df_risk.at[i,'longitude']), return_geom=True, return_dist=True)
print(node_id, '-', distance)

### Data finalization

In [None]:
df_cleaned = df_risk[(df_risk['u'].notna())&(df_risk['dist'] <= 200)]
df_cleaned.shape

In [None]:
def get_risk_value(row):
    return 1 + row['persons_injured'] + row['persons_killed']*2 

In [None]:
df_cleaned['risk'] = df_cleaned.apply(lambda row: get_risk_value(row), axis=1 )

In [None]:
df_cleaned = df_cleaned.fillna({'v':0,'k':0})

In [None]:
df_cleaned.head()

In [None]:
series_risk = df_cleaned.groupby(['u', 'v', 'k'])['risk'].agg('sum')

In [None]:
series_risk

### Append risk to OSM Graph

In [None]:
G = ox.load_graphml(filepath='data/NYC_drive.osm') #, node_dtypes=specific_dtypes, edge_dtypes=specific_dtypes)

In [None]:
i=0
for idx, risk in series_risk.iteritems():
    u = idx[0]
    v = idx[1]
    k = idx[2]
    
    try:
        if v == 0 :
            G.nodes[u]["risk"] = risk
        else:
            G.edges[u,v,k]["risk"] = risk
            G.edges[v,u,k]["risk"] = risk
    except KeyError:
        pass #print("error with idx = ", idx) 

In [None]:
def get_edge_global_risk(node_u, node_v, edge_k):
   node_u_wt = float(node_u.get("risk", 0))
   node_v_wt = float(node_v.get("risk", 0))
   edge_k_wt = float(edge_k.get("risk", 0))

   return (node_u_wt + node_v_wt)/2 + edge_k_wt

In [None]:
i = 0
for u,v,k in G.edges:
    G.edges[u,v,k]["global_risk"] = get_edge_global_risk(G.nodes[u], G.nodes[v], G.edges[u,v,k])

In [None]:
ox.save_graphml(G, filepath="NYC_drive_risk_full_100000.osm")

In [None]:
df_cleaned.columns

## Cluster map

In [None]:
geometry = [Point(xy) for xy in zip(df_cleaned.longitude, df_cleaned.latitude)]
gdf_cleaned = gpd.GeoDataFrame(df_cleaned, crs="EPSG:4326", geometry=geometry)

In [None]:
#gdf_risk
from folium import plugins

cluster_map = folium.Map(location=[latitude, longitude],
                         zoom_start=10)

marker_cluster = plugins.MarkerCluster().add_to(cluster_map)
for i, v in gdf_cleaned.iterrows():
    popup = """
    Killed : <b>%s</b><br>
    Injured : <b>%s</b><br>
    """ % (v['persons_killed'], v['persons_injured'])
    
    if v['persons_killed'] > 0:
        folium.CircleMarker(location=[v['latitude'], v['longitude']],
                            radius=10,
                            tooltip=popup,
                            color='#581845',
                            fill_color='#581845',
                            fill_opacity=0.7,
                            fill=True).add_to(marker_cluster)
    elif v['persons_injured'] > 0:
        folium.CircleMarker(location=[v['latitude'], v['longitude']],
                            radius=10,
                            tooltip=popup,
                            color='#C70039',
                            fill_color='#C70039',
                            fill_opacity=0.7,
                            fill=True).add_to(marker_cluster)
    else :
        folium.CircleMarker(location=[v['latitude'], v['longitude']],
                            radius=10,
                            tooltip=popup,
                            color='#FFC300',
                            fill_color='#FFC300',
                            fill_opacity=0.7,
                            fill=True).add_to(marker_cluster)

In [None]:
cluster_map