In [None]:
import osmnx as ox
from osmnx import geometries as geom
import networkx as nx
import numpy as np
import pandas as pd
import geopandas 
from geopandas import GeoDataFrame 
import time
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from scipy.spatial.distance import squareform, pdist
from sklearn.cluster import DBSCAN

ox.config(use_cache=True, log_console=True)

In [None]:
# parameterize DBSCAN
eps = 300 #meters
minpts = 10 #smallest cluster size allowed
pseudo_minpts = 1 #we're clustering nodes, not firms, so throw nothing away as noise: a single node might be alone,
                  #but may contain a hundred points as they're nearest node.

place = 'Amsterdam, Netherlands'

In [None]:
# get the street network
G = ox.graph_from_place(place, network_type='drive')
ox.plot_graph(G)


In [None]:
print(len(G.nodes))
print(len(G.edges))

In [None]:
#add edge travel times

G = ox.speed.add_edge_speeds(G)
G = ox.speed.add_edge_travel_times(G)


In [None]:
#find the bars in the place

tags = {"amenity": "bar"}
bars = ox.geometries_from_place(place, tags)

In [None]:
# view just the bars
bars=bars.dropna(subset = ['name'])

In [None]:
#create a new dataset called plot_bars for extracting the latitude and longitude from the polygon object in the geometry column of bars dataset
plot_bars =geopandas.GeoDataFrame([bars['name'],bars['geometry']])
plot_bars = geopandas.GeoDataFrame.transpose(plot_bars)

In [None]:
#convert geometry object to new coordinate reference system(crs)
plot_bars.crs = "EPSG:7415"
bars_test = plot_bars.to_crs(7415)

In [None]:
#extract latitude
lat=pd.DataFrame(bars_test.centroid.x)

In [None]:
#extract longitude
long=pd.DataFrame(bars_test.centroid.y)

In [None]:
#store osmnid in a new dataframe for use if needed
index_df = pd.DataFrame(index = long.index)

In [None]:
#Drop osmnid from latitude(lat) and longitude(long) dataframes

lat.reset_index(drop=True, inplace=True)
long.reset_index(drop=True, inplace=True)

In [None]:
#concatenate latitude and longitude to get coordinates of bars in new dataframe
latlong=pd.concat([lat,long],axis=1)

In [None]:
#add column names to coordinates
latlong.columns=['Latitude','Longitude']

In [None]:
# plot the bars and the points around which they cluster
fig, ax = ox.plot_graph(G, node_color='#aaaa',bgcolor='white' ,node_size=0, show=False, close=True)
ax.scatter(x=latlong['Latitude'], y=latlong['Longitude'], c='k', marker='.', s=50, zorder=3)
fig.canvas.draw()
fig

In [None]:
%%time
# compute DBSCAN using straight-line haversine distances
eps_rad = eps / 3671000. #meters to radians
db = DBSCAN(eps=eps_rad, min_samples=minpts, metric='haversine', algorithm='ball_tree')
latlong['spatial_cluster'] = db.fit_predict(np.deg2rad([['Longitude', 'Latitude']]))

In [None]:
len(latlong['spatial_cluster'].unique())

In [None]:
# plot bars by cluster
color_map = {-1:'blue', 0:'g', 1:'r', 2:'m', 3:'b'}
point_colors = [color_map[c] for c in latlong['spatial_cluster']]
fig, ax = ox.plot_graph(G, bgcolor='white' ,node_size=0, show=False, close=True)
ax.scatter(x=latlong['Latitude'], y=latlong['Longitude'], c=point_colors, marker='.', s=50, zorder=3)
fig.canvas.draw()
fig

In [None]:
# attach nearest network node to each firm
latlong['nn'] = ox.nearest_nodes(G, X=latlong['Latitude'], Y=latlong['Longitude'])
print(len(latlong['nn']))

# we'll get distances for each pair of nodes that have bars attached to them
nodes_unique = pd.Series(latlong['nn'].unique())
nodes_unique.index = nodes_unique.values
print(len(nodes_unique))

# convert MultiDiGraph to DiGraph for simpler faster distance matrix computation
G_dm = nx.DiGraph(G)

In [None]:
# calculate network-based distance between each node
def network_distance_matrix(u, G, vs=nodes_unique):
    try:
        dists = [nx.dijkstra_path_length(G, source=u, target=v, weight='length') for v in vs]       
    return pd.Series(dists, index=vs)