In [1]:
import pandas as pd
from sklearn.cluster import DBSCAN
import numpy as np
import gmaps
from geopy.distance import geodesic
from matplotlib import pyplot as plt, colors
from collections import Counter

In [2]:
# Loading the data
data = pd.read_csv("data/accidents_2012_to_2014.csv")
geo_data_ = data[data['Local_Authority_(District)'].isin([1,5])][data['Year'].isin([2013, 2014])] # only years 2013-14 and districts 1, 5
geo_data = geo_data_[['Accident_Index', 'Latitude', 'Longitude', 'Date', 'Accident_Severity']] # selecting necessary columns
heatmap_data = data[['Latitude', 'Longitude']] # locations for heatmap
X = geo_data_[['Latitude', 'Longitude']].values # locations for clustering

  interactivity=interactivity, compiler=compiler, result=result)
  This is separate from the ipykernel package so we can avoid doing imports until


In [3]:
gmaps.configure(api_key='AIzaSyBAiXZHSsm6vTaOdnXdkoDvlPzdAt-3Eyo')

In [14]:
# Heatmap

fig = gmaps.figure()
locations = np.array([heatmap_data.Latitude.values, heatmap_data.Longitude.values]).T
heatmap = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap)

fig

Figure(layout=FigureLayout(height='420px'))

In [33]:
# preventing points from dissipating when zooming in
heatmap.max_intensity = 10000
heatmap.point_radius = 10

In [4]:
# creating colour lists
colours = [colors.cnames[key] for key in ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple']]
colours_ = list(colors.cnames.values())[10:50]

In [5]:
# creating a list of mappings for info_box
accident_list = []
N = len(geo_data)
for i in range(N):
    accident_list.append(geo_data.iloc[i].to_dict())
accident_list = np.array(accident_list)

In [127]:
clusters = DBSCAN(eps=0.003, min_samples=90).fit(X)

In [232]:
marker_fig = gmaps.figure() # a separate figure for clustering + heatmap

info_box_template ="""
<dl>
<dt>Index</dt><dd>{Accident_Index}</dd>
<dt>Location</dt><dd>({Latitude}, {Longitude})</dd>
<dt>Date</dt><dd>{Date}</dd>
<dt>Severity</dt><dd>{Accident_Severity}</dd>
</dl>
"""
marker_list = [] # contains markers of each cluster
clust_lbls = clusters.labels_ # cluster labels

# creating list of cluster layers
for label in list(set(clust_lbls))[:-1]:
    idc = np.where(clust_lbls == label)
    X_ = X[idc]
    accident_info = [info_box_template.format(**accident) for accident in accident_list[idc]]
    markers = gmaps.symbol_layer(X_, fill_color=colours[label], stroke_opacity=0, scale=3, info_box_content=accident_info)
    marker_list.append(markers)
    marker_fig.add_layer(marker_list[len(marker_list)-1])


In [233]:
marker_fig.add_layer(heatmap)

In [234]:
marker_fig

Figure(layout=FigureLayout(height='420px'))

In [242]:
# function for calculating geodesic distance matrix
def dist_mx_geo(X):
#     start = time.time()
    
    N = len(X) # sample size
    M = np.zeros((N, N)) # distance matrix
    for i in range(N):
        for j in range(i+1, N):
                M[i][j] = geodesic(X[i], X[j]).km
                M[j][i] = M[i][j]
        print(f'\r{i} out of {N-1}', end='') 
#     end = time.time()
#     print(f'Distance matrix calculated in {end-start} seconds')
    return M

In [6]:
# Calculating distance matrix
# M = dist_mx_geo(X)
# np.save('distances.npy', M)
M = np.load('distances.npy')

In [7]:
# clustering using geodesic metric
clusters_g = DBSCAN(eps=0.1, min_samples=30, metric='precomputed').fit(M)

In [8]:
# map for clusters created using geodesic metric 
marker_fig_g = gmaps.figure() # a separate figure for clustering + heatmap

info_box_template ="""
<dl>
<dt>Index</dt><dd>{Accident_Index}</dd>
<dt>Location</dt><dd>({Latitude}, {Longitude})</dd>
<dt>Date</dt><dd>{Date}</dd>
<dt>Severity</dt><dd>{Accident_Severity}</dd>
</dl>
"""
marker_list_g = [] # contains markers of each cluster
clust_lbls_g = clusters_g.labels_ # cluster labels

# creating list of cluster layers
for label in list(set(clust_lbls_g))[:-1]:
    idc = np.where(clust_lbls_g == label)
    X_ = X[idc]
    accident_info = [info_box_template.format(**accident) for accident in accident_list[idc]]
    markers_g = gmaps.symbol_layer(X_, fill_color=colours_[label], stroke_opacity=0, scale=3, info_box_content=accident_info)
    marker_list_g.append(markers_g)
    marker_fig_g.add_layer(marker_list_g[len(marker_list_g)-1])


In [9]:
# marker_fig_g.add_layer(heatmap)
marker_fig_g

Figure(layout=FigureLayout(height='420px'))

False