# Let's download all the dependencies that we will need.

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# Download and Explore Dataset

In [2]:
df = pd.read_csv(r"path\collisions_routieres_2019 - Copie.csv")
print('Data downloaded!')

Data downloaded!


# Cleaning dataframe - delete columns non useful

In [3]:
df = df.drop(['NB_BLESSES_LEGERS','JR_SEMN_ACCDN','DT_ACCDN','CD_MUNCP','NO_CIVIQ_ACCDN','SFX_NO_CIVIQ_ACCDN','BORNE_KM_ACCDN','RUE_ACCDN','TP_REPRR_ACCDN','ACCDN_PRES_DE','NB_METRE_DIST_ACCD','CD_GENRE_ACCDN','CD_SIT_PRTCE_ACCDN','CD_ETAT_SURFC','CD_ECLRM','CD_ENVRN_ACCDN','NO_ROUTE','CD_CATEG_ROUTE','CD_ETAT_CHASS','CD_ASPCT_ROUTE','CD_LOCLN_ACCDN','CD_POSI_ACCDN','CD_CONFG_ROUTE','CD_ZON_TRAVX_ROUTR','CD_PNT_CDRNL_ROUTE','CD_PNT_CDRNL_REPRR','CD_COND_METEO','NB_VEH_IMPLIQUES_ACCDN','HEURE_ACCDN','AN','NB_VICTIMES_TOTAL','GRAVITE','REG_ADM','MRC','nb_automobile_camion_leger','nb_camionLourd_tractRoutier','nb_outil_equipement','nb_tous_autobus_minibus','nb_bicyclette','nb_cyclomoteur','nb_motocyclette','nb_taxi','nb_urgence','nb_motoneige','nb_VHR','nb_autres_types','nb_veh_non_precise','NB_DECES_PIETON','NB_BLESSES_PIETON','NB_VICTIMES_PIETON','NB_DECES_MOTO','NB_BLESSES_MOTO','NB_VICTIMES_MOTO','NB_DECES_VELO','NB_BLESSES_VELO','NB_VICTIMES_VELO','VITESSE_AUTOR','LOC_X','LOC_Y','LOC_COTE_QD','LOC_COTE_PD','LOC_DETACHEE','LOC_IMPRECISION'], axis=1)
df=df.drop(df[(df.NB_MORTS == 0) & (df.NB_BLESSES_GRAVES == 0)].index)
df=df.reset_index()
df

Unnamed: 0,index,NO_SEQ_COLL,NB_MORTS,NB_BLESSES_GRAVES,latitude,longitude
0,129,SPVM _ 2019 _ 130,0,1,45.48505,-73.86485
1,165,SPVM _ 2019 _ 166,1,0,45.50952,-73.80060
2,210,SPVM _ 2019 _ 211,0,3,45.48921,-73.93608
3,246,SPVM _ 2019 _ 247,0,1,45.50787,-73.76006
4,287,SPVM _ 2019 _ 288,0,1,45.49430,-73.87576
...,...,...,...,...,...,...
155,18718,SPVM _ 2019 _ 18719,0,1,45.61275,-73.51807
156,18735,SPVM _ 2019 _ 18736,1,0,45.67214,-73.50408
157,18936,SPVM _ 2019 _ 18937,0,1,45.65120,-73.48977
158,19122,SPVM _ 2019 _ 19123,0,1,45.43823,-73.84873


# Use geopy library to get the latitude and longitude values of Montreal City.

In [4]:
address = 'Montreal City, QC'

geolocator = Nominatim(user_agent="ef_explorer")
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinate of Montreal City are {}, {}.'.format(Latitude, Longitude))

The geograpical coordinate of Montreal City are 45.4972159, -73.6103642.


# Create a map of Montreal with car accidents superimposed on top.

In [5]:
# create map of Montreal using latitude and longitude values
map_montreal = folium.Map(location=[Latitude, Longitude], zoom_start=10)

# add markers to map
for lat, lng, nbr in zip(df['latitude'], df['longitude'], df['NO_SEQ_COLL']):
    label = '{}'.format(nbr)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_montreal)  
    
map_montreal

# Explore the accident near venues with Foursquare data

In [6]:
# Add Foursquare credentials to run remainder of notebook
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '' # Foursquare API version

In [7]:
# Explore the first accident in the dataframe  
df.loc[0, 'NO_SEQ_COLL']

collision_latitude = df.loc[0, 'latitude'] # collision latitude value
collision_longitude = df.loc[0, 'longitude'] # collision longitude value

collision_name = df.loc[0, 'NO_SEQ_COLL'] # collision name

print('Latitude and longitude values of {} are {}, {}.'.format(collision_name, 
                                                               collision_latitude, 
                                                               collision_longitude))

Latitude and longitude values of SPVM _ 2019 _ 130 are 45.48505, -73.86485.


# Get the top 100 venues in a radius of 500 meters

In [8]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius


# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    collision_latitude, 
    collision_longitude, 
    radius, 
    LIMIT)
url # display URL

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '606627d73f409152673632f8'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Pierrefonds-Roxboro',
  'headerFullLocation': 'Pierrefonds-Roxboro, Montreal',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 12,
  'suggestedBounds': {'ne': {'lat': 45.4895500045, 'lng': -73.85844344987976},
   'sw': {'lat': 45.4805499955, 'lng': -73.87125655012025}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bddfcf80ee3a5938f7c2fb0',
       'name': 'La Roulotte',
       'location': {'address': '10 rue Paiement',
        'crossStreet': 'Boul Gouin Ouest',
        'lat': 45.48287700123952,
        'lng': -73.86688392331969,
        'labeledLatLng

In [9]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [10]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,La Roulotte,Diner,45.482877,-73.866884
1,Harvey's,Restaurant,45.483984,-73.860282
2,Club Piscine,Furniture / Home Store,45.482992,-73.860296
3,Second Cup,Coffee Shop,45.484049,-73.860264
4,Bulk Barn,Food & Drink Shop,45.484398,-73.858569


In [11]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

12 venues were returned by Foursquare.


# Let's create a function to repeat the same process to all the accidents in Montreal

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Collision', 
                  'Collision Latitude', 
                  'Collision Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
# Run the function on each accident and store in new dataframe
mtlCollision_venues = getNearbyVenues(names=df['NO_SEQ_COLL'],
                                   latitudes=df['latitude'],
                                   longitudes=df['longitude']
                                  )

SPVM _ 2019 _ 130
SPVM _ 2019 _ 166
SPVM _ 2019 _ 211
SPVM _ 2019 _ 247
SPVM _ 2019 _ 288
SPVM _ 2019 _ 300
SPVM _ 2019 _ 447
SPVM _ 2019 _ 545
SPVM _ 2019 _ 636
SPVM _ 2019 _ 753
SPVM _ 2019 _ 949
SPVM _ 2019 _ 977
SPVM _ 2019 _ 989
SPVM _ 2019 _ 1093
SPVM _ 2019 _ 1452
SPVM _ 2019 _ 1577
SPVM _ 2019 _ 1796
SPVM _ 2019 _ 1865
SPVM _ 2019 _ 1882
SPVM _ 2019 _ 1987
SPVM _ 2019 _ 2126
SPVM _ 2019 _ 2293
SPVM _ 2019 _ 2337
SPVM _ 2019 _ 2538
SPVM _ 2019 _ 2670
SPVM _ 2019 _ 2721
SPVM _ 2019 _ 2790
SPVM _ 2019 _ 3063
SPVM _ 2019 _ 3187
SPVM _ 2019 _ 3341
SPVM _ 2019 _ 3402
SPVM _ 2019 _ 3423
SPVM _ 2019 _ 3445
SPVM _ 2019 _ 3844
SPVM _ 2019 _ 3856
SPVM _ 2019 _ 3958
SPVM _ 2019 _ 4029
SPVM _ 2019 _ 4061
SPVM _ 2019 _ 4068
SPVM _ 2019 _ 4089
SPVM _ 2019 _ 4133
SPVM _ 2019 _ 4361
SPVM _ 2019 _ 4781
SPVM _ 2019 _ 5208
SPVM _ 2019 _ 5430
SPVM _ 2019 _ 5530
SPVM _ 2019 _ 5986
SPVM _ 2019 _ 6022
SPVM _ 2019 _ 6211
SPVM _ 2019 _ 6288
SPVM _ 2019 _ 6414
SPVM _ 2019 _ 6505
SPVM _ 2019 _ 6520
SPVM _

In [14]:
mtlCollision_venues.groupby('Collision').count()

Unnamed: 0_level_0,Collision Latitude,Collision Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Collision,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SPVM _ 2019 _ 10051,20,20,20,20,20,20
SPVM _ 2019 _ 10193,36,36,36,36,36,36
SPVM _ 2019 _ 10227,29,29,29,29,29,29
SPVM _ 2019 _ 10366,25,25,25,25,25,25
SPVM _ 2019 _ 10472,18,18,18,18,18,18
...,...,...,...,...,...,...
SPVM _ 2019 _ 9759,5,5,5,5,5,5
SPVM _ 2019 _ 977,14,14,14,14,14,14
SPVM _ 2019 _ 9827,15,15,15,15,15,15
SPVM _ 2019 _ 989,24,24,24,24,24,24


# Analysing each accident

In [15]:
# one hot encoding
mtl_onehot = pd.get_dummies(mtlCollision_venues[['Venue Category']], prefix="", prefix_sep="")

# Add neighbourhood column back to dataframe
mtl_onehot['Collision'] = mtlCollision_venues['Collision'] 

# move neighborhood column to the first column
fixed_columns = [mtl_onehot.columns[-1]] + list(mtl_onehot.columns[:-1])
mtl_onehot = mtl_onehot[fixed_columns]

# Check size of new dataframe
mtl_onehot.shape

(3477, 298)

In [16]:
# Group rows by collision and mean of frequency for each category
mtl_grouped = mtl_onehot.groupby('Collision').mean().reset_index()
mtl_grouped

Unnamed: 0,Collision,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Arepa Restaurant,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,SPVM _ 2019 _ 10051,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.050000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,SPVM _ 2019 _ 10193,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,...,0.0,0.000000,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,SPVM _ 2019 _ 10227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.103448,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,SPVM _ 2019 _ 10366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,SPVM _ 2019 _ 10472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.0,0.000000,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,SPVM _ 2019 _ 9759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156,SPVM _ 2019 _ 977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
157,SPVM _ 2019 _ 9827,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,...,0.0,0.000000,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
158,SPVM _ 2019 _ 989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# Report 5 most common venues for each accident
num_top_venues = 5

for hood in mtl_grouped['Collision']:
    print("----"+hood+"----")
    temp = mtl_grouped[mtl_grouped['Collision'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----SPVM _ 2019 _ 10051----
                venue  freq
0          Restaurant  0.10
1       Grocery Store  0.10
2              Bakery  0.10
3  Spanish Restaurant  0.05
4    Greek Restaurant  0.05


----SPVM _ 2019 _ 10193----
                  venue  freq
0           Coffee Shop  0.08
1              Pharmacy  0.08
2         Deli / Bodega  0.06
3  Fast Food Restaurant  0.06
4             Bookstore  0.03


----SPVM _ 2019 _ 10227----
                   venue  freq
0            Coffee Shop  0.10
1  Vietnamese Restaurant  0.10
2                   Bank  0.07
3     Chinese Restaurant  0.07
4   Gym / Fitness Center  0.07


----SPVM _ 2019 _ 10366----
                 venue  freq
0       Clothing Store  0.12
1           Shoe Store  0.12
2          Coffee Shop  0.08
3  Sporting Goods Shop  0.08
4       Ice Cream Shop  0.04


----SPVM _ 2019 _ 10472----
                   venue  freq
0    Rental Car Location  0.11
1     Italian Restaurant  0.11
2  Vietnamese Restaurant  0.11
3      Indian Restau

                   venue  freq
0          Grocery Store  0.25
1                   Park  0.25
2   Fast Food Restaurant  0.25
3  Vietnamese Restaurant  0.25
4           Noodle House  0.00


----SPVM _ 2019 _ 1452----
                        venue  freq
0  Construction & Landscaping  0.25
1           Food & Drink Shop  0.25
2         Arts & Crafts Store  0.25
3           Electronics Store  0.25
4                      Museum  0.00


----SPVM _ 2019 _ 14698----
              venue  freq
0        Restaurant  0.13
1          Pharmacy  0.13
2  Department Store  0.07
3       Supermarket  0.07
4    Hardware Store  0.07


----SPVM _ 2019 _ 14944----
                        venue  freq
0  Construction & Landscaping  0.33
1                 Auto Garage  0.33
2           Electronics Store  0.33
3                      Museum  0.00
4                 Music Store  0.00


----SPVM _ 2019 _ 15016----
                           venue  freq
0                           Park   1.0
1  Paper / Office Supplies St

               venue  freq
0      Grocery Store   0.2
1        Gas Station   0.2
2     Sandwich Place   0.2
3           Pharmacy   0.2
4  Convenience Store   0.2


----SPVM _ 2019 _ 300----
                  venue  freq
0     Convenience Store   0.5
1                Island   0.5
2     Accessories Store   0.0
3          Noodle House   0.0
4  Outdoor Supply Store   0.0


----SPVM _ 2019 _ 3063----
                           venue  freq
0                           Park   1.0
1  Paper / Office Supplies Store   0.0
2           Outdoor Supply Store   0.0
3                Organic Grocery   0.0
4                   Optical Shop   0.0


----SPVM _ 2019 _ 3187----
                       venue  freq
0              Grocery Store  0.22
1      Vietnamese Restaurant  0.11
2               Soccer Field  0.11
3  Cajun / Creole Restaurant  0.11
4       Gym / Fitness Center  0.11


----SPVM _ 2019 _ 3341----
                  venue  freq
0        Breakfast Spot  0.08
1           Coffee Shop  0.08
2  Fast F

4             Steakhouse  0.03


----SPVM _ 2019 _ 8600----
                       venue  freq
0              Grocery Store  0.25
1     Furniture / Home Store  0.25
2                       Park  0.25
3  Cajun / Creole Restaurant  0.25
4                Music Store  0.00


----SPVM _ 2019 _ 8671----
               venue  freq
0               Park  0.25
1     Ice Cream Shop  0.12
2    Organic Grocery  0.12
3  French Restaurant  0.12
4        Supermarket  0.12


----SPVM _ 2019 _ 8682----
                     venue  freq
0           Hunting Supply  0.11
1                   Bakery  0.11
2            Bowling Alley  0.11
3  Fruit & Vegetable Store  0.11
4                 Gym Pool  0.11


----SPVM _ 2019 _ 8847----
               venue  freq
0       Gourmet Shop  0.25
1    Organic Grocery  0.25
2  French Restaurant  0.25
3     Hardware Store  0.25
4       Noodle House  0.00


----SPVM _ 2019 _ 8929----
        venue  freq
0  Food Truck  0.15
1      Garden  0.15
2  Restaurant  0.08
3      Museu

In [18]:
# Function to sort venues in decscending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
# Create a dataframe with top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['Collision']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe
mtlCollision_venues_sorted = pd.DataFrame(columns=columns)
mtlCollision_venues_sorted['Collision'] = mtl_grouped['Collision']

for ind in np.arange(mtl_grouped.shape[0]):
    mtlCollision_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mtl_grouped.iloc[ind, :], num_top_venues)

mtlCollision_venues_sorted.head()

Unnamed: 0,Collision,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,SPVM _ 2019 _ 10051,Bakery,Grocery Store,Restaurant,Mediterranean Restaurant,Spanish Restaurant,Sandwich Place,Dog Run,Gas Station,Bank,Metro Station
1,SPVM _ 2019 _ 10193,Pharmacy,Coffee Shop,Deli / Bodega,Fast Food Restaurant,Sushi Restaurant,Portuguese Restaurant,Italian Restaurant,Sandwich Place,Bakery,Bank
2,SPVM _ 2019 _ 10227,Vietnamese Restaurant,Coffee Shop,Bank,Gym / Fitness Center,Chinese Restaurant,Electronics Store,Seafood Restaurant,Food & Drink Shop,Japanese Restaurant,Multiplex
3,SPVM _ 2019 _ 10366,Shoe Store,Clothing Store,Sporting Goods Shop,Coffee Shop,Fast Food Restaurant,Gas Station,Shopping Mall,Restaurant,Jewelry Store,Department Store
4,SPVM _ 2019 _ 10472,Vietnamese Restaurant,Rental Car Location,Italian Restaurant,History Museum,Gym,Salon / Barbershop,Gas Station,Restaurant,Department Store,Park


# Clustering the accidents

In [20]:
# set number of clusters
kclusters = 10

mtl_grouped_clustering = mtl_grouped.drop('Collision', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mtl_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 2, 2, 0, 2, 2, 2, 0, 0])

In [21]:
# Create dataframe that includes the cluster and top 10 venues

# Add clustering labels
mtlCollision_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

mtl_merged = df

# Merge mtl_grouped with mtl_data to add latitude/longitude for each accident
mtl_merged = mtl_merged.join(mtlCollision_venues_sorted.set_index('Collision'), on='NO_SEQ_COLL')

mtl_merged.head() # check the last columns!

Unnamed: 0,index,NO_SEQ_COLL,NB_MORTS,NB_BLESSES_GRAVES,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,129,SPVM _ 2019 _ 130,0,1,45.48505,-73.86485,2,Restaurant,Pizza Place,Grocery Store,Diner,Furniture / Home Store,Coffee Shop,Food & Drink Shop,Pharmacy,Performing Arts Venue,Yoga Studio
1,165,SPVM _ 2019 _ 166,1,0,45.50952,-73.8006,7,Hookah Bar,Furniture / Home Store,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Fabric Shop,Event Space,Donut Shop
2,210,SPVM _ 2019 _ 211,0,3,45.48921,-73.93608,9,Construction & Landscaping,Yoga Studio,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
3,246,SPVM _ 2019 _ 247,0,1,45.50787,-73.76006,9,Construction & Landscaping,Park,Yoga Studio,Event Space,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
4,287,SPVM _ 2019 _ 288,0,1,45.4943,-73.87576,2,Gas Station,Sandwich Place,Convenience Store,Grocery Store,Pharmacy,Yoga Studio,Empanada Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant


In [24]:
# Visualize the clustered data
# Create map
map_clusters = folium.Map(location=[Latitude, Longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mtl_merged['latitude'], mtl_merged['longitude'], mtl_merged['NO_SEQ_COLL'], mtl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examining each cluster

In [280]:
# Cluster 1
mtl_merged.loc[mtl_merged['Cluster Labels'] == 0, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,SPVM _ 2019 _ 447,-73.81386,0,Park,Mediterranean Restaurant,Grocery Store,Garden Center,Jewelry Store,Yoga Studio,Ethiopian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
23,SPVM _ 2019 _ 2538,-73.69298,0,Pool,Park,Intersection,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
25,SPVM _ 2019 _ 2721,-73.67692,0,Pizza Place,Storage Facility,Park,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
73,SPVM _ 2019 _ 8600,-73.53898,0,Park,Grocery Store,Discount Store,Yoga Studio,Dumpling Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
79,SPVM _ 2019 _ 9759,-73.63183,0,Park,Playground,Grocery Store,Tennis Court,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
125,SPVM _ 2019 _ 14470,-73.62964,0,Grocery Store,Fast Food Restaurant,Park,Vietnamese Restaurant,Yoga Studio,Event Service,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
138,SPVM _ 2019 _ 16558,-73.56624,0,Park,Thai Restaurant,Convenience Store,Grocery Store,Event Service,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant


In [281]:
# Cluster 2
mtl_merged.loc[mtl_merged['Cluster Labels'] == 1, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,SPVM _ 2019 _ 1577,-73.70608,1,Coffee Shop,Hotel,Intersection,Fast Food Restaurant,Café,Sandwich Place,Bowling Alley,Airport Terminal,Dance Studio,Falafel Restaurant
30,SPVM _ 2019 _ 3402,-73.72574,1,Coffee Shop,Pharmacy,Video Store,Café,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
88,SPVM _ 2019 _ 10666,-73.64236,1,Coffee Shop,Burger Joint,Park,Restaurant,Pub,Sushi Restaurant,Grocery Store,Café,Electronics Store,Empanada Restaurant
95,SPVM _ 2019 _ 11131,-73.63569,1,Coffee Shop,Café,Park,Fast Food Restaurant,Ice Cream Shop,Pub,Restaurant,Sushi Restaurant,Liquor Store,Yoga Studio
148,SPVM _ 2019 _ 17824,-73.52965,1,Coffee Shop,Restaurant,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
153,SPVM _ 2019 _ 18588,-73.49609,1,Ice Cream Shop,Convenience Store,Coffee Shop,Gas Station,Yoga Studio,Event Space,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant


In [282]:
# Cluster 3
mtl_merged.loc[mtl_merged['Cluster Labels'] == 2, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,SPVM _ 2019 _ 1093,-73.78518,2,Yoga Studio,Supermarket,Baseball Stadium,Boxing Gym,Breakfast Spot,Gas Station,Gym,Kitchen Supply Store,Movie Theater,Paper / Office Supplies Store
16,SPVM _ 2019 _ 1796,-73.69527,2,Café,Grocery Store,Restaurant,Gas Station,Bank,Sushi Restaurant,Liquor Store,Supermarket,Greek Restaurant,Business Service
17,SPVM _ 2019 _ 1865,-73.70085,2,Grocery Store,Pharmacy,Restaurant,Café,Gym,Breakfast Spot,Sushi Restaurant,Supermarket,Gas Station,Middle Eastern Restaurant
18,SPVM _ 2019 _ 1882,-73.69078,2,Café,Yoga Studio,Italian Restaurant,Restaurant,Business Service,Bus Line,Gym,Supermarket,Coffee Shop,Fast Food Restaurant
20,SPVM _ 2019 _ 2126,-73.67826,2,Athletics & Sports,Hockey Arena,Gym / Fitness Center,Furniture / Home Store,Yoga Studio,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,SPVM _ 2019 _ 14422,-73.58382,2,Café,Bookstore,Coffee Shop,Bakery,Bar,Furniture / Home Store,French Restaurant,Mediterranean Restaurant,Gaming Cafe,Supermarket
140,SPVM _ 2019 _ 16811,-73.58949,2,Vietnamese Restaurant,Mexican Restaurant,Hardware Store,Baseball Field,Tennis Court,Supermarket,Park,Italian Restaurant,Fried Chicken Joint,Ice Cream Shop
145,SPVM _ 2019 _ 17555,-73.57515,2,Hotel Bar,Sports Bar,Coffee Shop,Gym,Gym / Fitness Center,Burger Joint,Yoga Studio,Event Space,Empanada Restaurant,English Restaurant
156,SPVM _ 2019 _ 18736,-73.50408,2,Bus Stop,Construction & Landscaping,Supermarket,Train Station,Yoga Studio,Eye Doctor,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service


In [283]:
# Cluster 4
mtl_merged.loc[mtl_merged['Cluster Labels'] == 3, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
135,SPVM _ 2019 _ 15905,-73.61818,3,Bakery,Eye Doctor,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Fabric Shop


In [284]:
# Cluster 5
mtl_merged.loc[mtl_merged['Cluster Labels'] == 4, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,SPVM _ 2019 _ 130,-73.86485,4,Restaurant,Pizza Place,Food & Drink Shop,Coffee Shop,Diner,Pharmacy,Furniture / Home Store,Grocery Store,Yoga Studio,Escape Room
1,SPVM _ 2019 _ 166,-73.8006,4,Martial Arts School,Hookah Bar,Electronics Store,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Eastern European Restaurant
7,SPVM _ 2019 _ 545,-73.83306,4,Deli / Bodega,Middle Eastern Restaurant,Italian Restaurant,Restaurant,Sushi Restaurant,Coffee Shop,Liquor Store,Bookstore,Bagel Shop,Frozen Yogurt Shop
8,SPVM _ 2019 _ 636,-73.80319,4,Fast Food Restaurant,Grocery Store,Pharmacy,Coffee Shop,BBQ Joint,Italian Restaurant,Skating Rink,Burger Joint,Seafood Restaurant,Flower Shop
9,SPVM _ 2019 _ 753,-73.8383,4,Sushi Restaurant,Restaurant,Italian Restaurant,Pizza Place,Deli / Bodega,Breakfast Spot,Middle Eastern Restaurant,Food & Drink Shop,Bagel Shop,Chinese Restaurant
12,SPVM _ 2019 _ 989,-73.82414,4,Bookstore,Hotel,Grocery Store,Coffee Shop,Clothing Store,Discount Store,Restaurant,Sandwich Place,Shoe Store,Fast Food Restaurant
14,SPVM _ 2019 _ 1452,-73.73682,4,Arts & Crafts Store,Other Repair Shop,Electronics Store,Food & Drink Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Fabric Shop,Eye Doctor
24,SPVM _ 2019 _ 2670,-73.69124,4,Restaurant,Park,Bank,Gas Station,Donut Shop,Shopping Mall,Department Store,Pharmacy,Brewery,Convenience Store
29,SPVM _ 2019 _ 3341,-73.64779,4,Coffee Shop,Restaurant,Shopping Mall,Fast Food Restaurant,Breakfast Spot,Sporting Goods Shop,Hockey Arena,Jewelry Store,Clothing Store,Department Store
35,SPVM _ 2019 _ 3958,-73.62487,4,Pizza Place,Coffee Shop,Restaurant,Bakery,Yoga Studio,Massage Studio,Sushi Restaurant,Frozen Yogurt Shop,Korean Restaurant,Liquor Store


In [285]:
# Cluster 6
mtl_merged.loc[mtl_merged['Cluster Labels'] == 5, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,SPVM _ 2019 _ 247,-73.76006,5,Construction & Landscaping,Park,Yoga Studio,Eye Doctor,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
27,SPVM _ 2019 _ 3063,-73.64639,5,Construction & Landscaping,Park,Yoga Studio,Eye Doctor,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
78,SPVM _ 2019 _ 9223,-73.61937,5,Park,Dog Run,Gas Station,Yoga Studio,Event Service,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Space


In [286]:
# Cluster 7
mtl_merged.loc[mtl_merged['Cluster Labels'] == 6, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
128,SPVM _ 2019 _ 15016,-73.64676,6,Pharmacy,Park,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service


In [287]:
# Cluster 8
mtl_merged.loc[mtl_merged['Cluster Labels'] == 7, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,SPVM _ 2019 _ 288,-73.87576,7,Sandwich Place,Convenience Store,Grocery Store,Gas Station,Pharmacy,Event Service,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
10,SPVM _ 2019 _ 949,-73.74231,7,Pharmacy,Breakfast Spot,Grocery Store,Department Store,BBQ Joint,Discount Store,Sandwich Place,Bank,Big Box Store,Intersection
11,SPVM _ 2019 _ 977,-73.81208,7,Soccer Field,Hobby Shop,Park,Stadium,Shopping Mall,Liquor Store,Supermarket,Bank,Bakery,Organic Grocery
19,SPVM _ 2019 _ 1987,-73.68813,7,Bank,Pizza Place,Pharmacy,Grocery Store,Park,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
21,SPVM _ 2019 _ 2293,-73.70405,7,Gym,Discount Store,Pharmacy,Middle Eastern Restaurant,Shopping Mall,Sandwich Place,Department Store,Greek Restaurant,Grocery Store,Restaurant
28,SPVM _ 2019 _ 3187,-73.71292,7,Grocery Store,Gym / Fitness Center,Soccer Field,Ice Cream Shop,Indian Restaurant,Vietnamese Restaurant,Coffee Shop,Cajun / Creole Restaurant,Yoga Studio,Electronics Store
31,SPVM _ 2019 _ 3423,-73.69672,7,Asian Restaurant,Pharmacy,Shopping Mall,Liquor Store,Bank,Discount Store,Paper / Office Supplies Store,Grocery Store,Gym,Gas Station
33,SPVM _ 2019 _ 3844,-73.63524,7,Korean Restaurant,Convenience Store,Gym / Fitness Center,Liquor Store,Bakery,Supermarket,Discount Store,Park,Athletics & Sports,Burger Joint
34,SPVM _ 2019 _ 3856,-73.63913,7,Pizza Place,Bakery,Gym / Fitness Center,Korean Restaurant,Pharmacy,Indian Restaurant,Discount Store,Intersection,Gas Station,Supermarket
36,SPVM _ 2019 _ 4029,-73.64183,7,Indian Restaurant,Pharmacy,Intersection,Baseball Field,Supermarket,Yoga Studio,Eye Doctor,Empanada Restaurant,English Restaurant,Escape Room


In [288]:
# Cluster 9
mtl_merged.loc[mtl_merged['Cluster Labels'] == 8, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
159,SPVM _ 2019 _ 19262,-73.95116,8,Train Station,Yoga Studio,Event Space,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Eye Doctor


In [289]:
# Cluster 10
mtl_merged.loc[mtl_merged['Cluster Labels'] == 9, mtl_merged.columns[[1] + list(range(5, mtl_merged.shape[1]))]]

Unnamed: 0,NO_SEQ_COLL,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,SPVM _ 2019 _ 211,-73.93608,9,Construction & Landscaping,Yoga Studio,Fabric Shop,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Eye Doctor
5,SPVM _ 2019 _ 300,-73.86138,9,Construction & Landscaping,Island,Yoga Studio,Eye Doctor,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
154,SPVM _ 2019 _ 18682,-73.51645,9,Construction & Landscaping,Home Service,Yoga Studio,Eye Doctor,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space


# FINAL ANALYSIS AND CONCLUSIONS

Cluster 1: The park cluster. It is not a very large group of accidents are located in a low population density zone with parks and small restaurants around, widely dispersed within the city, is not important enough to focus emergency resources around.

Cluster 2: The cafe cluster. This is another not very large group of accidents. Here the most common places to find are coffees and restaurants. Not important enough to focus emergency resources around.

Cluster 3: The Downtown cluster. The largest cluster. Very commercial venues in this cluster like hotels, gas stations, stores, restaurants located in downtown city. Accidents here seem to be caused more by the high population density than surrounding venues. This group of accidents already has a significant number of accident care resources such as fire stations, police and medical centers.

Cluster 4: One cluster of one. Not important enough to focus emergency resources around.

Cluster 5: The commercial cluster.  It is a large group of accidents. Unlike cluster 3, it contains venues outside the city downtown. The most common venues to find in this cluster are restaurants, stores, pharmacies.  

Cluster 6: The construction zone cluster. It’s a group of only 3 accidents in a construction zone.  Not important enough to focus emergency resources around.

Cluster 7: Another cluster of one. Not important enough to focus emergency resources around.

Cluster 8: The restaurant cluster. It is a large group of accidents in residential areas on the outskirts of the city. The most common venues in this cluster are restaurants, shopping malls, bakeries, stores. It’s an important group, generally in neighborhoods with accident care services like police and fire stations.  This is the group to follow by the responsible for accidents care in the city.

Cluster 9: Another cluster of one. Accident happened in industrial zone. Not important enough to focus emergency resources around.

Cluster 10: The developing area cluster. It is a small group of accidents in not very high population density and in developing the city area.  Not important enough to focus emergency resources around.

