# Part 4: Visualizing Clusters on Map
In this stage, I will visualize cluster results by color coding and labeling the neighborhoods on a map

In [1]:
import pandas as pd
import numpy as np
import folium
from geopy.geocoders import Nominatim

In [2]:
# import police dataset with cluster results
pol_df = pd.read_csv('./data/pol_clus_df.csv', encoding = "ISO-8859-1")

#set neighborhood as index
pol_df.set_index(pol_df['Neighborhood'], drop=True, inplace=True)
pol_df.drop(['Neighborhood'], axis=1, inplace=True)

#check shape and format
print(pol_df.shape)
pol_df.head()

(73, 8)


Unnamed: 0_level_0,Sex Offences,Assaults,Robbery,Break & Enter,Auto Theft,Other Theft,Offensive Weapons,Cluster Labels
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
VPD Arbutus Ridge,0.009524,0.003778,0.001608,0.016613,0.009393,0.006241,0.0,5
VPD Central_Business_District,0.31619,0.404222,0.414791,0.169968,0.133671,0.37235,0.537123,3
VPD Dunbar-Southlands,0.011429,0.003556,0.003215,0.018104,0.009393,0.008954,0.00232,5
VPD Fairview,0.049524,0.026444,0.0209,0.067519,0.034682,0.060001,0.019722,1
VPD Grandview-Woodland,0.059048,0.052444,0.05627,0.062194,0.08815,0.040634,0.024362,1


🤑 __Get addresses of neighborhoods__ <br>
In order to plot neighborhoods on a map, we first need to obtain the addresses of each neighborhood.

I have already obtained the addresses manually and stored it in a csv file. Here is how I obtained them:
- for Vancouver, I used the neighborhood name as address since it was not numbered like the others
- Toronto and Montreal both have numbered neighborhood names due to the naming convention of the police department. I searched up the address of the police station at each neighborhood and used that as the address.

In [3]:
# load addresses data
add_df = pd.read_csv("./data/all addresses.csv", header=None)

print(add_df.shape)
add_df.head()

(73, 2)


Unnamed: 0,0,1
0,Arbutus Ridge,"Arbutus Ridge Vancouver, BC"
1,Central_Business_District,"Vancouver City Center Vancouver, BC"
2,Dunbar-Southlands,"Dunbar-Southlands Vancouver, BC"
3,Fairview,"Fairview Vancouver, BC"
4,Grandview-Woodland,"Grandview-Woodland Vancouver, BC"


In [4]:
# set index the same as that of pol_df
add_df.set_index(pol_df.index, drop=True, inplace=True)
add_df

Unnamed: 0_level_0,0,1
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
VPD Arbutus Ridge,Arbutus Ridge,"Arbutus Ridge Vancouver, BC"
VPD Central_Business_District,Central_Business_District,"Vancouver City Center Vancouver, BC"
VPD Dunbar-Southlands,Dunbar-Southlands,"Dunbar-Southlands Vancouver, BC"
VPD Fairview,Fairview,"Fairview Vancouver, BC"
VPD Grandview-Woodland,Grandview-Woodland,"Grandview-Woodland Vancouver, BC"
...,...,...
SPVM 42.0,42,"8181 Boulevard Lacordaire, Saint-Léonard, QC"
SPVM 45.0,45,"8200 Boulevard Maurice-Duplessis, Montréal, QC"
SPVM 46.0,46,"6850, boulevard Joseph-Renaud, Anjou, Montréal..."
SPVM 48.0,48,"6680 Rue Sherbrooke E, Montréal, QC"


In [5]:
# only keep column 1, which is the address column
add_df = add_df[[1]]
add_df.columns = ['Address']
add_df.head()

Unnamed: 0_level_0,Address
Neighborhood,Unnamed: 1_level_1
VPD Arbutus Ridge,"Arbutus Ridge Vancouver, BC"
VPD Central_Business_District,"Vancouver City Center Vancouver, BC"
VPD Dunbar-Southlands,"Dunbar-Southlands Vancouver, BC"
VPD Fairview,"Fairview Vancouver, BC"
VPD Grandview-Woodland,"Grandview-Woodland Vancouver, BC"


🤑 __Get latitude and longitude from address__<br>
Need to convert addresses into longitude and latitude in order to input them into the plotting function. We will do this using the Geopy package

In [6]:
# get lat and long
lat = []
long = []
geolocator = Nominatim(user_agent='neigh_explorer') # create object instance for obtaining lat and long from address
for address in add_df['Address'].tolist(): # loop through every address 
    loc = geolocator.geocode(address, country_codes='ca')
    if loc==None: # just in case some addresses aren't recognized, we need to know so we can fix them
        print(address)
    else:
        lat.append(loc.latitude)
        long.append(loc.longitude)

In [7]:
# create new columns latitude and longitude in the address dataframe
add_df['Latitude'] = lat
add_df['Longitude'] = long
add_df.head()

Unnamed: 0_level_0,Address,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VPD Arbutus Ridge,"Arbutus Ridge Vancouver, BC",49.246305,-123.159636
VPD Central_Business_District,"Vancouver City Center Vancouver, BC",49.282471,-123.118628
VPD Dunbar-Southlands,"Dunbar-Southlands Vancouver, BC",49.237864,-123.184354
VPD Fairview,"Fairview Vancouver, BC",49.261956,-123.130408
VPD Grandview-Woodland,"Grandview-Woodland Vancouver, BC",49.275849,-123.066934


In [8]:
# merge address dataframe with crime dataframe
A = pol_df
B = add_df
premium_pol_df = pd.merge(A, B, left_index=True, right_index=True)
premium_pol_df.head()

Unnamed: 0_level_0,Sex Offences,Assaults,Robbery,Break & Enter,Auto Theft,Other Theft,Offensive Weapons,Cluster Labels,Address,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
VPD Arbutus Ridge,0.009524,0.003778,0.001608,0.016613,0.009393,0.006241,0.0,5,"Arbutus Ridge Vancouver, BC",49.246305,-123.159636
VPD Central_Business_District,0.31619,0.404222,0.414791,0.169968,0.133671,0.37235,0.537123,3,"Vancouver City Center Vancouver, BC",49.282471,-123.118628
VPD Dunbar-Southlands,0.011429,0.003556,0.003215,0.018104,0.009393,0.008954,0.00232,5,"Dunbar-Southlands Vancouver, BC",49.237864,-123.184354
VPD Fairview,0.049524,0.026444,0.0209,0.067519,0.034682,0.060001,0.019722,1,"Fairview Vancouver, BC",49.261956,-123.130408
VPD Grandview-Woodland,0.059048,0.052444,0.05627,0.062194,0.08815,0.040634,0.024362,1,"Grandview-Woodland Vancouver, BC",49.275849,-123.066934


🤑 __Separate the cities in the dataframe__ <br>
I want to plot three different maps, one for each city. Thus we need one dataframe for each city.

In [9]:
# find the index of the respective cities and separate accordingly

# boolean list for each city
vpd = []
tps = []
spvm = []

# iterate through neighborhood names to see which city it belongs to
for neigh in premium_pol_df.index: 
    vpd.append('VPD' in neigh)
    tps.append('TPS' in neigh)
    spvm.append('SPVM' in neigh)
    
# create separate dataframes
vpd_df = premium_pol_df[vpd]
tps_df = premium_pol_df[tps]
spvm_df = premium_pol_df[spvm]

In [10]:
# check shape of the dataframes
print(vpd_df.shape)
print(tps_df.shape)
print(spvm_df.shape)

(24, 11)
(16, 11)
(33, 11)


looks right :)

🤑 __Plot the maps!__ <br>
I will be using the folium package which creates interactive maps with markers

In [11]:
# Vancouver first
# get the coordinates for the city
geolocator = Nominatim(user_agent='neigh_explorer')
van = geolocator.geocode('Vancouver', country_codes='ca')

In [12]:
# define color and label for each cluster 
color_dictionary = {5:'#FFC300', 6:'#FF5733', 1:'#C70039', 2:'#900C3F', 0:'#581845', 4:'#FDFEFE', 3:'#FDFEFE'}
label_dictionary = {5:'very low crime', 6:'low crime', 1:'medium crime', 2:'high crime', 0:'very high crime', 
                    4:'OUTLIER: very low in all except offensive weapons',
                    3:'OUTLIER: exceptionally high crime'}
line_color = '#1C2833'

In [13]:
# plot map for vancouver
van_map_clusters = folium.Map(location=[van.latitude, van.longitude], zoom_start=12) #initialize map

for lat, long, neigh, clus in zip(vpd_df['Latitude'], vpd_df['Longitude'], vpd_df.index, vpd_df['Cluster Labels']):
    # create marker for each neighborhood and add to map
    folium.CircleMarker(location=[lat, long], 
                        radius=10, 
                        popup=label_dictionary[clus]+'. ' +neigh, 
                        fill=True,
                        fill_opacity=1,
                        fill_color=color_dictionary[clus],
                        color=line_color).add_to(van_map_clusters)
    
van_map_clusters
# click on the markers to see the labels!

In [14]:
# Toronto next
# Toronto coordinates
geolocator = Nominatim(user_agent='neigh_explorer')
tor = geolocator.geocode('Toronto', country_codes='ca')

In [15]:
tor_map_clusters = folium.Map(location=[tor.latitude, tor.longitude], zoom_start=11)

for lat, long, neigh, clus in zip(tps_df['Latitude'], tps_df['Longitude'], tps_df.index, tps_df['Cluster Labels']):
    
    folium.CircleMarker(location=[lat, long], 
                        radius=10, 
                        popup=label_dictionary[clus]+'. ' +neigh, 
                        fill=True,
                        fill_opacity=1,
                        fill_color=color_dictionary[clus],
                        color=line_color).add_to(tor_map_clusters)
    
tor_map_clusters

In [16]:
# last but not least, montreal
# Montreal coordinates
geolocator = Nominatim(user_agent='neigh_explorer')
mon = geolocator.geocode('Montreal', country_codes='ca')

In [17]:
mon_map_clusters = folium.Map(location=[mon.latitude, mon.longitude], zoom_start=11)

for lat, long, neigh, clus in zip(spvm_df['Latitude'], spvm_df['Longitude'], spvm_df.index, spvm_df['Cluster Labels']):
    
    folium.CircleMarker(location=[lat, long], 
                        radius=10, 
                        popup=label_dictionary[clus]+'. ' +neigh, 
                        fill=True,
                        fill_opacity=1,
                        fill_color=color_dictionary[clus],
                        color=line_color).add_to(mon_map_clusters)
    
mon_map_clusters