## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

import geocoder
from geopy.geocoders import Nominatim

from sklearn.cluster import KMeans

import folium

print('Libraries imported')

Libraries imported


## 2. Download and Explore Dataset

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

data = pd.read_html(url)[0]

data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


because we do not use the part that contains Not assigned, then we delete the part that contains not assigned

In [3]:
data = data.replace('Not assigned', np.nan)

data = data.dropna()

data = data.reset_index(drop=True)

data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [4]:
data.shape

(103, 3)

## 3. Get Latitude and Longitude for Postal Code

In [5]:
data['Latitude'] = data.apply(lambda row: geocoder.arcgis('{}, Toronto, Ontario'.format(row['Postal Code'])).latlng[0], axis=1)
data['Longitude'] = data.apply(lambda row: geocoder.arcgis('{}, Toronto, Ontario'.format(row['Postal Code'])).latlng[1], axis=1)

In [6]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


## 4. Choose the Borough that Contains Toronto

In [7]:
data_toronto = data.copy()

data_toronto = data[data.Borough.str.contains('Toronto')]

data_toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804
15,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587
19,M4E,East Toronto,The Beaches,43.67709,-79.29547
20,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306
24,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493
25,M6G,Downtown Toronto,Christie,43.66869,-79.42071
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891


## 5. Cluster Neighborhoods

In [8]:
X = data_toronto['Latitude']
Y = data_toronto['Longitude']
Z = np.stack((X, Y), axis=1)

kclusters = 4

kmeans = KMeans(n_clusters=kclusters, random_state=0)

kmeans.fit(Z)

KMeans(n_clusters=4, random_state=0)

#### create a Cluster column containing clustering labels

In [9]:
data_toronto.insert(5, 'Cluster Labels', kmeans.labels_)

data_toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,2
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,2
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804,2
15,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587,2
19,M4E,East Toronto,The Beaches,43.67709,-79.29547,0
20,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306,2
24,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493,2
25,M6G,Downtown Toronto,Christie,43.66869,-79.42071,3
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258,2
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891,3


## 6. Visualizing Neihbourhood

In [10]:
address = 'Toronto, On'

geolocator = Nominatim(user_agent = 'toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [11]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(data_toronto['Latitude'], data_toronto['Longitude'], data_toronto['Neighbourhood'], data_toronto['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters