PART 3 ANALYSIS



Link to this notebook: https://eu-gb.dataplatform.cloud.ibm.com/analytics/notebooks/v2/031d61ba-f431-40c6-b593-793e5be2e00b/view?access_token=b386f1439524e1b76b8db1007e7f66859fc6bf4097cc6426be0b15738db80d2f

In [2]:
import pandas as pd
data = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
df = data[0]
df = df[df.Neighbourhood != "Not assigned"]
df.reset_index(drop=True, inplace=True)
gf = pd.read_csv("http://cocl.us/Geospatial_data") #Import data
ff = df.merge(gf) #merge coordinate data frame and original
ff

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


Import json, folium, matplotlib and sklearn libraries.

In [3]:
!pip install folium
import json
import folium 
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
from sklearn.cluster import KMeans

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 3.3 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


Create the map itself by cycling through each row of the data frame and then add each borough and neighbourhood as a label to the points.

In [4]:

map_toronto = folium.Map(location=[43.6487, -79.38544], zoom_start=10)

for lat, lng, borough, neighbourhood in zip(ff['Latitude'], ff['Longitude'], ff['Borough'], ff['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Cluster the Neighbourhoods.

In [5]:
# set number of clusters
kclusters = 5

#only analyse coordinates
coord = ff[["Latitude","Longitude"]]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(coord)

# Show cluster assignment for all neighbourhoods and add them to our dataframe for later

kdf = pd.DataFrame(kmeans.labels_)
ff["Cluster"] = kdf

Display clustered neighbourhoods

In [7]:
map_toronto = folium.Map(location=[43.6487, -79.38544], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lat, lng, borough, neighbourhood, cluster in zip(ff['Latitude'], ff['Longitude'], ff['Borough'], ff['Neighbourhood'], ff["Cluster"]):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto