IMPORTING TORONTO POSTAL CODES

In [44]:
import requests 
import pandas as pd 
import numpy as np
import random 
from IPython.display import Image 
from IPython.core.display import HTML 
from IPython.display import display_html

from pandas.io.json import json_normalize
import folium # plotting library
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# import pip
# pip.main(['install', 'geopy'])

## **Scraping Postal Codes**

In [47]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(url,'lxml')
from IPython.display import display_html

#Read Table String
tab = str(soup.table)
data = pd.read_html(tab)

In [102]:
data_fm=data[0]
data_fm.head(15)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


## **Data Wrangling** 

In [106]:
#Drop Rows with Not assigned in Borough
data_f1 = data_fm.dropna()
data_f1.reset_index(drop=True, inplace=True)
data_f1

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing Centre
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [89]:
data_f1.shape

(103, 3)

## Get Location Data 

In [64]:
# Latitude Longitude Data Available for Toronto Postal Codes
lat_lon = pd.read_csv('https://cocl.us/Geospatial_data')
lat_lon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [65]:
lat_lon.shape

(103, 3)

In [69]:
# Merge Data from Two tables on Postal code
neighbourhoods = pd.merge(data_f1,lat_lon,on='Postal Code')
neighbourhoods.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## **Cluster Neighbourhoods**

In [79]:
#Cluster only neighbourhoods in Toronto
toronto_data = neighbourhoods[neighbourhoods['Borough'].str.contains('Toronto')]
toronto_data.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [80]:
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighborhood in zip(toronto_data['Latitude'],toronto_data['Longitude'],toronto_data['Borough'],toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto


In [81]:
# Clustering neighborhoods based on lat & long

#Drop all columns in toronto df except lat & long
toronto_clustering = toronto_data.drop(['Postal Code','Borough','Neighborhood'],1)
toronto_clustering

Unnamed: 0,Latitude,Longitude
2,43.65426,-79.360636
4,43.662301,-79.389494
9,43.657162,-79.378937
15,43.651494,-79.375418
19,43.676357,-79.293031
20,43.644771,-79.373306
24,43.657952,-79.387383
25,43.669542,-79.422564
30,43.650571,-79.384568
31,43.669005,-79.442259


In [84]:
# Apply K-means clustering algorithm based on log & lat values
k=6
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clustering)

#Add K-means labels to toronto_data df
toronto_data['Cluster Labels']=kmeans.labels_
toronto_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,2
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,5
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,2
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,3


In [85]:
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, neighborhood, cluster in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood'], toronto_data['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters