## Segmenting and Clustering Neighborhoods in Toronto

In [78]:
#import necessary items 
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup 

### Get the data from web

In [15]:
data_link=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [18]:
con=BeautifulSoup(data_link.content, "lxml")

In [22]:
#get the table 
table= con.find_all('table')[0] 
df = pd.read_html(str(table))[0]
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [24]:
#clear the data, delete NA items 
dfa= df[(df.Borough != "Not assigned")]
dfa.reset_index(drop=True, inplace=True)
dfa.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [34]:
#Group the table with same postcodes

table=pd.DataFrame(columns=['Postcode','Borough','Neighbourhood'])

gro_borough = dfa.groupby('Postcode')['Borough'].apply(lambda x: set(x).pop())
gro_neighborhoods = dfa.groupby('Postcode')['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
gro_df = pd.DataFrame(list(zip(gro_borough.index, gro_borough, gro_neighborhoods)))
gro_df.columns = ['Postcode', 'Borough', 'Neighbourhood']
gro_df.head(30)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [36]:
group_df.shape

(103, 3)

 ### get the geographical coordinates of the neighborhoods in the Toronto.

In [40]:
#import data

import io
url="http://cocl.us/Geospatial_data"
a=requests.get(url).content
geospatial_data=pd.read_csv(io.StringIO(a.decode('utf-8')))

geospatial_data.head(10)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [43]:
#add la and lo to the table 

table_ge=group_df.join(geospatial_data.set_index('Postal Code'), on='Postcode')
table_ge.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


 ### Explore and cluster the neighborhoods in Toronto

In [47]:
# get the geograpical coordinate of Toronto: 

from geopy.geocoders import Nominatim

address = "Toronto, Canada"
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [55]:
!pip install folium


Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/72/ff/004bfe344150a064e558cb2aedeaa02ecbf75e60e148a55a9198f0c41765/folium-0.10.0-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 14.7MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.0


In [56]:
import folium

In [67]:
# create the map 

map_toronto  = folium.Map(location=[latitude, longitude], zoom_start=10)

# create a map
for lat, lng, borough, neighborhood in zip(table_ge['Latitude'], table_ge['Longitude'], table_ge['Borough'], table_ge['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill_color='#3186cc',
        fill_opacity=0.4,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [73]:
# cluster the neibourhood use k-means
from sklearn.cluster import KMeans
kclusters=6
table_clu= table_ge.drop(['Postcode','Borough','Neighbourhood'], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(table_clu)
kmeans.labels_

array([2, 2, 2, 2, 2, 2, 2, 5, 2, 5, 2, 5, 2, 2, 2, 2, 2, 3, 5, 3, 3, 3,
       3, 3, 3, 5, 5, 5, 3, 1, 3, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 0, 5, 0,
       3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0,
       0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4, 4, 4, 4, 4, 0, 4, 5,
       4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [77]:
table_ge['Cluster Labels']=kmeans.labels_
table_ge.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,2
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,5
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,2
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,5
