# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import pandas as pd
import numpy as np
import requests

## Problem 1 - Scraping Toronto Neighborhoods from Wikipedia 

In [2]:
# Use request to scrap wikipedia page
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [3]:
website_list = pd.read_html(website_url.text)
postal_code_df = website_list[0]

In [4]:
postal_code_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
# Replace 'Not assigned' Boroughs by NaN and drop the rows with NaN values
postal_code_df.Borough.replace('Not assigned', np.nan, inplace = True)
postal_code_df.dropna(inplace = True)

# Check for unassigned Neighbourhoods
postal_code_df.Neighbourhood.str.count("Not assigned").sum()

0

In [6]:
# Group by Postal Code
postal_code_df = postal_code_df.groupby(['Postal Code']).head()

In [7]:
# Sort by Postal Code and reset indices
postal_code_df.sort_values(by=['Postal Code'], inplace=True)
postal_code_df.reset_index(drop=True, inplace=True)
postal_code_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [8]:
# Shape of the dataframe
postal_code_df.shape

(103, 3)

## Problem 2 - Geocoding Toronto Neighborhoods

In [59]:
import geocoder 

In [63]:
# Simple test
geocoder.google('M4B, Toronto, Ontario')


<[REQUEST_DENIED] Google - Geocode [empty]>

### Geocoding From File

In [9]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [26]:
# Merge dataframe on 'Postal Code'
Toronto_df = pd.merge(postal_code_df, df_geo, left_on='Postal Code', right_on='Postal Code')
Toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [27]:
# Data set complete with no missing values
Toronto_df.isna().sum()

Postal Code      0
Borough          0
Neighbourhood    0
Latitude         0
Longitude        0
dtype: int64

## Problem 2 - FourSquare 

Drawing inspiration from the previous lab where we cluster the neighbourhood of NYC, We cluster Toronto based on the similarities of the venues categories using Kmeans clustering and Foursquare API.

In [61]:
# Load User Credential for FourSquare
import os
import json, requests

FS_CLIENT_ID = os.environ.get('FS_CLIENT_ID')
FS_CLIENT_SECRET = os.environ.get('FS_CLIENT_SECRET')
VERSION = '20180605'
URL_EXPLORE = 'https://api.foursquare.com/v2/venues/explore'

In [24]:
from geopy.geocoders import Nominatim 
import folium

In [23]:
# Let set the Toronto City Hall
address = '100 Queen St W, Toronto, ON M5H 2N2'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Toronto City Hall Location {}, {}.'.format(latitude, longitude))

Toronto City Hall Location 43.6536032, -79.38400547469666.


Neighbourhood shape file can be access at https://open.toronto.ca/dataset/neighbourhoods/   

In [40]:
# Creating the map of Toronto
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)
Toronto_zip = zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Borough'], Toronto_df['Neighbourhood'])

# adding markers to map
for latitude, longitude, borough, neighbourhood in Toronto_zip:
    label = '{} -- {}'.format(borough, neighbourhood)
    label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,  
        color='red',
        weight=1,
        fill=True,
        fill_color='crimson',
        fill_opacity=0.5    
        ).add_to(Toronto_map)  
Toronto_map

#folium.GeoJson("Neighbourhoods.geojson").add_to(map_Toronto)    


In [51]:
def create_query(latitudes, longitudes, radius):
    params = dict(
        client_id=FS_CLIENT_ID,
        client_secret=FS_CLIENT_SECRET,
        v=VERSION,
        ll='{},{}'.format(latitudes,longitudes),
        radius = radius
    ) 
    return params

def nearby_venues(df_T, radius):
    venues_lst=[]  
    for index, row in df_T.iterrows():
        params = create_query( row['Latitude'], row['Longitude'], radius)
        resp   = requests.get(url=URL_EXPLORE, params=params).json()["response"]['groups'][0]['items']
        venues_rd = [ [row['Neighbourhood'], row['Latitude'], row['Longitude'], v['venue']['name'], v['venue']['categories'][0]['name'] ] for v in resp]
        venues_lst.extend( venues_rd )

    venues_df = pd.DataFrame(venues_lst, columns=['Neighbourhood', 'Latitude', 'Longitude', 'Venue' , 'Category'])
    return venues_df

In [52]:
Venues_df = nearby_venues(Toronto_df, 500)

In [57]:
Venues_df.head()

Unnamed: 0,Neighbourhood,Latitude,Longitude,Venue,Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,Bar
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,SEBS Engineering Inc. (Sustainable Energy and ...,Construction & Landscaping
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,Electronics Store


In [54]:
print('{} venues were returned by Foursquare.'.format(Venues_df.shape[0]))

1337 venues were returned by Foursquare.


In [64]:
Venues_df.groupby('Neighbourhood').count().head(20)

Unnamed: 0_level_0,Latitude,Longitude,Venue,Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agincourt,5,5,5,5
"Alderwood, Long Branch",7,7,7,7
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21
Bayview Village,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22
Berczy Park,30,30,30,30
"Birch Cliff, Cliffside West",4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",25,25,25,25
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17


In [65]:
Venues_df.groupby('Category').max().head(20)

Unnamed: 0_level_0,Neighbourhood,Latitude,Longitude,Venue
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accessories Store,"Wexford, Maryvale",43.750072,-79.295849,Puffin Gear
Airport,Downsview,43.737473,-79.39442,Toronto Downsview Airport (YZD)
Airport Food Court,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,Billy Bishop Café
Airport Gate,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,Gate 8
Airport Lounge,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,Porter Lounge
Airport Service,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,ORNGE - Toronto Air Base
Airport Terminal,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,Porter FBO Limited
American Restaurant,"Toronto Dominion Centre, Design Exchange",43.778517,-79.239476,braised
Antique Shop,"High Park, The Junction South",43.661608,-79.464763,SMASH
Aquarium,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,Ripley's Aquarium of Canada


## Neighbourhood Analysis

### One hot encoding

In [70]:
Venues_1H = pd.concat( [Venues_df['Neighbourhood'],pd.get_dummies(Venues_df[['Category']], prefix="", prefix_sep="")], axis=1, sort=False)
Venues_1H.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Business Service,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,...,Restaurant,River,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Social Club,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [71]:
Venues_1H.shape

(1337, 241)

### Grouping
group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [75]:
Venues_gr = Venues_1H.groupby('Neighbourhood').mean().reset_index()
Venues_gr.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Business Service,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,...,Restaurant,River,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Social Club,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,...,0.045455,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [133]:
## 10 Most frequent venue per neighbourhood

num_top_venues = 10

dfTop = pd.DataFrame()

for neighbourhood in Venues_gr['Neighbourhood']:
    temp = Venues_gr[Venues_gr['Neighbourhood'] == neighbourhood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    ttop = temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues)
    dfTop[neighbourhood] = ttop['venue']
       
dfTop = dfTop.T
dfTop.reset_index(inplace=True)
dfTop.columns = ["Neighbourhood"] + ['Top_{}'.format(n+1) for n in range(10)] 
dfTop.head()

Unnamed: 0,Neighbourhood,Top_1,Top_2,Top_3,Top_4,Top_5,Top_6,Top_7,Top_8,Top_9,Top_10
0,Agincourt,Breakfast Spot,Lounge,Latin American Restaurant,Clothing Store,Skating Rink,Accessories Store,Motel,Mediterranean Restaurant,Men's Store,Metro Station
1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Gym,Pharmacy,Pub,Coffee Shop,Middle Eastern Restaurant,Monument / Landmark,Modern European Restaurant,Mobile Phone Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pharmacy,Restaurant,Pizza Place,Middle Eastern Restaurant,Shopping Mall,Mobile Phone Shop,Sandwich Place,Supermarket
3,Bayview Village,Japanese Restaurant,Café,Chinese Restaurant,Bank,Accessories Store,Modern European Restaurant,Motel,Monument / Landmark,Mobile Phone Shop,Museum
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Coffee Shop,Italian Restaurant,Pizza Place,Indian Restaurant,Butcher,Café,Locksmith,Liquor Store,Sushi Restaurant


## Cluster Neighborhoods

In [134]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

Toronto_K = Venues_gr.drop(columns=['Neighbourhood'])

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_K)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 4, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,
       0, 1, 1, 1, 1, 1, 0, 2], dtype=int32)

In [135]:
# add clustering labels
dfTop.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = Toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join( dfTop.set_index('Neighbourhood'), on='Neighbourhood')

Toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Top_1,Top_2,Top_3,Top_4,Top_5,Top_6,Top_7,Top_8,Top_9,Top_10
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4.0,Fast Food Restaurant,Accessories Store,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Construction & Landscaping,Bar,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Medical Center,Breakfast Spot,Bank,Electronics Store,Intersection,Mexican Restaurant,Rental Car Location,Restaurant,Miscellaneous Shop,Modern European Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Thai Restaurant,Fried Chicken Joint,Bank,Caribbean Restaurant,Gas Station,Athletics & Sports,Bakery,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


In [136]:
# Drop all the NaN values to prevent data skew
YYZ_df = Toronto_merged.dropna(subset=['Cluster Labels'])

In [128]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [139]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(YYZ_df['Latitude'], YYZ_df['Longitude'], YYZ_df['Neighbourhood'], YYZ_df['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(map_clusters)
        
map_clusters

In [161]:
YYZ_df.loc[YYZ_df['Cluster Labels'] == 0, YYZ_df.columns[[1] + list(range(5, YYZ_df.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,Top_1,Top_2,Top_3,Top_4,Top_5,Top_6,Top_7,Top_8,Top_9,Top_10
14,Scarborough,0.0,Playground,Park,Bakery,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
23,North York,0.0,Convenience Store,Park,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
25,North York,0.0,Park,Food & Drink Shop,Accessories Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
40,East York,0.0,Convenience Store,Park,Intersection,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
44,Central Toronto,0.0,Park,Swim School,Bus Line,Accessories Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station


In [162]:
YYZ_df.loc[YYZ_df['Cluster Labels'] == 1, YYZ_df.columns[[1] + list(range(5, YYZ_df.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,Top_1,Top_2,Top_3,Top_4,Top_5,Top_6,Top_7,Top_8,Top_9,Top_10
1,Scarborough,1.0,Construction & Landscaping,Bar,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
2,Scarborough,1.0,Medical Center,Breakfast Spot,Bank,Electronics Store,Intersection,Mexican Restaurant,Rental Car Location,Restaurant,Miscellaneous Shop,Modern European Restaurant
3,Scarborough,1.0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant
4,Scarborough,1.0,Thai Restaurant,Fried Chicken Joint,Bank,Caribbean Restaurant,Gas Station,Athletics & Sports,Bakery,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
5,Scarborough,1.0,Playground,Smoke Shop,Jewelry Store,Monument / Landmark,Martial Arts School,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station
