# Clustering Toronto Neighbourhoods

In [46]:
# Import required libraries

import pandas as pd
import requests
import sys
from bs4 import BeautifulSoup

# scrape the html content
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL)

# Create beautiful soup object
soup = BeautifulSoup(page.content, 'html.parser')

In [47]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

#print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df.head(7)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"


In [48]:
df.shape

(103, 3)

In [49]:
# Insert geospatial coordinates and create pandas df

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

if os.environ.get('RUNTIME_ENV_LOCATION_TYPE') == 'external':
    endpoint_7090f49ae7784ed7b279229f4ce3b604 = 'https://s3-api.us-geo.objectstorage.softlayer.net'
else:
    endpoint_7090f49ae7784ed7b279229f4ce3b604 = 'https://s3-api.us-geo.objectstorage.service.networklayer.com'

client_7090f49ae7784ed7b279229f4ce3b604 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='rlBmohhFSr91xlRUIJsZ_V99ZAQgbNmq1DOlcT8z79yB',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url=endpoint_7090f49ae7784ed7b279229f4ce3b604)

body = client_7090f49ae7784ed7b279229f4ce3b604.get_object(Bucket='macheinelearning-donotdelete-pr-f7wuwhhokb8ggh',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_coords = pd.read_csv(body)
body = client_7090f49ae7784ed7b279229f4ce3b604.get_object(Bucket='macheinelearning-donotdelete-pr-f7wuwhhokb8ggh',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_coords = pd.read_csv(body)
df_coords.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [50]:
neighborhoods = pd.merge(df, df_coords, on="PostalCode")
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [51]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
#from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [52]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [53]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [54]:
# Lets focus into the borough of Etobicoke

Etobicoke_data = neighborhoods[neighborhoods['Borough'] == 'Etobicoke'].reset_index(drop=True)
Etobicoke_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724


In [55]:
# Getting the coordinates

address = 'Etobicoke, ON'
geolocator = Nominatim(user_agent="tn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Etobicoke are 43.6435559, -79.5656326.


In [56]:
# create map of Manhattan using latitude and longitude values
map_Etobicoke = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Etobicoke_data['Latitude'], Etobicoke_data['Longitude'], Etobicoke_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Etobicoke)  
    
map_Etobicoke

In [57]:
# Lets explore the first neighbourhood
Etobicoke_data.loc[0, 'Neighborhood']

'Islington Avenue'

In [58]:
CLIENT_ID = 'J2VNJFAYIYLLMUX0BP25WE1WMWUQOKRASOM0TDWHJIV0NMUG' # your Foursquare ID
CLIENT_SECRET = 'GXRQDD3VXGVRVSN4JGGPEJN1TTLSE3EMML5ZK4KTWVDCBFW2' # your Foursquare Secret
ACCESS_TOKEN = 'EW4X4KIJXWWJ5HCRXYBYL2YAYXJRBT4GAPIOTBF5LOC1ZCQF' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: J2VNJFAYIYLLMUX0BP25WE1WMWUQOKRASOM0TDWHJIV0NMUG
CLIENT_SECRET:GXRQDD3VXGVRVSN4JGGPEJN1TTLSE3EMML5ZK4KTWVDCBFW2


In [59]:
neighborhood_latitude = Etobicoke_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Etobicoke_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Etobicoke_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Islington Avenue are 43.6678556, -79.5322424.


In [60]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=J2VNJFAYIYLLMUX0BP25WE1WMWUQOKRASOM0TDWHJIV0NMUG&client_secret=GXRQDD3VXGVRVSN4JGGPEJN1TTLSE3EMML5ZK4KTWVDCBFW2&v=20180604&ll=43.6678556,-79.5322424&radius=500&limit=100'

In [61]:
# Send the GET request and examine the resutls
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60b2a8fd5499bd0efbadddd0'},
  'headerLocation': 'Edenbridge - Humber Valley',
  'headerFullLocation': 'Edenbridge - Humber Valley, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 0,
  'suggestedBounds': {'ne': {'lat': 43.672355604500005,
    'lng': -79.52603299418392},
   'sw': {'lat': 43.6633555955, 'lng': -79.53845180581608}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': []}]}}

In [62]:
# define a function to repeat the same process to all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [63]:
Etobicoke_venues = getNearbyVenues(names=Etobicoke_data['Neighborhood'],
                                   latitudes=Etobicoke_data['Latitude'],
                                   longitudes=Etobicoke_data['Longitude']
                                  )

Islington Avenue
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
New Toronto, Mimico South, Humber Bay Shores
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [64]:
# check the size of the df
print(Etobicoke_venues.shape)
Etobicoke_venues.head()

(68, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,Marius Bakery,43.648965,-79.549381,Bakery
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,LCBO,43.642099,-79.576592,Liquor Store
2,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,Starbucks,43.641312,-79.576924,Coffee Shop
3,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,The Beer Store,43.641313,-79.576925,Beer Store
4,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,Shoppers Drug Mart,43.641312,-79.576924,Pharmacy


In [65]:
# find how many venues per neighborhood
Etobicoke_venues.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(Etobicoke_venues['Venue Category'].unique())))

There are 39 uniques categories.


In [66]:
# analyze each neighborhood then grouping

# one hot encoding
Etobicoke_onehot = pd.get_dummies(Etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Etobicoke_onehot['Neighborhood'] = Etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Etobicoke_onehot.columns[-1]] + list(Etobicoke_onehot.columns[:-1])
Etobicoke_onehot = Etobicoke_onehot[fixed_columns]

Etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Baseball Field,Beer Store,Breakfast Spot,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Grocery Store,Gym,Hardware Store,Hobby Shop,Intersection,Kids Store,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,River,Sandwich Place,Shopping Plaza,Social Club,Supplement Shop,Tanning Salon,Wings Joint
0,"West Deane Park, Princess Gardens, Martin Grov...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [67]:
Etobicoke_grouped = Etobicoke_onehot.groupby('Neighborhood').mean().reset_index()

In [68]:
# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in Etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Etobicoke_grouped[Etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.22
1             Pub  0.11
2            Pool  0.11
3             Gym  0.11
4  Sandwich Place  0.11


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
            venue  freq
0    Liquor Store  0.11
1      Beer Store  0.11
2       Pet Store  0.11
3  Shopping Plaza  0.11
4            Café  0.11


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
                 venue  freq
0                 Park   0.5
1       Sandwich Place   0.5
2  American Restaurant   0.0
3                 Pool   0.0
4   Mexican Restaurant   0.0


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
               venue  freq
0        Wings Joint  0.07
1  Convenience Store  0.07
2         Kids Store  0.07
3             Bakery  0.07
4     Hardware Store  0.07


----New Toronto, Mimico South, Humber Bay Shores----
                 venue  freq
0 

In [69]:
# putting that into a df

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# get top 10 venues for each neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
import numpy as np
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# create columns according to number of top venues

num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Etobicoke_grouped['Neighborhood']

for ind in np.arange(Etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Pub,Pool,Gym,Sandwich Place,Pharmacy,Coffee Shop,Playground,Middle Eastern Restaurant,Park
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Liquor Store,Beer Store,Pet Store,Shopping Plaza,Café,Pharmacy,Coffee Shop,Convenience Store,Pizza Place,Pool
2,"Kingsview Village, St. Phillips, Martin Grove ...",Park,Sandwich Place,American Restaurant,Pool,Mexican Restaurant,Middle Eastern Restaurant,Pet Store,Pharmacy,Pizza Place,Playground
3,"Mimico NW, The Queensway West, South of Bloor,...",Wings Joint,Convenience Store,Kids Store,Bakery,Hardware Store,Gym,Grocery Store,Discount Store,Fast Food Restaurant,Sandwich Place
4,"New Toronto, Mimico South, Humber Bay Shores",American Restaurant,Restaurant,Mexican Restaurant,Liquor Store,Pharmacy,Bakery,Hobby Shop,Pizza Place,Gym,Flower Shop


In [70]:
!pip install -U numpy

!pip install -U pandas

!pip install -U scipy==1.4.1

!pip install -U scikit-learn

!pip install -U imbalanced-learn



In [71]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [72]:
# run kmeans clustering

# set number of clusters
kclusters = 5

Etobicoke_grouped_clustering = Etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 3, 2, 2, 0, 2, 4, 1, 2], dtype=int32)

In [84]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Etobicoke_merged = Etobicoke_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
Etobicoke_merged = Etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
Etobicoke_merged = Etobicoke_merged.iloc[1: , :]
Etobicoke_merged.head() # check the last columns!

ValueError: cannot insert Cluster Labels, already exists

In [85]:
# create map
Etobicoke_merged = Etobicoke_merged.astype({'Cluster Labels': int})
                                            
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Etobicoke_merged['Latitude'], Etobicoke_merged['Longitude'], Etobicoke_merged['Neighborhood'], Etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
# Examining each cluster

In [86]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 0, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Etobicoke,0,Baseball Field,Breakfast Spot,American Restaurant,Pub,Middle Eastern Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Playground


In [87]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 1, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,1,Bakery,Kids Store,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Playground


In [88]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 2, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,2,Liquor Store,Beer Store,Pet Store,Shopping Plaza,Café,Pharmacy,Coffee Shop,Convenience Store,Pizza Place,Pool
3,Etobicoke,2,Intersection,Middle Eastern Restaurant,Sandwich Place,Pizza Place,Chinese Restaurant,Coffee Shop,Discount Store,Pool,Park,Pet Store
5,Etobicoke,2,American Restaurant,Restaurant,Mexican Restaurant,Liquor Store,Pharmacy,Bakery,Hobby Shop,Pizza Place,Gym,Flower Shop
6,Etobicoke,2,Grocery Store,Fast Food Restaurant,Beer Store,Pharmacy,Pizza Place,Sandwich Place,Fried Chicken Joint,American Restaurant,Park,Pet Store
7,Etobicoke,2,Pizza Place,Pub,Pool,Gym,Sandwich Place,Pharmacy,Coffee Shop,Playground,Middle Eastern Restaurant,Park
10,Etobicoke,2,Wings Joint,Convenience Store,Kids Store,Bakery,Hardware Store,Gym,Grocery Store,Discount Store,Fast Food Restaurant,Sandwich Place


In [89]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 3, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Etobicoke,3,Park,Sandwich Place,American Restaurant,Pool,Mexican Restaurant,Middle Eastern Restaurant,Pet Store,Pharmacy,Pizza Place,Playground


In [90]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 4, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Etobicoke,4,Park,River,American Restaurant,Pool,Mexican Restaurant,Middle Eastern Restaurant,Pet Store,Pharmacy,Pizza Place,Playground
