# Segmenting and Clustering Neighborhoods in Toronto - Part III

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.cm as cm
import matplotlib.colors as colors

from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



### Data Collection

We are scraping neighbourhood data for Toronto from wiki, and joining it with the lat-long values for each postal code.

In [2]:
path='https://cocl.us/Geospatial_data'
df_latlong = pd.read_csv(path)
df_latlong.rename(columns = {'Postal Code':'PostalCode'}, inplace = True)

src_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

source = requests.get(src_url).text
soup = BeautifulSoup(source, 'lxml')

table = soup.find('table')

columns = []
for header in table.find_all('th'):
    columns.append(header.text.split('\n')[0])

li = [[]]
for rows in table.find_all('tr'):
    l = []
    for data in rows.find_all('td'):
        l.append(data.text.split('\n')[0])
    li.append(l)
    
df = pd.DataFrame(li)
df.columns = columns
df.dropna(inplace=True)
df = df[df['Borough']!='Not assigned']
df.loc[df['Neighbourhood']=='Not assigned','Neighbourhood'] = df.loc[df['Neighbourhood']=='Not assigned','Borough']
df = df.reset_index(drop=True)

df_grouped = df[['Postcode','Borough']].drop_duplicates()
df_grouped = df_grouped.reset_index(drop=True)

for code, df_group in df.groupby('Postcode', sort=False):
    df_grouped.loc[df_grouped['Postcode']==code, 'Neighbourhood'] = (", ".join(df_group['Neighbourhood']))
df_grouped.rename(columns = {'Postcode':'PostalCode'}, inplace = True)

df_final = pd.merge(df_grouped, df_latlong, on='PostalCode', how='left')
df_final.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


We will be only working on data where Borough contains the word "Toronto"

In [3]:
df_final = df_final[df_final['Borough'].str.contains('Toronto')]
df_final = df_final.reset_index(drop=True)
df_final

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
8,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


### Plotting Neighbourhoods

In [4]:
# create map of Toronto using latitude and longitude values
latitude = 43.66
longitude = -79.38
map_tor = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_final['Latitude'], df_final['Longitude'], df_final['Borough'], df_final['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

### Using Foursquare API to fetch Venue details

In [5]:
CLIENT_ID = 'KXF3LOAKFBTIXISMFZ2H4WMDYSVY3ZOHRVDJKMD5GRXSNUD1HIDE'
CLIENT_SECRET = 'PDCNDXHVYUAGHGSTBSQYF1445FWCRY0F4AMHIDJENBRXK1ODHIDE'
VERSION = '20180605'
LIMIT = 100

In [6]:
def getNearbyVenues(pcode, borough, names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for code, brgh, name, lat, lng in zip(pcode, borough, names, latitudes, longitudes):
        print(code, brgh, name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            code,
            brgh,
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                    'PostalCode',
                    'Borough',
                    'Neighbourhood',
                    'Neighbourhood Latitude',
                    'Neighbourhood Longitude',
                    'Venue',
                    'Venue Latitude',
                    'Venue Longitude',
                    'Venue Category']
    
    return(nearby_venues)

In [7]:
toronto_venues = getNearbyVenues(pcode=df_final['PostalCode'],
                                 borough=df_final['Borough'],
                                 names=df_final['Neighbourhood'],
                                 latitudes=df_final['Latitude'],
                                 longitudes=df_final['Longitude'])

M5A Downtown Toronto Harbourfront, Regent Park
M5B Downtown Toronto Ryerson, Garden District
M5C Downtown Toronto St. James Town
M4E East Toronto The Beaches
M5E Downtown Toronto Berczy Park
M5G Downtown Toronto Central Bay Street
M6G Downtown Toronto Christie
M5H Downtown Toronto Adelaide, King, Richmond
M6H West Toronto Dovercourt Village, Dufferin
M5J Downtown Toronto Harbourfront East, Toronto Islands, Union Station
M6J West Toronto Little Portugal, Trinity
M4K East Toronto The Danforth West, Riverdale
M5K Downtown Toronto Design Exchange, Toronto Dominion Centre
M6K West Toronto Brockton, Exhibition Place, Parkdale Village
M4L East Toronto The Beaches West, India Bazaar
M5L Downtown Toronto Commerce Court, Victoria Hotel
M4M East Toronto Studio District
M4N Central Toronto Lawrence Park
M5N Central Toronto Roselawn
M4P Central Toronto Davisville North
M5P Central Toronto Forest Hill North, Forest Hill West
M6P West Toronto High Park, The Junction South
M4R Central Toronto North To

In [8]:
toronto_venues.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
5,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
6,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
7,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,The Extension Room,43.653313,-79.359725,Gym / Fitness Center
8,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Corktown Common,43.655618,-79.356211,Park
9,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub


We have extracted a list of venues around each neighbourhood.

In [9]:
toronto_venues.shape

(1707, 9)

In [10]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,PostalCode,Borough,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",22,22,22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16,16,16
"Cabbagetown, St. James Town",45,45,45,45,45,45,45,45
Central Bay Street,85,85,85,85,85,85,85,85
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100,100,100
Christie,16,16,16,16,16,16,16,16
Church and Wellesley,91,91,91,91,91,91,91,91


In [11]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 236 uniques categories.


In [12]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
toronto_onehot.shape

(1707, 237)

In [14]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,...,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.011765
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.06,0.0,0.04,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,...,0.0,0.0,0.0,0.0,0.010989,0.010989,0.0,0.0,0.010989,0.010989


In [15]:
toronto_grouped.shape

(38, 237)

### Extracting the top 5 venues for each neighbourhood

In [16]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
            venue  freq
0     Coffee Shop  0.08
1            Café  0.05
2      Steakhouse  0.04
3             Bar  0.04
4  Cosmetics Shop  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2  Italian Restaurant  0.04
3                Café  0.04
4         Cheese Shop  0.04


----Brockton, Exhibition Place, Parkdale Village----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3     Yoga Studio  0.05
4   Grocery Store  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                  venue  freq
0    Light Rail Station  0.12
1           Yoga Studio  0.06
2         Auto Workshop  0.06
3         Garden Center  0.06
4  Fast Food Restaurant  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                venue  freq
0     Airport Service  0.19
1      Airport Lounge

In [17]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Bar,Cosmetics Shop,American Restaurant,Thai Restaurant,Hotel,Restaurant,Burger Joint
1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Italian Restaurant,Farmers Market
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Breakfast Spot,Café,Bakery,Stadium,Burrito Place,Restaurant,Caribbean Restaurant,Climbing Gym,Yoga Studio
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Garden Center,Fast Food Restaurant,Farmers Market,Comic Shop,Recording Studio,Restaurant,Burrito Place
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Plane,Sculpture Garden,Coffee Shop,Harbor / Marina,Bar,Boat or Ferry,Airport Gate


In [19]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:50]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 3, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [20]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

toronto_merged = df_final

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,Coffee Shop,Bakery,Park,Café,Pub,Mexican Restaurant,Breakfast Spot,Theater,Gym / Fitness Center,Chocolate Shop,0
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Restaurant,Fast Food Restaurant,Bubble Tea Shop,Pizza Place,Italian Restaurant,0
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Gastropub,Clothing Store,Beer Bar,Breakfast Spot,Cocktail Bar,0
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Pub,Neighborhood,Health Food Store,Other Great Outdoors,Trail,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,0
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Italian Restaurant,Farmers Market,0
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Ice Cream Shop,Salad Place,Bubble Tea Shop,Spa,0
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564,Grocery Store,Café,Park,Italian Restaurant,Convenience Store,Baby Store,Coffee Shop,Restaurant,Diner,Nightclub,0
7,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,Coffee Shop,Café,Steakhouse,Bar,Cosmetics Shop,American Restaurant,Thai Restaurant,Hotel,Restaurant,Burger Joint,0
8,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,Supermarket,Bakery,Pharmacy,Gym / Fitness Center,Café,Bank,Bar,Music Venue,Middle Eastern Restaurant,Brewery,0
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,Coffee Shop,Hotel,Aquarium,Café,Scenic Lookout,Brewery,Fried Chicken Joint,Music Venue,Restaurant,Sports Bar,0


### Plotting the clustered neighbourhoods

In [21]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1 - Neighbourhoods with most coffee shops and cafes

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Downtown Toronto,Coffee Shop,Bakery,Park,Café,Pub,Mexican Restaurant,Breakfast Spot,Theater,Gym / Fitness Center,Chocolate Shop,0
1,Downtown Toronto,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Restaurant,Fast Food Restaurant,Bubble Tea Shop,Pizza Place,Italian Restaurant,0
2,Downtown Toronto,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Gastropub,Clothing Store,Beer Bar,Breakfast Spot,Cocktail Bar,0
3,East Toronto,Pub,Neighborhood,Health Food Store,Other Great Outdoors,Trail,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,0
4,Downtown Toronto,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Italian Restaurant,Farmers Market,0
5,Downtown Toronto,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Ice Cream Shop,Salad Place,Bubble Tea Shop,Spa,0
6,Downtown Toronto,Grocery Store,Café,Park,Italian Restaurant,Convenience Store,Baby Store,Coffee Shop,Restaurant,Diner,Nightclub,0
7,Downtown Toronto,Coffee Shop,Café,Steakhouse,Bar,Cosmetics Shop,American Restaurant,Thai Restaurant,Hotel,Restaurant,Burger Joint,0
8,West Toronto,Supermarket,Bakery,Pharmacy,Gym / Fitness Center,Café,Bank,Bar,Music Venue,Middle Eastern Restaurant,Brewery,0
9,Downtown Toronto,Coffee Shop,Hotel,Aquarium,Café,Scenic Lookout,Brewery,Fried Chicken Joint,Music Venue,Restaurant,Sports Bar,0


### Cluster 2

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
20,Central Toronto,Trail,Jewelry Store,Sushi Restaurant,Bus Line,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,1


### Cluster 3

In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
18,Central Toronto,Garden,Yoga Studio,Dim Sum Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,2


### Cluster 4

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
28,Central Toronto,Gym,Playground,Restaurant,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,3


### Cluster 5

In [26]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
32,Downtown Toronto,Park,Trail,Playground,Building,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant,Yoga Studio,4
