# Segmenting and Clustering Neighborhoods in Toronto

Project Assignment from IBM Data Science Professional Certificate - Applied Data Science Capstone

### Importing the data from Wikipedia into a pandas dataframe

In [74]:
import pandas as pd
import numpy as np
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [75]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

data = pd.read_html(link)[0]
data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [76]:
# Cleaning the dataset

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
df_ngbh_can = data.rename(columns={'Postal Code': 'PostalCode', 'Neighbourhood': 'Neighborhood'})

# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df_ngbh_can = df_ngbh_can[~(df_ngbh_can.Borough == 'Not assigned')].reset_index(drop=True)

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df_ngbh_can['Neighborhood'] = [df_ngbh_can.Borough if x == 'Not assigned' else x for x in df_ngbh_can.Neighborhood]

df_ngbh_can.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [77]:
# Dataframe: number of rows
print("Number of rows contained in the df:", df_ngbh_can.shape[0])

Number of rows contained in the df: 103


### Adding latitudes and longitudes to the dataframe

In [78]:
# Using the csv file since geocoder is not working properly
csv_root = 'http://cocl.us/Geospatial_data'

df_ltd_long = pd.read_csv(csv_root)
df_ltd_long

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [79]:
# Merging both dataframes into one: df_canada = df_ngbh_can + df_ltd_long
df_canada = df_ngbh_can.merge(df_ltd_long, left_on='PostalCode', right_on='Postal Code', how='inner')

# Dropping innecessary columns: Postal Code (duplicated)
df_canada.drop('Postal Code', axis=1, inplace=True)

df_canada.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [80]:
# df_canada: number of rows
print("Number of rows contained in the df:", df_canada.shape[0])

Number of rows contained in the df: 103


## Process to cluster neighborhoods in Toronto based on top venues

After cleaning the dataset and defining a new df (df_toronto), we will start exploring and analysing the df venues
- Exploring each neighborhood in Toronto
    - Cleaning the dataset
    - Defining the 'getNearbyVenues' function to obtain all the data from venues
    - Define the credentials to Foursquare
- Getting the venues per neighborhood
    - Analyse each neighborhood: Top 10 venues
- Clustering Toronto's neighborhoods
    - Examine Toronto's clusters

### Exploring Each Neighborhood in Toronto

First, we will clean the database in order to filter by Toronto's neighborhoods only

In [81]:
# Filtering by Boroughs that contain the word 'Toronto': Conforming the new df_toronto
df_toronto = df_canada[df_canada.Borough.str.contains('Toronto')].reset_index(drop=True)
df_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [82]:
# Defining the function to obtain nearby venues: getNearbyVenues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [83]:
# Defining the credentials needed to read the venues from Foursquare:
CLIENT_ID = 'ANNV5QVQNPQPHJWUIR1J2YAQ44WIIBFLJYP512NF1PG0WSRD' # your Foursquare ID
CLIENT_SECRET = '3JQHTEPR1ZQS02ZUYFOLLMH5ZXY1UIVYMJJKT14LL1OSYARG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ANNV5QVQNPQPHJWUIR1J2YAQ44WIIBFLJYP512NF1PG0WSRD
CLIENT_SECRET:3JQHTEPR1ZQS02ZUYFOLLMH5ZXY1UIVYMJJKT14LL1OSYARG


### Getting the venues per neighborhood

In [111]:
# Getting the venues with the pre-defined function using 'PostalCode'
toronto_venues = getNearbyVenues(df_toronto.Neighborhood, df_toronto.Latitude, df_toronto.Longitude, radius=1000)

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [112]:
print(toronto_venues.shape)
toronto_venues.head()

(3174, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site


In [113]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,100,100,100,100,100,100
"Brockton, Parkdale Village, Exhibition Place",100,100,100,100,100,100
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",50,50,50,50,50,50
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",14,14,14,14,14,14
Central Bay Street,100,100,100,100,100,100
Christie,100,100,100,100,100,100
Church and Wellesley,100,100,100,100,100,100
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,100,100,100,100,100,100
Davisville North,100,100,100,100,100,100


In [114]:
# How many unique categories are there?
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 278 uniques categories.


#### Analyzing each of the Neighborhoods

In [115]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Zoo,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Turkish Restaurant,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [116]:
# Grouping each neighborhood by taking the mean of the frequency of occurrence of each category 
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Turkish Restaurant,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.02
5,Christie,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
6,Church and Wellesley,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.01
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.02


Introducing this data into a pandas dataframe

In [117]:
# Defining a function to order the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [118]:
# Defining the n top venues and sorting them 
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Beer Bar,Park,Seafood Restaurant,Gastropub,Bakery
1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Café,Bar,Restaurant,Bakery,Furniture / Home Store,Gift Shop,Tibetan Restaurant,Park,Performing Arts Venue
2,"Business reply mail Processing Centre, South C...",Park,Coffee Shop,Pizza Place,Brewery,Sushi Restaurant,Bakery,Italian Restaurant,Fast Food Restaurant,Snack Place,French Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Harbor / Marina,Café,Scenic Lookout,Garden,Airport,Dog Run,Park,Dance Studio,Sculpture Garden
4,Central Bay Street,Coffee Shop,Sushi Restaurant,Hotel,Café,Ramen Restaurant,Park,Gastropub,Furniture / Home Store,Italian Restaurant,Japanese Restaurant


### Clustering Toronto's neighborhoods

In [119]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 3, 4, 0, 0, 0, 2, 3, 0])

In [120]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with df_toronto to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Pub,Café,Park,Theater,Bakery,Diner,Restaurant,Breakfast Spot,Performing Arts Venue
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Park,Gay Bar,Pizza Place,Italian Restaurant,Ramen Restaurant,Sushi Restaurant,Café,Middle Eastern Restaurant,Men's Store
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Coffee Shop,Gastropub,Italian Restaurant,Hotel,Japanese Restaurant,Diner,Creperie,Sushi Restaurant,New American Restaurant,Department Store
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Café,Coffee Shop,Japanese Restaurant,Restaurant,Gastropub,Italian Restaurant,Beer Bar,Seafood Restaurant,Hotel,Bakery
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Coffee Shop,Pizza Place,Beach,Japanese Restaurant,Breakfast Spot,Burger Joint,Health Food Store,Bar,Bakery


#### Creating the map visualization

Locating Toronto using geopy

In [121]:
from geopy.geocoders import Nominatim

address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [124]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine the clusters 

#### Cluster 1

In [125]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Pub,Café,Park,Theater,Bakery,Diner,Restaurant,Breakfast Spot,Performing Arts Venue
1,Downtown Toronto,0,Coffee Shop,Park,Gay Bar,Pizza Place,Italian Restaurant,Ramen Restaurant,Sushi Restaurant,Café,Middle Eastern Restaurant,Men's Store
4,East Toronto,0,Pub,Coffee Shop,Pizza Place,Beach,Japanese Restaurant,Breakfast Spot,Burger Joint,Health Food Store,Bar,Bakery
6,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Hotel,Café,Ramen Restaurant,Park,Gastropub,Furniture / Home Store,Italian Restaurant,Japanese Restaurant
7,Downtown Toronto,0,Korean Restaurant,Coffee Shop,Café,Grocery Store,Cocktail Bar,Mexican Restaurant,Sandwich Place,Ice Cream Shop,Pizza Place,Comedy Club
11,West Toronto,0,Café,Restaurant,Bar,Coffee Shop,Bakery,Italian Restaurant,Vegetarian / Vegan Restaurant,Cocktail Bar,Pizza Place,Asian Restaurant
12,East Toronto,0,Greek Restaurant,Coffee Shop,Café,Pub,Italian Restaurant,Fast Food Restaurant,Bank,Bakery,Furniture / Home Store,Ice Cream Shop
14,West Toronto,0,Coffee Shop,Café,Bar,Restaurant,Bakery,Furniture / Home Store,Gift Shop,Tibetan Restaurant,Park,Performing Arts Venue
15,East Toronto,0,Indian Restaurant,Coffee Shop,Grocery Store,Park,Restaurant,Beach,Sandwich Place,Café,Gym,Brewery
17,East Toronto,0,Coffee Shop,Bar,American Restaurant,Brewery,Vietnamese Restaurant,Diner,Bakery,French Restaurant,Sushi Restaurant,Italian Restaurant


#### Cluster 2

In [126]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,1,Park,Gym / Fitness Center,Café,Trail,Coffee Shop,College Quad,College Gym,Bookstore,Ethiopian Restaurant,Escape Room


#### Cluster 3

In [127]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,2,Coffee Shop,Gastropub,Italian Restaurant,Hotel,Japanese Restaurant,Diner,Creperie,Sushi Restaurant,New American Restaurant,Department Store
3,Downtown Toronto,2,Café,Coffee Shop,Japanese Restaurant,Restaurant,Gastropub,Italian Restaurant,Beer Bar,Seafood Restaurant,Hotel,Bakery
5,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Beer Bar,Park,Seafood Restaurant,Gastropub,Bakery
8,Downtown Toronto,2,Coffee Shop,Café,Hotel,Theater,Gym,Restaurant,Gastropub,Arts & Crafts Store,Seafood Restaurant,Plaza
10,Downtown Toronto,2,Coffee Shop,Hotel,Café,Restaurant,Japanese Restaurant,Park,Theater,Brewery,Gym,Baseball Stadium
13,Downtown Toronto,2,Coffee Shop,Hotel,Café,Restaurant,Japanese Restaurant,Concert Hall,Theater,Seafood Restaurant,Italian Restaurant,Gastropub
16,Downtown Toronto,2,Coffee Shop,Hotel,Restaurant,Café,Gastropub,Japanese Restaurant,Beer Bar,Concert Hall,American Restaurant,Thai Restaurant
34,Downtown Toronto,2,Coffee Shop,Café,Japanese Restaurant,Hotel,Restaurant,Beer Bar,Gastropub,American Restaurant,Seafood Restaurant,Park
35,Downtown Toronto,2,Café,Park,Japanese Restaurant,Gastropub,Diner,Bakery,Coffee Shop,Restaurant,Taiwanese Restaurant,Jewelry Store
36,Downtown Toronto,2,Coffee Shop,Café,Hotel,Restaurant,Theater,Gastropub,Japanese Restaurant,American Restaurant,Monument / Landmark,Park


#### Cluster 4

In [128]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,West Toronto,3,Café,Coffee Shop,Park,Sushi Restaurant,Italian Restaurant,Bar,Brewery,Bakery,Portuguese Restaurant,Pharmacy
19,Central Toronto,3,Italian Restaurant,Sushi Restaurant,Café,Pharmacy,Bank,Coffee Shop,Asian Restaurant,Dry Cleaner,Lingerie Store,Clothing Store
21,Central Toronto,3,Park,Café,Coffee Shop,Bank,Trail,Skating Rink,Burger Joint,Pharmacy,Liquor Store,Sushi Restaurant
23,Central Toronto,3,Italian Restaurant,Diner,Café,Park,Sporting Goods Shop,Coffee Shop,Skating Rink,Mexican Restaurant,Restaurant,Food & Drink Shop
26,Central Toronto,3,Coffee Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,Restaurant,Indian Restaurant,Café,Fast Food Restaurant,Dessert Shop,Gym
29,Central Toronto,3,Grocery Store,Coffee Shop,Italian Restaurant,Gym,Restaurant,Thai Restaurant,Park,Sandwich Place,Bank,Playground
31,Central Toronto,3,Coffee Shop,Sushi Restaurant,Italian Restaurant,Thai Restaurant,Grocery Store,Restaurant,Park,Pizza Place,French Restaurant,Café
33,Downtown Toronto,3,Park,Coffee Shop,Grocery Store,Convenience Store,BBQ Joint,Pie Shop,Sandwich Place,Candy Store,Filipino Restaurant,Metro Station
38,East Toronto,3,Park,Coffee Shop,Pizza Place,Brewery,Sushi Restaurant,Bakery,Italian Restaurant,Fast Food Restaurant,Snack Place,French Restaurant


#### Cluster 5

In [129]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Downtown Toronto,4,Coffee Shop,Harbor / Marina,Café,Scenic Lookout,Garden,Airport,Dog Run,Park,Dance Studio,Sculpture Garden


Therefore, these previously printed dataframes are the ones that compose each of the clusters represented on the map