<font size = 8> <b>Segmenting and Clustering Neighborhoods in Toronto</b> </font>

<font size=5>Part 1: Load the neighborhood details from Wikipedia and process the dataframe</font>

In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
import numpy as np

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
#All the tables are read from the url
tables = pd.read_html(url)
#Our desired table is the first table i.e. tables[0]
df_raw = tables[0]
df_raw.rename(columns={'Postal code': 'PostalCode'},inplace=True) #Rename column

print("Number of blank Neighborhood entries: ",pd.isna(df_raw['Neighborhood']).sum())
      
df_raw

Number of blank Neighborhood entries:  77


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


In [4]:
#Define function which returns the neighborhoods separated by commas

def process_neighborhood(row):
    
    #row[0] corresponds to the first element of the row i.e. first column of data frame, row[1] corresponds to second column and so on..

    #If Neigbhorhood is blank, assigns that row's Borough as the Neighborhood
    if (pd.isna(row[2])):
        return row[1]
    else:
        retStr=""
        #Replace the slashes with commas and clean up any extra spaces
        my_list = row[2].split("/")
        for ele in my_list:
            retStr = retStr + ele.strip() + ", "
        retStr = retStr[0:-2] #Remove the unnecessary ", " from the end of the string
        return(retStr)
   

In [5]:
#Process raw dataframe and save the results in a new one


#Drop the rows with Borough = 'Not assigned'
df = df_raw[df_raw['Borough']!='Not assigned'].reset_index(drop=True)



#All the neighborhoods need to be separated by commas in place of slashes present in wikipedia source:
df['Neighborhood']=df.apply(process_neighborhood,axis=1)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:
print("The neighborhood has {} unique boroughs".format(len(df['Borough'].unique())))

The neighborhood has 10 unique boroughs


In [7]:
df.shape

(103, 3)

<font size=5>Part 2: Add latitude and longitude information to the dataframe </font>

In [8]:
#Install needs to be done only one time, hence the next line is commented
#!pip install geocoder
import geocoder 

#Define function to get the lat long values for a location
def get_coords(row): 
    lat_lng_coords = None
    tries = 0
    postal_code = row[0] #The first column contains the postal code

    # loop until you get the coordinates, with max 5 tries for any postal code
    while(lat_lng_coords is None and tries<5):
        tries += 1
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        
    #print(tries, postal_code,lat_lng_coords, type(lat_lng_coords))
    return (lat_lng_coords)

#Create latitude and longitude columns in the dataframe and set the right values
df["Latitude"], df["Longitude"] = zip(*df.apply(get_coords, axis=1))

#Alternative way to assign lat-long
#df[['Latitude', 'Longitude']] = df.apply(get_coords,axis=1,result_type='expand')

df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.311890
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.661790,-79.389390
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653340,-79.509766
99,M4Y,Downtown Toronto,Church and Wellesley,43.666659,-79.381472
100,M7Y,East Toronto,Business reply mail Processing CentrE,43.648700,-79.385450
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.632798,-79.493017


<font size=5>Part 3: Explore and Cluster the Neighborhoods in Toronto </font>

In [9]:
#Import additional libraries

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values0
import json
from pandas.io.json import json_normalize

import requests # library to handle requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means clustering 
from sklearn.cluster import KMeans

#!pip install folium
import folium # map rendering library


__3.1 Create and visualise map of entire Toronto__

In [10]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
folium.CircleMarker(
        [latitude, longitude],
        radius = 10,
        popup=address,
        color='black',
        fill = True,
        fill_color='grey',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
print('The geograpical coordinate of Toronto are {}, {}. It is shown on the map by the bigger circle with black outline and grey fill.'.format(latitude, longitude))

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood'] ):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347. It is shown on the map by the bigger circle with black outline and grey fill.


__3.2 Limit analysis only to boroughs containing the word 'Toronto'__

In [11]:
#Create a new dataframe limited to boroughs containing the word Toronto

df_toronto_boroughs = df[df['Borough'].str.contains('Toronto')].reset_index(drop=True)
df_toronto_boroughs

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.661790,-79.389390
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
4,M4E,East Toronto,The Beaches,43.678148,-79.295349
...,...,...,...,...,...
34,M5W,Downtown Toronto,Stn A PO Boxes,43.648700,-79.385450
35,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667656,-79.367326
36,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648269,-79.381884
37,M4Y,Downtown Toronto,Church and Wellesley,43.666659,-79.381472


In [12]:
#Create a map of only boroughs containing the word Toronto
map_toronto_boroughs = folium.Map(location=[latitude, longitude], zoom_start=11)
folium.CircleMarker(
        [latitude, longitude],
        radius = 10,
        popup=address,
        color='black',
        fill = True,
        fill_color='grey',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_boroughs)
print('The geograpical coordinates of Toronto are {}, {}. It is shown on the map by the bigger circle with black outline and grey fill.'.format(latitude, longitude))

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_boroughs['Latitude'], df_toronto_boroughs['Longitude'], df_toronto_boroughs['Borough'], df_toronto_boroughs['Neighborhood'] ):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_boroughs)  
    
map_toronto_boroughs


The geograpical coordinates of Toronto are 43.6534817, -79.3839347. It is shown on the map by the bigger circle with black outline and grey fill.


_We can see that the above map has fewer dots than the previous map, as it is limited to boroughs containing the word Toronto_

__3.3 Explore the venues in the neighborhoods containing the word Toronto__

In [13]:
# The code was removed by Watson Studio for sharing.

In [14]:
#Define the function to explore the venues
VERSION = '20180605' # Foursquare API version
radius = 500
LIMIT = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            results = "" #Results are not fetched for few entries resulting in Null Cluster subsequently. Null rows will be dropped later.
        #print("Results",results)
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
toronto_boroughs_venues = getNearbyVenues(names=df_toronto_boroughs['Neighborhood'],
                                   latitudes=df_toronto_boroughs['Latitude'],
                                   longitudes=df_toronto_boroughs['Longitude']
                                  )
toronto_boroughs_venues

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town,

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.650964,-79.353041,Souk Tabule,43.653756,-79.354390,Mediterranean Restaurant
1,"Regent Park, Harbourfront",43.650964,-79.353041,Young Centre for the Performing Arts,43.650825,-79.357593,Performing Arts Venue
2,"Regent Park, Harbourfront",43.650964,-79.353041,SOMA chocolatemaker,43.650622,-79.358127,Chocolate Shop
3,"Regent Park, Harbourfront",43.650964,-79.353041,BATLgrounds,43.647088,-79.351306,Athletics & Sports
4,"Regent Park, Harbourfront",43.650964,-79.353041,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant
...,...,...,...,...,...,...,...
1588,Business reply mail Processing CentrE,43.648700,-79.385450,Rexall Pharma Plus,43.650693,-79.386847,Pharmacy
1589,Business reply mail Processing CentrE,43.648700,-79.385450,Ematei 絵馬亭,43.651156,-79.388990,Japanese Restaurant
1590,Business reply mail Processing CentrE,43.648700,-79.385450,Meli Baklava & Chocolate Bar,43.650140,-79.390377,Chocolate Shop
1591,Business reply mail Processing CentrE,43.648700,-79.385450,Red Eye Espresso,43.651150,-79.390146,Café


In [16]:
#Show the number of venues for each neighborhood
toronto_boroughs_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,67,67,67,67,67,67
"Brockton, Parkdale Village, Exhibition Place",44,44,44,44,44,44
Business reply mail Processing CentrE,100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",65,65,65,65,65,65
Central Bay Street,54,54,54,54,54,54
...,...,...,...,...,...,...
"The Annex, North Midtown, Yorkville",27,27,27,27,27,27
The Beaches,5,5,5,5,5,5
"The Danforth West, Riverdale",5,5,5,5,5,5
"Toronto Dominion Centre, Design Exchange",100,100,100,100,100,100


In [17]:
#Display the unique categories of venues
print('There are {} uniques categories.'.format(len(toronto_boroughs_venues['Venue Category'].unique())))

There are 219 uniques categories.


__Show top 10 venues by neighborhood__

In [18]:
# one hot encoding
toronto_boroughs_onehot = pd.get_dummies(toronto_boroughs_venues[['Venue Category']], prefix="", prefix_sep="")
# Move Neighborhood column to beginning
toronto_boroughs_onehot.drop(columns='Neighborhood',inplace=True)
toronto_boroughs_onehot.insert(0, column='Neighborhood', value=toronto_boroughs_venues['Neighborhood'], allow_duplicates=False)

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
toronto_grouped = toronto_boroughs_onehot.groupby('Neighborhood').mean().reset_index()
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

print("Top 10 venues by neighborhood:")
neighborhoods_venues_sorted

Top 10 venues by neighborhood:


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Seafood Restaurant,Cocktail Bar,Italian Restaurant,Farmers Market,Bakery,Breakfast Spot,Café,Hotel,Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Café,Thrift / Vintage Store,Pizza Place,Gift Shop,Boutique,Brewery,Italian Restaurant,Japanese Restaurant,French Restaurant
2,Business reply mail Processing CentrE,Coffee Shop,Hotel,Café,Japanese Restaurant,Asian Restaurant,Restaurant,American Restaurant,Mediterranean Restaurant,Gym,Salon / Barbershop
3,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Restaurant,Café,Park,French Restaurant,Lounge,Speakeasy,Gym,Gym / Fitness Center,Italian Restaurant
4,Central Bay Street,Coffee Shop,Middle Eastern Restaurant,Clothing Store,Restaurant,Sandwich Place,Japanese Restaurant,Plaza,Bubble Tea Shop,Breakfast Spot,Shopping Mall
...,...,...,...,...,...,...,...,...,...,...,...
33,"The Annex, North Midtown, Yorkville",Sandwich Place,Café,History Museum,French Restaurant,Burger Joint,Flower Shop,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Italian Restaurant
34,The Beaches,Health Food Store,Church,Pub,Trail,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
35,"The Danforth West, Riverdale",Bus Line,Business Service,Park,Grocery Store,Discount Store,Yoga Studio,Electronics Store,Flower Shop,Fish Market,Fish & Chips Shop
36,"Toronto Dominion Centre, Design Exchange",Coffee Shop,Hotel,Café,Japanese Restaurant,Restaurant,Seafood Restaurant,American Restaurant,Salad Place,Deli / Bodega,Sporting Goods Shop


__3.4 Cluster neighborhoods__

In [19]:
#Cluster neighborhoods with K Means 
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
#print(kmeans.labels_)


# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_.astype(int))
#print(neighborhoods_venues_sorted)
toronto_merged = df_toronto_boroughs

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged['Cluster Labels']

0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
     ... 
34    1.0
35    1.0
36    1.0
37    1.0
38    1.0
Name: Cluster Labels, Length: 39, dtype: float64

In [20]:
print("Null entries: ",pd.isna(toronto_merged['Cluster Labels']).sum())
#Drop rows containing NAs. Sometimes NAs can arise due to errors in fetching some results using foursquare apis
toronto_merged.dropna(inplace=True)

__Visualize the clusters__

In [21]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

__Explore Clusters__

_Cluster Label 0 : Commerce Zone_

_(Baseball field, Business Service and Government Buildings are  most frequent in this zone)_

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,East Toronto,0.0,Baseball Field,Business Service,Government Building,Night Market,Yoga Studio,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


_Cluster Label 1:Core Toronto_ 

_(Most boroughs have been grouped in this cluster and while it is dominated by food, there are quite a few markets, gyms and shops as well)_

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1.0,Pub,Café,Coffee Shop,Athletics & Sports,Bakery,Mediterranean Restaurant,Mexican Restaurant,Food Truck,Chocolate Shop,French Restaurant
1,Downtown Toronto,1.0,Coffee Shop,Sushi Restaurant,Café,Middle Eastern Restaurant,Juice Bar,Fried Chicken Joint,Spa,Burrito Place,Sandwich Place,Yoga Studio
2,Downtown Toronto,1.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Bar,Hotel,Restaurant,Cosmetics Shop,Café,Ramen Restaurant
3,Downtown Toronto,1.0,Café,Coffee Shop,Italian Restaurant,Gastropub,American Restaurant,Seafood Restaurant,Cocktail Bar,Cosmetics Shop,Moroccan Restaurant,Hotel
4,East Toronto,1.0,Health Food Store,Church,Pub,Trail,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
...,...,...,...,...,...,...,...,...,...,...,...,...
34,Downtown Toronto,1.0,Coffee Shop,Hotel,Café,Japanese Restaurant,Asian Restaurant,Restaurant,American Restaurant,Mediterranean Restaurant,Gym,Salon / Barbershop
35,Downtown Toronto,1.0,Coffee Shop,Restaurant,Bakery,Pizza Place,Café,Pub,Italian Restaurant,Chinese Restaurant,Breakfast Spot,Jewelry Store
36,Downtown Toronto,1.0,Coffee Shop,Café,Hotel,American Restaurant,Restaurant,Gym,Japanese Restaurant,Deli / Bodega,Steakhouse,Seafood Restaurant
37,Downtown Toronto,1.0,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Pub,Café,Dance Studio,Gay Bar,Gastropub,Men's Store


_Cluster Label 2:Green Zone_

_(Parks, Yoga Studios are most common here)_

In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,2.0,Park,Yoga Studio,Dumpling Restaurant,Food,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


_Cluster Label 3:Residential Zone_

_(Bus Line, Swimming, Yoga,Food are most frequent in this zone)_

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,3.0,Bus Line,Swim School,Yoga Studio,Food Court,Food,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


_Cluster Label 4: Fitness Zone_

_(Playgrounds, Parks and Gyms dominate this zone)_

In [27]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,4.0,Playground,Gym Pool,Park,Garden,Donut Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
33,Downtown Toronto,4.0,Playground,Park,Grocery Store,Candy Store,Dumpling Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
