# Segmenting and Clusterinng Neighborhoods in Toronto

In [73]:
#!pip install geocoder
#!pip install geopy
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [106]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json
import numpy as np
import folium
from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests
from pandas.io.json import json_normalize

from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import geocoder # import geocoder
from geopy.geocoders import Nominatim

## 1) Webscraping

In [38]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data  = requests.get(url).text
soup = BeautifulSoup(data, 'html5lib')

The soup object is scraped ignoring any postal codes that do not have borough assigned. Any boroughs which have more than one neighborhoods are fomatted so that each neighborhood is separated by a comma. Unusual cases which do not follow the same patern as the rest of the html are manually found and replaced.

In [60]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['Postal Code'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

#print(table_contents)
df=pd.DataFrame(table_contents)
df = df.set_index('Postal Code')
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
print(df.shape)
df.head()


(103, 2)


Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Queen's Park,Ontario Provincial Government


## 2) Geocoding

__Note__: I tried obtaining the latitudes and longitudes using geocoder as shown in the commented sections. However, it appears not to be working and keeps returning 'None'. Therefore, I will obtain them from the '.csv' file instead.


In [41]:
# initialize your variable to None
#latitude = []
#longitude = []

#for postal_code in df['PostalCode']:
    #print(postal_code)
    #lat_lng_coords = None

    # loop until you get the coordinates
    #while(lat_lng_coords is None):
        #g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        #lat_lng_coords = g.latlng
    
    #latitude.append(lat_lng_coords[0])
    #longitude.append(lat_lng_coords[1])

### Reading latitude and longitude from csv file

In [62]:
latlong_df = pd.read_csv (r'C:\Users\isaac\Geospatial_Coordinates.csv')
latlong_df = latlong_df.set_index('Postal Code')
latlong_df.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


### Merging dataframes

In [63]:
total_df = pd.merge(df, latlong_df, on = 'Postal Code')
print(total_df.shape)
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(total_df['Borough'].unique()),
        total_df.shape[0]
    )
)
total_df.head()

(103, 4)
The dataframe has 15 boroughs and 103 neighborhoods.


Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## 3) Clustering

### Mapping Toronto Neighborhoods
First all the neighborhoods in Toronto are displayed on the map. Only including neighborhoods with borough names including the word "Toronto".

__Filtering dataframe for Toronto boroughs__

In [70]:
toronto_df = total_df[total_df['Borough'].str.contains('Toronto')]
toronto_df.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
M4E,East Toronto,The Beaches,43.676357,-79.293031
M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


__Finding Toronto postal codes__

In [68]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


__Creating map of Toronto Postal Codes__

In [75]:
# create map of totonto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare credential and version

In [76]:
CLIENT_ID = 'WQI5ZY3BIH5MUEMW5D2BSBT2YMNCUUU4444CTA1CGCO0ROHW' # your Foursquare ID
CLIENT_SECRET = '4XCGNCT0MGJ4AVTTXJDLV011WIG1WFFTZTHQDZ1QRQSOCKAS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: WQI5ZY3BIH5MUEMW5D2BSBT2YMNCUUU4444CTA1CGCO0ROHW
CLIENT_SECRET:4XCGNCT0MGJ4AVTTXJDLV011WIG1WFFTZTHQDZ1QRQSOCKAS


### Find top 100 venues within 500 meters for all neighborhoods of Toronto

In [78]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
            
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [80]:
toronto_venues = getNearbyVenues(names=toronto_df['Neighborhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )

Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Enclave of M5E
St. James Town, Cabbagetown
First Canadi

__Inspect resulting venues database and find out how many unique categories there are.__

In [84]:
print(toronto_venues.shape)
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues.head()

(1493, 7)
There are 215 uniques categories.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


### Analyse each Neighborhood

In [86]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1493, 215)


Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


####  Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [91]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped.head()

(39, 215)


Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.071429,0.071429,0.142857,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.015152,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Print each neighborhood alongside top 5 venues 

In [92]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
            venue  freq
0    Cocktail Bar  0.09
1  Sandwich Place  0.07
2     Coffee Shop  0.07
3          Bakery  0.07
4  Farmers Market  0.04


----Brockton, Parkdale Village, Exhibition Place----
               venue  freq
0     Breakfast Spot  0.09
1               Café  0.09
2     Sandwich Place  0.09
3        Coffee Shop  0.09
4  Convenience Store  0.05


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0   Airport Service  0.14
1  Airport Terminal  0.14
2   Harbor / Marina  0.07
3               Bar  0.07
4       Coffee Shop  0.07


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1      Sandwich Place  0.09
2                Café  0.06
3  Italian Restaurant  0.06
4    Sushi Restaurant  0.06


----Christie----
                venue  freq
0       Grocery Store  0.29
1                Café  0.21
2                Park  0.14
3  Italian Restaur

#### Converting into a pandas dataframe
Write a function to sort venues in descending order

In [107]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create new dataframe with top 10 venues for each neighborhood

In [108]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Cocktail Bar,Sandwich Place,Coffee Shop,Bakery,Farmers Market,Vegetarian / Vegan Restaurant,Seafood Restaurant,Beer Bar,Liquor Store,Comfort Food Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Breakfast Spot,Café,Sandwich Place,Coffee Shop,Convenience Store,Italian Restaurant,Climbing Gym,Restaurant,Bar,Bakery
2,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Terminal,Harbor / Marina,Bar,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry,Boutique,Plane
3,Central Bay Street,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Sushi Restaurant,Japanese Restaurant,Burger Joint,Salad Place,Restaurant,Pizza Place
4,Christie,Grocery Store,Café,Park,Italian Restaurant,Baby Store,Nightclub,Restaurant,Coffee Shop,Middle Eastern Restaurant,Movie Theater


### Cluster Neighborhoods
Run k-means to cluster neighborhoods into 5 clusters.

In [114]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [110]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Café,Beer Store,Spa,Sandwich Place,Breakfast Spot,Performing Arts Venue
M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Sandwich Place,Clothing Store,Café,Japanese Restaurant,Bank,Thai Restaurant,Pizza Place,Cosmetics Shop,Hotel
M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Restaurant,Clothing Store,Gym,Gastropub,Farmers Market
M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Health Food Store,Construction & Landscaping,Pub,Movie Theater,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Cocktail Bar,Sandwich Place,Coffee Shop,Bakery,Farmers Market,Vegetarian / Vegan Restaurant,Seafood Restaurant,Beer Bar,Liquor Store,Comfort Food Restaurant


#### Visualise clusters

In [112]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M5A,"Regent Park, Harbourfront",Coffee Shop,Park,Bakery,Pub,Café,Beer Store,Spa,Sandwich Place,Breakfast Spot,Performing Arts Venue
M5B,"Garden District, Ryerson",Coffee Shop,Sandwich Place,Clothing Store,Café,Japanese Restaurant,Bank,Thai Restaurant,Pizza Place,Cosmetics Shop,Hotel
M5C,St. James Town,Coffee Shop,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Restaurant,Clothing Store,Gym,Gastropub,Farmers Market
M4E,The Beaches,Health Food Store,Construction & Landscaping,Pub,Movie Theater,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
M5E,Berczy Park,Cocktail Bar,Sandwich Place,Coffee Shop,Bakery,Farmers Market,Vegetarian / Vegan Restaurant,Seafood Restaurant,Beer Bar,Liquor Store,Comfort Food Restaurant
M5G,Central Bay Street,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Sushi Restaurant,Japanese Restaurant,Burger Joint,Salad Place,Restaurant,Pizza Place
M6G,Christie,Grocery Store,Café,Park,Italian Restaurant,Baby Store,Nightclub,Restaurant,Coffee Shop,Middle Eastern Restaurant,Movie Theater
M5H,"Richmond, Adelaide, King",Coffee Shop,Café,Sandwich Place,Gym,Clothing Store,Restaurant,Sushi Restaurant,Steakhouse,Pizza Place,Bank
M6H,"Dufferin, Dovercourt Village",Pharmacy,Music Venue,Bank,Bakery,Brewery,Supermarket,Gas Station,Café,Grocery Store,Middle Eastern Restaurant
M5J,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Café,Hotel,Aquarium,Restaurant,Scenic Lookout,Pizza Place,Deli / Bodega,Fried Chicken Joint,Sports Bar


In [117]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M4T,"Moore Park, Summerhill East",Trail,Tennis Court,Restaurant,Yoga Studio,Moroccan Restaurant,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant


In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M4J,The Danforth East,Park,Convenience Store,Yoga Studio,Movie Theater,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


In [119]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M5P,Forest Hill North & West,Park,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Monument / Landmark,Market,Martial Arts School,Mediterranean Restaurant,Men's Store
M4W,Rosedale,Park,Playground,Trail,Yoga Studio,Moroccan Restaurant,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant


In [120]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M5N,Roselawn,Health & Beauty Service,Home Service,Garden,Museum,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


### Rename cluster labels

In [126]:
toronto_merged["Cluster Labels"].replace({0:"Food and Drink",
                                         1:"Sport and Activities",
                                         2:"Entertainment",
                                         3:"Outdoor Recreation",
                                         4:"Retail"}, inplace=True)
toronto_merged.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Food and Drink,Coffee Shop,Park,Bakery,Pub,Café,Beer Store,Spa,Sandwich Place,Breakfast Spot,Performing Arts Venue
M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,Food and Drink,Coffee Shop,Sandwich Place,Clothing Store,Café,Japanese Restaurant,Bank,Thai Restaurant,Pizza Place,Cosmetics Shop,Hotel
M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Food and Drink,Coffee Shop,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Restaurant,Clothing Store,Gym,Gastropub,Farmers Market
M4E,East Toronto,The Beaches,43.676357,-79.293031,Food and Drink,Health Food Store,Construction & Landscaping,Pub,Movie Theater,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,Food and Drink,Cocktail Bar,Sandwich Place,Coffee Shop,Bakery,Farmers Market,Vegetarian / Vegan Restaurant,Seafood Restaurant,Beer Bar,Liquor Store,Comfort Food Restaurant
