# Part 1 - Scrape Wikipedia page

Import all dependencies needed for this notebook

In [1]:
from bs4 import BeautifulSoup
import lxml
import requests
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import json
import matplotlib.cm as cm
import matplotlib.colors as colors
import ssl
import certifi
import geopy.geocoders
from geopy.geocoders import Nominatim
import folium
ctx = ssl.create_default_context(cafile=certifi.where())
geopy.geocoders.options.default_ssl_context = ctx
from sklearn.cluster import KMeans

Use BeautifulSoup to scrape the following Wikipedia page: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)

soup = BeautifulSoup(page.text,'lxml')
table = soup.find_all('table', class_='wikitable sortable')[0]
table_rows = table.find_all('tr')

data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

Transform the data into a pandas dataframe excluding Boroughs that are Not Assigned

In [3]:
toronto_postalcode = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighborhood'])
toronto_postalcode = toronto_postalcode.dropna()
toronto_postalcode=toronto_postalcode[toronto_postalcode.Borough != 'Not assigned'].reset_index(drop=True)
toronto_postalcode.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Print the number of rows in dataframe using .shape method

In [4]:
toronto_postalcode.shape

(103, 3)

---
# Part 2 - Add latitude and longitude

Use csv file to create dataframe with latitude and longitude coordinates

In [5]:
toronto_geo = pd.read_csv("Geospatial_Coordinates.csv")
toronto_geo.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
toronto_geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge Toronto postal code and geo coordinates dataframes

In [6]:
toronto_data = pd.merge(toronto_postalcode, toronto_geo, on='PostalCode')
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part3 - Explore and cluster the neighborhoods in Toronto

Get latitude and longitude coordinates of Toronto

In [7]:
address = 'Toronto, ON, Canada'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


Generate map of Toronto with markers for all neighborhoods

In [8]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Foursquare credentials

In [9]:
CLIENT_ID = 'GSLC5NU3BAI1XVIVW2AB0LW24ZZQWIFINXE4POVAHUJXNMZR'
CLIENT_SECRET = 'GZPTBK4DEJIDMUCPE2E5JXHTPYGKSBT2EAGFE4PIQ52CZ252'
VERSION = '20180605'

Function to find venues in each neighborhood

In [10]:
def getNearbyVenues(names, latitudes, longitudes, radius=1200, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Create toronto_venues dataframe using the above function

In [11]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

Size of the toronto_venues dataframe

In [12]:
print(toronto_venues.shape)
toronto_venues.head()

(5832, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Donalda Golf & Country Club,43.752816,-79.342741,Golf Course
3,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
4,Parkwoods,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant


Group by neighborhood to check how many venues were returned for each

In [13]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,53,53,53,53,53,53
"Alderwood, Long Branch",44,44,44,44,44,44
"Bathurst Manor, Wilson Heights, Downsview North",37,37,37,37,37,37
Bayview Village,14,14,14,14,14,14
"Bedford Park, Lawrence Manor East",56,56,56,56,56,56
...,...,...,...,...,...,...
"Willowdale, Willowdale West",34,34,34,34,34,34
Woburn,18,18,18,18,18,18
Woodbine Heights,49,49,49,49,49,49
York Mills West,41,41,41,41,41,41


In [14]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 344 uniques categories.


One hot encoding

In [15]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Zoo Exhibit,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
toronto_onehot.shape

(5832, 344)

Group rows by neighborhood and get the mean of the frequency of each category

In [17]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.018868,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.029412,0.000000,0.0,0.0,0.0
95,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
96,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.020408,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
97,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0


In [18]:
toronto_grouped.shape

(99, 344)

Top 5 most common venues for each neighborhood

In [19]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.17
1         Shopping Mall  0.06
2           Pizza Place  0.06
3  Caribbean Restaurant  0.04
4        Sandwich Place  0.04


----Alderwood, Long Branch----
         venue  freq
0         Park  0.09
1  Pizza Place  0.07
2  Gas Station  0.07
3     Pharmacy  0.07
4  Coffee Shop  0.07


----Bathurst Manor, Wilson Heights, Downsview North----
         venue  freq
0  Pizza Place  0.08
1  Coffee Shop  0.05
2         Park  0.05
3         Bank  0.05
4     Building  0.03


----Bayview Village----
                 venue  freq
0          Gas Station  0.14
1                 Bank  0.14
2  Japanese Restaurant  0.14
3                 Café  0.07
4        Grocery Store  0.07


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Pizza Place  0.07
1         Coffee Shop  0.07
2          Restaurant  0.05
3  Italian Restaurant  0.05
4          Bagel Shop  0.05


----Berczy Park----
         venue  freq
0  C

Function to sort the venues in descending order

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Dataframe to display the top 10 venues for each neighborhood

In [21]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.shape

(99, 11)

Use k-means to cluster the neighborhoods into 5 clusters

In [22]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:10]

array([4, 4, 4, 1, 4, 0, 1, 0, 0, 0], dtype=int32)

Dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [23]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Park,Convenience Store,Pharmacy,Bus Stop,Caribbean Restaurant,Train Station,Golf Course,Shop & Service,Shopping Mall,Discount Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Coffee Shop,Middle Eastern Restaurant,Gym,Hotel,Playground,Pizza Place,Sandwich Place,Optical Shop,Café,Sporting Goods Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Café,Restaurant,Theater,Bakery,Pub,Park,Italian Restaurant,Gastropub,Diner
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Clothing Store,Restaurant,Furniture / Home Store,Coffee Shop,Fast Food Restaurant,Dessert Shop,Sushi Restaurant,Vietnamese Restaurant,Italian Restaurant,Fried Chicken Joint
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Park,Clothing Store,Ramen Restaurant,Burger Joint,Italian Restaurant,Comic Shop,Gastropub,Pizza Place


Visualize the clusters on a map

In [24]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0,Coffee Shop,Middle Eastern Restaurant,Gym,Hotel,Playground,Pizza Place,Sandwich Place,Optical Shop,Café,Sporting Goods Shop
2,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Theater,Bakery,Pub,Park,Italian Restaurant,Gastropub,Diner
3,North York,0,Clothing Store,Restaurant,Furniture / Home Store,Coffee Shop,Fast Food Restaurant,Dessert Shop,Sushi Restaurant,Vietnamese Restaurant,Italian Restaurant,Fried Chicken Joint
4,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Park,Clothing Store,Ramen Restaurant,Burger Joint,Italian Restaurant,Comic Shop,Gastropub,Pizza Place
7,North York,0,Coffee Shop,Japanese Restaurant,Restaurant,Bank,Pizza Place,Italian Restaurant,Supermarket,Gym,Burger Joint,Sporting Goods Shop
9,Downtown Toronto,0,Coffee Shop,Café,Gastropub,Japanese Restaurant,Theater,Hotel,Middle Eastern Restaurant,Mexican Restaurant,Ramen Restaurant,Seafood Restaurant
13,North York,0,Coffee Shop,Japanese Restaurant,Restaurant,Bank,Pizza Place,Italian Restaurant,Supermarket,Gym,Burger Joint,Sporting Goods Shop
15,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Italian Restaurant,Theater,Gastropub,Gym,Tea Room,Seafood Restaurant,Cocktail Bar
19,East Toronto,0,Pub,Coffee Shop,Beach,Pizza Place,Breakfast Spot,Japanese Restaurant,Skating Rink,Bakery,Burger Joint,Tea Room
20,Downtown Toronto,0,Coffee Shop,Restaurant,Hotel,Café,Park,Italian Restaurant,Plaza,Concert Hall,Bakery,Farmers Market


### Cluster 2

In [26]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1,Park,Convenience Store,Pharmacy,Bus Stop,Caribbean Restaurant,Train Station,Golf Course,Shop & Service,Shopping Mall,Discount Store
5,Etobicoke,1,Pharmacy,Convenience Store,Bakery,Grocery Store,Park,Shopping Mall,Skating Rink,Golf Course,Japanese Restaurant,Café
11,Etobicoke,1,Park,Hotel,Pizza Place,Convenience Store,Transportation Service,Restaurant,Pharmacy,Clothing Store,Fish & Chips Shop,Mexican Restaurant
12,Scarborough,1,Breakfast Spot,Gym / Fitness Center,Park,Burger Joint,Playground,Italian Restaurant,Filipino Restaurant,Event Space,Falafel Restaurant,Farm
27,North York,1,Park,Pharmacy,Bank,Bakery,Coffee Shop,Chinese Restaurant,Ice Cream Shop,Grocery Store,Sandwich Place,Supermarket
39,North York,1,Bank,Gas Station,Japanese Restaurant,Restaurant,Chinese Restaurant,Grocery Store,Park,Trail,Café,Skating Rink
45,North York,1,Park,Intersection,Gym,Gym / Fitness Center,Coffee Shop,Japanese Restaurant,Restaurant,Pub,Fireworks Store,Fish & Chips Shop
50,North York,1,Park,Bank,Skating Rink,Electronics Store,Italian Restaurant,Furniture / Home Store,Pharmacy,Shopping Mall,Pizza Place,Sports Bar
58,Scarborough,1,Park,Diner,Ice Cream Shop,Thai Restaurant,General Entertainment,Gym,College Stadium,Restaurant,Gym Pool,Café
66,North York,1,Coffee Shop,Park,Restaurant,Gas Station,Thai Restaurant,Optical Shop,Burrito Place,Japanese Restaurant,French Restaurant,Tennis Court


### Cluster 3

In [27]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
95,Scarborough,2,Donut Shop,Zoo,Filipino Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field


### Cluster 4

In [28]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,Etobicoke,3,Coffee Shop,Rental Car Location,Mediterranean Restaurant,Swiss Restaurant,Zoo,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market


### Cluster 5

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,4,Fast Food Restaurant,Trail,Restaurant,Zoo Exhibit,Cosmetics Shop,Coffee Shop,Paper / Office Supplies Store,Chinese Restaurant,Caribbean Restaurant,Martial Arts Dojo
8,East York,4,Pharmacy,Pizza Place,Fast Food Restaurant,Gym / Fitness Center,Skating Rink,Intersection,Park,Restaurant,Convenience Store,Gastropub
10,North York,4,Coffee Shop,Grocery Store,Fast Food Restaurant,Pizza Place,Italian Restaurant,Gas Station,Bank,Sandwich Place,Bakery,Restaurant
14,East York,4,Pizza Place,Coffee Shop,Park,Café,Thai Restaurant,Ice Cream Shop,Athletics & Sports,Sandwich Place,Bar,Farmers Market
16,York,4,Coffee Shop,Pizza Place,Bank,Caribbean Restaurant,Park,Convenience Store,Beer Store,Sandwich Place,Seafood Restaurant,Optical Shop
17,Etobicoke,4,Coffee Shop,Baseball Field,Pizza Place,Pet Store,Garden,Grocery Store,Liquor Store,Gas Station,Beer Store,Sandwich Place
18,Scarborough,4,Pizza Place,Bank,Fast Food Restaurant,Coffee Shop,Breakfast Spot,Electronics Store,Beer Store,Sandwich Place,Supermarket,Shopping Mall
21,York,4,Park,Coffee Shop,Grocery Store,Bus Stop,Beer Store,Pharmacy,Seafood Restaurant,BBQ Joint,Burger Joint,Bank
22,Scarborough,4,Park,Coffee Shop,Indian Restaurant,Pizza Place,Department Store,Juice Bar,Supermarket,Thrift / Vintage Store,Sandwich Place,Fast Food Restaurant
26,Scarborough,4,Coffee Shop,Indian Restaurant,Bakery,Gas Station,Bank,Sandwich Place,Chinese Restaurant,Thai Restaurant,Bar,Sushi Restaurant
