### Part 1 - Create Dataframe

In [24]:
#Import libraries
import pandas as pd
import numpy as np
import requests

In [25]:
#Save Wikipedia page as a variable "dfs"
dfs = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
#First table on page is saved as dataframe "df"
df = dfs[0]
#Get number of rows
df.shape

(289, 3)

In [26]:
#Find the number of "Not assigned" Neighbourhoods
(df['Neighbourhood']=='Not assigned').value_counts()

False    211
True      78
Name: Neighbourhood, dtype: int64

In [27]:
#Find the number of "Not assigned" Boroughs
(df['Borough']=='Not assigned').value_counts()

False    212
True      77
Name: Borough, dtype: int64

In [28]:
#Remove all "Boroughs" that are "Not assigned"
df.drop(df[df.Borough=='Not assigned'].index,inplace=True)
(df['Borough']=='Not assigned').value_counts()

False    212
Name: Borough, dtype: int64

In [29]:
#However, we stil have one Neighbourhoods which is "Not assigned"
(df['Neighbourhood']=='Not assigned').value_counts()

False    211
True       1
Name: Neighbourhood, dtype: int64

In [30]:
#Let's copy the values from Boroughs for all Neighborhoods that are "Not assigned"
df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'], df['Neighbourhood'])
(df['Neighbourhood']=='Not assigned').value_counts()

False    212
Name: Neighbourhood, dtype: int64

In [31]:
#Group by "Postcode" and "Borough" and aggregate
df = df.groupby(['Postcode','Borough'], sort = False).agg(lambda x: ', '.join(x))
df.shape

(103, 1)

### Part 2 - Import location

In [32]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Harbourfront, Regent Park"
M6A,North York,"Lawrence Heights, Lawrence Manor"
M7A,Queen's Park,Queen's Park


In [33]:
#Let's reset the index
df = df.reset_index()
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [34]:
#Let's load the csv file with the geographical coordinates of each postal code:
dfg = pd.read_csv("https://cocl.us/Geospatial_data")
dfg.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [35]:
#Let's merge both dataframes using the "Postal Code" column
dfm = pd.merge(df, dfg, how='outer', left_on="Postcode", right_on="Postal Code", validate='m:1')
dfm.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",M5A,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",M6A,43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,M7A,43.662301,-79.389494


In [36]:
#We have to columns for the Postal Code, let's drop the "Postcode" column
dfm.drop(['Postcode'], axis=1, inplace=True)
dfm.head(15)

Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,North York,Parkwoods,M3A,43.753259,-79.329656
1,North York,Victoria Village,M4A,43.725882,-79.315572
2,Downtown Toronto,"Harbourfront, Regent Park",M5A,43.65426,-79.360636
3,North York,"Lawrence Heights, Lawrence Manor",M6A,43.718518,-79.464763
4,Queen's Park,Queen's Park,M7A,43.662301,-79.389494
5,Etobicoke,Islington Avenue,M9A,43.667856,-79.532242
6,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
7,North York,Don Mills North,M3B,43.745906,-79.352188
8,East York,"Woodbine Gardens, Parkview Hill",M4B,43.706397,-79.309937
9,Downtown Toronto,"Ryerson, Garden District",M5B,43.657162,-79.378937


### Part 3 - Clustering

In [37]:
#Let's import all the required libraries
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library

In [38]:
#Let's define the latitude/longitude point for the center of our Toronto Map:
latitude = 43.731136
longitude = -79.371932
#Let's define the start location and zoom:
tdot_map = folium.Map(location=[latitude, longitude], zoom_start=11)

In [39]:
# add all boroughs as markers to map
for lat, lng, borough, neighbourhood in zip(dfm['Latitude'], dfm['Longitude'], dfm['Borough'], dfm['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tdot_map)  
    
tdot_map

In [40]:
#Let's enter our Foursquare Credentials
CLIENT_ID = '3RJOEEVONTI1P4WQMR5BIGPOHR4Y3V0TKEWX5BFKESXXUAXH' # your Foursquare ID
CLIENT_SECRET = '04PWBPN0KFHHHPWY3OLPANP00KFH2XKFLQA5OBSB5JAC2V0N' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3RJOEEVONTI1P4WQMR5BIGPOHR4Y3V0TKEWX5BFKESXXUAXH
CLIENT_SECRET:04PWBPN0KFHHHPWY3OLPANP00KFH2XKFLQA5OBSB5JAC2V0N


In [41]:
#Let's get the neighborhood's latitude and longitude values.
neighborhood_latitude = dfm.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = dfm.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = dfm.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [42]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
#Let's get the nearby venues:
tdot_venues = getNearbyVenues(names=dfm['Neighbourhood'],
                                   latitudes=dfm['Latitude'],
                                   longitudes=dfm['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront, Regent Park
Lawrence Heights, Lawrence Manor
Queen's Park
Islington Avenue
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The D

In [44]:
#Let's look at the dataframe:
print(tdot_venues.shape)
tdot_venues.head()

(2221, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [45]:
#Let's group venues by neighborhood:
tdot_venues.groupby('Neighborhood').count()
tdot_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [46]:
#Let's check how many categories we have:
print('There are {} uniques categories.'.format(len(tdot_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [52]:
#Let's build our one hot encoding dataframe:
tdot_onehot = pd.get_dummies(tdot_venues[['Venue Category']], prefix="", prefix_sep="")
tdot_onehot['Neighborhood'] = tdot_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tdot_onehot.columns[-1]] + list(tdot_onehot.columns[:-1])
tdot_onehot = tdot_onehot[fixed_columns]
tdot_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [53]:
tdot_onehot.shape

(2221, 269)

In [54]:
#Let's create a dataframe grouping the one hot encoding results:
tdot_grouped = tdot_onehot.groupby('Neighborhood').mean().reset_index()
tdot_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
tdot_grouped.shape

(99, 269)

In [51]:
#Let's get the top 5 venues for each neighborhood:
num_top_venues = 5

for hood in tdot_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = tdot_grouped[tdot_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1             Café  0.04
2              Bar  0.04
3  Thai Restaurant  0.04
4       Steakhouse  0.04


----Agincourt----
                venue  freq
0      Breakfast Spot   0.2
1  Chinese Restaurant   0.2
2              Lounge   0.2
3      Sandwich Place   0.2
4        Skating Rink   0.2


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
               venue  freq
0         Playground  0.33
1        Coffee Shop  0.33
2               Park  0.33
3  Mobile Phone Shop  0.00
4      Movie Theater  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.17
1           Pizza Place  0.08
2  Fast Food Restaurant  0.08
3            Beer Store  0.08
4          Liquor Store  0.08


----Alderwood, Long Branch----
                venue  freq
0         Pizza Place   0.2
1  Athletic

In [73]:
#Let's get the most common venue per neighborhood:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [74]:
#Let's get the top 10 venues per neighborhood:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tdot_grouped['Neighborhood']

for ind in np.arange(tdot_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tdot_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Bar,Thai Restaurant,Café,Steakhouse,Gym,Sushi Restaurant,Bakery,American Restaurant,Asian Restaurant
1,Agincourt,Chinese Restaurant,Sandwich Place,Lounge,Breakfast Spot,Skating Rink,Women's Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Coffee Shop,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Fried Chicken Joint,Coffee Shop,Sandwich Place,Discount Store,Liquor Store,Beer Store,Japanese Restaurant,Fast Food Restaurant
4,"Alderwood, Long Branch",Pizza Place,Athletics & Sports,Pharmacy,Coffee Shop,Pool,Pub,Sandwich Place,Skating Rink,Gym,Airport Terminal


In [75]:
#Let's setup our KMeans clustering. Let's use 5 clusters:
kclusters = 6
tdot_grouped_clustering = tdot_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tdot_grouped_clustering)
kmeans.labels_[0:10] 

array([0, 0, 3, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [76]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tdot_merged = dfm

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tdot_merged = tdot_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood',how = 'right')
tdot_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,M3A,43.753259,-79.329656,3,Food & Drink Shop,Park,Fast Food Restaurant,Event Space,Ethiopian Restaurant,Falafel Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dim Sum Restaurant
1,North York,Victoria Village,M4A,43.725882,-79.315572,0,Pizza Place,Portuguese Restaurant,Coffee Shop,Hockey Arena,Intersection,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
2,Downtown Toronto,"Harbourfront, Regent Park",M5A,43.65426,-79.360636,0,Coffee Shop,Café,Bakery,Park,Pub,Theater,Breakfast Spot,Mexican Restaurant,Italian Restaurant,French Restaurant
3,North York,"Lawrence Heights, Lawrence Manor",M6A,43.718518,-79.464763,0,Clothing Store,Vietnamese Restaurant,Miscellaneous Shop,Shoe Store,Coffee Shop,Boutique,Event Space,Accessories Store,Furniture / Home Store,Empanada Restaurant
4,Queen's Park,Queen's Park,M7A,43.662301,-79.389494,0,Coffee Shop,Gym,Japanese Restaurant,Diner,Yoga Studio,Park,Smoothie Shop,Seafood Restaurant,Burger Joint,Sandwich Place


In [87]:
#Let's create our cluster map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tdot_merged['Latitude'], tdot_merged['Longitude'], tdot_merged['Neighbourhood'], tdot_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Cluster 1

In [78]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 0, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,0,Pizza Place,Portuguese Restaurant,Coffee Shop,Hockey Arena,Intersection,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
2,"Harbourfront, Regent Park",0,Coffee Shop,Café,Bakery,Park,Pub,Theater,Breakfast Spot,Mexican Restaurant,Italian Restaurant,French Restaurant
3,"Lawrence Heights, Lawrence Manor",0,Clothing Store,Vietnamese Restaurant,Miscellaneous Shop,Shoe Store,Coffee Shop,Boutique,Event Space,Accessories Store,Furniture / Home Store,Empanada Restaurant
4,Queen's Park,0,Coffee Shop,Gym,Japanese Restaurant,Diner,Yoga Studio,Park,Smoothie Shop,Seafood Restaurant,Burger Joint,Sandwich Place
7,Don Mills North,0,Japanese Restaurant,Gym / Fitness Center,Caribbean Restaurant,Baseball Field,Café,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop
8,"Woodbine Gardens, Parkview Hill",0,Pizza Place,Fast Food Restaurant,Bank,Intersection,Breakfast Spot,Athletics & Sports,Café,Gastropub,Rock Climbing Spot,Gym / Fitness Center
9,"Ryerson, Garden District",0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bar,Tea Room,Diner,Thai Restaurant,Theater
10,Glencairn,0,Pub,Japanese Restaurant,Metro Station,Sushi Restaurant,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store
12,"Highland Creek, Rouge Hill, Port Union",0,Bar,Women's Store,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Filipino Restaurant
13,"Flemingdon Park, Don Mills South",0,Gym,Beer Store,Coffee Shop,Asian Restaurant,Chinese Restaurant,Fast Food Restaurant,Shopping Mall,Bike Shop,Sporting Goods Shop,Sandwich Place


#### Cluster 2

In [79]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 1, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Rouge, Malvern",1,Fast Food Restaurant,Dim Sum Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 3

In [83]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 2, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,"Cloverdale, Islington, Martin Grove, Princess ...",2,Golf Course,Bank,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Women's Store,Diner
66,York Mills West,2,Park,Bank,Women's Store,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store


#### Cluster 4

In [81]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 3, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,3,Food & Drink Shop,Park,Fast Food Restaurant,Event Space,Ethiopian Restaurant,Falafel Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dim Sum Restaurant
77,"Kingsview Village, Martin Grove Gardens, Richv...",3,Pizza Place,Park,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
83,"Moore Park, Summerhill East",3,Playground,Park,Tennis Court,Gym,Colombian Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store
85,"Agincourt North, L'Amoreaux East, Milliken, St...",3,Playground,Park,Coffee Shop,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
91,Rosedale,3,Park,Playground,Trail,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
98,"The Kingsway, Montgomery Road, Old Mill North",3,Park,River,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


#### Cluster 5

In [84]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 4, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough Village,4,Playground,Convenience Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
35,East Toronto,4,Park,Pizza Place,Convenience Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
64,Weston,4,Park,Convenience Store,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store


#### Cluster 6

In [85]:
tdot_merged.loc[tdot_merged['Cluster Labels'] == 5, tdot_merged.columns[[1] + list(range(5, tdot_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,Downsview Central,5,Food Truck,Baseball Field,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Women's Store,Field
57,"Emery, Humberlea",5,Baseball Field,Women's Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Filipino Restaurant
101,"Humber Bay, King's Mill Park, Kingsway Park So...",5,Pool,Baseball Field,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
