# Capstone Week3 : Segmenting and Clustering Neighborhoods in Toronto

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import numpy as np

## Web scrapping the wikipedia page:https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

#### Read the Wikipedia page directly wti the pandas read_html module .. and keep first table of that page as this is the one we want 

In [63]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
tables = pd.read_html(url) # Returns list of all tables on page
# First table of that page is what we need .. 
toronto_neigh = tables[0]

# rename the column Neighbourhood to Neighborhood for consistency with other resources later in this lab
toronto_neigh.rename(columns={"Neighbourhood":"Neighborhood"},inplace=True)

print (toronto_neigh.dtypes, toronto_neigh.shape)


Postal Code     object
Borough         object
Neighborhood    object
dtype: object (180, 3)


#### Clean the data frame by removing the Borought with 'not assigned' value then grouping based on Postal Code and Borough to aggregate the Neighborhood in a single line

In [64]:
# clean and format data frame :
# First remove spaces in the column name, as a good pratice 
toronto_neigh.rename(index=str, columns={"Postal Code": "PostalCode"}, inplace = True)
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
toronto_neigh = toronto_neigh.loc[toronto_neigh['Borough']!='Not assigned']
#More than one neighborhood can exist in one postal code area ...  These two rows will be combined into one row with the neighborhoods separated with a comma 
toronto_grouped = toronto_neigh.groupby(['PostalCode','Borough'], as_index=False).agg(lambda x: ','.join(x))
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
mask = toronto_grouped['Neighborhood'] == "Not assigned"
toronto_grouped.loc[mask, 'Neighborhood'] = toronto_grouped.loc[mask, 'Borough']

print ('Dataframe shape:',toronto_grouped.shape)
display (toronto_grouped.head())

Dataframe shape: (103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Work with the coordinate Latitude Longitude, based on csv file : https://cocl.us/Geospatial_data and merge dataframes

In [65]:
# read the csv file cintaing Coordinates and convert to dataframe + merge to our previous df from wikipedia
import io
import requests
url = "https://cocl.us/Geospatial_data"
src = requests.get(url).content  # get raw formatted content data from that url
coord = pd.read_csv(io.StringIO(src.decode('utf-8'))) # pass to pandas read_csv a 'file-like' object the raw we got from prev step

# again rename header to remove space 
coord.rename(index=str, columns={"Postal Code": "PostalCode"}, inplace = True)

# mergde our datafile wth the coordinate dataframe
neigh = pd.merge(toronto_grouped, coord, on='PostalCode', how='inner') # Join info on PostalCode which is our unique value, common to both dataframe

display (neigh.head())

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Explore and cluster the neighborhoods in Toronto

#### We keep only the Borough having Toronto in their name . This sublist will be our focus for the coming investigations

In [66]:
# filter our df only on the Borough having Toronto in their name 
toronto_data = neigh[neigh['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


#### Visual our selected borough in a map using Folium

In [69]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# visualize the Tornonto neighbour map whenever there is Toronto in Borough 
# create map of Manhattan using latitude and longitude values. centered  on first Borough , to start
latitude = toronto_data.Latitude[0]
longitude = toronto_data.Longitude[0]
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add Borough long/lat markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [8]:
##My foursquar credentials 

CLIENT_ID = '3S1PFTNL1HAFWEYMYHRLCVZVWN1DSK0IPHKSX5JXAGR3E1ML' # your Foursquare ID
CLIENT_SECRET = 'V4IEPFK4NJZZER4LJA33R2LX3S5KRBHVECAFZJDVTLFZUTBP' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

### Explore the Toronto Neighborhood

#### Use the foursquare api to explore  what is surronding the place [first borough having Toronto in the name] we look for

In [70]:
# create foursquare URL and get result
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json()
#results

#### borrowed from Coursera Lab That  function will help to explore - based on coordinates being pushed - what are the interesting venues around. To do that we're querying the foursquare API passing along the coordinate of our first borough from our list, with a 600 radius to gather anough data from what is surrounding these places.

In [71]:
def getNearbyVenues(names, latitudes, longitudes, radius=600):
    LIMIT = 100 # limit of number of venues returned by Foursquare API
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [73]:
# Get the list of venues along with their coordinates, for our selected borough from our dataframe toronto_data
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

In [74]:
print(toronto_venues.shape)
toronto_venues.head(15)

(2111, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,Tori's Bakeshop,43.672114,-79.290331,Vegetarian / Vegan Restaurant
2,The Beaches,43.676357,-79.293031,Beaches Bake Shop,43.680363,-79.289692,Bakery
3,The Beaches,43.676357,-79.293031,Ed's Real Scoop,43.67263,-79.287993,Ice Cream Shop
4,The Beaches,43.676357,-79.293031,The Beech Tree,43.680493,-79.288846,Gastropub
5,The Beaches,43.676357,-79.293031,Mastermind Toys,43.671453,-79.293971,Toy / Game Store
6,The Beaches,43.676357,-79.293031,Beacher Cafe,43.671938,-79.291238,Breakfast Spot
7,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
8,The Beaches,43.676357,-79.293031,Veloute Bistro,43.672267,-79.289584,French Restaurant
9,The Beaches,43.676357,-79.293031,Xola,43.672603,-79.28808,Mexican Restaurant


#### Look at Neighborhood having the most Venues , by Category .. 

In [75]:
display (toronto_venues.groupby(['Neighborhood','Venue Category']).count().sort_values(['Venue'], ascending=False).head(15))

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Neighborhood,Venue Category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Bay Street,Coffee Shop,17,17,17,17,17
"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,15,15,15,15,15
"Queen's Park, Ontario Provincial Government",Coffee Shop,13,13,13,13,13
Stn A PO Boxes,Coffee Shop,13,13,13,13,13
"Toronto Dominion Centre, Design Exchange",Coffee Shop,12,12,12,12,12
"Commerce Court, Victoria Hotel",Coffee Shop,12,12,12,12,12
"Toronto Dominion Centre, Design Exchange",Hotel,10,10,10,10,10
"First Canadian Place, Underground city",Coffee Shop,10,10,10,10,10
Church and Wellesley,Coffee Shop,10,10,10,10,10
"First Canadian Place, Underground city",Hotel,10,10,10,10,10


#### How many venue's categories do we have and which category is the most seen  

List the top 15 Venues we observed from our investigation .. There a lot of Coffee shop around :-) 

In [76]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

print (toronto_venues.groupby(['Venue Category']).count().sort_values(['Venue'], ascending=False)['Venue'].head(15))


There are 263 uniques categories.
Venue Category
Coffee Shop            192
Café                   108
Restaurant              68
Hotel                   57
Park                    53
Italian Restaurant      50
Japanese Restaurant     40
Sushi Restaurant        39
Bakery                  39
Pizza Place             38
Sandwich Place          37
Pub                     36
Gym                     36
Bar                     35
Gastropub               31
Name: Venue, dtype: int64


In [77]:
pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Analyse each neighborhood

In [78]:
# one hot encoding
toronto_dum = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_dum['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column to make easier to read for us as we have many Venue category
colname = "Neighborhood"
neighcol = toronto_dum.pop(colname)
toronto_dum.insert(0, colname, neighcol)

toronto_dum.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [79]:
toronto_grouped = toronto_dum.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,...,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.011111
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.052632,0.052632,0.052632,0.105263,0.105263,0.052632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01


#### Let's see what are the top5 most common venue for each Neighborhood 

In [80]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                 venue  freq
0          Coffee Shop  0.09
1  Japanese Restaurant  0.03
2                Hotel  0.03
3   Seafood Restaurant  0.03
4           Restaurant  0.03


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.07
1     Coffee Shop  0.07
2      Restaurant  0.05
3  Breakfast Spot  0.05
4       Gift Shop  0.05


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                    venue  freq
0                    Park  0.12
1           Burrito Place  0.08
2        Recording Studio  0.04
3  Thrift / Vintage Store  0.04
4                 Brewery  0.04


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0       Coffee Shop  0.11
1    Airport Lounge  0.11
2   Airport Service  0.11
3     Boat or Ferry  0.11
4  Sculpture Garden  0.05


----Central Bay Street----
                ve

In [102]:
np.arange(toronto_grouped.shape[0])
toronto_grouped.shape

(39, 263)

#### Build a dataframe out of this  and get the top5 Most common Venue

In [81]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Berczy Park,Coffee Shop,Japanese Restaurant,Seafood Restaurant,Restaurant,Pub
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Sandwich Place,Restaurant
2,"Business reply mail Processing Centre, South C...",Park,Burrito Place,Grocery Store,Pub,Skate Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Boat or Ferry,Airport Lounge,Airport Service,Coffee Shop,Harbor / Marina
4,Central Bay Street,Coffee Shop,Café,Bubble Tea Shop,Italian Restaurant,Thai Restaurant


## Clustering  Neighborhoods

In [96]:
# import k-means from clustering stage
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Breakfast Spot,Cheese Shop,Indian Restaurant,Ice Cream Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Pub,Café,Italian Restaurant
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Sandwich Place,Restaurant,Gym,Italian Restaurant
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Coffee Shop,Bar,Bakery,American Restaurant,Café
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Swim School,Park,Bus Line,Dog Run,Doner Restaurant
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Pizza Place,Hotel,Brewery,Café,Sushi Restaurant
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,0,Sporting Goods Shop,Coffee Shop,Clothing Store,Café,Diner
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Pizza Place,Dessert Shop,Italian Restaurant,Sandwich Place,Café
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Park,Gym,Playground,Trail,Yoga Studio
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,0,Coffee Shop,Italian Restaurant,Restaurant,Sushi Restaurant,Grocery Store


In [97]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
#### Examine the clusters 

In [108]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,East Toronto,0,Pub,Breakfast Spot,Cheese Shop,Indian Restaurant,Ice Cream Shop
1,East Toronto,0,Greek Restaurant,Coffee Shop,Pub,Café,Italian Restaurant
2,East Toronto,0,Park,Sandwich Place,Restaurant,Gym,Italian Restaurant
3,East Toronto,0,Coffee Shop,Bar,Bakery,American Restaurant,Café
5,Central Toronto,0,Pizza Place,Hotel,Brewery,Café,Sushi Restaurant
6,Central Toronto,0,Sporting Goods Shop,Coffee Shop,Clothing Store,Café,Diner
7,Central Toronto,0,Pizza Place,Dessert Shop,Italian Restaurant,Sandwich Place,Café
9,Central Toronto,0,Coffee Shop,Italian Restaurant,Restaurant,Sushi Restaurant,Grocery Store
11,Downtown Toronto,0,Coffee Shop,Pizza Place,Restaurant,Park,Café
12,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Café


In [109]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Central Toronto,1,Park,Gym,Playground,Trail,Yoga Studio
10,Downtown Toronto,1,Park,Playground,Tennis Court,Trail,Donut Shop


In [110]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,Central Toronto,2,Swim School,Park,Bus Line,Dog Run,Doner Restaurant


In [112]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
23,Central Toronto,3,Sushi Restaurant,Jewelry Store,Mexican Restaurant,Trail,Park


In [113]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
22,Central Toronto,4,Garden,Playground,Dive Bar,Music Venue,Dumpling Restaurant
