### Segmenting and Clustering Neighborhoods in Toronto

I put all of answer in one notebook. Every question is separated.

### Question 1 Creat dataframe refer to wiki by BeautifulSoup

In [6]:
#import libraries

import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

In [7]:
#Download data by BeautifulSoup

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url).text
soup = BeautifulSoup(res, 'lxml')
table = soup.find('table', {'class':'wikitable sortable'})

In [8]:
#Creat a datafram for toronto refer to link
table_rows = table.find_all('tr')
data = []

for row in table_rows:
    td = []
    for t in row.find_all('td'):
        td.append(t.text.strip())
    data.append(td)
df = pd.DataFrame(data, columns = ['PostalCode', 'Borough', 'Neighborhood'])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [9]:
#Clean the data

df = df.dropna()
empty = 'Not assigned'
df = df[(df.PostalCode != empty) & (df.Borough != empty) & (df.Neighborhood != empty)]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [10]:
df_final = df.groupby(['PostalCode', 'Borough'], sort = False).agg(','.join).reset_index()
df_final.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M9A,Etobicoke,Islington Avenue


In [11]:
df_final.shape

(102, 3)

### Question 2 Use the csv file to create dataframe with longitude and latitude values

In [12]:
#To transform the coordinate file into the dataframe.

csv_file = 'http://cocl.us/Geospatial_data'
coordinates = pd.read_csv(csv_file)
coordinates.columns = ['PostalCode', 'Latitude', 'Longitude']
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
#Merge coordinate in dataframe

df_merge = pd.merge(df_final, coordinates[['PostalCode','Latitude', 'Longitude']], on='PostalCode')
df_merge.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
5,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
6,M3B,North York,Don Mills North,43.745906,-79.352188
7,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
8,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
9,M6B,North York,Glencairn,43.709577,-79.445073


In [14]:
df_final.shape

(102, 3)

### Question 3: Cluster the neighborhoods in toronto

We can use folium to generate a map with coordinates for each postal code

In [2]:
!conda install -c conda-forge folium=0.5.0 --yes 

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be 

In [17]:
#Import libraries

import folium
from geopy.geocoders import Nominatim

In [18]:
adress = 'Toronto, ON'
geolocator = Nominatim(user_agent = 'Toronto')
location = geolocator.geocode(adress)
latitude = location.latitude
longitude = location.longitude
print (latitude, longitude)

43.653963 -79.387207


In [19]:
# Generate a folium map

map_toronto = folium.Map(location = [latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_merge['Latitude'], 
                                           df_merge['Longitude'], 
                                           df_merge['Borough'], 
                                           df_merge['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng],
                       radius = 5,
                       popup = label,
                       color = 'bleu',
                       fill = True,
                       fill_color = 'red',
                       fill_opacity = 0.7,
                       parse_html = False).add_to(map_toronto)
map_toronto

Define Foursquare Credentials and Version

In [20]:
# The code was removed by Watson Studio for sharing.

In [21]:
# Defining radius and limit of venues to get
radius = 500
LIMIT = 100

In [22]:
def getNearbyVenues(names, latitudes, longitudes, radius = 500):
    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        #Creat API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit{}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
        
        #make GET request
        res = requests.get(url).json()['response']['groups'][0]['items']
        
        #Return relevant information for each nearby venue
        venues_list.append([(name, lat, lng,
                             v['venue']['name'], 
                             v['venue']['location']['lat'], 
                             v['venue']['location']['lng'], 
                             v['venue']['categories'][0]['name']) for v in res])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    return (nearby_venues)     

In [23]:
toronto_df = getNearbyVenues(names = df_merge['Neighborhood'],
                                latitudes = df_merge['Latitude'],
                                longitudes = df_merge['Longitude'],
                                radius = 500)

In [24]:
toronto_df.columns = ['Neighborhood', 'Neighborhood Latitude',
                     'Neighborhood Longitude', 'Venue', 'Venue Latitude',
                     'Venue Longitude', 'Venue Category']
toronto_df.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
5,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.31362,Intersection
6,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.31286,Pizza Place
7,"Harbourfront,Regent Park",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
8,"Harbourfront,Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
9,"Harbourfront,Regent Park",43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center


In [25]:
toronto_df.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",30,30,30,30,30,30
Agincourt,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",3,3,3,3,3,3
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",10,10,10,10,10,10
"Alderwood,Long Branch",9,9,9,9,9,9
"Bathurst Manor,Downsview North,Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",25,25,25,25,25,25
Berczy Park,30,30,30,30,30,30
"Birch Cliff,Cliffside West",4,4,4,4,4,4


In [26]:
toronto_onehot = pd.get_dummies(toronto_df[['Venue Category']], prefix = "", prefix_sep = "")
toronto_onehot ['Neighborhood'] = toronto_df['Neighborhood']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

Print each neighborhood along with the top 10 most common venues.

In [27]:
num_top_venues = 10

for neigh in toronto_grouped['Neighborhood']:
  print("----" + neigh + "----")
  temp = toronto_grouped[toronto_grouped['Neighborhood'] == neigh].T.reset_index()
  temp.columns = ['venue', 'freq']
  temp = temp.iloc[1:]
  temp['freq'] = temp['freq'].astype(float)
  temp = temp.round({'freq': 2})
  print(temp.sort_values('freq', ascending = False).reset_index(drop = True).head(num_top_venues))
  print('\n')

----Adelaide,King,Richmond----
                  venue  freq
0            Steakhouse  0.10
1                  Café  0.07
2                 Hotel  0.07
3           Coffee Shop  0.07
4      Asian Restaurant  0.07
5  Gym / Fitness Center  0.03
6            Food Court  0.03
7                Lounge  0.03
8            Smoke Shop  0.03
9   Monument / Landmark  0.03


----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1                     Lounge  0.25
2             Clothing Store  0.25
3               Skating Rink  0.25
4              Movie Theater  0.00
5             Medical Center  0.00
6   Mediterranean Restaurant  0.00
7              Metro Station  0.00
8         Mexican Restaurant  0.00
9  Middle Eastern Restaurant  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                      venue  freq
0                Playground  0.33
1                      Park  0.33
2          Sculpture Garden  0.33
3                     Motel  0.00


Put the top 10 in to a dataframe

In [28]:
# Define a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
  row_categories = row.iloc[1:]
  row_categories_sorted = row_categories.sort_values(ascending = False)

  return row_categories_sorted.index.values[0:num_top_venues]

In [29]:
# Creat a new dataframe for venues sorted
import numpy as np
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# Creat columns according to the number of top venues
columns = ['Neighborhood']
for i in np.arange(num_top_venues):
  try:
    columns.append('{}{} Most Common Venue'.format(i+1, indicators[i]))
  except:
    columns.append('{}th most Common Venue'.format(i+1))

# Creat a new dataframe
neighborhood_venues_sorted = pd.DataFrame(columns = columns)
neighborhood_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for i in np.arange(toronto_grouped.shape[0]):
  neighborhood_venues_sorted.iloc[i,1:] = return_most_common_venues(toronto_grouped.iloc[i, :], num_top_venues)

neighborhood_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
0,"Adelaide,King,Richmond",Steakhouse,Café,Coffee Shop,Hotel,Asian Restaurant,Lounge,Speakeasy,Smoke Shop,Seafood Restaurant,Gastropub
1,Agincourt,Lounge,Breakfast Spot,Clothing Store,Skating Rink,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Playground,Park,Sculpture Garden,Yoga Studio,Curling Ice,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Beer Store,Fried Chicken Joint,Fast Food Restaurant,Liquor Store,Pharmacy,Coffee Shop,Sandwich Place,Pizza Place,Aquarium
4,"Alderwood,Long Branch",Pizza Place,Skating Rink,Dance Studio,Coffee Shop,Pharmacy,Pub,Sandwich Place,Gym,American Restaurant,Ethiopian Restaurant
5,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Pharmacy,Diner,Shopping Mall,Sandwich Place,Deli / Bodega,Fast Food Restaurant,Supermarket,Restaurant,Sushi Restaurant
6,Bayview Village,Chinese Restaurant,Japanese Restaurant,Bank,Café,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
7,"Bedford Park,Lawrence Manor East",Italian Restaurant,Coffee Shop,Juice Bar,Sandwich Place,Indian Restaurant,Fast Food Restaurant,Japanese Restaurant,Breakfast Spot,Liquor Store,Restaurant
8,Berczy Park,Seafood Restaurant,Beer Bar,Coffee Shop,Cocktail Bar,Café,Farmers Market,Comfort Food Restaurant,Steakhouse,Basketball Stadium,Jazz Club
9,"Birch Cliff,Cliffside West",College Stadium,General Entertainment,Skating Rink,Café,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store


#### Cluster Neighborhood

In [30]:
# Import libraries
from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist

toronto_clustering = toronto_grouped.drop('Neighborhood',1)


To determine the nomber of clusters. The elbow method is performed.

In [31]:
K = range(1, 10)
distortions = []
for k in K:
  kmeans = KMeans(init = 'k-means++', n_clusters=k, n_init = 12, random_state=0)
  kmeans.fit(toronto_clustering.values.reshape(-1, 1))
  distortions.append(sum(np.min(cdist(toronto_clustering.values.reshape(-1, 1),
                                        kmeans.cluster_centers_, 'euclidean'), axis=1)) / toronto_clustering.shape [0])

import matplotlib.pyplot as plt
plt.plot(K, distortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Distortion')
plt.title('The Elbow Method showing the optimal k')
plt.show()

<Figure size 640x480 with 1 Axes>

Refer to the elbow method, the optimal value of the number of cluster should be defined as 5.

In [32]:
num_clusters = 5

kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(toronto_clustering)
kmeans.labels_

array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0,
       0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0,
       0, 0, 0, 0, 0, 0, 4, 4, 3, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,
       4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4], dtype=int32)

Right now, we need to creat a new dataframe that includeds the cluster for each neighborhood.

In [33]:
# Add clusterin labels
neighborhood_venues_sorted.insert(0, 'Cluster', kmeans.labels_)
toronto_merge = df_merge

# Merge toronto_grouped with df_merge
toronto_merge = toronto_merge.join(neighborhood_venues_sorted.set_index('Neighborhood'), on = 'Neighborhood')

toronto_merge.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4.0,Park,Food & Drink Shop,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636,0.0,Coffee Shop,Park,Bakery,Breakfast Spot,Gym / Fitness Center,Mexican Restaurant,Yoga Studio,Theater,Café,Farmers Market
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763,0.0,Furniture / Home Store,Clothing Store,Accessories Store,Arts & Crafts Store,Women's Store,Miscellaneous Shop,Boutique,Sporting Goods Shop,Coffee Shop,Vietnamese Restaurant
4,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,,,,,,


We observe that there is NaN for some neighborhood and one more cluster label columns. Drop them.

In [34]:
toronto_merge = toronto_merge.dropna()

In [36]:
toronto_merge['Cluster'] = toronto_merge.Cluster.astype(int)

In [37]:
# Import libraries
import matplotlib.cm as cm
import matplotlib.colors as colors

# Creat the folium map

map_toronto = folium.Map(location = [latitude, longitude], zoom_start=10)

# Set color for clusters
x = np.arange(num_clusters)
ys = [i + x +(i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Set the marker for the map
markers_colors = []
for lat, lng, nei, cluster in zip(toronto_merge['Latitude'], toronto_merge['Longitude'], toronto_merge['Neighborhood'], toronto_merge['Cluster']):
  label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
  folium.CircleMarker([lat, lng],
                      radius=5,
                      popup=label,
                      color=rainbow[cluster-1],
                      fill=True,
                      fill_color=rainbow[cluster-1],
                      fill_opacity=0.7).add_to(map_toronto)

map_toronto


#### For Cluster 1

In [38]:
toronto_merge.loc[toronto_merge['Cluster'] == 0, toronto_merge.columns[[1] + list(range(5, toronto_merge.shape[1]))]]

Unnamed: 0,Borough,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
1,North York,0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
2,Downtown Toronto,0,Coffee Shop,Park,Bakery,Breakfast Spot,Gym / Fitness Center,Mexican Restaurant,Yoga Studio,Theater,Café,Farmers Market
3,North York,0,Furniture / Home Store,Clothing Store,Accessories Store,Arts & Crafts Store,Women's Store,Miscellaneous Shop,Boutique,Sporting Goods Shop,Coffee Shop,Vietnamese Restaurant
6,North York,0,Café,Gym / Fitness Center,Caribbean Restaurant,Japanese Restaurant,Baseball Field,Basketball Court,Diner,Dessert Shop,Dim Sum Restaurant,Yoga Studio
7,East York,0,Pizza Place,Fast Food Restaurant,Athletics & Sports,Pharmacy,Café,Intersection,Bank,Pet Store,Gym / Fitness Center,Gastropub
8,Downtown Toronto,0,Café,Coffee Shop,Clothing Store,Gastropub,Beer Bar,Plaza,Steakhouse,Sporting Goods Shop,Hotel,Spa
9,North York,0,Pizza Place,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Pub,Park,Dog Run,Discount Store,Diner,Curling Ice
11,Scarborough,0,Bar,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
12,North York,0,Gym,Asian Restaurant,Coffee Shop,Beer Store,Japanese Restaurant,Fast Food Restaurant,Discount Store,Dim Sum Restaurant,Concert Hall,Supermarket
13,East York,0,Park,Video Store,Cosmetics Shop,Bus Stop,Beer Store,Skating Rink,Pharmacy,Curling Ice,Dog Run,Drugstore


####  For Cluster 2

In [39]:
toronto_merge.loc[toronto_merge['Cluster'] == 1, toronto_merge.columns[[1] + list(range(5, toronto_merge.shape[1]))]]

Unnamed: 0,Borough,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
5,Scarborough,1,Fast Food Restaurant,Yoga Studio,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner


#### For Cluster 3

In [40]:
toronto_merge.loc[toronto_merge['Cluster'] == 2, toronto_merge.columns[[1] + list(range(5, toronto_merge.shape[1]))]]

Unnamed: 0,Borough,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
10,Etobicoke,2,Bank,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store


#### For Cluster 4

In [41]:
toronto_merge.loc[toronto_merge['Cluster'] == 3, toronto_merge.columns[[1] + list(range(5, toronto_merge.shape[1]))]]

Unnamed: 0,Borough,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
61,Central Toronto,3,Garden,Yoga Studio,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner


#### For cluster 5

In [42]:
toronto_merge.loc[toronto_merge['Cluster'] == 4, toronto_merge.columns[[1] + list(range(5, toronto_merge.shape[1]))]]

Unnamed: 0,Borough,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th most Common Venue,5th most Common Venue,6th most Common Venue,7th most Common Venue,8th most Common Venue,9th most Common Venue,10th most Common Venue
0,North York,4,Park,Food & Drink Shop,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
20,York,4,Park,Women's Store,Market,Fast Food Restaurant,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
34,East York,4,Pizza Place,Coffee Shop,Park,Convenience Store,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
39,North York,4,Airport,Park,Snack Place,Other Repair Shop,Yoga Studio,Curling Ice,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
44,North York,4,Park,Yoga Studio,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
48,North York,4,Park,Construction & Landscaping,Basketball Court,Bakery,Yoga Studio,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
60,Central Toronto,4,Park,Swim School,Bus Line,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
63,York,4,Park,Convenience Store,Yoga Studio,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
65,North York,4,Park,Convenience Store,Bar,Bank,Yoga Studio,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
67,Central Toronto,4,Park,Jewelry Store,Trail,Sushi Restaurant,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
