Applied Data Science Capstone
=============================

Week 3: Segmenting and Clustering Neighborhoods in Toronto
------

In [42]:
# Import the packages we're going to need.
import random
import numpy as np
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup
import lxml
import geocoder
import folium
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### Part 1: Scraping the online table

We will scrape the Wikipedia page using the BeautifulSoup package and convert the table on the website into a pandas DataFrame:

In [2]:
# Scrape the Wikipedia page using the BeautifulSoup library.
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, 'lxml')

# Find the table of interest within the webpage.
table = soup.find('table', class_='wikitable sortable')

# Go through the table and extract the values into three lists, one for each column.
A = []
B = []
C = []
column_names = []
for row in table.findAll('tr'):
    # Extract and save the column headers when you find them.
    headers = row.findAll('th')
    if len(headers) == 3:
        for header in headers:
            column_names.append(header.find(text=True).rstrip().replace(' ', '').replace('Neighbour', 'Neighbor'))
    cells = row.findAll('td')
    # Extract and save the values.
    if len(cells) == 3:
        A.append(cells[0].find(text=True).rstrip())
        B.append(cells[1].find(text=True).rstrip())
        C.append(cells[2].find(text=True).rstrip())
        
# Create a dataframe with the values scraped from the webpage table.
df = pd.DataFrame(A, columns=[column_names[0]])
df[column_names[1]] = B
df[column_names[2]] = C

# Remove rows with non-assigned boroughs. Remember to reset the indices on the table.
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True)
df = df.reset_index(drop=True)

We first run some checks that the data has been cleaned correctly:

In [3]:
# Check that there are no more non-assigned boroughs or neighborhoods.
print('Number of non-assigned boroughs:', len(df.loc[df.Borough == 'Not assigned']))
print('Number of non-assigned neighborhoods:', len(df.loc[df.Neighborhood == 'Not assigned']))

# Check that there are no duplicate values in the postal codes column.
print('Postal code duplicates:', df.PostalCode.duplicated().any())

Number of non-assigned boroughs: 0
Number of non-assigned neighborhoods: 0
Postal code duplicates: False


Now we can display the first few rows of the table:

In [4]:
df.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


And finally we display the shape of the table:

In [5]:
df.shape

(103, 3)

### Part 2: Adding geographical coordinates

We will use the Geocoder package to find the longitude and latitude values for each postal code. Within Geocoder we will make use of the ArcGIS system, as it is free and stable.

In [8]:
# Loop through the postal codes in the table and look up the geographical coordinates for each.
latitude = []
longitude = []
for postal_code in df.PostalCode:
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
    latitude.append(g.latlng[0])
    longitude.append(g.latlng[1])

# Append the latitude and longitude values to the table.
df['Latitude'] = latitude
df['Longitude'] = longitude

Now we can display the new table with the added geographical coordinates:

In [9]:
df.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.66263,-79.52831
6,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
7,M3B,North York,Don Mills,43.74923,-79.36186
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.70718,-79.31192
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804


### Part 3: Segmenting and clustering neighborhoods

Let's start by creating a map of Toronto with the neighborhoods mapped:

In [10]:
# Get the coordinates for the city.
g = geocoder.arcgis('Toronto, Ontario')

In [11]:
# ArcGIS gives the wrong coordinates for this postal code, so we fix it by hand.
index = df[df.PostalCode == 'M7Y'].index[0]
df.at[index, 'Latitude'] = 43.6627
df.at[index, 'Longitude'] = -79.3216

In [14]:
def createMap(df):
    # Create the map.
    map_toronto = folium.Map(location=g.latlng, zoom_start=10)

    # Add the neighborhood markers. Use a different random color for each borough.
    def generateRandomColor():
        color = '#'+''.join([random.choice('0123456789abcdef') for j in range(6)])
        return color

    borough_colors = {}
    for index, row in df.iterrows():
        borough = row.Borough
        if borough not in borough_colors:
            borough_colors[borough] = generateRandomColor()
        label = '{} ({})'.format(row.Neighborhood, borough)
        folium.CircleMarker(
            [row.Latitude, row.Longitude],
            radius=5,
            popup=label,
            color=borough_colors[borough],
            fill_color=borough_colors[borough],
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)

    return map_toronto

# Create and display the map.
map_toronto = createMap(df)
map_toronto

Now we restrict ourselves to boroughs that contain the word Toronto:

In [15]:
# Create the DataFrame with the subset of boroughs.
df_subset = df[df.Borough.str.contains('Toronto')].reset_index(drop=True)

# Create and display the new map.
map_subset = createMap(df_subset)
map_subset

We can now use the Foursquare API to retrieve the venues in all of the neighborhoods in this subset of boroughs:

In [16]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [17]:
radius = 500

venues_list = []
for index, row in df_subset.iterrows():
    # create the API request URL.
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        row.Latitude,
        row.Longitude,
        radius,
        LIMIT)
    
    # make the GET request.
    results = requests.get(url).json()['response']['groups'][0]['items']

    # Append the relevant information to the output list.
    for result in results:
        venues_list.append(
            (
                row.Neighborhood, 
                row.Latitude, 
                row.Longitude, 
                result['venue']['name'], 
                result['venue']['location']['lat'], 
                result['venue']['location']['lng'],  
                result['venue']['categories'][0]['name'],
            )
        )

In [18]:
# Transfer the data into a DataFrame.
toronto_venues = pd.DataFrame(
    venues_list,
    columns= [
        'Neighborhood',
        'Neighborhood Latitude',
        'Neighborhood Longitude',
        'Venue',
        'Venue Latitude',
        'Venue Longitude', 
        'Venue Category',
    ],
)

# Display the first few rows.
toronto_venues.head(11)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65512,-79.36264,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.65512,-79.36264,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.65512,-79.36264,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Regent Park, Harbourfront",43.65512,-79.36264,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,"Regent Park, Harbourfront",43.65512,-79.36264,Body Blitz Spa East,43.654735,-79.359874,Spa
5,"Regent Park, Harbourfront",43.65512,-79.36264,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
6,"Regent Park, Harbourfront",43.65512,-79.36264,Rooster Coffee,43.6519,-79.365609,Coffee Shop
7,"Regent Park, Harbourfront",43.65512,-79.36264,Berkeley Church,43.655123,-79.365873,Event Space
8,"Regent Park, Harbourfront",43.65512,-79.36264,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
9,"Regent Park, Harbourfront",43.65512,-79.36264,Sumach Espresso,43.658135,-79.359515,Coffee Shop


Let's also display some information for the DataFrame:

In [21]:
toronto_venues.shape

(1629, 7)

In [22]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 227 uniques categories.


Now we can start manipulating the data for our analysis and clustering:

In [26]:
# Create the one-hot encoding.
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix='', prefix_sep='')

# Add the neighborhood data back to DataFrame and move it to the first column.
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# Display the DataFrame.
toronto_onehot.head(11)

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head(11)

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,Berczy Park,0.016129,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,...,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,...,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.016667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.016667,0.016667,0.016667,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.01,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Display the top 5 venue types for each neighborhood:

In [28]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.08
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.05
3            Beer Bar  0.03
4              Bakery  0.03


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.06
1             Bar  0.06
2     Coffee Shop  0.06
3      Restaurant  0.05
4  Sandwich Place  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0  Light Rail Station  0.13
1             Butcher  0.07
2          Comic Shop  0.07
3      Farmers Market  0.07
4          Skate Park  0.07


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0  Italian Restaurant  0.06
1         Coffee Shop  0.06
2                Café  0.05
3                Park  0.04
4   French Restaurant  0.04


----Central Bay Street----
                       venue  freq
0

Process the data to create a new DataFrame with the top 10 venue types for each neighborhood:

In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues.
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create the new DataFrame.
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(11)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Bakery,Breakfast Spot,Restaurant,Farmers Market,Cheese Shop,Lounge
1,"Brockton, Parkdale Village, Exhibition Place",Café,Bar,Coffee Shop,Restaurant,Sandwich Place,Gift Shop,Japanese Restaurant,Burrito Place,Arts & Crafts Store,Breakfast Spot
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Butcher,Comic Shop,Farmers Market,Skate Park,Garden Center,Garden,Restaurant,Gym / Fitness Center,Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Coffee Shop,Café,Park,French Restaurant,Bar,Speakeasy,Lounge,Grocery Store,Bakery
4,Central Bay Street,Coffee Shop,Clothing Store,Pizza Place,Middle Eastern Restaurant,Plaza,Café,Cosmetics Shop,Sandwich Place,Bubble Tea Shop,Ramen Restaurant
5,Christie,Café,Grocery Store,Playground,Baby Store,Candy Store,Italian Restaurant,Coffee Shop,Moving Target,Museum,Optical Shop
6,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Fast Food Restaurant,Café,Gay Bar,Dance Studio,Hotel,Mediterranean Restaurant
7,"Commerce Court, Victoria Hotel",Coffee Shop,Hotel,Restaurant,Italian Restaurant,Café,American Restaurant,Japanese Restaurant,Gym,Seafood Restaurant,Deli / Bodega
8,Davisville,Dessert Shop,Coffee Shop,Pizza Place,Italian Restaurant,Café,Sandwich Place,Park,Thai Restaurant,Fast Food Restaurant,Sushi Restaurant
9,Davisville North,Hotel,Gym / Fitness Center,Park,Japanese Restaurant,Gym,Department Store,Breakfast Spot,Food & Drink Shop,Monument / Landmark,Moroccan Restaurant


Finally, we can cluster the data using a k-means algorithm. We'll cluster into three groups:

In [32]:
# Set the number of clusters.
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# Run k-means clustering.
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# Add the clustering labels to the DataFrame.
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_subset

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Let's have a look at the DataFrame again, with the added cluster data:

In [34]:
toronto_merged.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,1,Coffee Shop,Breakfast Spot,Yoga Studio,Gym / Fitness Center,Spa,Food Truck,Restaurant,Thai Restaurant,Bakery,Theater
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,1,Coffee Shop,Sandwich Place,Fried Chicken Joint,Theater,Burrito Place,Café,Moving Target,Mediterranean Restaurant,Gastropub,Bank
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804,1,Coffee Shop,Clothing Store,Japanese Restaurant,Hotel,Café,Middle Eastern Restaurant,Cosmetics Shop,Diner,Movie Theater,Fast Food Restaurant
3,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587,1,Coffee Shop,Hotel,Cosmetics Shop,Café,Clothing Store,Italian Restaurant,Cocktail Bar,Gastropub,Moroccan Restaurant,Restaurant
4,M4E,East Toronto,The Beaches,43.67709,-79.29547,1,Health Food Store,Pub,Trail,Yoga Studio,Opera House,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater
5,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306,1,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Bakery,Breakfast Spot,Restaurant,Farmers Market,Cheese Shop,Lounge
6,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493,1,Coffee Shop,Clothing Store,Pizza Place,Middle Eastern Restaurant,Plaza,Café,Cosmetics Shop,Sandwich Place,Bubble Tea Shop,Ramen Restaurant
7,M6G,Downtown Toronto,Christie,43.66869,-79.42071,1,Café,Grocery Store,Playground,Baby Store,Candy Store,Italian Restaurant,Coffee Shop,Moving Target,Museum,Optical Shop
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258,1,Coffee Shop,Café,Hotel,Restaurant,Gym,American Restaurant,Japanese Restaurant,Steakhouse,Salad Place,Asian Restaurant
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891,1,Park,Grocery Store,Furniture / Home Store,Middle Eastern Restaurant,Smoke Shop,Brazilian Restaurant,Liquor Store,Gym,Bar,Bank


Finally, let's visualize the clusters on the map:

In [44]:
# Create the map.
map_clusters = folium.Map(location=g.latlng, zoom_start=11)

# Set the color scheme for the clusters.
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map.
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

# Display the map.
map_clusters

Surprisingly, the k-means algorithm has created one large cluster with almost all the neighborhoods in it, and two much smaller clusters around the outskirts. This suggests a large degree of homogeneity between neighborhoods. Let's end the analysis by separating the DataFrame by cluster:

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,0,Business Service,Park,Yoga Studio,Opera House,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum
22,West Toronto,0,Park,Yoga Studio,Noodle House,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target
23,Central Toronto,0,Playground,Gym Pool,Park,Noodle House,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater
33,Downtown Toronto,0,Park,Playground,Bike Trail,Opera House,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target


In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Coffee Shop,Breakfast Spot,Yoga Studio,Gym / Fitness Center,Spa,Food Truck,Restaurant,Thai Restaurant,Bakery,Theater
1,Downtown Toronto,1,Coffee Shop,Sandwich Place,Fried Chicken Joint,Theater,Burrito Place,Café,Moving Target,Mediterranean Restaurant,Gastropub,Bank
2,Downtown Toronto,1,Coffee Shop,Clothing Store,Japanese Restaurant,Hotel,Café,Middle Eastern Restaurant,Cosmetics Shop,Diner,Movie Theater,Fast Food Restaurant
3,Downtown Toronto,1,Coffee Shop,Hotel,Cosmetics Shop,Café,Clothing Store,Italian Restaurant,Cocktail Bar,Gastropub,Moroccan Restaurant,Restaurant
4,East Toronto,1,Health Food Store,Pub,Trail,Yoga Studio,Opera House,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater
5,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Bakery,Breakfast Spot,Restaurant,Farmers Market,Cheese Shop,Lounge
6,Downtown Toronto,1,Coffee Shop,Clothing Store,Pizza Place,Middle Eastern Restaurant,Plaza,Café,Cosmetics Shop,Sandwich Place,Bubble Tea Shop,Ramen Restaurant
7,Downtown Toronto,1,Café,Grocery Store,Playground,Baby Store,Candy Store,Italian Restaurant,Coffee Shop,Moving Target,Museum,Optical Shop
8,Downtown Toronto,1,Coffee Shop,Café,Hotel,Restaurant,Gym,American Restaurant,Japanese Restaurant,Steakhouse,Salad Place,Asian Restaurant
9,West Toronto,1,Park,Grocery Store,Furniture / Home Store,Middle Eastern Restaurant,Smoke Shop,Brazilian Restaurant,Liquor Store,Gym,Bar,Bank


In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,2,Health & Beauty Service,Fast Food Restaurant,Yoga Studio,Opera House,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target
