# Task 3: Segmenting, Clustering and Analyzing the Neighborhood of Toronto

Coding of part 1 and part 2

In [1]:
! pip install BeautifulSoup4

import pandas as pd
from bs4 import BeautifulSoup
import requests
import re


url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
src = requests.get(url).text
soup = BeautifulSoup(src, 'xml')
table=soup.find('table')

column_names = ['Postalcode','Borough','Neighborhood']
df = pd.DataFrame(columns = column_names)

postcode=0
borough=0
nh=0

# Codong to extract data into dataframe
for tr in table.find_all('tr'):
    row_data=[]
    for td in tr.find_all('td'):
        postcode=td.p.b.text
        borough=td.find_next('a').text
        nh1 = td.p.span.text.replace(')',' ').split('(',)
        nh = " ,".join(nh1)
        df = df.append({'Postalcode':postcode,'Borough':borough,'Neighborhood':nh}, ignore_index=True)

del_rows=df[df['Neighborhood']=='Not assigned'].index
df.drop(del_rows, inplace=True)

!conda install -c conda-forge geocoder --yes
url1 = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv'
geo_data = pd.read_csv(url1)


df_toronto = pd.merge(geo_data, df, left_on='Postal Code', right_on='Postalcode')

df_toronto = df_toronto[['Postalcode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']]
df_toronto.head(10)

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Scarborough ,Malvern / Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Scarborough ,Rouge Hill / Port Union / Highlan...",43.784535,-79.160497
2,M1E,Scarborough,"Scarborough ,Guildwood / Morningside / West Hill",43.763573,-79.188711
3,M1G,Scarborough,"Scarborough ,Woburn",43.770992,-79.216917
4,M1H,Scarborough,"Scarborough ,Cedarbrae",43.773136,-79.239476
5,M1J,Scarborough,"Scarborough ,Scarborough Village",43.744734,-79.239476
6,M1K,Scarborough,"Scarborough ,Kennedy Park / Ionview / East Bir...",43.727929,-79.262029
7,M1L,Scarborough,"Scarborough ,Golden Mile / Clairlea / Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Scarborough ,Cliffside / Cliffcrest / Scarboro...",43.716316,-79.239476
9,M1N,Scarborough,"Scarborough ,Birch Cliff / Cliffside West",43.692657,-79.264848


Install Required Libraries for Clustering and Rendering Map

In [2]:
import json # library to handle JSON files
import numpy as np

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python-3.7-main

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.1.0                |     pyhd3deb0d_0          64 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          98 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.1.0-pyhd3deb0d_0



Downloading and Extracting Packages
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
geopy-2.1.0          | 64 KB     | ###################################

Lets' get the coordinates of Toronto city.

In [3]:
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city are 43.6534817, -79.3839347.


Lets create a map of the whole Toronto City.

In [4]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

 Lets add markers to the Toronto Mapwith neighborhoods superimposed on top

In [5]:
for lat, lng, borough, neighborhood in zip(
        df_toronto['Latitude'], 
        df_toronto['Longitude'], 
        df_toronto['Borough'], 
        df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

Dataframe to map of a part of Toronto City

In [6]:
df_toronto_town = df_toronto[df_toronto['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_toronto_town.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4R,North Toronto,"Central Toronto ,North Toronto West",43.715383,-79.405678
1,M4W,Downtown Toronto,"Downtown Toronto ,Rosedale",43.679563,-79.377529
2,M4X,Downtown Toronto,"Downtown Toronto ,St. James Town / Cabbagetown",43.667967,-79.367675
3,M4Y,Downtown Toronto,"Downtown Toronto ,Church and Wellesley",43.66586,-79.38316
4,M5A,Downtown Toronto,"Downtown Toronto ,Regent Park / Harbourfront",43.65426,-79.360636


Plot again the map and the markers for this region.

In [7]:
map_toronto_town = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, borough, neighborhood in zip(
        df_toronto_town['Latitude'], 
        df_toronto_town['Longitude'], 
        df_toronto_town['Borough'], 
        df_toronto_town['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_town)  

map_toronto_town

Define Foursquare Credentials and Version

In [8]:
# Personal Information have been kept hidden from the post shared on github
CLIENT_ID = '' # Foursquare ID 
CLIENT_SECRET = '' # Foursquare Secret
VERSION = '' # Foursquare API version
LIMIT = 100

Explore the first neighborhood in our previously created data frame

In [10]:
neighborhood_name = df_toronto_town.loc[0, 'Neighborhood']
print(f"The first neighborhood's name is '{neighborhood_name}'.")

neighborhood_latitude = df_toronto_town.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_toronto_town.loc[0, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

The first neighborhood's name is 'Central Toronto ,North Toronto West '.
Latitude and longitude values of Central Toronto ,North Toronto West  are 43.7153834, -79.4056784.


Now, let's get the top 100 venues that are in The Beaches within a radius of 1000 meters

In [12]:
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

# get the result to a json file
results = requests.get(url).json()

Function that extracts the category of the venue

In [13]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues



Unnamed: 0,name,categories,lat,lng
0,Barreworks,Yoga Studio,43.71407,-79.400109
1,Uncle Betty's Diner,Diner,43.714452,-79.400091
2,Civello Salon,Salon / Barbershop,43.715111,-79.400304
3,Sushi Shop,Restaurant,43.713861,-79.400093
4,Nailsense,Spa,43.717467,-79.400653
5,Tio's Urban Mexican,Mexican Restaurant,43.71463,-79.4
6,lululemon,Clothing Store,43.713478,-79.400082
7,Starbucks,Coffee Shop,43.71559,-79.40045
8,C'est Bon,Chinese Restaurant,43.716785,-79.400406
9,Sporting Life,Sporting Goods Shop,43.716277,-79.400248


Lets xxplore neighborhoods of Toronto City

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_town_venues = getNearbyVenues(names=df_toronto_town['Neighborhood'],
                                   latitudes=df_toronto_town['Latitude'],
                                   longitudes=df_toronto_town['Longitude']
                                  )

toronto_town_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Central Toronto ,North Toronto West",43.715383,-79.405678,Barreworks,43.71407,-79.400109,Yoga Studio
1,"Central Toronto ,North Toronto West",43.715383,-79.405678,Uncle Betty's Diner,43.714452,-79.400091,Diner
2,"Central Toronto ,North Toronto West",43.715383,-79.405678,Civello Salon,43.715111,-79.400304,Salon / Barbershop
3,"Central Toronto ,North Toronto West",43.715383,-79.405678,Sushi Shop,43.713861,-79.400093,Restaurant
4,"Central Toronto ,North Toronto West",43.715383,-79.405678,Nailsense,43.717467,-79.400653,Spa


Let's check how many venues were returned for each neighborhood.

In [16]:
toronto_town_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Central Toronto ,North Toronto West",20,20,20,20,20,20
"Downtown Toronto ,Berczy Park",59,59,59,59,59,59
"Downtown Toronto ,CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport",15,15,15,15,15,15
"Downtown Toronto ,Central Bay Street",61,61,61,61,61,61
"Downtown Toronto ,Christie",16,16,16,16,16,16
"Downtown Toronto ,Church and Wellesley",79,79,79,79,79,79
"Downtown Toronto ,Commerce Court / Victoria Hotel",100,100,100,100,100,100
"Downtown Toronto ,First Canadian Place / Underground city",100,100,100,100,100,100
"Downtown Toronto ,Garden District, Ryerson",100,100,100,100,100,100
"Downtown Toronto ,Harbourfront East / Union Station / Toronto Islands",100,100,100,100,100,100


Let's find out how many unique categories can be curated from all the returned venues

In [17]:
print('There are {} uniques categories.'.format(len(toronto_town_venues['Venue Category'].unique())))

There are 204 uniques categories.


Lets analyze Each Neighborhood

In [19]:
# one hot encoding
toronto_town_onehot = pd.get_dummies(toronto_town_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_town_onehot['Neighborhood'] = toronto_town_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_town_onehot.columns[-1]] + list(toronto_town_onehot.columns[:-1])
toronto_town_onehot = toronto_town_onehot[fixed_columns]

toronto_town_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [20]:
toronto_town_grouped = toronto_town_onehot.groupby('Neighborhood').mean().reset_index()
toronto_town_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,"Central Toronto ,North Toronto West",0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Downtown Toronto ,Berczy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0
2,"Downtown Toronto ,CN Tower / King and Spadina ...",0.0,0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.2,0.133333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Downtown Toronto ,Central Bay Street",0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.032787,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393
4,"Downtown Toronto ,Christie",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Let's check the 10 most common venues in each neighborhood.

In [21]:

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_town_grouped['Neighborhood']

for ind in np.arange(toronto_town_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_town_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Central Toronto ,North Toronto West",Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Diner,Chinese Restaurant,Café,Rental Car Location,Restaurant,Salon / Barbershop
1,"Downtown Toronto ,Berczy Park",Coffee Shop,Cocktail Bar,Bakery,Restaurant,Cheese Shop,Beer Bar,Farmers Market,Pharmacy,Seafood Restaurant,Concert Hall
2,"Downtown Toronto ,CN Tower / King and Spadina ...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Sculpture Garden,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina
3,"Downtown Toronto ,Central Bay Street",Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Thai Restaurant,Portuguese Restaurant
4,"Downtown Toronto ,Christie",Grocery Store,Café,Park,Nightclub,Baby Store,Athletics & Sports,Restaurant,Italian Restaurant,Candy Store,Coffee Shop


Now Lets'Cluster neighborhoods

In [22]:
# set number of clusters
kclusters = 5

toronto_town_grouped_clustering = toronto_town_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_town_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 2, 1, 4, 1, 1, 1, 1, 1], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [23]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_town_merged = df_toronto_town

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_town_merged = toronto_town_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_town_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4R,North Toronto,"Central Toronto ,North Toronto West",43.715383,-79.405678,0,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Diner,Chinese Restaurant,Café,Rental Car Location,Restaurant,Salon / Barbershop
1,M4W,Downtown Toronto,"Downtown Toronto ,Rosedale",43.679563,-79.377529,3,Park,Playground,Trail,Yoga Studio,Movie Theater,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
2,M4X,Downtown Toronto,"Downtown Toronto ,St. James Town / Cabbagetown",43.667967,-79.367675,1,Pizza Place,Coffee Shop,Café,Chinese Restaurant,Restaurant,Italian Restaurant,Bakery,Pet Store,Pub,Market
3,M4Y,Downtown Toronto,"Downtown Toronto ,Church and Wellesley",43.66586,-79.38316,1,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Gay Bar,Restaurant,Yoga Studio,Men's Store,Fast Food Restaurant,Smoke Shop,Mediterranean Restaurant
4,M5A,Downtown Toronto,"Downtown Toronto ,Regent Park / Harbourfront",43.65426,-79.360636,1,Coffee Shop,Bakery,Park,Breakfast Spot,Theater,Café,Pub,Event Space,Chocolate Shop,Cosmetics Shop


Finally, let's visualize the resulting clusters

In [24]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(
        toronto_town_merged['Latitude'], 
        toronto_town_merged['Longitude'], 
        toronto_town_merged['Neighborhood'], 
        toronto_town_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Now, let's examine each cluster and determine the discriminating venue categories that distinguish each cluster.

# Cluster 1

In [25]:
toronto_town_merged.loc[toronto_town_merged['Cluster Labels'] == 0, toronto_town_merged.columns[[1] + list(range(5, toronto_town_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North Toronto,0,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Diner,Chinese Restaurant,Café,Rental Car Location,Restaurant,Salon / Barbershop


# Cluster 2

In [26]:
toronto_town_merged.loc[toronto_town_merged['Cluster Labels'] == 1, toronto_town_merged.columns[[1] + list(range(5, toronto_town_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,1,Pizza Place,Coffee Shop,Café,Chinese Restaurant,Restaurant,Italian Restaurant,Bakery,Pet Store,Pub,Market
3,Downtown Toronto,1,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Gay Bar,Restaurant,Yoga Studio,Men's Store,Fast Food Restaurant,Smoke Shop,Mediterranean Restaurant
4,Downtown Toronto,1,Coffee Shop,Bakery,Park,Breakfast Spot,Theater,Café,Pub,Event Space,Chocolate Shop,Cosmetics Shop
5,Downtown Toronto,1,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Japanese Restaurant,Café,Hotel,Bubble Tea Shop,Italian Restaurant,Cosmetics Shop,Fast Food Restaurant
6,Downtown Toronto,1,Café,Coffee Shop,Cocktail Bar,Restaurant,Gastropub,Italian Restaurant,Moroccan Restaurant,Creperie,Farmers Market,Park
7,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Bakery,Restaurant,Cheese Shop,Beer Bar,Farmers Market,Pharmacy,Seafood Restaurant,Concert Hall
8,Downtown Toronto,1,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Thai Restaurant,Portuguese Restaurant
9,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Hotel,Clothing Store,Gym,Thai Restaurant,Deli / Bodega,Cosmetics Shop,Sushi Restaurant
10,Downtown Toronto,1,Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Brewery,Italian Restaurant,Restaurant,Sporting Goods Shop,Fried Chicken Joint
11,Downtown Toronto,1,Coffee Shop,Hotel,Café,Seafood Restaurant,Restaurant,Salad Place,Japanese Restaurant,Italian Restaurant,Sushi Restaurant,Asian Restaurant


# Cluster 3

In [27]:
toronto_town_merged.loc[toronto_town_merged['Cluster Labels'] == 2, toronto_town_merged.columns[[1] + list(range(5, toronto_town_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Downtown Toronto,2,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Sculpture Garden,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina


# Cluster 4

In [28]:
toronto_town_merged.loc[toronto_town_merged['Cluster Labels'] == 3, toronto_town_merged.columns[[1] + list(range(5, toronto_town_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Movie Theater,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


# Cluster 5

In [30]:
toronto_town_merged.loc[toronto_town_merged['Cluster Labels'] == 4, toronto_town_merged.columns[[1] + list(range(5, toronto_town_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Downtown Toronto,4,Grocery Store,Café,Park,Nightclub,Baby Store,Athletics & Sports,Restaurant,Italian Restaurant,Candy Store,Coffee Shop


-- End of task. --