# Explore Toronto

#### Author : Sumit Chhabra

In [1]:
#install folium in IBM Watson Studio
#skip this step if already installed
!conda install -c conda-forge folium=0.5.0 --yes

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  51.47 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  35.54 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  40.77 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  44.81 MB/s


# Convert HTML to pandas dataframe

Combine all steps from previous assignment 

In [17]:
#Read from wikipedia
import requests
from bs4 import BeautifulSoup
import pandas as pd

NA = 'Not assigned'

res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')

#extract table from html
table = soup.find_all('table')[0]

#convert to dataframe
df = pd.read_html(str(table))[0]
print("Old:", df.shape)
#convert first row to columns and drop first row
headers = df.iloc[0]
new_df  = pd.DataFrame(df.values[1:], columns=headers)
new_df.rename(columns={'Postcode': 'PostalCode'}, inplace=True)
#print(new_df.head())

#drop Borough with "Not assigned"
new_df = new_df[~new_df['Borough'].isin([NA])]
#new_df.head()
print("New:", new_df.shape)

#Replace Not assigned Neighborhood with Borough name
for index, row in new_df.iterrows():
    if row['Neighbourhood'] == NA:
        print ('Found ', row['Borough'], ' - replace it')
        new_df.at[index, 'Neighbourhood'] = row['Borough']

#backup method to get latitude and longitude
url="http://cocl.us/Geospatial_data"
geodata=pd.read_csv(url)
geodata.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

#merge two dataframes
merged_df = new_df.merge(geodata, how = 'inner', on = ['PostalCode'])
print(merged_df.loc[merged_df['PostalCode'] == 'M5G'])
print(merged_df.loc[merged_df['PostalCode'] == 'M5V'])

Old: (290, 3)
New: (212, 3)
Found  Queen's Park  - replace it
   PostalCode           Borough       Neighbourhood   Latitude  Longitude
41        M5G  Downtown Toronto  Central Bay Street  43.657952 -79.387383
    PostalCode           Borough      Neighbourhood   Latitude  Longitude
164        M5V  Downtown Toronto           CN Tower  43.628947  -79.39442
165        M5V  Downtown Toronto      Bathurst Quay  43.628947  -79.39442
166        M5V  Downtown Toronto     Island airport  43.628947  -79.39442
167        M5V  Downtown Toronto  Harbourfront West  43.628947  -79.39442
168        M5V  Downtown Toronto   King and Spadina  43.628947  -79.39442
169        M5V  Downtown Toronto      Railway Lands  43.628947  -79.39442
170        M5V  Downtown Toronto      South Niagara  43.628947  -79.39442


#### Filter out Boroughs which contains word Toronto

In [18]:
print("Old dataset:", merged_df.shape)
filtered_df = merged_df[merged_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
print("New dataset:", filtered_df.shape)
filtered_df.head()

Old dataset: (212, 5)
New dataset: (74, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


#### Use geopy library to get the latitude and longitude values of Toronto City.
In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent toronto_explorer, as shown below.

In [7]:
#install geocoder in IBM Watson Studio
#skip this step if already installed
!conda install -c conda-forge geopy --yes

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  22.00 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00   3.37 MB/s


In [8]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Toronto City are 43.653963, -79.387207.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [19]:
import folium

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(filtered_df['Latitude'], filtered_df['Longitude'], filtered_df['Borough'], filtered_df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [20]:
#Note to provide your creds

CLIENT_ID = 'GWAG0Y3R0AQZRZLEYFFD4CZGBUM14TWYE3YQRZUY0XFBOSGB' # your Foursquare ID
CLIENT_SECRET = '4IZY24VFB03REW5SZKLHP1N4YMLSWPQDUH0DY2CBRRZKOYMT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GWAG0Y3R0AQZRZLEYFFD4CZGBUM14TWYE3YQRZUY0XFBOSGB
CLIENT_SECRET:4IZY24VFB03REW5SZKLHP1N4YMLSWPQDUH0DY2CBRRZKOYMT


## 2. Explore Neighborhoods in Toronto

#### Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called *toronto_venues*.

In [46]:
toronto_venues = getNearbyVenues(names=filtered_df['Neighbourhood'],
                                   latitudes=filtered_df['Latitude'],
                                   longitudes=filtered_df['Longitude']
                                  )
print(toronto_venues.shape)
toronto_venues.head()

(3274, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


Let's check how many venues were returned for each neighborhood

In [31]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,14,14,14,14,14,14
Berczy Park,58,58,58,58,58,58
Brockton,21,21,21,21,21,21
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
CN Tower,14,14,14,14,14,14
Cabbagetown,47,47,47,47,47,47
Central Bay Street,78,78,78,78,78,78
Chinatown,100,100,100,100,100,100
Christie,15,15,15,15,15,15


Let's find out how many unique categories can be curated from all the returned venues

In [32]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 239 uniques categories.


## 3. Analyze Each Neighborhood

In [60]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
# seems to be a bug and doesnt like Neighborhood
toronto_onehot['Neighbourhood'] = toronto_venues['Neighborhood'] 
toronto_onehot.shape

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
#print("Fixed Columns:", fixed_columns)
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(3274, 240)


Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [61]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.010000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030000,...,0.000000,0.00,0.010000,0.000000,0.000000,0.010000,0.000000,0.000000,0.010000,0.000000
1,Bathurst Quay,0.000000,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Berczy Park,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,Brockton,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,Business Reply Mail Processing Centre 969 Eastern,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.055556
5,CN Tower,0.000000,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,Cabbagetown,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.021277,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,Central Bay Street,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.012821,...,0.000000,0.00,0.012821,0.000000,0.000000,0.012821,0.000000,0.000000,0.000000,0.012821
8,Chinatown,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.050000,0.000000,0.040000,0.010000,0.000000,0.000000,0.000000,0.000000
9,Christie,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


#### Let's print each neighborhood along with the top 5 most common venues

In [64]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
             venue  freq
0      Coffee Shop  0.06
1       Steakhouse  0.04
2              Bar  0.04
3  Thai Restaurant  0.04
4             Café  0.04


----Bathurst Quay----
              venue  freq
0    Airport Lounge  0.14
1   Airport Service  0.14
2  Airport Terminal  0.14
3             Plane  0.07
4     Boat or Ferry  0.07


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2          Restaurant  0.05
3              Bakery  0.03
4  Seafood Restaurant  0.03


----Brockton----
                    venue  freq
0                    Café  0.10
1             Coffee Shop  0.10
2          Breakfast Spot  0.10
3                  Bakery  0.05
4  Furniture / Home Store  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.11
1       Auto Workshop  0.06
2       Burrito Place  0.06
3             Butcher  0.06
4          Smoke Shop  0.06


----CN Tower----
     

                venue  freq
0    Greek Restaurant  0.21
1         Coffee Shop  0.09
2      Ice Cream Shop  0.07
3  Italian Restaurant  0.07
4           Bookstore  0.05


----Roncesvalles----
            venue  freq
0  Breakfast Spot  0.12
1       Gift Shop  0.12
2    Dessert Shop  0.06
3      Restaurant  0.06
4         Dog Run  0.06


----Rosedale----
               venue  freq
0               Park  0.50
1         Playground  0.25
2              Trail  0.25
3  Accessories Store  0.00
4          Nightclub  0.00


----Roselawn----
                       venue  freq
0               Home Service   0.5
1                     Garden   0.5
2                  Nightclub   0.0
3         Mexican Restaurant   0.0
4  Middle Eastern Restaurant   0.0


----Runnymede----
                venue  freq
0                Café  0.09
1         Coffee Shop  0.09
2         Pizza Place  0.06
3    Sushi Restaurant  0.06
4  Italian Restaurant  0.06


----Ryerson----
                       venue  freq
0             

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [65]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [76]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Asian Restaurant,Burger Joint,Gym,Hotel,American Restaurant
1,Bathurst Quay,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
2,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Farmers Market,Café,Cheese Shop,Steakhouse,Italian Restaurant,Bakery,Seafood Restaurant
3,Brockton,Coffee Shop,Breakfast Spot,Café,Pet Store,Burrito Place,Performing Arts Venue,Stadium,Caribbean Restaurant,Bar,Climbing Gym
4,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Garden,Pizza Place,Comic Shop,Restaurant,Butcher,Burrito Place,Skate Park,Smoke Shop


## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [77]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 1, 1, 1, 3, 1, 1, 1, 1], dtype=int32)

In [78]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = filtered_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,1,Coffee Shop,Café,Bakery,Park,Pub,Breakfast Spot,Mexican Restaurant,Theater,Beer Store,Bank
1,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636,1,Coffee Shop,Café,Bakery,Park,Pub,Breakfast Spot,Mexican Restaurant,Theater,Beer Store,Bank
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937,1,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Middle Eastern Restaurant,Theater,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Bar
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937,1,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Middle Eastern Restaurant,Theater,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Bar
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Coffee Shop,Restaurant,Café,Hotel,Breakfast Spot,Bakery,Gastropub,Italian Restaurant,Clothing Store,Park




Finally, let's visualize the resulting clusters


In [82]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters

#### Cluster 1

In [86]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,Central Toronto,Lawrence Park,0,Park,Swim School,Bus Line,Yoga Studio,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
34,Central Toronto,Forest Hill North,0,Jewelry Store,Trail,Bus Line,Park,Sushi Restaurant,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Yoga Studio
35,Central Toronto,Forest Hill West,0,Jewelry Store,Trail,Bus Line,Park,Sushi Restaurant,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Yoga Studio
66,Downtown Toronto,Rosedale,0,Park,Playground,Trail,Discount Store,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


#### Cluster 2

In [87]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Harbourfront,1,Coffee Shop,Café,Bakery,Park,Pub,Breakfast Spot,Mexican Restaurant,Theater,Beer Store,Bank
1,Downtown Toronto,Regent Park,1,Coffee Shop,Café,Bakery,Park,Pub,Breakfast Spot,Mexican Restaurant,Theater,Beer Store,Bank
2,Downtown Toronto,Ryerson,1,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Middle Eastern Restaurant,Theater,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Bar
3,Downtown Toronto,Garden District,1,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Middle Eastern Restaurant,Theater,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Bar
4,Downtown Toronto,St. James Town,1,Coffee Shop,Restaurant,Café,Hotel,Breakfast Spot,Bakery,Gastropub,Italian Restaurant,Clothing Store,Park
5,East Toronto,The Beaches,1,Health Food Store,Pub,Trail,Coffee Shop,Neighborhood,Falafel Restaurant,Event Space,Farmers Market,Ethiopian Restaurant,Discount Store
6,Downtown Toronto,Berczy Park,1,Coffee Shop,Restaurant,Cocktail Bar,Farmers Market,Café,Cheese Shop,Steakhouse,Italian Restaurant,Bakery,Seafood Restaurant
7,Downtown Toronto,Central Bay Street,1,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Bubble Tea Shop,Sandwich Place,Salad Place,Ice Cream Shop,Spa
8,Downtown Toronto,Christie,1,Grocery Store,Café,Park,Convenience Store,Restaurant,Baby Store,Nightclub,Italian Restaurant,Coffee Shop,Diner
9,Downtown Toronto,Adelaide,1,Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Asian Restaurant,Burger Joint,Gym,Hotel,American Restaurant


#### Cluster 3

In [88]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,Central Toronto,Moore Park,2,Playground,Tennis Court,Convenience Store,Cosmetics Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant
50,Central Toronto,Summerhill East,2,Playground,Tennis Court,Convenience Store,Cosmetics Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


#### Cluster 4

In [89]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
59,Downtown Toronto,CN Tower,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
60,Downtown Toronto,Bathurst Quay,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
61,Downtown Toronto,Island airport,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
62,Downtown Toronto,Harbourfront West,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
63,Downtown Toronto,King and Spadina,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
64,Downtown Toronto,Railway Lands,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden
65,Downtown Toronto,South Niagara,3,Airport Lounge,Airport Service,Airport Terminal,Boutique,Plane,Airport,Airport Food Court,Airport Gate,Harbor / Marina,Sculpture Garden


#### Cluster 5

In [90]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Central Toronto,Roselawn,4,Home Service,Garden,Yoga Studio,Dog Run,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
