In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.12.5          |   py36h5fab9bb_1         143 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.1.0                |     pyhd3deb0d_0          64 KB  conda-forge
    openssl-1.1.1j             |       h7f98852_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.4 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.1.0-pyhd3deb0d_0

The following packages will be

In [5]:
pip install beautifulsoup4

Collecting beautifulsoup4
[?25l  Downloading https://files.pythonhosted.org/packages/d1/41/e6495bd7d3781cee623ce23ea6ac73282a373088fcd0ddc809a047b18eae/beautifulsoup4-4.9.3-py3-none-any.whl (115kB)
[K     |████████████████████████████████| 122kB 23.3MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2; python_version >= "3.0" (from beautifulsoup4)
  Downloading https://files.pythonhosted.org/packages/41/e7/3617a4b988ed7744743fb0dbba5aa0a6e3f95a9557b43f8c4740d296b48a/soupsieve-2.2-py3-none-any.whl
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.9.3 soupsieve-2.2
Note: you may need to restart the kernel to use updated packages.


In [6]:
from bs4 import BeautifulSoup # this module helps in web scrapping.

In [7]:
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=1011037969"
data  = requests.get(url).text 
soup = BeautifulSoup(data,"html5lib")  # create a soup object using the variable 'data'

The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood.
We add each cell data to the dataframe <b> pc_canada 

In [9]:
pc_canada = pd.DataFrame(columns=["PostalCode", "Borough", "Neighborhood"])

for row in soup.find('tbody').find_all("tr"):
    col = row.find_all("td")
    if (col != []):
        PostalCode =col[0].text.replace("\n","")
        Borough =col[1].text.replace("\n","")
        Neighborhood =col[2].text.replace("\n","")
    
        pc_canada = pc_canada.append({"PostalCode":PostalCode, "Borough":Borough, "Neighborhood":Neighborhood}, ignore_index=True)
        #print("{}, {}, {}".format(PostalCode,Borough,Neighborhood))
        
pc_canada.head() #Uncomment this cell to check the dataframe

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [10]:
for index, row in pc_canada.iterrows():
    if row['Borough'] == "Not assigned":
        pc_canada = pc_canada.drop([index])
        
pc_canada = pc_canada.reset_index(drop=True)
#pc_canada - #Uncomment this cell to check the dataframe

If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.
(Since there are no such cells we don't see any changes)

In [11]:
for index, row in pc_canada.iterrows():
    if row['Neighborhood'] == "Not assigned":
        print(row['Neightborhood'])
        row['Neighborhood'] = row['Borough']
        
#pc_canada - #Uncomment this cell to check the dataframe

More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11  in the above table. (Since they are already combined we don't see any changes)

In [12]:
pc_canada['Neighborhood'] = pc_canada.groupby(['PostalCode'])['Neighborhood'].transform(lambda x : ', '.join(x)) 
pc_canada.head()  

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


We check the dimensions of the dataframe pc_canada

In [13]:
pc_canada.shape

(103, 3)

Given that this package can be very unreliable, in case you are not able to get the geographical coordinates of the neighborhoods using the Geocoder package, here is a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [14]:
coordinates = pd.read_csv('https://cocl.us/Geospatial_data')

In [15]:
new_df = pd.DataFrame(columns=["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"])

for i in np.arange(103):
    for j in np.arange(103):
        if coordinates['Postal Code'][i] == pc_canada['PostalCode'][j]:
            new_df = new_df.append({"PostalCode":coordinates['Postal Code'][i], "Borough": pc_canada['Borough'][j], "Neighborhood":pc_canada['Neighborhood'][j], "Latitude":coordinates['Latitude'][i], "Longitude":coordinates['Longitude'][i]}, ignore_index=True)

new_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Filter the dataframe by Borough with the word 'Toronto'.

In [16]:
toronto_data = new_df[new_df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [17]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


We visualize the map of Toronto and its neighborhoods

In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 2. Explore neighborhoods in Toronto


#### Define credentials for Foursquare and let's create a function to repeat the same process to all the neighborhoods in Manhattan


In [27]:
CLIENT_ID = 'C1JSXBICKQBK2ZFP4N1TLFJKVTUUUVXZXK04VCA3KDUXBKHD' # your Foursquare ID
CLIENT_SECRET = 'I3ICN1QGURK35EBEXEBNUJ3NN52QKPAUYVDQQGTSLU2I32FE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: C1JSXBICKQBK2ZFP4N1TLFJKVTUUUVXZXK04VCA3KDUXBKHD
CLIENT_SECRET:I3ICN1QGURK35EBEXEBNUJ3NN52QKPAUYVDQQGTSLU2I32FE


In [28]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id=C1JSXBICKQBK2ZFP4N1TLFJKVTUUUVXZXK04VCA3KDUXBKHD&client_secret=I3ICN1QGURK35EBEXEBNUJ3NN52QKPAUYVDQQGTSLU2I32FE&v=20180605&ll=40.7896239,-73.9598939&radius=500&limit=100'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [29]:
# type your answer here
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West,  Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
Runn

#### Let's check the size of the resulting dataframe


In [32]:
print(toronto_venues.shape)
toronto_venues.head()

(1120, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Central Park Tennis Center,40.789313,-73.961862,Tennis Court
1,The Beaches,43.676357,-79.293031,North Meadow Recreation Center,40.791216,-73.959661,Recreation Center
2,The Beaches,43.676357,-79.293031,East Meadow,40.79016,-73.955498,Field
3,The Beaches,43.676357,-79.293031,Oldest Tree in Central Park,40.789188,-73.957867,Park
4,The Beaches,43.676357,-79.293031,Central Park - 96th Street Playground,40.787813,-73.956257,Playground


Let's check how many venues were returned for each neighborhood


In [33]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,28,28,28,28,28,28
"Brockton, Parkdale Village, Exhibition Place",28,28,28,28,28,28
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",28,28,28,28,28,28
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",28,28,28,28,28,28
Central Bay Street,28,28,28,28,28,28
Christie,28,28,28,28,28,28
Church and Wellesley,28,28,28,28,28,28
"Commerce Court, Victoria Hotel",28,28,28,28,28,28
Davisville,28,28,28,28,28,28
Davisville North,28,28,28,28,28,28


#### Let's find out how many unique categories can be curated from all the returned venues


In [36]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 14 uniques categories.


## 3. Analyze Each Neighborhood


In [37]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Baseball Field,Bike Rental / Bike Share,Breakfast Spot,Bus Station,Bus Stop,Dog Run,Field,Food Truck,Metro Station,Outdoors & Recreation,Park,Playground,Recreation Center,Tennis Court
0,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,The Beaches,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,0,0,1,0,0


And let's examine the new dataframe size.


In [38]:
toronto_onehot.shape

(1120, 15)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [39]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Baseball Field,Bike Rental / Bike Share,Breakfast Spot,Bus Station,Bus Stop,Dog Run,Field,Food Truck,Metro Station,Outdoors & Recreation,Park,Playground,Recreation Center,Tennis Court
0,Berczy Park,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
1,"Brockton, Parkdale Village, Exhibition Place",0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
2,"Business reply mail Processing Centre, South C...",0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
4,Central Bay Street,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
5,Christie,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
6,Church and Wellesley,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
7,"Commerce Court, Victoria Hotel",0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
8,Davisville,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714
9,Davisville North,0.321429,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.035714,0.142857,0.142857,0.035714,0.035714


In [40]:
toronto_grouped.shape

(40, 15)

#### Let's print each neighborhood along with the top 5 most common venues


In [41]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                      venue  freq
0            Baseball Field  0.32
1                      Park  0.14
2                Playground  0.14
3  Bike Rental / Bike Share  0.04
4            Breakfast Spot  0.04


----Brockton, Parkdale Village, Exhibition Place----
                      venue  freq
0            Baseball Field  0.32
1                      Park  0.14
2                Playground  0.14
3  Bike Rental / Bike Share  0.04
4            Breakfast Spot  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                      venue  freq
0            Baseball Field  0.32
1                      Park  0.14
2                Playground  0.14
3  Bike Rental / Bike Share  0.04
4            Breakfast Spot  0.04


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                      venue  freq
0            Baseball Field  0.32
1                      Park  0.14

#### Let's put that into a _pandas_ dataframe. First, let's write a function to sort the venues in descending order.


In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.


In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
1,"Brockton, Parkdale Village, Exhibition Place",Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
2,"Business reply mail Processing Centre, South C...",Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
3,"CN Tower, King and Spadina, Railway Lands, Har...",Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
4,Central Bay Street,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run


## 4. Cluster Neighborhoods


Run _k_-means to cluster the neighborhood into 5 clusters.


In [50]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

  return_n_iter=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


Finally, let's visualize the resulting clusters


In [46]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters


Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.


#### Cluster 1


In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
1,East Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
2,East Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
3,East Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
4,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
5,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
6,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
7,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
8,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run
9,Central Toronto,0,Baseball Field,Playground,Park,Tennis Court,Recreation Center,Outdoors & Recreation,Metro Station,Food Truck,Field,Dog Run


#### Cluster 2


In [52]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 3


In [53]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 4


In [54]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 5


In [55]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
