In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


In [2]:
base_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",match='Postal Code')

In [3]:
base_df=base_df[0]


<h3>Drop "Not assigned" rows</h3>

In [4]:
ind=base_df[base_df.Borough=="Not assigned"].index
base_df.drop(ind,inplace=True)
base_df.rename(columns={"Postal Code": "PostalCode"},inplace=True)
base_df.reset_index(drop=True,inplace=True)
base_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
post_df = pd.read_csv("Geospatial_Coordinates.csv")
post_df.rename(columns={"Postal Code": "PostalCode"},inplace=True)

In [6]:
print(post_df.shape)
print(base_df.shape)

(103, 3)
(103, 3)


<h3>Merge tables with postal codes and coordinates</h3>

In [7]:
neigh_df = pd.merge(base_df, post_df, on=['PostalCode'])
print(neigh_df.shape)
neigh_df.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


<h3>Define Toronto's coordinates</h3>

In [8]:
geolocator = Nominatim(user_agent="dkaigu@yandex.ru")
toronto_location = geolocator.geocode("Toronto, Ontario")
toronto_latitude = toronto_location.latitude
toronto_longitude = toronto_location.longitude
print("Toronto latitude, longitude: ({},{})".format(toronto_latitude,toronto_longitude))

Toronto latitude, longitude: (43.6534817,-79.3839347)


<h3>Build map with postal codes</h3>

In [9]:
map_toronto = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(neigh_df['Latitude'], neigh_df['Longitude'], neigh_df['PostalCode']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<h3>FourSquare Params:</h3>

In [10]:
CLIENT_ID = 'IBBRPE2XMZ4QHIXN4K01EUFD4WCWJFYHFGOM3J5PJTG2QHWZ' # your Foursquare ID
CLIENT_SECRET = 'I3HUQRLIYM4DJLMLPJBYTL2RSPBDEHLEJMX0CSYV235PYLJQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IBBRPE2XMZ4QHIXN4K01EUFD4WCWJFYHFGOM3J5PJTG2QHWZ
CLIENT_SECRET:I3HUQRLIYM4DJLMLPJBYTL2RSPBDEHLEJMX0CSYV235PYLJQ


<h3>Define functions for clustering</h3>

In [12]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'PostalCode Latitude', 
                  'PostalCode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

<h3>Create dataframe with Toronto's venues</h3>

In [14]:
toronto_venues = getNearbyVenues(names=neigh_df['PostalCode'],
                                   latitudes=neigh_df['Latitude'],
                                   longitudes=neigh_df['Longitude']
                                  )

M3A
M4A
M5A
M6A
M7A
M9A
M1B
M3B
M4B
M5B
M6B
M9B
M1C
M3C
M4C
M5C
M6C
M9C
M1E
M4E
M5E
M6E
M1G
M4G
M5G
M6G
M1H
M2H
M3H
M4H
M5H
M6H
M1J
M2J
M3J
M4J
M5J
M6J
M1K
M2K
M3K
M4K
M5K
M6K
M1L
M2L
M3L
M4L
M5L
M6L
M9L
M1M
M2M
M3M
M4M
M5M
M6M
M9M
M1N
M2N
M3N
M4N
M5N
M6N
M9N
M1P
M2P
M4P
M5P
M6P
M9P
M1R
M2R
M4R
M5R
M6R
M7R
M9R
M1S
M4S
M5S
M6S
M1T
M4T
M5T
M1V
M4V
M5V
M8V
M9V
M1W
M4W
M5W
M8W
M9W
M1X
M4X
M5X
M8X
M4Y
M7Y
M8Y
M8Z


In [15]:
print(toronto_venues.shape)
toronto_venues.head()

(1676, 7)


Unnamed: 0,PostalCode,PostalCode Latitude,PostalCode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,M3A,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,M3A,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
3,M3A,43.753259,-79.329656,Bella Vita Catering & Private Chef Service,43.756651,-79.331524,BBQ Joint
4,M4A,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena


<h3>Check for unique categories</h3>

In [16]:
toronto_venues.groupby('PostalCode').count()
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 248 uniques categories.


<h3>Analyse each postal code location</h3>

In [17]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1676, 249)


Unnamed: 0,PostalCode,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<h3>Group rows by PostalCode and by mean of the occurance of each category</h3>

In [18]:
toronto_grouped = toronto_onehot.groupby('PostalCode').mean().reset_index()
toronto_grouped

Unnamed: 0,PostalCode,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,M9N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,M9P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,M9R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98,M9V,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<h3>Print top5 most common venue in each postal code location</h3>

In [19]:
num_top_venues = 5

for hood in toronto_grouped['PostalCode']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['PostalCode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                  venue  freq
0  Fast Food Restaurant   1.0
1     Accessories Store   0.0
2                 Motel   0.0
3        Massage Studio   0.0
4        Medical Center   0.0


----M1C----
               venue  freq
0                Bar   0.5
1      Moving Target   0.5
2  Accessories Store   0.0
3              Motel   0.0
4     Massage Studio   0.0


----M1E----
               venue  freq
0     Medical Center  0.11
1  Electronics Store  0.11
2     Breakfast Spot  0.11
3      Moving Target  0.11
4               Bank  0.11


----M1G----
                             venue  freq
0                      Coffee Shop  0.50
1                Convenience Store  0.25
2                Korean Restaurant  0.25
3              Monument / Landmark  0.00
4  Molecular Gastronomy Restaurant  0.00


----M1H----
                  venue  freq
0           Gas Station  0.11
1   Fried Chicken Joint  0.11
2                Bakery  0.11
3  Caribbean Restaurant  0.11
4       Thai Restaurant  0.11




<h3>function to sort the venues in descending order</h3>

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

<h3>Create the new dataframe and display the top 10 venues for each postalcode</h3>

In [21]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Yoga Studio,Dance Studio,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
1,M1C,Bar,Moving Target,Yoga Studio,Dance Studio,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
2,M1E,Moving Target,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Mexican Restaurant,Breakfast Spot,Electronics Store,Discount Store
3,M1G,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Deli / Bodega,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run
4,M1H,Lounge,Hakka Restaurant,Bank,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Fried Chicken Joint,Bakery,Gas Station,Dog Run


<h3>Run k-means to cluster the neighborhood into 5 clusters.</h3>

In [22]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('PostalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
# print(type(kmeans.labels_[0]))
kmeans.labels_[0:100] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 4, 2, 2, 2, 2, 2, 4, 2,
       4, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 4, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 0, 2, 4, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 3, 2, 2, 0, 3, 4, 0, 4, 0, 2])

<h3>Create a new dataframe that includes the cluster as well as the top 10 venues for each Postal Code.</h3

In [24]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neigh_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = pd.merge(toronto_merged,neighborhoods_venues_sorted, on='PostalCode')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,Bus Stop,Park,Food & Drink Shop,BBQ Joint,Yoga Studio,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Intersection,Hockey Arena,French Restaurant,Portuguese Restaurant,Financial or Legal Service,Yoga Studio,Dim Sum Restaurant,Department Store,Dessert Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Yoga Studio,Cosmetics Shop,Shoe Store
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Furniture / Home Store,Accessories Store,Event Space,Coffee Shop,Boutique,Vietnamese Restaurant,Discount Store,Department Store,Dessert Shop
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Coffee Shop,Diner,Yoga Studio,Arts & Crafts Store,Distribution Center,Japanese Restaurant,Smoothie Shop,Beer Bar,Café,Portuguese Restaurant


<h3>Visualize results</h3>

In [25]:
# create map
map_clusters = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Examine each Clusters</h3>

In [26]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,East York,0,Pizza Place,Bank,Pharmacy,Gastropub,Gym / Fitness Center,Athletics & Sports,Café,Intersection,Yoga Studio,Dim Sum Restaurant
9,North York,0,Japanese Restaurant,Pub,Pizza Place,Asian Restaurant,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Yoga Studio
48,North York,0,Pizza Place,Curling Ice,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner
61,York,0,Pizza Place,Brewery,Convenience Store,Yoga Studio,Dance Studio,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
68,Etobicoke,0,Pizza Place,Sandwich Place,Coffee Shop,Middle Eastern Restaurant,Intersection,Discount Store,Chinese Restaurant,Yoga Studio,Dim Sum Restaurant,Department Store
70,North York,0,Pharmacy,Pizza Place,Grocery Store,Coffee Shop,Bank,Butcher,Diner,Deli / Bodega,Department Store,Dessert Shop
80,Scarborough,0,Pharmacy,Pizza Place,Bank,Convenience Store,Gas Station,Noodle House,Italian Restaurant,Fast Food Restaurant,Thai Restaurant,Fried Chicken Joint
87,Etobicoke,0,Pizza Place,Grocery Store,Liquor Store,Fried Chicken Joint,Pharmacy,Sandwich Place,Beer Store,Fast Food Restaurant,Department Store,Dessert Shop
91,Etobicoke,0,Pizza Place,Coffee Shop,Skating Rink,Pharmacy,Gym,Sandwich Place,Pub,Yoga Studio,Dessert Shop,Dance Studio


In [28]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Sculpture Garden,Rental Car Location,Plane,Harbor / Marina,Coffee Shop,Bar


In [30]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2,Bus Stop,Park,Food & Drink Shop,BBQ Joint,Yoga Studio,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
1,North York,2,Coffee Shop,Intersection,Hockey Arena,French Restaurant,Portuguese Restaurant,Financial or Legal Service,Yoga Studio,Dim Sum Restaurant,Department Store,Dessert Shop
2,Downtown Toronto,2,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Yoga Studio,Cosmetics Shop,Shoe Store
3,North York,2,Clothing Store,Furniture / Home Store,Accessories Store,Event Space,Coffee Shop,Boutique,Vietnamese Restaurant,Discount Store,Department Store,Dessert Shop
4,Downtown Toronto,2,Coffee Shop,Diner,Yoga Studio,Arts & Crafts Store,Distribution Center,Japanese Restaurant,Smoothie Shop,Beer Bar,Café,Portuguese Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
94,Downtown Toronto,2,Café,Coffee Shop,Restaurant,Hotel,Concert Hall,Gym,Deli / Bodega,American Restaurant,Bar,Steakhouse
95,Etobicoke,2,Pool,River,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
96,Downtown Toronto,2,Coffee Shop,Gay Bar,Sushi Restaurant,Yoga Studio,Japanese Restaurant,Men's Store,Restaurant,Salon / Barbershop,Sake Bar,Diner
97,East Toronto,2,Light Rail Station,Park,Pizza Place,Skate Park,Auto Workshop,Fast Food Restaurant,Farmers Market,Burrito Place,Comic Shop,Restaurant


In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
55,North York,3,Baseball Field,Yoga Studio,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
98,Etobicoke,3,Baseball Field,Yoga Studio,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,York,4,Park,Women's Store,Pool,Curling Ice,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
33,East York,4,Park,Convenience Store,Yoga Studio,Dance Studio,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
50,North York,4,Park,Yoga Studio,Curling Ice,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
62,York,4,Park,Yoga Studio,Curling Ice,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
64,North York,4,Park,Electronics Store,Convenience Store,Dance Studio,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
75,Etobicoke,4,Park,Sandwich Place,Mobile Phone Shop,Yoga Studio,Curling Ice,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
83,Scarborough,4,Park,Playground,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
89,Downtown Toronto,4,Park,Trail,Playground,Yoga Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center
