# Part 1
- scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
- create a dataframe accordingly

In [92]:
# import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
# get html from wiki page and create soup object, using lxml parser
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(url)
soup = BeautifulSoup(source.text, 'lxml')

#using soup object to get the data
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())

    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
df_canada = pd.DataFrame(data = data,columns = columns)
df_canada.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [3]:
len(df_canada['Postal code'].unique())

180

In [4]:
# print the original shape of the dataset
df_canada.shape

(180, 3)

In [5]:
# drop Borough=='Not assigned'
df_canada = df_canada[df_canada.Borough!='Not assigned']
# print the shape of the dataset ignore Borough == 'Not assigned'
df_canada.shape

(103, 3)

In [6]:
# check the unique Postal codes
len(df_canada['Postal code'].unique())

103

In [7]:
# check the Neighborhood == 'Not assigned'
df_canada[df_canada.Neighborhood=='Not assigned'].count()

Postal code     0
Borough         0
Neighborhood    0
dtype: int64

In [8]:
# take a look at the head
df_canada.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


It turns out the Neighborhood does not have values with 'Not assigned', and there is no duplicated Postal codes in the dataset,
in stead, there are values in Neigborgood seperated with '/', while it is required to be ',', therefore it has to be replaced with ','.

In [9]:
def replace_function(x):
    temp_list = x.split('/')
    out = [item.strip() for item in temp_list]
    return ', '.join(out)

In [10]:
df_canada.Neighborhood = df_canada.Neighborhood.apply(replace_function)

In [11]:
# reset index
df_canada.reset_index(inplace=True)
df_canada.drop(columns=['index'], inplace=True)

In [12]:
df_canada.columns=['PostalCode','Borough','Neighborhood']

In [13]:
# take a look at the head
df_canada.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [14]:
# print out shape
df_canada.shape

(103, 3)

# Part II
- create a dataframe with geo information

In [15]:
geo_data = pd.read_csv('Geospatial_Coordinates.csv')

In [16]:
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
geo_data.columns=['PostalCode', 'Latitude', 'Longitude']

In [18]:
merged_df = pd.merge(df_canada, geo_data, on='PostalCode')

In [19]:
merged_df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [20]:
merged_df.shape

(103, 5)

# Part III

In [24]:
import folium

In [25]:
df_toronto = merged_df[merged_df.Borough.str.contains('Toronto')]

In [26]:
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [27]:
df_toronto.shape

(39, 5)

In [86]:
import folium

latitude = 43.653963
longitude = -79.387207
# use the latitude, logitude of 43.653963, -79.387207 for Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='white',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Using FourSquare to explore neighborhood

In [35]:
# client id and secret
CLIENT_ID = "W1ELWGJ1OXEIE5WTMUSDIUA0QSA4SNYKFQYAIDRPQNMJERDP"
CLIENT_SECRET = "LWIK0FCTGT5O3GKXNSQEAFMN1KB4OU5VF5O0VQN3JNP4TD03"
VERSION = "20180605"

In [45]:
def getVenues(neighborhoods, latitudes, longitudes, radius=500, limit=50):
    
    venues=[]
    for neighbor, lat, lng in zip(neighborhoods, latitudes, longitudes):
#        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues.append([(
            neighbor, 
            lat, 
            lng, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],
            venue['venue']['categories'][0]['name']) for venue in results])

    result_venues = pd.DataFrame([item for venue in venues for item in venue])
    result_venues.columns = ['Neighborhood', 
                  'N_Latitude', 
                  'N_Longitude', 
                  'Venue', 
                  'V_Latitude', 
                  'V_Longitude', 
                  'V_Category']
    
    return(result_venues)

In [46]:
n_venues = getVenues(df_toronto.Neighborhood, df_toronto.Latitude, df_toronto.Longitude, radius=1000, limit=50)

In [47]:
n_venues.head()

Unnamed: 0,Neighborhood,N_Latitude,N_Longitude,Venue,V_Latitude,V_Longitude,V_Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site


In [48]:
n_venues.shape

(1797, 7)

In [55]:
# take a look at the unique categories and the number of unique categories
print(len(n_venues.V_Category.unique()))
n_venues.V_Category.unique()

231


array(['Bakery', 'Coffee Shop', 'Distribution Center', 'Restaurant',
       'Historic Site', 'Spa', 'Farmers Market', 'Park', 'Dessert Shop',
       'Chocolate Shop', 'Mediterranean Restaurant', 'Breakfast Spot',
       'Performing Arts Venue', 'Italian Restaurant', 'French Restaurant',
       'Theater', 'Pub', 'Tech Startup', 'Café', 'Liquor Store',
       'Mexican Restaurant', 'Thai Restaurant', 'Event Space', 'Pool',
       'Animal Shelter', 'Gym / Fitness Center', 'Shoe Store',
       'Ice Cream Shop', 'Sandwich Place', 'Yoga Studio',
       'Bubble Tea Shop', 'Beer Bar', 'Creperie', 'Gastropub',
       'Arts & Crafts Store', 'Dance Studio', 'Burrito Place',
       'Ramen Restaurant', 'Diner', 'Hobby Shop', 'Museum', 'Bookstore',
       'Middle Eastern Restaurant', 'Juice Bar', 'Supermarket',
       'Vegetarian / Vegan Restaurant', 'Concert Hall',
       'Theme Restaurant', 'Gift Shop', 'Smoke Shop', 'Bar',
       'Modern European Restaurant', 'Discount Store', 'Jewelry Store',
   

In [68]:
# There is a category "Neighborhood", it will cause problems over the next steps, replace it with something else
n_venues.replace({'V_Category':'Neighborhood'}, 'Neighbourhood', inplace=True)

In [69]:
# take a look at the counts of the categories for the venues
n_venues.groupby('V_Category').count().sort_values(by='Neighborhood', ascending=False)

Unnamed: 0_level_0,Neighborhood,N_Latitude,N_Longitude,Venue,V_Latitude,V_Longitude
V_Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Coffee Shop,128,128,128,128,128,128
Café,119,119,119,119,119,119
Park,60,60,60,60,60,60
Italian Restaurant,59,59,59,59,59,59
Restaurant,53,53,53,53,53,53
...,...,...,...,...,...,...
Pie Shop,1,1,1,1,1,1
Fruit & Vegetable Store,1,1,1,1,1,1
Persian Restaurant,1,1,1,1,1,1
Gas Station,1,1,1,1,1,1


## Analyze neighborhood

In [70]:
# one hot encoding
toronto_onehot = pd.get_dummies(n_venues[['V_Category']], prefix="", prefix_sep="")

columns = list(toronto_onehot.columns)
# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = n_venues['Neighborhood']
fixed_columns = ['Neighborhood'] + columns
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
toronto_onehot.shape

(1797, 232)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [72]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,...,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.04


In [73]:
toronto_grouped.shape

(39, 232)

print each neighborhood with first most common venues

In [74]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
          venue  freq
0   Coffee Shop  0.08
1         Hotel  0.06
2      Beer Bar  0.06
3          Café  0.06
4  Cocktail Bar  0.04


----Brockton, Parkdale Village, Exhibition Place----
         venue  freq
0   Restaurant  0.08
1         Café  0.06
2  Coffee Shop  0.06
3    Gift Shop  0.06
4       Bakery  0.06


----Business reply mail Processing CentrE----
                venue  freq
0                Park  0.09
1         Coffee Shop  0.06
2         Pizza Place  0.06
3             Brewery  0.06
4  Italian Restaurant  0.04


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0       Coffee Shop  0.13
1              Café  0.13
2   Harbor / Marina  0.13
3             Track  0.07
4  Sculpture Garden  0.07


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.12
1   Italian Restaurant  0.04
2  Japanese Restaurant  0.04
3                 Café  0.04
4  A

In [75]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [78]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Hotel,Beer Bar,Café,Farmers Market,Park,Cocktail Bar,Seafood Restaurant,Japanese Restaurant,Cheese Shop
1,"Brockton, Parkdale Village, Exhibition Place",Restaurant,Café,Gift Shop,Bakery,Coffee Shop,Vegetarian / Vegan Restaurant,Breakfast Spot,Furniture / Home Store,Arts & Crafts Store,Hawaiian Restaurant
2,Business reply mail Processing CentrE,Park,Brewery,Coffee Shop,Pizza Place,Fast Food Restaurant,Italian Restaurant,Pet Store,Sushi Restaurant,Farmers Market,Burrito Place
3,"CN Tower, King and Spadina, Railway Lands, Har...",Harbor / Marina,Coffee Shop,Café,Airport,Dance Studio,Dog Run,Scenic Lookout,Garden,Sculpture Garden,Track
4,Central Bay Street,Coffee Shop,Italian Restaurant,Japanese Restaurant,Café,Plaza,Arts & Crafts Store,Neighbourhood,Breakfast Spot,Bubble Tea Shop,Sandwich Place


## Clustering neighbors

In [79]:
from sklearn.cluster import KMeans

In [81]:
# set number of clusters
nclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=nclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 0, 4, 3, 0, 2, 0, 3, 4, 4])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [82]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Café,Bakery,Park,Theater,Mexican Restaurant,Pub,Breakfast Spot,Italian Restaurant,Event Space
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Park,Italian Restaurant,Gastropub,Music School,Creperie,Concert Hall,College Theater,Pizza Place,Ramen Restaurant
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Plaza,Park,Electronics Store,Theater,Restaurant,Ramen Restaurant,Fast Food Restaurant,Breakfast Spot,Shopping Mall
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,Café,Coffee Shop,Hotel,Cosmetics Shop,Farmers Market,Cocktail Bar,Japanese Restaurant,Beer Bar,Park,Restaurant
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Pub,Breakfast Spot,Park,Coffee Shop,Bakery,Japanese Restaurant,Tea Room,Beach,Caribbean Restaurant,Juice Bar


Let's visulize the clusters

In [93]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(nclusters)
ys = [i + x + (i*x)**2 for i in range(nclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], \
                                  toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine clusters

### cluster 1

In [94]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Café,Bakery,Park,Theater,Mexican Restaurant,Pub,Breakfast Spot,Italian Restaurant,Event Space
4,Downtown Toronto,0,Coffee Shop,Park,Italian Restaurant,Gastropub,Music School,Creperie,Concert Hall,College Theater,Pizza Place,Ramen Restaurant
9,Downtown Toronto,0,Coffee Shop,Plaza,Park,Electronics Store,Theater,Restaurant,Ramen Restaurant,Fast Food Restaurant,Breakfast Spot,Shopping Mall
24,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Japanese Restaurant,Café,Plaza,Arts & Crafts Store,Neighbourhood,Breakfast Spot,Bubble Tea Shop,Sandwich Place
36,Downtown Toronto,0,Hotel,Coffee Shop,Brewery,Park,Baseball Stadium,Plaza,Café,Skating Rink,IT Services,Ice Cream Shop
37,West Toronto,0,Bar,Cocktail Bar,Restaurant,Asian Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Men's Store,Coffee Shop,Italian Restaurant,Café
43,West Toronto,0,Restaurant,Café,Gift Shop,Bakery,Coffee Shop,Vegetarian / Vegan Restaurant,Breakfast Spot,Furniture / Home Store,Arts & Crafts Store,Hawaiian Restaurant
91,Downtown Toronto,0,Grocery Store,Park,Coffee Shop,Candy Store,BBQ Joint,Metro Station,Japanese Restaurant,Breakfast Spot,Playground,Bank
99,Downtown Toronto,0,Coffee Shop,Men's Store,Gastropub,Gay Bar,Bookstore,Burger Joint,Thai Restaurant,Dance Studio,Restaurant,Pub


### cluster 2

In [95]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Central Toronto,1,Gym / Fitness Center,Trail,Coffee Shop,College Gym,Café,College Quad,Bookstore,Park,Ethiopian Restaurant,Event Space


### cluster 3

In [96]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Downtown Toronto,2,Café,Korean Restaurant,Grocery Store,Cocktail Bar,Coffee Shop,Indian Restaurant,Pizza Place,Italian Restaurant,Ethiopian Restaurant,Pub
41,East Toronto,2,Greek Restaurant,Café,Pub,Italian Restaurant,Ice Cream Shop,Bakery,Yoga Studio,Brewery,Bubble Tea Shop,Cuban Restaurant
69,West Toronto,2,Café,Bar,Italian Restaurant,Gastropub,Sushi Restaurant,Coffee Shop,Grocery Store,Flea Market,Thai Restaurant,Seafood Restaurant
74,Central Toronto,2,Vegetarian / Vegan Restaurant,Italian Restaurant,Café,Grocery Store,Pub,Pizza Place,Tea Room,School,Restaurant,Japanese Restaurant
80,Downtown Toronto,2,Café,Bookstore,Bakery,Vegetarian / Vegan Restaurant,Restaurant,Park,Japanese Restaurant,Doner Restaurant,Ramen Restaurant,Pizza Place
84,Downtown Toronto,2,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Coffee Shop,Burger Joint,Vietnamese Restaurant,Bar,Cheese Shop,Fish Market,Jazz Club
96,Downtown Toronto,2,Park,Japanese Restaurant,Gastropub,Diner,Café,Restaurant,Caribbean Restaurant,Indian Restaurant,Farm,Italian Restaurant


### cluster 4

In [98]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Downtown Toronto,3,Café,Coffee Shop,Hotel,Cosmetics Shop,Farmers Market,Cocktail Bar,Japanese Restaurant,Beer Bar,Park,Restaurant
20,Downtown Toronto,3,Coffee Shop,Hotel,Beer Bar,Café,Farmers Market,Park,Cocktail Bar,Seafood Restaurant,Japanese Restaurant,Cheese Shop
30,Downtown Toronto,3,Coffee Shop,Café,Theater,Gym,Concert Hall,American Restaurant,Sushi Restaurant,Restaurant,Bookstore,Smoke Shop
42,Downtown Toronto,3,Café,Coffee Shop,Hotel,Restaurant,Concert Hall,American Restaurant,Gastropub,Japanese Restaurant,Beer Bar,Mediterranean Restaurant
48,Downtown Toronto,3,Café,Coffee Shop,Restaurant,Japanese Restaurant,Hotel,Gastropub,Vegetarian / Vegan Restaurant,American Restaurant,Seafood Restaurant,Gym
87,Downtown Toronto,3,Harbor / Marina,Coffee Shop,Café,Airport,Dance Studio,Dog Run,Scenic Lookout,Garden,Sculpture Garden,Track
92,Downtown Toronto,3,Café,Beer Bar,Japanese Restaurant,Coffee Shop,Restaurant,Farmers Market,Seafood Restaurant,Cocktail Bar,Liquor Store,Sporting Goods Shop
97,Downtown Toronto,3,Café,Coffee Shop,Hotel,Concert Hall,Beer Bar,Gastropub,Gym,Restaurant,American Restaurant,Steakhouse


### cluster 5

In [97]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,East Toronto,4,Pub,Breakfast Spot,Park,Coffee Shop,Bakery,Japanese Restaurant,Tea Room,Beach,Caribbean Restaurant,Juice Bar
31,West Toronto,4,Café,Coffee Shop,Park,Pharmacy,Bar,Brewery,Bakery,Italian Restaurant,Sushi Restaurant,Gym
47,East Toronto,4,Beach,Coffee Shop,Indian Restaurant,Burrito Place,Park,Sandwich Place,Café,Fast Food Restaurant,Grocery Store,Brewery
54,East Toronto,4,Coffee Shop,Café,Bakery,Brewery,Italian Restaurant,Bar,Gay Bar,Restaurant,Bookstore,Boutique
62,Central Toronto,4,Sushi Restaurant,Italian Restaurant,Pharmacy,Coffee Shop,Café,Bank,Gym Pool,Gym,Gastropub,Japanese Restaurant
67,Central Toronto,4,Italian Restaurant,Coffee Shop,Dessert Shop,Pizza Place,Café,Sushi Restaurant,Mexican Restaurant,Bookstore,Yoga Studio,Gastropub
68,Central Toronto,4,Bank,Gym / Fitness Center,Coffee Shop,Café,Park,Burger Joint,Liquor Store,Sushi Restaurant,Skating Rink,Bakery
73,Central Toronto,4,Skating Rink,Coffee Shop,Italian Restaurant,Sporting Goods Shop,Café,Restaurant,Diner,Park,Mexican Restaurant,Wine Bar
75,West Toronto,4,Bar,Sushi Restaurant,Coffee Shop,Gift Shop,Restaurant,Breakfast Spot,Eastern European Restaurant,Bakery,Thai Restaurant,Dog Run
79,Central Toronto,4,Italian Restaurant,Coffee Shop,Gym,Dessert Shop,Café,Sushi Restaurant,Restaurant,Bookstore,Gastropub,Pizza Place


# End, Thank you!