# Start by scraping the table in the Wikipedia article  https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [228]:
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import requests
import folium
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
from pandas.io.json import json_normalize

In [17]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [19]:
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, "lxml")

In [21]:
#print(soup.prettify()) # run this to see the HTML stracture of the page

In [40]:
right_table=soup.find('table', class_='wikitable sortable') #create object for the table alone

In [148]:
#loop to scrape each column contents into a list
A=[]
B=[]
C=[]

for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)>0:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))  


In [149]:
#create pandas df
df=pd.DataFrame(A,columns=['PostalCode'])
df['Borough']=B
df['Neighborhood']=C
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [150]:
# drop where 'Borough' = 'Not assigned'
df = df[df.Borough != 'Not assigned\n'].reset_index().drop('index', axis = 1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [151]:
#Combine  same PostalCode

i = 1
while(i < len(df)):
    if df['PostalCode'].iloc[i] == df['PostalCode'].iloc[i - 1]:
        df.at[i - 1, 'Neighborhood'] = df.Neighborhood.iloc[i - 1] +', ' + df.Neighborhood.iloc[i]
        df.drop(index = i, inplace = True)
        df = df.reset_index().drop('index', axis = 1)
    else:
        i += 1

In [152]:
# Check if any Neighborhood still has a 'Not assigned' string value
boolean_findings = df['Neighborhood'].str.contains('Not assigned')
boolean_findings.sum()

0

In [153]:
#Check the shape of the DF
df.shape

(103, 3)

## get the latitude and the longitude coordinates of each neighborhood

In [154]:
#import land and lat csv
df_geo=pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.rename({'Postal Code': 'PostalCode'}, axis='columns', inplace=True)
df_geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [196]:
df['PostalCode'] = df['PostalCode'].str.replace(r'\n', '') # remove extra string from column

df_new = pd.merge(df, df_geo, on='PostalCode') #merge

In [197]:
df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [164]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[32, 34], zoom_start=8)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#e182f2',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
# Show map Toronto with borough and neighborhood 
map_Toronto

In [171]:
#Define Foursquare Credentials and Version

CLIENT_ID = 'B2KOROEDSKLUPZUFSN41BGSD5Y5EGXDWRCU15DXC53HY1JJX' # your Foursquare ID
CLIENT_SECRET = 'X2ZCN0QVXCSEL3GNTFVLOACX20UAAWZJGXTWTULSAMBQB5ZI' # your Foursquare Secret
VERSION = '20200711' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID has been set ' )
print('CLIENT_SECRET has been set')




Your credentails:
CLIENT_ID has been set 
CLIENT_SECRET has been set


In [172]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 2000# define radius

In [173]:
 #create a function to get venus for all the neighborhoods in Toronto

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    print("Finished!")
    
    return(nearby_venues)

In [174]:
# write the code to run the above function on each neighborhood and create a new dataframe called Toronto_venues


Toronto_venues = getNearbyVenues(names=df_new['Neighborhood'],
                                 latitudes=df_new['Latitude'],
                                 longitudes=df_new['Longitude'],
                                 radius = 2000
                                )

Finished!


In [175]:
# check the size of the resulting dataframe
print(Toronto_venues.shape)
Toronto_venues.head()

(8610, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Donalda Golf & Country Club,43.752816,-79.342741,Golf Course
2,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
3,Parkwoods,43.753259,-79.329656,Galleria Supermarket,43.75352,-79.349518,Supermarket
4,Parkwoods,43.753259,-79.329656,Graydon Hall Manor,43.763923,-79.342961,Event Space


In [176]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,100,100,100,100,100,100
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Wilson Heights, Downsview North",53,53,53,53,53,53
Bayview Village,53,53,53,53,53,53
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100
"Birch Cliff, Cliffside West",42,42,42,42,42,42
"Brockton, Parkdale Village, Exhibition Place",100,100,100,100,100,100
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",100,100,100,100,100,100


In [177]:
#Let's find out how many unique categories can be curated from all the returned venues

print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 320 uniques categories.


### Analyze Each Neighborhood

In [178]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Zoo Exhibit,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,Arcade,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [179]:

Toronto_onehot.shape

(8610, 320)

In [180]:
#group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0


In [186]:
#print each neighborhood along with the top 5 most common venues



for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


----Agincourt
----
                venue  freq
0  Chinese Restaurant  0.12
1         Coffee Shop  0.08
2          Restaurant  0.05
3            Pharmacy  0.05
4       Shopping Mall  0.03
5      Sandwich Place  0.03
6                Bank  0.03
7   Indian Restaurant  0.03
8         Pizza Place  0.02
9         Gas Station  0.02


----Alderwood, Long Branch
----
                  venue  freq
0           Coffee Shop  0.11
1  Fast Food Restaurant  0.05
2           Pizza Place  0.04
3      Department Store  0.04
4        Sandwich Place  0.03
5        Breakfast Spot  0.03
6    Seafood Restaurant  0.03
7                  Café  0.03
8          Burger Joint  0.03
9     Electronics Store  0.03


----Bathurst Manor, Wilson Heights, Downsview North
----
               venue  freq
0        Coffee Shop  0.08
1               Bank  0.06
2        Pizza Place  0.06
3               Park  0.06
4   Asian Restaurant  0.04
5         Restaurant  0.04
6        Gas Station  0.04
7  Convenience Store  0.04
8      

In [187]:
#write a function to sort the venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [209]:
#create the new dataframe and display the top 10 venues for each neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Coffee Shop,Pharmacy,Restaurant,Shopping Mall,Bank,Indian Restaurant,Sandwich Place,Sushi Restaurant,Breakfast Spot
1,"Alderwood, Long Branch",Coffee Shop,Fast Food Restaurant,Pizza Place,Department Store,Breakfast Spot,Seafood Restaurant,Electronics Store,Sandwich Place,Café,Burger Joint
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pizza Place,Park,Bank,Restaurant,Gas Station,Deli / Bodega,Asian Restaurant,Sandwich Place,Pharmacy
3,Bayview Village,Park,Chinese Restaurant,Coffee Shop,Bank,Clothing Store,Pharmacy,Gas Station,Shopping Mall,Japanese Restaurant,Grocery Store
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sushi Restaurant,Italian Restaurant,Bakery,Bagel Shop,Restaurant,Sandwich Place,Pizza Place,Pub,Café


# Cluster Neighborhoods

In [210]:
# Run k-means to cluster the neighborhood into 5 clusters.

kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:]

array([2, 2, 2, 2, 0, 3, 2, 0, 0, 3, 0, 2, 2, 3, 0, 3, 2, 2, 3, 0, 0, 2,
       2, 2, 2, 0, 0, 2, 2, 3, 0, 3, 2, 2, 2, 3, 0, 2, 2, 2, 0, 0, 2, 2,
       0, 2, 2, 0, 2, 0, 4, 2, 2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 2, 3, 3, 3,
       0, 0, 2, 0, 0, 2, 2, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 2, 3, 3, 1, 2,
       2, 2, 2, 2, 2, 3, 2, 2, 0, 2, 2])

create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [None]:
df_new['Neighborhood'] = df_new['Neighborhood'].str.replace(r'\n', '') # remove extra string from column

Toronto_merged = df_new

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


In [222]:
neighborhoods_venues_sorted['Neighborhood'] = neighborhoods_venues_sorted['Neighborhood'].str.replace(r'\n', '') # remove extra string from column
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,Agincourt,Chinese Restaurant,Coffee Shop,Pharmacy,Restaurant,Shopping Mall,Bank,Indian Restaurant,Sandwich Place,Sushi Restaurant,Breakfast Spot
1,2,"Alderwood, Long Branch",Coffee Shop,Fast Food Restaurant,Pizza Place,Department Store,Breakfast Spot,Seafood Restaurant,Electronics Store,Sandwich Place,Café,Burger Joint
2,2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pizza Place,Park,Bank,Restaurant,Gas Station,Deli / Bodega,Asian Restaurant,Sandwich Place,Pharmacy
3,2,Bayview Village,Park,Chinese Restaurant,Coffee Shop,Bank,Clothing Store,Pharmacy,Gas Station,Shopping Mall,Japanese Restaurant,Grocery Store
4,0,"Bedford Park, Lawrence Manor East",Coffee Shop,Sushi Restaurant,Italian Restaurant,Bakery,Bagel Shop,Restaurant,Sandwich Place,Pizza Place,Pub,Café


In [232]:
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [233]:
Toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,Coffee Shop,Japanese Restaurant,Gas Station,Sandwich Place,Supermarket,Chinese Restaurant,Pizza Place,Pharmacy,Fried Chicken Joint,Discount Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Fast Food Restaurant,Gym,Sandwich Place,Clothing Store,Japanese Restaurant,Grocery Store,Middle Eastern Restaurant,Intersection,Gas Station
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,Coffee Shop,Park,Restaurant,Café,Japanese Restaurant,Gastropub,Bakery,French Restaurant,Clothing Store,Liquor Store
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Coffee Shop,Fast Food Restaurant,Bank,Vietnamese Restaurant,Grocery Store,Furniture / Home Store,Pizza Place,Dessert Shop,Liquor Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Coffee Shop,Park,Clothing Store,Café,Restaurant,Bookstore,Japanese Restaurant,Pizza Place,Italian Restaurant,Hotel


In [234]:
#visualize the resulting clusters
# create map
map_clusters = folium.Map(location=[43.706204, -79.398752], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [235]:
#determine the discriminating venue categories that distinguish each cluster
#Cluster 1
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,East York,0,Coffee Shop,Park,Café,Pizza Place,Gastropub,Thai Restaurant,Skating Rink,Pharmacy,Ice Cream Shop,Ethiopian Restaurant
16,York,0,Coffee Shop,Italian Restaurant,Bank,Café,Mexican Restaurant,Indian Restaurant,Trail,Bakery,Ice Cream Shop,Caribbean Restaurant
19,East Toronto,0,Coffee Shop,Pub,Breakfast Spot,Beach,Bakery,Japanese Restaurant,Ice Cream Shop,Park,BBQ Joint,Café
21,York,0,Italian Restaurant,Bakery,Coffee Shop,Furniture / Home Store,Sandwich Place,Bank,Grocery Store,Mexican Restaurant,Pizza Place,Fast Food Restaurant
25,Downtown Toronto,0,Café,Coffee Shop,Bar,Grocery Store,Vegetarian / Vegan Restaurant,Italian Restaurant,Indian Restaurant,Ice Cream Shop,Restaurant,Korean Restaurant
31,West Toronto,0,Café,Coffee Shop,Italian Restaurant,Park,Bar,Brewery,Cocktail Bar,Grocery Store,Indian Restaurant,Pizza Place
35,East York,0,Café,Greek Restaurant,Coffee Shop,Bakery,Ice Cream Shop,Ethiopian Restaurant,American Restaurant,Park,Cocktail Bar,Gastropub
37,West Toronto,0,Café,Coffee Shop,Bakery,Pizza Place,Cocktail Bar,Restaurant,Italian Restaurant,Park,Art Gallery,Gym
41,East Toronto,0,Café,Greek Restaurant,Park,Vietnamese Restaurant,Bakery,Italian Restaurant,Pub,Ice Cream Shop,Coffee Shop,Yoga Studio
43,West Toronto,0,Coffee Shop,Café,Restaurant,Bakery,Furniture / Home Store,Park,Gift Shop,Athletics & Sports,Tea Room,Seafood Restaurant


In [236]:
#Cluster 2
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
95,Scarborough,1,Playground,Trail,Farm,Golf Course,Zoo,Ethiopian Restaurant,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant


In [237]:
#Cluster 3
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2,Coffee Shop,Japanese Restaurant,Gas Station,Sandwich Place,Supermarket,Chinese Restaurant,Pizza Place,Pharmacy,Fried Chicken Joint,Discount Store
1,North York,2,Coffee Shop,Fast Food Restaurant,Gym,Sandwich Place,Clothing Store,Japanese Restaurant,Grocery Store,Middle Eastern Restaurant,Intersection,Gas Station
3,North York,2,Clothing Store,Coffee Shop,Fast Food Restaurant,Bank,Vietnamese Restaurant,Grocery Store,Furniture / Home Store,Pizza Place,Dessert Shop,Liquor Store
5,Etobicoke,2,Pharmacy,Coffee Shop,Shopping Mall,Grocery Store,Park,Bank,Liquor Store,Golf Course,Camera Store,Pet Store
7,North York,2,Coffee Shop,Restaurant,Japanese Restaurant,Park,Bank,Pizza Place,Middle Eastern Restaurant,Sandwich Place,Supermarket,Italian Restaurant
8,East York,2,Pizza Place,Park,Coffee Shop,Sandwich Place,Ice Cream Shop,Pharmacy,Playground,Skating Rink,Fast Food Restaurant,Beer Store
10,North York,2,Clothing Store,Coffee Shop,Furniture / Home Store,Bank,Restaurant,Grocery Store,Fast Food Restaurant,Sushi Restaurant,Fried Chicken Joint,Caribbean Restaurant
11,Etobicoke,2,Coffee Shop,Convenience Store,Bank,Park,Grocery Store,Pharmacy,Farmers Market,Fish & Chips Shop,Sushi Restaurant,Sandwich Place
12,Scarborough,2,Coffee Shop,Breakfast Spot,Pizza Place,Sandwich Place,Bank,Park,Pet Store,Pharmacy,Diner,Discount Store
13,North York,2,Coffee Shop,Restaurant,Japanese Restaurant,Park,Bank,Pizza Place,Middle Eastern Restaurant,Sandwich Place,Supermarket,Italian Restaurant


In [238]:
#Cluster 4
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,3,Coffee Shop,Park,Restaurant,Café,Japanese Restaurant,Gastropub,Bakery,French Restaurant,Clothing Store,Liquor Store
4,Downtown Toronto,3,Coffee Shop,Park,Clothing Store,Café,Restaurant,Bookstore,Japanese Restaurant,Pizza Place,Italian Restaurant,Hotel
9,Downtown Toronto,3,Coffee Shop,Japanese Restaurant,Café,Restaurant,Gastropub,Thai Restaurant,Pizza Place,Park,Diner,Plaza
15,Downtown Toronto,3,Coffee Shop,Hotel,Japanese Restaurant,Park,Restaurant,Plaza,Café,Thai Restaurant,Theater,Farmers Market
20,Downtown Toronto,3,Coffee Shop,Park,Hotel,Restaurant,Café,Farmers Market,Plaza,Japanese Restaurant,Theater,Gym
24,Downtown Toronto,3,Café,Restaurant,Arts & Crafts Store,Bookstore,Japanese Restaurant,Coffee Shop,Theater,Park,Yoga Studio,Gym
30,Downtown Toronto,3,Coffee Shop,Hotel,Café,Thai Restaurant,Japanese Restaurant,Sandwich Place,Restaurant,Theater,Plaza,Clothing Store
36,Downtown Toronto,3,Hotel,Gym,Café,Coffee Shop,Park,Seafood Restaurant,Plaza,Brewery,Japanese Restaurant,Theater
42,Downtown Toronto,3,Coffee Shop,Restaurant,Park,Hotel,Plaza,Theater,Café,Japanese Restaurant,Clothing Store,Farmers Market
48,Downtown Toronto,3,Coffee Shop,Hotel,Café,Restaurant,Park,Theater,Plaza,Japanese Restaurant,Clothing Store,Baseball Stadium


In [239]:
#Cluster 5
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,4,Zoo Exhibit,Restaurant,Fast Food Restaurant,Gas Station,Pizza Place,Zoo,Spa,Grocery Store,Fried Chicken Joint,Bank
