# Segmenting and Clustering Neighborhoods in Toronto

### Creating a _pandas dataframe_ for Neighborhoods in Toronto

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
#!pip install folium
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
#getting source file
html_data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(html_data, 'html5lib')
soup.prettify()

'<!DOCTYPE html>\n<html class="client-nojs" dir="ltr" lang="en">\n <head>\n  <meta charset="utf-8"/>\n  <title>\n   List of postal codes of Canada: M - Wikipedia\n  </title>\n  <script>\n   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"YDeAWfPHpRnZyWEuOCuc6QAAAMQ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":1008658788,"wgRevisionId":1008658788,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communica

In [3]:
#initialize Dataframe
toronto_neighborhoods = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

#getting table index
tables = soup.find_all(name='table', class_='wikitable sortable')
for index, table in enumerate(tables):
    print(index, table)
#returns index = 0

0 <table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<

In [4]:
#building dataframe with data from wikipedia table
for row in tables[0].tbody.find_all('tr'):
    col = row.find_all('td')
    if col != []:
        postal_code = col[0].text.strip('\n')
        borough = col[1].text.strip('\n')
        neighborhood = col[2].text.strip('\n')
        toronto_neighborhoods = toronto_neighborhoods.append({'PostalCode': postal_code, 'Borough': borough,
                                                             'Neighborhood':neighborhood}, ignore_index=True)

toronto_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Cleaning data in _pandas dataframe_

1. Removing columns where borough is 'Not assigned'
2. Assign the borough name to Neighborhoods that are 'Not assigned', if present 

For point 2., the number of 'Not assigned' is the same for borough and neighborhood, anyway we can make sure that if a borough has a value but not a neighborhood, it would get the same value.

In [5]:
#check = toronto_neighborhoods.groupby('Neighborhood').count()
#check.tail(20)

#check2 = toronto_neighborhoods.groupby('Borough').count()
#check2.head(10)

#1.
#converting 'Not assigned' to NaN
toronto_neighborhoods.replace('Not assigned', np.nan, inplace=True)

#dropping 
toronto_neighborhoods.dropna(subset=['Borough'], axis=0, inplace=True)

#2.

i = 0

for neigh in toronto_neighborhoods['Neighborhood']:
    if neigh == 'Not assigned':
        new_name = toronto_neighborhoods.loc[toronto_neighborhoods['Neighborhood'] == 'Not assigned']['Borough']
        toronto_neighborhoods.replace(neigh, new_name[i])
        i += 1

toronto_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
#printing no. of rows and columns in the dataframe
print('Rows:{}, Columns:{}'.format(toronto_neighborhoods.shape[0],toronto_neighborhoods.shape[1]))

Rows:103, Columns:3


### Merging toronto_neighborhoods _dataframe_ with latitude and longitude data for each postal_code 

In [7]:
latlng_df = pd.read_csv('Geospatial_coordinates.csv')
latlng_df.head()

toronto_merged = toronto_neighborhoods
toronto_merged = toronto_merged.join(latlng_df.set_index('Postal Code'), on='PostalCode')
toronto_merged.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Exploring and clustering neighborhoods

### 1. Exploring Toronto Map

In [8]:
toronto_lat = 43.651070
toronto_lng = -79.347015
map_toronto = folium.Map(location=[toronto_lat, toronto_lng], zoom_start=10)
for lat, lng, borough, neighborhood in zip(toronto_merged['Latitude'],toronto_merged['Longitude'], 
                                           toronto_merged['Borough'], toronto_merged['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng], radius=5, popup=label, color='blue', fill=True,
                      fill_color='#3186cc', fill_opacity=0.7, parse_html=False).add_to(map_toronto)

map_toronto

### 2. Getting nearby shops for each borough
For the research, I would consider each neighborhood for the corresponding borough, even those neighborhoods in a single row of the _dataframe_.

**The objective is to group neighborhoods on the basis of their surrounding shops.**

In [9]:
# @hidden_cell
CLIENT_ID = '***'
CLIENT_SECRET = '***'
AUTH_TOKEN = '***'
VERSION = '20180605'
LIMIT = 100

In [12]:
def getNearbyShops(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        if ',' in name:
            names_list = name.split(', ') #splits neighborhoods' names for each borough
            for el in names_list:
                name = el
                search_query = 'Shop'
                url1='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&query={}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, AUTH_TOKEN, search_query, VERSION, radius, LIMIT)
                results1 = requests.get(url1).json()['response']['venues']
                for v in results1:
                    if v['categories'] != []:
                        venues_list.append([(name, lat, lng, v['name'],                  
                                 v['location']['lat'], v['location']['lng'],
                                 v['categories'][0]['name'])])       
                        
        else:
            search_query='Shop'
            url2='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&query={}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, AUTH_TOKEN, search_query, VERSION, radius, LIMIT)
            results2 = requests.get(url2).json()['response']['venues']
            for v in results2: 
                if v['categories'] != []:
                    venues_list.append([(name, lat, lng, v['name'],
                                         v['location']['lat'], v['location']['lng'],
                                         v['categories'][0]['name'])])
    
    nearby_shops = pd.DataFrame([item for venues_list in venues_list for item in venues_list])
    nearby_shops.columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 
                               'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    return nearby_shops

toronto_venues = getNearbyShops(names=toronto_merged['Neighborhood'], latitudes=toronto_merged['Latitude'], longitudes=toronto_merged['Longitude'])


Making sure of the resulting _dataframe_ and the number and type of neighborhoods considered in the analysis:

In [13]:
print(toronto_venues['Neighborhood'].unique())
print('Number of unique Neighborhoods={}, Shape of the dataframe={}'.format(len(toronto_venues['Neighborhood'].unique()), toronto_venues.shape))
toronto_venues.head()

['Regent Park' 'Harbourfront' 'Lawrence Manor' 'Lawrence Heights'
 "Queen's Park" 'Ontario Provincial Government' 'Islington Avenue'
 'Humber Valley Village' 'Parkview Hill' 'Woodbine Gardens'
 'Garden District' 'Ryerson' 'Don Mills' 'Woodbine Heights'
 'St. James Town' 'Eringate' 'Bloordale Gardens' 'Old Burnhamthorpe'
 'Markland Wood' 'The Beaches' 'Berczy Park' 'Leaside'
 'Central Bay Street' 'Cedarbrae' 'Bathurst Manor' 'Wilson Heights'
 'Downsview North' 'Thorncliffe Park' 'Richmond' 'Adelaide' 'King'
 'Dufferin' 'Dovercourt Village' 'Fairview' 'Henry Farm' 'Oriole'
 'Northwood Park' 'York University' 'East Toronto'
 'Broadview North (Old East York)' 'Harbourfront East' 'Union Station'
 'Toronto Islands' 'Little Portugal' 'Trinity' 'Kennedy Park' 'Ionview'
 'East Birchmount Park' 'The Danforth West' 'Riverdale'
 'Toronto Dominion Centre' 'Design Exchange' 'Brockton' 'Parkdale Village'
 'Exhibition Place' 'Golden Mile' 'Clairlea' 'Oakridge' 'India Bazaar'
 'The Beaches West' 'Comme

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park,43.65426,-79.360636,Chanel Jewelry Shop - Ccjew,43.653124,-79.362702,Jewelry Store
1,Regent Park,43.65426,-79.360636,Steeped & Infused Tea Shop,43.65562,-79.364639,Tea Room
2,Regent Park,43.65426,-79.360636,Tony's Barber Shop Saloon,43.655482,-79.365229,Salon / Barbershop
3,Regent Park,43.65426,-79.360636,cheap ray bans sunglasses canada shop - prosun...,43.650361,-79.362273,Miscellaneous Shop
4,Regent Park,43.65426,-79.360636,aaa quality cheap soccer jersey shop - thailan...,43.649973,-79.36193,Clothing Store


### 3. One-Hot Encoding

We use one-hot encoding to get the relative frequency of each shop type. We then group data on the basis of the neighborhood names.

In [14]:
#new dataframe to generate one-hot encoding data
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix='', prefix_sep='')
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot)
toronto_onehot = toronto_onehot[fixed_columns[:-1]]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Antique Shop,Art Gallery,Arts & Crafts Store,Auto Workshop,Bakery,Bank,Bar,...,Tattoo Parlor,Tea Room,Thrift / Vintage Store,Toy / Game Store,Vape Store,Video Store,Vietnamese Restaurant,Watch Shop,Wine Shop,Women's Store
0,Regent Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Regent Park,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,Regent Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Regent Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Regent Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
#grouping neighborhoods
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Antique Shop,Art Gallery,Arts & Crafts Store,Auto Workshop,Bakery,Bank,Bar,...,Tattoo Parlor,Tea Room,Thrift / Vintage Store,Toy / Game Store,Vape Store,Video Store,Vietnamese Restaurant,Watch Shop,Wine Shop,Women's Store
0,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Adelaide,0.022222,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Albion Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 4. Clustering Modeling
#### We use KMeans algorithm to cluster neighborhoods on the basis of their surrounding shops.

In [16]:
kclusters = 5
toronto_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clustering)
print('Labels:', kmeans.labels_)

Labels: [3 3 2 1 2 2 1 3 3 1 0 3 3 3 3 1 3 3 3 1 3 3 3 3 3 3 3 3 1 2 1 2 0 1 3 3 3
 3 3 3 3 1 3 3 3 3 3 3 3 2 2 0 2 2 2 3 2 3 3 1 4 2 3 3 1 1 3 1 1 1 3 4 3 3
 3 2 3 1 3 3 1 1 3 3 3 3 3 1 3 3 3 3 1 3 3 4 1 3 3 2 3 3 2 4 3 1 2 3 3 3 3
 1 3 0 3 1 3 3 4 2 3 3 3 3 3 3 3 3 3 3 3 3 3 2 1 1 3 1]


## 5. Creating a new _dataframe_ to return the top 10 frequent shops for each neighborhood

In [17]:
def return_most_common_shops(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
neighborhoods_sorted = pd.DataFrame(columns=columns)
neighborhoods_sorted['Neighborhood'] = toronto_grouped['Neighborhood']
for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_sorted.iloc[ind, 1:] = return_most_common_shops(toronto_grouped.iloc[ind, :], num_top_venues)

print('Rows={}, Columns={}'.format(neighborhoods_sorted.shape[0], neighborhoods_sorted.shape[1]))
neighborhoods_sorted.head()


Rows=138, Columns=11


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lawrence Park,Rental Service,Restaurant,Miscellaneous Shop,Women's Store,Department Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store
1,Adelaide,Pharmacy,Restaurant,Cosmetics Shop,Pop-Up Shop,Coffee Shop,College Classroom,Sushi Restaurant,Smoke Shop,Hobby Shop,Pawn Shop
2,Albion Gardens,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
3,Alderwood,Salon / Barbershop,Convenience Store,Women's Store,Clothing Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Cosmetics Shop,Costume Shop
4,Bathurst Manor,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega


In [18]:
#Adding cluster labels for each row and the neighborhoods' latitude and longitude
neighborhoods_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_final = toronto_venues.join(neighborhoods_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_final.drop(['Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category'], 1, inplace=True)
toronto_final.drop_duplicates(inplace=True)
toronto_final.reset_index(drop=True, inplace=True)

toronto_final.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Regent Park,43.65426,-79.360636,3,Pharmacy,Tea Room,Miscellaneous Shop,Burger Joint,Salon / Barbershop,Jewelry Store,Shopping Mall,Bike Shop,Design Studio,Clothing Store
1,Harbourfront,43.65426,-79.360636,3,Pharmacy,Tea Room,Miscellaneous Shop,Burger Joint,Salon / Barbershop,Jewelry Store,Shopping Mall,Bike Shop,Design Studio,Clothing Store
2,Lawrence Manor,43.718518,-79.464763,3,Hardware Store,Miscellaneous Shop,Clothing Store,Antique Shop,Art Gallery,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
3,Lawrence Heights,43.718518,-79.464763,3,Hardware Store,Miscellaneous Shop,Clothing Store,Antique Shop,Art Gallery,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
4,Queen's Park,43.662301,-79.389494,3,Pharmacy,Miscellaneous Shop,Gift Shop,Wine Shop,Salon / Barbershop,Food & Drink Shop,Thrift / Vintage Store,Flower Shop,Bar,Sushi Restaurant


## 6. Returning the Map for Neighborhoods coloured by Cluster type

In [19]:
print(list(toronto_final['Cluster Labels']))
map_clusters = folium.Map(location=[toronto_lat, toronto_lng], zoom_start=11)
x= np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
lat_lon = []

for lat, lon, poi, cluster in zip(toronto_final['Neighborhood Latitude'], toronto_final['Neighborhood Longitude'],
                                 toronto_final['Neighborhood'], toronto_final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    if [lat,lon] in lat_lon: #some neighborhoods have overlapping latitude and longitude, so I modifify slightly their values to be represented on map
        lat = lat + 0.001
        lon = lon + 0.001
        folium.CircleMarker([lat, lon],
                           radius=5, popup=label,
                           color=rainbow[cluster-1],
                           fill = True, fill_color = rainbow[cluster-1],
                           fill_opacity=0.7).add_to(map_clusters)
    else:
        lat_lon.append([lat, lon])
        folium.CircleMarker([lat, lon],
                           radius=5, popup=label,
                           color=rainbow[cluster-1],
                           fill = True, fill_color = rainbow[cluster-1],
                           fill_opacity=0.7).add_to(map_clusters)

map_clusters



[3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 3, 1, 3, 1, 1, 1, 1, 3, 3, 1, 3, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 0, 0, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]


## 7. Checking Clusters' characteristics and assigning names

### _Cluster_ 0

In [20]:
toronto_final.loc[toronto_final['Cluster Labels'] == 0, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,East Toronto,Convenience Store,Women's Store,Dumpling Restaurant,College Classroom,Comic Shop,Construction & Landscaping,Cosmetics Shop,Costume Shop,Deli / Bodega,Department Store
39,Broadview North (Old East York),Convenience Store,Women's Store,Dumpling Restaurant,College Classroom,Comic Shop,Construction & Landscaping,Cosmetics Shop,Costume Shop,Deli / Bodega,Department Store
58,India Bazaar,Convenience Store,Board Shop,Women's Store,Dumpling Restaurant,College Classroom,Comic Shop,Construction & Landscaping,Cosmetics Shop,Costume Shop,Deli / Bodega
59,The Beaches West,Convenience Store,Board Shop,Women's Store,Dumpling Restaurant,College Classroom,Comic Shop,Construction & Landscaping,Cosmetics Shop,Costume Shop,Deli / Bodega


In [21]:
#We assign the dataframe to the variable toronto_convenience, where convenience stores and women's stores are the most common shops.
toronto_convenience = toronto_final.loc[toronto_final['Cluster Labels'] == 0, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]].reset_index()

### _Cluster_ 1

In [22]:
toronto_final.loc[toronto_final['Cluster Labels'] == 1, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Parkview Hill,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
9,Woodbine Gardens,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
13,Woodbine Heights,Sporting Goods Shop,Salon / Barbershop,Pharmacy,Design Studio,Cocktail Bar,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store
15,Eringate,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
16,Bloordale Gardens,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
17,Old Burnhamthorpe,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
18,Markland Wood,Pharmacy,Salon / Barbershop,Women's Store,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
21,Leaside,Salon / Barbershop,Doctor's Office,Butcher,Bike Shop,Women's Store,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop
23,Cedarbrae,Salon / Barbershop,Bakery,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
31,Dufferin,Salon / Barbershop,Shopping Mall,Pharmacy,Women's Store,Department Store,Cocktail Bar,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping


In [23]:
#We assign the dataframe to the variable toronto_salons where Salons and Barbershops are the most common venues.
toronto_salons = toronto_final.loc[toronto_final['Cluster Labels'] == 1, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]].reset_index()

### _Cluster_ 2

In [24]:
toronto_final.loc[toronto_final['Cluster Labels'] == 2, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Islington Avenue,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
7,Humber Valley Village,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
24,Bathurst Manor,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
25,Wilson Heights,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
26,Downsview North,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
45,Kennedy Park,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
46,Ionview,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
47,East Birchmount Park,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
113,South Steeles,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
114,Silverstone,Pharmacy,Doctor's Office,Coffee Shop,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega


In [25]:
#We assign the dataframe to the variable toronto_med where pharmacies are the most common shops.
toronto_med = toronto_final.loc[toronto_final['Cluster Labels'] == 2, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]].reset_index()

### _Cluster_ 3

In [26]:
toronto_centre = toronto_final.loc[toronto_final['Cluster Labels'] == 3, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]
toronto_centre.head(30)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Regent Park,Pharmacy,Tea Room,Miscellaneous Shop,Burger Joint,Salon / Barbershop,Jewelry Store,Shopping Mall,Bike Shop,Design Studio,Clothing Store
1,Harbourfront,Pharmacy,Tea Room,Miscellaneous Shop,Burger Joint,Salon / Barbershop,Jewelry Store,Shopping Mall,Bike Shop,Design Studio,Clothing Store
2,Lawrence Manor,Hardware Store,Miscellaneous Shop,Clothing Store,Antique Shop,Art Gallery,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
3,Lawrence Heights,Hardware Store,Miscellaneous Shop,Clothing Store,Antique Shop,Art Gallery,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
4,Queen's Park,Pharmacy,Miscellaneous Shop,Gift Shop,Wine Shop,Salon / Barbershop,Food & Drink Shop,Thrift / Vintage Store,Flower Shop,Bar,Sushi Restaurant
5,Ontario Provincial Government,Pharmacy,Miscellaneous Shop,Gift Shop,Wine Shop,Salon / Barbershop,Food & Drink Shop,Thrift / Vintage Store,Flower Shop,Bar,Sushi Restaurant
10,Garden District,Salon / Barbershop,Gift Shop,Cosmetics Shop,Pharmacy,Sporting Goods Shop,Shopping Mall,Department Store,Clothing Store,Miscellaneous Shop,Men's Store
11,Ryerson,Salon / Barbershop,Gift Shop,Cosmetics Shop,Pharmacy,Sporting Goods Shop,Shopping Mall,Department Store,Clothing Store,Miscellaneous Shop,Men's Store
12,Don Mills,Café,Mobile Phone Shop,Women's Store,Cocktail Bar,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop
14,St. James Town,Pharmacy,Gift Shop,Salon / Barbershop,Shoe Store,Jewelry Store,Cosmetics Shop,Smoke Shop,Mobile Phone Shop,Clothing Store,Office


In [27]:
#We have assigned toronto_centre name to the dataframe because it has gift shops, restaurants/cafes and pharmacies as most frequent shops, as typical of city centres. 

### _Cluster_ 4

In [28]:
toronto_final.loc[toronto_final['Cluster Labels'] == 4, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
136,Mimico NW,Medical Center,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
137,The Queensway West,Medical Center,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
138,South of Bloor,Medical Center,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
139,Kingsway Park South West,Medical Center,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega
140,Royal York South West,Medical Center,Women's Store,Doctor's Office,College Classroom,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Deli / Bodega


In [29]:
#We assign the dataframe to the variable toronto_women because their first shop is still medical, but the second most common shop is women's store
toronto_women = toronto_final.loc[toronto_final['Cluster Labels'] == 4, toronto_final.columns[[0] + list(range(4, toronto_final.shape[1]))]]