# Neighbourhood clustering

## Part 1: Setting up DataFrame

### Import necessary libraries

In [1]:
import pandas as pd
import requests # library to handle requests
import numpy as np
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

### First we scrape the data from the table


In [2]:
df_web = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [3]:
dfToronto=df_web[0]

In [4]:
dfToronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### We see some data entries which are invalid, let's clean up the DataFrame

In [5]:
dfToronto=dfToronto.drop(dfToronto[dfToronto['Borough']=='Not assigned'].index)

In [6]:
dfToronto[dfToronto['Postal Code'].duplicated(keep=False)]  ##No repeating postal codes

Unnamed: 0,Postal Code,Borough,Neighborhood


### Check size of cleaned DataFrame

In [7]:
dfToronto.shape

(103, 3)

## Part 2: Obtaining geolocation info


### Install Geocoder & Geopy

In [86]:
pip install geopy

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/07/e1/9c72de674d5c2b8fcb0738a5ceeb5424941fefa080bfe4e240d0bacb5a38/geopy-2.0.0-py3-none-any.whl (111kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.0.0
Note: you may need to restart the kernel to use updated packages.


In [87]:
pip install geocoder

Note: you may need to restart the kernel to use updated packages.


In [9]:
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

### Define Foursquare ID & Credentials

In [13]:
CLIENT_ID = '3B12YESKZWGLATGW0PZ10F5GLGQSCNLCV1ADBAH05ZSMMXEI' # your Foursquare ID
CLIENT_SECRET = 'DBV040MHE1HLQJSMD0H4JKROPLB1FFBXQXJ00FKMDOBVM40M' # your Foursquare Secret
VERSION = '20180606'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
geolocator = Nominatim(user_agent="meep_agent")

Your credentails:
CLIENT_ID: 3B12YESKZWGLATGW0PZ10F5GLGQSCNLCV1ADBAH05ZSMMXEI
CLIENT_SECRET:DBV040MHE1HLQJSMD0H4JKROPLB1FFBXQXJ00FKMDOBVM40M


### Obtain Latitude & Longitude Info with Geolocator

In [14]:
dfToronto=dfToronto.replace(to_replace ="Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",  
                            value ="969 Eastern Ave, South Central Letter Processing Plant Toronto")

dfToronto=dfToronto.replace(to_replace ="Stn A PO Boxes",value ="25 The Esplanade")

dfToronto=dfToronto.replace(to_replace ="Canada Post Gateway Processing Centre",  
                            value ="Canada Post Gateway")

dfToronto=dfToronto.replace(to_replace ="Studio District",  
                            value ="Distillery District")
dfToronto=dfToronto.replace(to_replace ="Del Ray, Mount Dennis, Keelsdale and Silverthorn",  
                            value ="Mount Dennis, Del Ray, Keelsdale and Silverthorn")

In [15]:
sep = ','
lat=[]
long=[]
for i in dfToronto[['Borough','Neighborhood']].values:
    location = geolocator.geocode(('Ontario, '+i[0]+', '+i[1].split(sep,-1)[0]))
    if(location):
        print(('Ontario, '+i[0]+', '+i[1].split(sep,-1)[0]))
        print(location.latitude,location.longitude,'\n')
        lat.append(location.latitude)
        long.append(location.longitude)
    else:
        print(i)
        try:
            print(('Ontario, '+i[0].split(' ',-1)[1]+', '+i[1].split(sep,-1)[0]))
            location = geolocator.geocode(('Ontario, '+i[0].split(' ',-1)[1]+', '+i[1].split(sep,-1)[0]))
        except:
            print(('Ontario, '+i[0]+', '+i[1].split('-',-1)[0]))
            location = geolocator.geocode(('Ontario, '+i[0]+', '+i[1].split('-',-1)[0]))
        print(location.latitude,location.longitude,'\n')
        lat.append(location.latitude)
        long.append(location.longitude)

Ontario, North York, Parkwoods
43.7587999 -79.3201966 

Ontario, North York, Victoria Village
43.732658 -79.3111892 

['Downtown Toronto' 'Regent Park, Harbourfront']
Ontario, Toronto, Regent Park
43.6607056 -79.3604569 

Ontario, North York, Lawrence Manor
43.7220788 -79.4375067 

['Downtown Toronto' "Queen's Park, Ontario Provincial Government"]
Ontario, Toronto, Queen's Park
43.659659 -79.3903399 

Ontario, Etobicoke, Islington Avenue
43.6794838 -79.5389092 

Ontario, Scarborough, Malvern
43.8091955 -79.2217008 

Ontario, North York, Don Mills
43.775347 -79.3459439 

Ontario, East York, Parkview Hill
43.699971000000005 -79.33251996261595 

['Downtown Toronto' 'Garden District, Ryerson']
Ontario, Toronto, Garden District
43.6564995 -79.3771141 

Ontario, North York, Glencairn
43.7087117 -79.4406853 

Ontario, Etobicoke, West Deane Park
43.6631995 -79.5685684 

Ontario, Scarborough, Rouge Hill
43.7802711 -79.1304992 

Ontario, North York, Don Mills
43.775347 -79.3459439 

Ontario, Eas

In [16]:
dfLocation=pd.DataFrame({'Latitude':lat,'Longitude':long})

In [17]:
dfToronto=dfToronto.reset_index(drop=True)

In [18]:
dfCpy = pd.concat([dfToronto,dfLocation], axis=1)

### Display DataFrame with Location Info

In [19]:
dfCpy

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.758800,-79.320197
1,M4A,North York,Victoria Village,43.732658,-79.311189
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.660706,-79.360457
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.722079,-79.437507
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.659659,-79.390340
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.679484,-79.538909
6,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701
7,M3B,North York,Don Mills,43.775347,-79.345944
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.699971,-79.332520
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.656500,-79.377114


# Part 3: Display Nighborhoods with Folium

### Import the required libraries

In [21]:
pip install folium

Collecting folium
  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [20]:
import folium

### Display Neighborhoods in a Toronto map

In [21]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[43.7181557,-79.5181422], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfCpy['Latitude'], dfCpy['Longitude'], dfCpy['Borough'], dfCpy['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### We will define Scarborough neighbourhoods as the topic of interest, to perform the clustering analysis

In [22]:
dfScarboro=dfCpy[dfCpy['Borough']=='Scarborough']
dfScarboro=dfScarboro.reset_index(drop=True)
dfScarboro.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.780271,-79.130499
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.755225,-79.198229
3,M1G,Scarborough,Woburn,43.759824,-79.225291
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692


### Create the URL to query to FourSquare

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius, LIMIT):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Obtain 50 Neraby Venues to each Scarborough Neighbourhood

In [25]:
Scarborough_venues = getNearbyVenues(names=dfScarboro['Neighborhood'],
                                   latitudes=dfScarboro['Latitude'],
                                   longitudes=dfScarboro['Longitude'],
                                   radius=500,
                                   LIMIT=50
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


In [29]:
print(Scarborough_venues.shape)
Scarborough_venues.head()

(238, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.809196,-79.221701,Shoppers Drug Mart,43.80961,-79.222729,Pharmacy
1,"Malvern, Rouge",43.809196,-79.221701,Subway,43.806961,-79.221476,Sandwich Place
2,"Malvern, Rouge",43.809196,-79.221701,Pizza Hut,43.808326,-79.220616,Pizza Place
3,"Malvern, Rouge",43.809196,-79.221701,Pizza Pizza,43.806613,-79.221243,Pizza Place
4,"Malvern, Rouge",43.809196,-79.221701,Shoppers Drug Mart,43.806489,-79.223024,Pharmacy


### Visualize the numbers of venues obtained for each neighbourhood

In [30]:
Scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,12,12,12,12,12,12
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,26,26,26,26,26,26
"Clarks Corners, Tam O'Shanter, Sullivan",4,4,4,4,4,4
"Cliffside, Cliffcrest, Scarborough Village West",8,8,8,8,8,8
"Dorset Park, Wexford Heights, Scarborough Town Centre",10,10,10,10,10,10
"Golden Mile, Clairlea, Oakridge",50,50,50,50,50,50
"Guildwood, Morningside, West Hill",4,4,4,4,4,4
"Kennedy Park, Ionview, East Birchmount Park",5,5,5,5,5,5
"Malvern, Rouge",12,12,12,12,12,12


In [31]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 90 uniques categories.


#### Note: Some neighbourhoods return a very small number of venues, clustering might be non-sensical for those

### Do one-hot encoding on the data to define all possible categories

In [36]:
# one hot encoding
Scarboro_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Scarboro_onehot['Neighborhood'] = Scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Scarboro_onehot.columns[-1]] + list(Scarboro_onehot.columns[:-1])
Scarboro_onehot = Scarboro_onehot[fixed_columns]

Scarboro_onehot.shape

(238, 91)

### Obtain Frequency based DataFrame for each Venue Category

In [37]:
Scarboro_grouped = Scarboro_onehot.groupby('Neighborhood').mean().reset_index()
Scarboro_grouped.shape

(17, 91)

### Display Top 5 Venue Categories for each Neighbourhood

In [38]:
num_top_venues = 5

for hood in Scarboro_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Scarboro_grouped[Scarboro_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.17
1  Hong Kong Restaurant  0.08
2     Korean Restaurant  0.08
3         Shopping Mall  0.08
4           Coffee Shop  0.08


----Birch Cliff, Cliffside West----
                   venue  freq
0        College Stadium  0.25
1  General Entertainment  0.25
2           Skating Rink  0.25
3                   Café  0.25
4      Accessories Store  0.00


----Cedarbrae----
                  venue  freq
0  Fast Food Restaurant  0.12
1           Coffee Shop  0.08
2              Pharmacy  0.04
3         Shopping Mall  0.04
4        Discount Store  0.04


----Clarks Corners, Tam O'Shanter, Sullivan----
                  venue  freq
0     Convenience Store  0.25
1  Caribbean Restaurant  0.25
2           Gas Station  0.25
3                  Park  0.25
4          Optical Shop  0.00


----Cliffside, Cliffcrest, Scarborough Village West----
            venue  freq
0     Pizza Place  0.25
1  Sandwich Place  0.12
2             Pub 

In [40]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Scarboro_grouped['Neighborhood']

for ind in np.arange(Scarboro_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Scarboro_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Korean Restaurant,Coffee Shop,Train Station,Vietnamese Restaurant,Asian Restaurant,Hong Kong Restaurant,Rental Car Location,Food Court,Cantonese Restaurant
1,"Birch Cliff, Cliffside West",General Entertainment,Café,Skating Rink,College Stadium,Warehouse Store,Fast Food Restaurant,Cosmetics Shop,Department Store,Discount Store,Distribution Center
2,Cedarbrae,Fast Food Restaurant,Coffee Shop,Pizza Place,Liquor Store,Clothing Store,Pharmacy,Park,Paper / Office Supplies Store,Optical Shop,Department Store
3,"Clarks Corners, Tam O'Shanter, Sullivan",Convenience Store,Gas Station,Caribbean Restaurant,Park,Filipino Restaurant,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant
4,"Cliffside, Cliffcrest, Scarborough Village West",Pizza Place,Pub,Grocery Store,Sandwich Place,Park,Breakfast Spot,Coffee Shop,Distribution Center,Gas Station,Convenience Store


### Now that we have venues sorted, run a clustering analysis on the data

In [50]:
# set number of clusters
kclusters = 3

Scarboro_grouped_clustering = Scarboro_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Scarboro_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0])

In [51]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Scarboro_merged = dfScarboro

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Scarboro_merged = Scarboro_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Scarboro_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701,0,Fast Food Restaurant,Pharmacy,Pizza Place,Gym / Fitness Center,Bubble Tea Shop,Park,Sandwich Place,Skating Rink,Grocery Store,Beer Store
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.780271,-79.130499,1,Train Station,Warehouse Store,Fish Market,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.755225,-79.198229,1,Train Station,Storage Facility,Baseball Field,Warehouse Store,Fish Market,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant
3,M1G,Scarborough,Woburn,43.759824,-79.225291,0,Fast Food Restaurant,Coffee Shop,Discount Store,Bank,Big Box Store,Pizza Place,Paper / Office Supplies Store,Department Store,Sandwich Place,Filipino Restaurant
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692,0,Fast Food Restaurant,Coffee Shop,Pizza Place,Liquor Store,Clothing Store,Pharmacy,Park,Paper / Office Supplies Store,Optical Shop,Department Store


In [52]:
# create map
map_clusters = folium.Map(location=[43.7181557,-79.5181422], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Scarboro_merged['Latitude'], Scarboro_merged['Longitude'], Scarboro_merged['Neighborhood'], Scarboro_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [53]:
Scarboro_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701,0,Fast Food Restaurant,Pharmacy,Pizza Place,Gym / Fitness Center,Bubble Tea Shop,Park,Sandwich Place,Skating Rink,Grocery Store,Beer Store
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.780271,-79.130499,1,Train Station,Warehouse Store,Fish Market,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.755225,-79.198229,1,Train Station,Storage Facility,Baseball Field,Warehouse Store,Fish Market,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant
3,M1G,Scarborough,Woburn,43.759824,-79.225291,0,Fast Food Restaurant,Coffee Shop,Discount Store,Bank,Big Box Store,Pizza Place,Paper / Office Supplies Store,Department Store,Sandwich Place,Filipino Restaurant
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692,0,Fast Food Restaurant,Coffee Shop,Pizza Place,Liquor Store,Clothing Store,Pharmacy,Park,Paper / Office Supplies Store,Optical Shop,Department Store
5,M1J,Scarborough,Scarborough Village,43.743742,-79.211632,0,Coffee Shop,Pub,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Discount Store,Gym,Fried Chicken Joint,Distribution Center,General Entertainment
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.724878,-79.253969,0,Fast Food Restaurant,Grocery Store,Asian Restaurant,Chinese Restaurant,Warehouse Store,Fish Market,Department Store,Discount Store,Distribution Center,Dumpling Restaurant
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.727841,-79.287622,0,Sandwich Place,Clothing Store,Burrito Place,Hardware Store,Fast Food Restaurant,Japanese Restaurant,Burger Joint,Department Store,Grocery Store,Pet Store
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.71117,-79.248177,0,Pizza Place,Pub,Grocery Store,Sandwich Place,Park,Breakfast Spot,Coffee Shop,Distribution Center,Gas Station,Convenience Store
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.691805,-79.264494,0,General Entertainment,Café,Skating Rink,College Stadium,Warehouse Store,Fast Food Restaurant,Cosmetics Shop,Department Store,Discount Store,Distribution Center


### We briefly describe the differences between the three neighbourhoods

### Cluster 1:

This cluster includes neighbourhoods with common suburb venues, such as restaurants, coffee or pizza places

In [54]:
Scarboro_merged.loc[Scarboro_merged['Cluster Labels'] == 0]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701,0,Fast Food Restaurant,Pharmacy,Pizza Place,Gym / Fitness Center,Bubble Tea Shop,Park,Sandwich Place,Skating Rink,Grocery Store,Beer Store
3,M1G,Scarborough,Woburn,43.759824,-79.225291,0,Fast Food Restaurant,Coffee Shop,Discount Store,Bank,Big Box Store,Pizza Place,Paper / Office Supplies Store,Department Store,Sandwich Place,Filipino Restaurant
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692,0,Fast Food Restaurant,Coffee Shop,Pizza Place,Liquor Store,Clothing Store,Pharmacy,Park,Paper / Office Supplies Store,Optical Shop,Department Store
5,M1J,Scarborough,Scarborough Village,43.743742,-79.211632,0,Coffee Shop,Pub,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Discount Store,Gym,Fried Chicken Joint,Distribution Center,General Entertainment
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.724878,-79.253969,0,Fast Food Restaurant,Grocery Store,Asian Restaurant,Chinese Restaurant,Warehouse Store,Fish Market,Department Store,Discount Store,Distribution Center,Dumpling Restaurant
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.727841,-79.287622,0,Sandwich Place,Clothing Store,Burrito Place,Hardware Store,Fast Food Restaurant,Japanese Restaurant,Burger Joint,Department Store,Grocery Store,Pet Store
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.71117,-79.248177,0,Pizza Place,Pub,Grocery Store,Sandwich Place,Park,Breakfast Spot,Coffee Shop,Distribution Center,Gas Station,Convenience Store
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.691805,-79.264494,0,General Entertainment,Café,Skating Rink,College Stadium,Warehouse Store,Fast Food Restaurant,Cosmetics Shop,Department Store,Discount Store,Distribution Center
10,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.752847,-79.282067,0,Accessories Store,Plaza,Gaming Cafe,Coffee Shop,Indian Restaurant,Clothing Store,Chinese Restaurant,Beer Store,Bowling Alley,Fast Food Restaurant
11,M1R,Scarborough,"Wexford, Maryvale",43.745377,-79.294715,0,Pizza Place,Middle Eastern Restaurant,Burger Joint,Grocery Store,Rental Car Location,Korean Restaurant,Seafood Restaurant,Intersection,Smoke Shop,Fish Market


### Cluster 2:

Can be considered to be mainly neighbourhoods where train stations, storage and warehouses are prevalent

In [55]:
Scarboro_merged.loc[Scarboro_merged['Cluster Labels'] == 1]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.780271,-79.130499,1,Train Station,Warehouse Store,Fish Market,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.755225,-79.198229,1,Train Station,Storage Facility,Baseball Field,Warehouse Store,Fish Market,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant


### Cluster 3:

This cluster is tricky to classify, as it is only one neighbourhood. We see that differing from the others, this cluster has a mixture of playgrounds, warehouses and stores.

In [56]:
Scarboro_merged.loc[Scarboro_merged['Cluster Labels'] == 2]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,M1W,Scarborough,"Steeles West, L'Amoreaux West",43.816178,-79.314538,2,Playground,Warehouse Store,College Stadium,Cosmetics Shop,Department Store,Discount Store,Distribution Center,Dumpling Restaurant,Electronics Store,Fast Food Restaurant
