In [2]:
from bs4 import BeautifulSoup

import pandas as pd #library for data analsysis 
import numpy as np # library to handle data in a vectorized manner

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

In [3]:
with open('Neighbourhoods.json') as f:
    data = json.load(f)

#df = pd.DataFrame.from_dict(json_normalize(data), orient='columns')    

#df.head()

area_num = []
area_name = []
lat = []
lng = []
for feature in data['features']:
    area_num.append(feature['properties']['AREA_SHORT_CODE'])
    area_name.append(feature['properties']['AREA_NAME'].rsplit("(",1)[0])
    lng.append(feature['properties']['LONGITUDE'])
    lat.append(feature['properties']['LATITUDE'])

df = pd.DataFrame ({"Neighbourhood Number":area_num, 
                    "Neighbourhood":area_name,
                    "Longitude":lng,
                    "Latitude":lat})

df.sort_values('Neighbourhood Number',ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Neighbourhood Number,Neighbourhood,Longitude,Latitude
0,1,West Humber-Clairville,-79.596356,43.71618
1,2,Mount Olive-Silverstone-Jamestown,-79.587259,43.746868
2,3,Thistletown-Beaumond Heights,-79.563491,43.737988
3,4,Rexdale-Kipling,-79.566228,43.723725
4,5,Elms-Old Rexdale,-79.548983,43.721519


Use geopy library to get the latitude and longitude values of Toronto.

In [4]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON. are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, ON. are 43.653963, -79.387207.


In [5]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, num, hood in zip(df['Latitude'], df['Longitude'],df['Neighbourhood Number'], df['Neighbourhood']):
    label = '{}, {}'.format(num, hood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [20]:
toronto_geo = r'Neighbourhoods.json'

# create map and display it
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)
map_toronto.choropleth(
    geo_data=toronto_geo,
    data=df,
    columns=['Neighbourhood Number','Longitude'],
    key_on='feature.properties.AREA_LONG_CODE',
    fill_color='YlOrRd', 
    fill_opacity=0.6, 
    line_opacity=0.2,
    legend_name='Toronto Neighbourhood')
# display the map of Toronto
map_toronto

In [22]:
#read in the edited csv file
to_neighbourhood_edited = pd.read_csv("TOneighbourhoods_edited.csv", thousands=',') 

#drop the un-needed columns
to_neighbourhood_cleaned = to_neighbourhood_edited.drop(columns=['Category', 'Topic'])

#set the characteristic as index
to_neighbourhood_cleaned.set_index('Characteristic', inplace=True)

#drop the unnecessary rows before transpose
to_neighbourhood_cleaned.drop('TSNS2020 Designation',inplace=True)
to_neighbourhood_cleaned.drop('City of Toronto',axis=1,inplace=True)
#to_neighbourhood_cleaned.drop('Population Change 2011-2016',inplace=True)
                              
#transpose the dataframe so that rows will be the neighbourhoods                             
to_neighbourhood_cleaned = to_neighbourhood_cleaned.transpose()

#set the Neighbourhood Number as index
to_neighbourhood_cleaned.set_index('Neighbourhood Number')

#to_neighbourhood_cleaned.reset_index(drop=True,inplace=True)
#to_neighbourhood_cleaned.set_index('Neighbourhood Number', drop=True, inplace=True)

to_neighbourhood_cleaned['Population Change 2011-2016']=to_neighbourhood_cleaned['Population Change 2011-2016'].str.replace('%','')

to_neighbourhood_cleaned

Characteristic,Neighbourhood Number,"Population, 2016",Population Change 2011-2016,Total private dwellings,Population density per square kilometre,Children (0-14 years),Youth (15-24 years),Working Age (25-54 years),Pre-retirement (55-64 years),Seniors (65+ years),...,After-tax income: Aggregate amount ($'000),Income taxes: Population with an amount,Income taxes: Average amount ($),Income taxes: Aggregate amount ($'000),Net federal tax: Population with an amount,Net federal tax: Average amount ($),Net federal tax: Aggregate amount ($'000),Provincial and territorial income taxes: Population with an amount,Provincial and territorial income taxes: Average amount ($),Provincial and territorial income taxes: Aggregate amount ($'000)
Agincourt North,129,29113,-3.90,9371,3929,3840,3705,11305,4230,6045,...,633574,12025,6726,80881,10910,4795,52315,11585,2472,28636
Agincourt South-Malvern West,128,23757,8.00,8535,3034,3075,3360,9965,3265,4105,...,540969,10480,7185,75294,9555,5102,48753,10060,2642,26579
Alderwood,20,12054,1.30,4732,2435,1760,1235,5220,1825,2015,...,388460,7255,11626,84349,6860,7895,54161,7060,4274,30171
Annex,95,30526,4.60,18109,10863,2360,3750,15040,3480,5910,...,2053142,18195,45973,836470,16675,30496,508525,17930,18254,327289
Banbury-Don Mills,42,27695,2.90,12473,2775,3605,2730,10810,3555,6975,...,1158599,16245,21829,354616,15250,14370,219140,15915,8509,135426
Bathurst Manor,34,15873,2.80,6418,3377,2325,1940,6655,2030,2940,...,482056,8075,12656,102199,7460,8645,64491,7830,4814,37695
Bay Street Corridor,76,25797,33.30,18436,14097,1695,6860,13065,1760,2420,...,992101,12770,23471,299729,10970,16729,183513,12575,9224,115997
Bayview Village,52,21396,21.10,10111,4195,2415,2505,10310,2540,3615,...,733692,11385,16439,187163,10295,11062,113887,11155,6569,73277
Bayview Woods-Steeles,49,13154,-2.80,4895,3240,1515,1635,4490,1825,3685,...,403347,6490,14685,95305,5875,10191,59871,6325,5608,35470
Bedford Park-Nortown,39,23236,0.20,9052,4209,4555,3210,8410,3075,3980,...,1513078,13455,48920,658221,12360,32139,397240,13245,19736,261409


In [24]:
to_neighbourhood_cleaned
to_neighbourhood_cleaned_num = pd.DataFrame()
to_neighbourhood_cleaned_num = to_neighbourhood_cleaned.apply(lambda x: pd.to_numeric(x.astype(str).str.replace(',',''), errors='coerce'))
to_neighbourhood_cleaned_num.dtypes



Characteristic
Neighbourhood Number                                                                                            int64
Population, 2016                                                                                                int64
Population Change 2011-2016                                                                                   float64
Total private dwellings                                                                                         int64
Population density per square kilometre                                                                         int64
Children (0-14 years)                                                                                           int64
Youth (15-24 years)                                                                                             int64
Working Age (25-54 years)                                                                                       int64
Pre-retirement (55-64 years)             

In [27]:
to_neighbourhood_cleaned_num.sort_values('Neighbourhood Number',ascending=True, inplace=True)

df.sort_values('Neighbourhood Number',ascending=True, inplace=True)

df_combined = pd.DataFrame()
df_combined = pd.concat([df,to_neighbourhood_cleaned_num[['Population, 2016']]], axis=1)

df_combined

Unnamed: 0,Neighbourhood Number,Neighbourhood,Longitude,Latitude,"Population, 2016"
0,1.0,West Humber-Clairville,-79.596356,43.716180,
1,2.0,Mount Olive-Silverstone-Jamestown,-79.587259,43.746868,
2,3.0,Thistletown-Beaumond Heights,-79.563491,43.737988,
3,4.0,Rexdale-Kipling,-79.566228,43.723725,
4,5.0,Elms-Old Rexdale,-79.548983,43.721519,
5,6.0,Kingsview Village-The Westway,-79.547863,43.698993,
6,7.0,Willowridge-Martingrove-Richview,-79.554221,43.683645,
7,8.0,Humber Heights-Westmount,-79.522416,43.692233,
8,9.0,Edenbridge-Humber Valley,-79.522458,43.670886,
9,10.0,Princess-Rosethorn,-79.544559,43.666051,


In [7]:
to_neighbourhood_cleaned_num.dropna(axis=1, how='any', thresh=None, subset=None, inplace=True)

In [8]:
to_neighbourhood_cleaned_num.sort_values('Neighbourhood Number', ascending=True, inplace=True)
to_neighbourhood_cleaned_num.reset_index(drop=True, inplace=True)

In [9]:
to_neighbourhood_cleaned_num.drop('Neighbourhood Number',axis=1,inplace=True)

In [10]:
to_neighbourhood_cleaned_num

Characteristic,"Population, 2016",Population Change 2011-2016,Total private dwellings,Population density per square kilometre,Children (0-14 years),Youth (15-24 years),Working Age (25-54 years),Pre-retirement (55-64 years),Seniors (65+ years),Older Seniors (85+ years),...,After-tax income: Aggregate amount ($'000),Income taxes: Population with an amount,Income taxes: Average amount ($),Income taxes: Aggregate amount ($'000),Net federal tax: Population with an amount,Net federal tax: Average amount ($),Net federal tax: Aggregate amount ($'000),Provincial and territorial income taxes: Population with an amount,Provincial and territorial income taxes: Average amount ($),Provincial and territorial income taxes: Aggregate amount ($'000)
0,33312,-2.3,11045,1117,5060,5445,13845,3990,4980,615,...,737298,15090,6429,97014,13865,4561,63245,14580,2317,33784
1,32954,0.5,10220,7291,7090,5240,13615,3475,3560,300,...,575316,11310,5064,57276,10310,3635,37473,10940,1810,19802
2,10360,2.2,3472,3130,1730,1410,4160,1195,1880,350,...,229873,4515,7040,31784,4100,5019,20578,4355,2575,11214
3,10529,0.4,3989,4229,1640,1355,4300,1520,1730,300,...,249159,4960,6949,34467,4560,4943,22539,4765,2502,11920
4,9456,-1.0,3344,3306,1805,1440,3700,1255,1275,145,...,203304,3955,6666,26365,3630,4729,17168,3830,2404,9208
5,22000,1.3,8159,4356,4240,3020,8635,2550,3585,575,...,527362,9390,9341,87710,8675,6484,56246,9135,3440,31426
6,22156,3.8,8721,4007,3555,2625,8140,2905,4905,885,...,653131,11740,11278,132405,10935,7742,84656,11530,4150,47848
7,10948,3.4,4261,3981,1450,1140,3790,1510,3045,950,...,317217,5605,11126,62360,5160,7737,39923,5490,4077,22380
8,15535,4.0,6606,2840,2120,1805,5940,2385,3290,665,...,903751,9300,39507,367417,8730,25291,220793,9050,16238,146956
9,11051,-1.3,3958,2138,1770,1580,3825,1855,2025,325,...,629991,6710,36827,247107,6330,24010,151984,6585,14436,95064


## Normalize Data 

In [11]:
from sklearn import preprocessing


to_neighbourhood_cleaned_normalized = preprocessing.StandardScaler().fit(to_neighbourhood_cleaned_num.reset_index(drop=True)).transform(to_neighbourhood_cleaned_num.reset_index(drop=True))





  return self.partial_fit(X, y)
  after removing the cwd from sys.path.


In [12]:
to_neighbourhood_cleaned_normalized

array([[ 1.38039662, -0.7147478 ,  0.49110967, ...,  0.91949128,
        -0.56564436, -0.39390282],
       [ 1.34458836, -0.39912187,  0.3366529 , ...,  0.25184704,
        -0.63166062, -0.57695407],
       [-0.91533352, -0.20749184, -0.92670985, ..., -0.95596541,
        -0.53205029, -0.6893875 ],
       ...,
       [ 0.32655327, -0.47802836, -0.04171937, ..., -0.11682464,
        -0.59116544, -0.5882524 ],
       [-0.27878652, -0.37657716, -0.42851656, ..., -0.6799202 ,
        -0.45418496, -0.59280839],
       [-0.95964375, -0.3427601 , -0.81961982, ..., -0.68358857,
        -0.26290709, -0.48130449]])

## Finding Clusters

Using K-Means

In [19]:
# set number of clusters
kclusters = 8

to_neighbourhood_clustering = to_neighbourhood_cleaned_normalized

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(to_neighbourhood_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 7, 7, 7, 0, 4, 7, 1, 1], dtype=int32)

In [20]:
df['Cluster']=kmeans.labels_


In [21]:
df

Unnamed: 0,Neighbourhood Number,Neighbourhood,Longitude,Latitude,Cluster
0,1,West Humber-Clairville,-79.596356,43.716180,0
1,2,Mount Olive-Silverstone-Jamestown,-79.587259,43.746868,0
2,3,Thistletown-Beaumond Heights,-79.563491,43.737988,7
3,4,Rexdale-Kipling,-79.566228,43.723725,7
4,5,Elms-Old Rexdale,-79.548983,43.721519,7
5,6,Kingsview Village-The Westway,-79.547863,43.698993,0
6,7,Willowridge-Martingrove-Richview,-79.554221,43.683645,4
7,8,Humber Heights-Westmount,-79.522416,43.692233,7
8,9,Edenbridge-Humber Valley,-79.522458,43.670886,1
9,10,Princess-Rosethorn,-79.544559,43.666051,1


In [132]:
# create map
map_clusters = folium.Map(location=[latitude+0.05, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Neighbourhood'], df['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster+1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [47]:
df_combined = pd.DataFrame()
df_combined = pd.concat([df[['Neighbourhood Number','Neighbourhood','Cluster']],
                         to_neighbourhood_cleaned_num[['Population, 2016',
                                                         'Average after-tax income of households in 2015 ($)',
                                                         '  Non-immigrants',
                                                         '  Immigrants',
                                                         '  Total visible minority population',
                                                         '  Not a visible minority',
                                                         '  Owner',
                                                         '  Renter',
                                                         '  Condominium',
                                                         '  Not condominium',
                                                         'Employment rate',
                                                         'Unemployment rate',
                                                         '  Car, truck, van - as a driver',
                                                         '  Car, truck, van - as a passenger',
                                                         '  Public transit',
                                                         '  Walked',
                                                         '  Bicycle']]], axis=1)
df_combined



Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
0,1,West Humber-Clairville,0,33312,426156,12285,19230,26945,5940,7010,3275,2130,8150,58.0,9.6,9445,1140,4380,425,60
1,2,Mount Olive-Silverstone-Jamestown,0,32954,360648,10425,21500,28455,4375,4425,5455,2860,7030,50.4,12.1,6965,930,4110,385,35
2,3,Thistletown-Beaumond Heights,7,10360,140050,4470,5465,6510,3615,2035,1245,90,3190,54.1,10.4,2860,320,1030,110,15
3,4,Rexdale-Kipling,7,10529,134305,5255,4975,5365,4990,2155,1685,105,3735,55.9,10.9,2930,235,1345,150,25
4,5,Elms-Old Rexdale,7,9456,123119,4600,4725,6365,3085,1750,1470,830,2385,54.6,10.0,2350,250,1330,70,20
5,6,Kingsview Village-The Westway,0,22000,272950,10110,11425,13130,8835,4055,3735,2590,5190,52.1,9.9,5335,450,2665,270,45
6,7,Willowridge-Martingrove-Richview,4,22156,412302,11775,10030,8870,13270,4595,3925,430,8090,53.5,8.5,6220,515,2380,140,20
7,8,Humber Heights-Westmount,7,10948,238655,5360,4645,2745,7365,2515,1620,925,3215,54.8,7.4,2800,205,1200,65,10
8,9,Edenbridge-Humber Valley,1,15535,441052,9065,5950,3230,11980,3510,2745,1035,5215,60.5,6.1,4490,345,2010,175,40
9,10,Princess-Rosethorn,1,11051,538736,7565,3360,2105,8945,3270,595,0,3855,59.5,6.0,3515,230,950,75,0


Cluster 1

In [48]:
df_combined.loc[df_combined['Cluster'] == 0, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
0,1,West Humber-Clairville,0,33312,426156,12285,19230,26945,5940,7010,3275,2130,8150,58.0,9.6,9445,1140,4380,425,60
1,2,Mount Olive-Silverstone-Jamestown,0,32954,360648,10425,21500,28455,4375,4425,5455,2860,7030,50.4,12.1,6965,930,4110,385,35
5,6,Kingsview Village-The Westway,0,22000,272950,10110,11425,13130,8835,4055,3735,2590,5190,52.1,9.9,5335,450,2665,270,45
23,24,Black Creek,0,21737,261560,8175,12705,17435,4125,2440,4880,1260,6065,47.8,12.7,4040,735,2720,245,85
24,25,Glenfield-Jane Heights,0,30491,410012,11700,17790,23155,7065,5105,4810,1165,8730,48.7,11.5,6205,975,3965,280,55
25,26,Downsview-Roding-CFB,0,35052,562794,14560,19515,20070,14945,6275,6855,1370,11755,56.8,7.9,8405,1060,6085,460,10
26,27,York University Heights,0,27593,302358,10295,15595,19135,8430,4600,5565,2870,7300,56.3,10.7,5945,665,5405,585,115
34,35,Westminster-Branson,0,26274,332776,7070,18155,11715,14410,4060,6165,2870,7340,56.2,9.2,5585,550,5275,345,60
35,36,Newtonbrook West,0,23831,260415,6450,15665,13825,9805,4710,4250,2165,6790,54.7,8.4,4760,625,4535,470,55
43,44,Flemingdon Park,0,21933,261233,6990,14030,17220,4695,3500,4335,4100,3725,50.6,10.6,3705,260,3860,405,50


Cluster 2

In [49]:
df_combined.loc[df_combined['Cluster'] == 1, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
8,9,Edenbridge-Humber Valley,1,15535,441052,9065,5950,3230,11980,3510,2745,1035,5215,60.5,6.1,4490,345,2010,175,40
9,10,Princess-Rosethorn,1,11051,538736,7565,3360,2105,8945,3270,595,0,3855,59.5,6.0,3515,230,950,75,0
11,12,Markland Wood,1,10554,195412,6505,3890,1395,9060,3200,1105,1520,2795,56.1,6.2,3025,275,1085,180,40
14,15,Kingsway South,1,9271,900624,7115,1960,1100,8060,2910,660,700,2885,58.6,7.5,2210,120,1185,115,30
18,19,Long Branch,1,10084,137480,6415,3445,2305,7760,2240,2575,760,4055,65.6,7.1,3145,230,1585,175,110
19,20,Alderwood,1,12054,168602,7975,3970,2490,9535,3675,950,190,4430,62.4,6.1,4090,355,1285,195,65
37,38,Lansing-Westgate,1,16164,356122,7960,7075,6930,9140,3840,2740,2570,4020,62.5,7.2,3705,195,3015,530,30
39,40,St.Andrew-Windfields,1,17812,649291,8355,8835,8940,8735,4275,2140,1185,5255,56.3,8.3,4555,245,1975,270,20
40,41,Bridle Path-Sunnybrook-York Mills,1,9266,1392010,5880,3150,2765,6415,2940,295,950,2290,53.6,8.0,2240,140,580,165,35
55,56,Leaside-Bennington,1,16828,841619,13130,3375,2935,13710,4635,1800,455,5980,63.7,6.9,4475,350,1795,505,235


Cluster 3

In [50]:
df_combined.loc[df_combined['Cluster'] == 2, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
16,17,Mimico (includes Humber Bay Shores),2,33964,583422,18925,13425,10395,22915,9150,8635,9360,8430,66.6,6.2,11095,760,5040,505,310
74,75,Church-Yonge Corridor,2,31340,443734,16650,10935,12440,17880,5800,13865,9935,9745,68.7,8.5,2705,245,7000,7275,690
75,76,Bay Street Corridor,2,25797,352218,9995,10455,15925,9675,4835,10245,10745,4330,56.2,10.2,1780,165,3540,5840,325
81,82,Niagara,2,31180,369683,20255,8995,10665,19765,9380,9395,15710,3070,82.7,4.5,6820,640,6965,5070,1330
92,93,Dovercourt-Wallace Emerson-Junction,2,36625,594477,20635,14395,12275,23935,7630,7665,1850,13455,65.2,7.2,5825,820,8950,1215,1980
94,95,Annex,2,30526,792507,19030,8270,7270,21380,6060,9870,4510,11430,65.8,6.7,3290,290,6200,3200,1675
103,104,Mount Pleasant West,2,29658,443047,15955,11295,9730,19200,4415,13070,5005,12490,69.5,6.7,4745,370,9435,1840,250


Cluster 4

In [51]:
df_combined.loc[df_combined['Cluster'] == 3, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
13,14,Islington-City Centre West,3,43965,854623,22450,19590,16035,27195,11215,8105,8340,10980,63.6,7.3,11775,975,8205,795,90
44,45,Parkwoods-Donalda,3,34805,623453,16630,17210,19075,15655,5300,8020,1505,11800,57.3,9.2,8580,820,5275,420,45
116,117,L'Amoreaux,3,43993,529383,13710,28325,33540,9945,9825,5205,4835,10205,50.4,10.9,10005,1220,5895,370,85
130,131,Rouge,3,46496,729154,20025,25575,37315,8750,11195,2205,1265,12125,59.4,8.2,13665,1510,5935,220,20
131,132,Malvern,3,43794,533202,15960,26900,39195,4580,9395,4020,4395,9020,55.5,10.5,10785,1400,6425,425,60
136,137,Woburn,3,53485,629030,20645,30185,40510,12495,9765,8670,3460,14980,51.9,10.6,11505,1405,7635,780,45


Cluster 5

In [52]:
df_combined.loc[df_combined['Cluster'] == 4, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
6,7,Willowridge-Martingrove-Richview,4,22156,412302,11775,10030,8870,13270,4595,3925,430,8090,53.5,8.5,6220,515,2380,140,20
10,11,Eringate-Centennial-West Deane,4,18588,352269,10180,8190,5130,13395,5810,1055,1640,5225,58.9,7.4,5980,465,1945,180,35
15,16,Stonegate-Queensway,4,25051,516575,15905,8635,4595,20310,6710,3665,605,9780,63.0,6.7,7090,440,3505,380,235
31,32,Englemount-Lawrence,4,22372,293189,11490,9785,8855,13030,3445,4795,1775,6470,57.4,8.9,3890,265,3945,590,85
38,39,Bedford Park-Nortown,4,23236,720203,15885,6805,4845,18270,6005,2595,1175,7425,60.3,5.5,5940,355,2415,550,90
41,42,Banbury-Don Mills,4,27695,493486,13310,13195,12700,14290,7390,4735,4455,7665,55.6,7.2,7150,500,2945,615,65
51,52,Bayview Village,4,21396,354894,7220,12625,14505,6635,5605,3930,5770,3760,58.5,7.7,5270,385,3695,345,30
61,62,East End-Danforth,4,21381,304010,14820,5920,6475,14535,4990,4190,410,8775,64.3,8.2,4145,365,4410,610,365
62,63,The Beaches,4,21567,659192,16900,4225,2980,18465,5885,3540,1150,8285,68.0,5.7,5165,590,2995,750,570
69,70,South Riverdale,4,27876,662651,18260,8210,11225,15815,7230,4840,2370,9710,66.8,6.5,5000,655,5315,1425,1145


Cluster 6

In [53]:
df_combined.loc[df_combined['Cluster'] == 5, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
50,51,Willowdale East,5,50434,572155,14150,31040,37715,12545,13800,8505,16330,5970,55.9,8.5,9890,695,9390,1550,50


Cluster 7

In [54]:
df_combined.loc[df_combined['Cluster'] == 6, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
76,77,Waterfront Communities-The Island,6,65913,662333,37035,23820,28895,36715,16825,23930,35730,5030,78.2,5.2,9100,760,10915,20855,1570


Cluster 8

In [55]:
df_combined.loc[df_combined['Cluster'] == 7, df_combined.columns[list(range(0, df_combined.shape[1]))]]


Unnamed: 0,Neighbourhood Number,Neighbourhood,Cluster,"Population, 2016",Average after-tax income of households in 2015 ($),Non-immigrants,Immigrants,Total visible minority population,Not a visible minority,Owner,Renter,Condominium,Not condominium,Employment rate,Unemployment rate,"Car, truck, van - as a driver","Car, truck, van - as a passenger",Public transit,Walked,Bicycle
2,3,Thistletown-Beaumond Heights,7,10360,140050,4470,5465,6510,3615,2035,1245,90,3190,54.1,10.4,2860,320,1030,110,15
3,4,Rexdale-Kipling,7,10529,134305,5255,4975,5365,4990,2155,1685,105,3735,55.9,10.9,2930,235,1345,150,25
4,5,Elms-Old Rexdale,7,9456,123119,4600,4725,6365,3085,1750,1470,830,2385,54.6,10.0,2350,250,1330,70,20
7,8,Humber Heights-Westmount,7,10948,238655,5360,4645,2745,7365,2515,1620,925,3215,54.8,7.4,2800,205,1200,65,10
12,13,Etobicoke West Mall,7,11848,132460,5415,5805,4520,6950,2410,2175,1785,2805,60.5,7.9,3185,315,1750,200,65
17,18,New Toronto,7,11463,188819,6975,3970,3615,7675,2335,3040,560,4810,60.9,8.7,2995,240,1815,265,155
20,21,Humber Summit,7,12416,202677,4860,7280,8140,4265,2725,1165,355,3530,50.7,9.6,3225,365,1305,95,10
21,22,Humbermede,7,15545,184459,5730,9440,11970,3565,2660,2400,645,4410,54.6,10.2,3925,505,2070,115,20
22,23,Pelmo Park-Humberlea,7,10722,154568,5685,4930,4675,6035,3070,645,535,3170,61.5,6.0,3460,330,1375,85,10
27,28,Rustic,7,9941,113995,4495,5105,5810,3945,1390,2265,80,3575,47.8,9.1,1905,200,1370,90,0


## Get Foursquare data

Define Foursquare Credentials and Version

In [58]:
CLIENT_ID = 'EKYLOJIRYVRFYUBRKVPLAY0BZEG52QPLCMH220LR21BJNT2H' # your Foursquare ID
CLIENT_SECRET = 'R0EB1FNJYG252UOG20URXWOJMGA5XFNK1XNOOZBGUEEFCSO2' # your Foursquare Secret
VERSION = '20190605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EKYLOJIRYVRFYUBRKVPLAY0BZEG52QPLCMH220LR21BJNT2H
CLIENT_SECRET:R0EB1FNJYG252UOG20URXWOJMGA5XFNK1XNOOZBGUEEFCSO2


get_category_type function from the Foursquare lab.

In [59]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

A function to get the top 100 venues that are within a radius of 500 meters within the postcode's coordinates

In [79]:
LIMIT = 100

def getNearbyVenues(neighbourhood, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for neighbourhood, lat, lng in zip(neighbourhood, latitudes, longitudes):
        print(neighbourhood)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            neighbourhood, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

run the above function for each postcode and create a new dataframe called toronto_venues.

In [81]:
toronto_venues = getNearbyVenues(neighbourhood=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

West Humber-Clairville 
Mount Olive-Silverstone-Jamestown 
Thistletown-Beaumond Heights 
Rexdale-Kipling 
Elms-Old Rexdale 
Kingsview Village-The Westway 
Willowridge-Martingrove-Richview 
Humber Heights-Westmount 
Edenbridge-Humber Valley 
Princess-Rosethorn 
Eringate-Centennial-West Deane 
Markland Wood 
Etobicoke West Mall 
Islington-City Centre West 
Kingsway South 
Stonegate-Queensway 
Mimico (includes Humber Bay Shores) 
New Toronto 
Long Branch 
Alderwood 
Humber Summit 
Humbermede 
Pelmo Park-Humberlea 
Black Creek 
Glenfield-Jane Heights 
Downsview-Roding-CFB 
York University Heights 
Rustic 
Maple Leaf 
Brookhaven-Amesbury 
Yorkdale-Glen Park 
Englemount-Lawrence 
Clanton Park 
Bathurst Manor 
Westminster-Branson 
Newtonbrook West 
Willowdale West 
Lansing-Westgate 
Bedford Park-Nortown 
St.Andrew-Windfields 
Bridle Path-Sunnybrook-York Mills 
Banbury-Don Mills 
Victoria Village 
Flemingdon Park 
Parkwoods-Donalda 
Pleasant View 
Don Valley Village 
Hillcrest Village 
Bayview

In [82]:
toronto_venues.shape

(5704, 7)

In [83]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,West Humber-Clairville,43.71618,-79.596356,Fortinos,43.721582,-79.596233,Grocery Store
1,West Humber-Clairville,43.71618,-79.596356,Mandarin Buffet,43.719798,-79.59582,Chinese Restaurant
2,West Humber-Clairville,43.71618,-79.596356,Tim Hortons,43.714657,-79.593716,Coffee Shop
3,West Humber-Clairville,43.71618,-79.596356,Woodbine Racetrack,43.715225,-79.604273,Racecourse
4,West Humber-Clairville,43.71618,-79.596356,Xawaash,43.715786,-79.593053,Mediterranean Restaurant


how many unique categories can be curated from all the returned venues

In [84]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 342 uniques categories.


how many venues were returned for each postcode

In [85]:
toronto_venues.groupby('Neighbourhood').count().head()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt North,45,45,45,45,45,45
Agincourt South-Malvern West,37,37,37,37,37,37
Alderwood,23,23,23,23,23,23
Annex,100,100,100,100,100,100
Banbury-Don Mills,46,46,46,46,46,46


In [86]:
# one hot encoding
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

to_onehot.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,West Humber-Clairville,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,West Humber-Clairville,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,West Humber-Clairville,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,West Humber-Clairville,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,West Humber-Clairville,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


examine the new dataframe size

In [87]:
to_onehot.shape

(5704, 343)

group by neighbourhood

In [88]:
to_grouped = to_onehot.groupby('Neighbourhood').mean().reset_index()
to_grouped

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Agincourt North,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.044444,0.000000,0.00,0.000000,0.000000,0.022222,0.000000,0.000000,0.0,0.0
1,Agincourt South-Malvern West,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
2,Alderwood,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,Annex,0.0,0.000000,0.000000,0.0,0.010000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.010000,0.000000,0.0,0.0
4,Banbury-Don Mills,0.0,0.000000,0.000000,0.0,0.043478,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.021739,0.000000,0.0,0.0
5,Bathurst Manor,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
6,Bay Street Corridor,0.0,0.000000,0.000000,0.0,0.020000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.010000,0.0,0.0
7,Bayview Village,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
8,Bayview Woods-Steeles,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
9,Bedford Park-Nortown,0.0,0.000000,0.000000,0.0,0.020000,0.0,0.0,0.000,0.00,...,0.000000,0.000000,0.00,0.000000,0.000000,0.020000,0.000000,0.000000,0.0,0.0


In [90]:
to_grouped.shape

(140, 343)

a function to sort the venues in descending order.

In [91]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each postcode.

In [92]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
hoods_venues_sorted = pd.DataFrame(columns=columns)
hoods_venues_sorted['Neighbourhood'] = to_grouped['Neighbourhood']

for ind in np.arange(to_grouped.shape[0]):
    hoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(to_grouped.iloc[ind, :], num_top_venues)

hoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt North,Chinese Restaurant,Bakery,Pharmacy,Shopping Mall,Pizza Place,Supermarket,Coffee Shop,Vietnamese Restaurant,Sandwich Place,Indian Restaurant
1,Agincourt South-Malvern West,Chinese Restaurant,Shopping Mall,Cantonese Restaurant,Restaurant,Pool Hall,Korean Restaurant,Sushi Restaurant,Supermarket,Lounge,Breakfast Spot
2,Alderwood,Park,Gas Station,Discount Store,Pharmacy,Pizza Place,Dance Studio,Pub,Convenience Store,Construction & Landscaping,Intersection
3,Annex,Restaurant,Coffee Shop,Café,Bakery,Italian Restaurant,Pub,French Restaurant,Pizza Place,Vegetarian / Vegan Restaurant,Museum
4,Banbury-Don Mills,Coffee Shop,Japanese Restaurant,Shoe Store,Restaurant,Clothing Store,American Restaurant,Park,Café,Burger Joint,Bakery


In [112]:
df.sort_values('Neighbourhood',ascending=True, inplace=True)
df.reset_index(drop=False,inplace=True)

to_data_merged = pd.DataFrame
to_data_merged = pd.concat([df[['Neighbourhood Number','Cluster']],hoods_venues_sorted], axis=1)




In [113]:
to_data_merged

Unnamed: 0,Neighbourhood Number,Cluster,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,129,0,Agincourt North,Chinese Restaurant,Bakery,Pharmacy,Shopping Mall,Pizza Place,Supermarket,Coffee Shop,Vietnamese Restaurant,Sandwich Place,Indian Restaurant
1,128,0,Agincourt South-Malvern West,Chinese Restaurant,Shopping Mall,Cantonese Restaurant,Restaurant,Pool Hall,Korean Restaurant,Sushi Restaurant,Supermarket,Lounge,Breakfast Spot
2,20,1,Alderwood,Park,Gas Station,Discount Store,Pharmacy,Pizza Place,Dance Studio,Pub,Convenience Store,Construction & Landscaping,Intersection
3,95,2,Annex,Restaurant,Coffee Shop,Café,Bakery,Italian Restaurant,Pub,French Restaurant,Pizza Place,Vegetarian / Vegan Restaurant,Museum
4,42,4,Banbury-Don Mills,Coffee Shop,Japanese Restaurant,Shoe Store,Restaurant,Clothing Store,American Restaurant,Park,Café,Burger Joint,Bakery
5,34,7,Bathurst Manor,Park,Sports Bar,Breakfast Spot,Grocery Store,Sandwich Place,Hardware Store,Men's Store,Baseball Field,Playground,Convenience Store
6,76,2,Bay Street Corridor,Coffee Shop,Café,Japanese Restaurant,Clothing Store,Italian Restaurant,Ramen Restaurant,Pizza Place,Arts & Crafts Store,Breakfast Spot,Theater
7,52,4,Bayview Village,Park,Coffee Shop,Outdoor Supply Store,Hardware Store,Shoe Store,Tennis Court,Grocery Store,Clothing Store,Fast Food Restaurant,Breakfast Spot
8,49,7,Bayview Woods-Steeles,Park,Pool,Recreation Center,Café,Music Store,Liquor Store,Chinese Restaurant,Bank,Coffee Shop,Japanese Restaurant
9,39,4,Bedford Park-Nortown,Pizza Place,Coffee Shop,Fast Food Restaurant,Italian Restaurant,Bagel Shop,Sushi Restaurant,Ice Cream Shop,Grocery Store,Convenience Store,Butcher


In [119]:
to_data_merged.loc[to_data_merged['Cluster'] == 0, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt North,0,Chinese Restaurant,Bakery,Pharmacy,Shopping Mall,Pizza Place,Supermarket,Coffee Shop,Vietnamese Restaurant,Sandwich Place,Indian Restaurant
1,Agincourt South-Malvern West,0,Chinese Restaurant,Shopping Mall,Cantonese Restaurant,Restaurant,Pool Hall,Korean Restaurant,Sushi Restaurant,Supermarket,Lounge,Breakfast Spot
11,Bendale,0,Park,Optical Shop,Chinese Restaurant,Grocery Store,Wings Joint,Wine Shop,Indian Restaurant,Department Store,Pizza Place,Tennis Court
13,Black Creek,0,Grocery Store,Chinese Restaurant,Athletics & Sports,Gym / Fitness Center,Liquor Store,Sandwich Place,Kitchen Supply Store,Vietnamese Restaurant,Pharmacy,Discount Store
24,Clairlea-Birchmount,0,Diner,Coffee Shop,Bakery,Bank,Pub,Beer Store,Intersection,Convenience Store,Gas Station,Fast Food Restaurant
30,Don Valley Village,0,Clothing Store,Park,Fast Food Restaurant,Coffee Shop,Toy / Game Store,Tea Room,Bakery,Kids Store,Pizza Place,Restaurant
31,Dorset Park,0,Electronics Store,Fast Food Restaurant,Indian Restaurant,Gym / Fitness Center,Pet Store,Bakery,Coffee Shop,Sri Lankan Restaurant,Beer Store,Clothing Store
33,Downsview-Roding-CFB,0,Supermarket,Baseball Field,Pizza Place,Vietnamese Restaurant,Video Game Store,Pharmacy,Grocery Store,Park,Field,Ethiopian Restaurant
37,Eglinton East,0,Wings Joint,Sandwich Place,Pharmacy,Fast Food Restaurant,Grocery Store,Coffee Shop,Bakery,Liquor Store,Pizza Place,Beer Store
42,Flemingdon Park,0,Coffee Shop,Japanese Restaurant,Grocery Store,Café,Sandwich Place,Science Museum,Gym,Middle Eastern Restaurant,Skating Rink,Golf Course


In [120]:
to_data_merged.loc[to_data_merged['Cluster'] == 1, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Alderwood,1,Park,Gas Station,Discount Store,Pharmacy,Pizza Place,Dance Studio,Pub,Convenience Store,Construction & Landscaping,Intersection
14,Blake-Jones,1,Café,Coffee Shop,Greek Restaurant,Pizza Place,Fast Food Restaurant,Ethiopian Restaurant,Furniture / Home Store,Burger Joint,Sports Bar,Ice Cream Shop
16,Bridle Path-Sunnybrook-York Mills,1,Café,College Gym,Bus Line,Bookstore,Restaurant,Gym / Fitness Center,Coffee Shop,Zoo Exhibit,Falafel Restaurant,Farm
19,Cabbagetown-South St.James Town,1,Park,Café,Pet Store,Pool,Japanese Restaurant,Diner,Gastropub,Taiwanese Restaurant,Thai Restaurant,Butcher
21,Casa Loma,1,Coffee Shop,Sandwich Place,Café,Pizza Place,Park,Liquor Store,Mexican Restaurant,Burger Joint,Italian Restaurant,Grocery Store
22,Centennial Scarborough,1,Bar,Pool,Playground,Park,Gym,Filipino Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
28,Danforth,1,Café,Coffee Shop,Park,Gastropub,Pizza Place,Ethiopian Restaurant,Sandwich Place,Bar,Burger Joint,American Restaurant
29,Danforth East York,1,Coffee Shop,Café,Sandwich Place,Ethiopian Restaurant,Pizza Place,Breakfast Spot,Thai Restaurant,Bus Line,Liquor Store,Bar
34,Dufferin Grove,1,Café,Coffee Shop,Bar,Bakery,Cocktail Bar,Restaurant,Gastropub,Sandwich Place,Italian Restaurant,Park
36,Edenbridge-Humber Valley,1,Park,Bakery,Pharmacy,Bus Stop,Garden,Skating Rink,Baseball Field,Electronics Store,Bus Line,Convenience Store


In [121]:
to_data_merged.loc[to_data_merged['Cluster'] == 2, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Annex,2,Restaurant,Coffee Shop,Café,Bakery,Italian Restaurant,Pub,French Restaurant,Pizza Place,Vegetarian / Vegan Restaurant,Museum
6,Bay Street Corridor,2,Coffee Shop,Café,Japanese Restaurant,Clothing Store,Italian Restaurant,Ramen Restaurant,Pizza Place,Arts & Crafts Store,Breakfast Spot,Theater
23,Church-Yonge Corridor,2,Coffee Shop,Clothing Store,Italian Restaurant,Japanese Restaurant,Theater,Ramen Restaurant,Café,Pizza Place,Restaurant,Sandwich Place
32,Dovercourt-Wallace Emerson-Junction,2,Bar,Café,Coffee Shop,Bakery,Pharmacy,Park,Mexican Restaurant,Caribbean Restaurant,Pizza Place,Brewery
77,Mimico (includes Humber Bay Shores),2,Pharmacy,Convenience Store,Grocery Store,Indian Restaurant,Furniture / Home Store,Spanish Restaurant,Brewery,Breakfast Spot,Sandwich Place,Coffee Shop
83,Mount Pleasant West,2,Coffee Shop,Italian Restaurant,Café,Gym,Park,Bookstore,Restaurant,Sushi Restaurant,Gastropub,Dessert Shop
87,Niagara,2,Coffee Shop,Café,Park,Furniture / Home Store,Gym,Restaurant,Yoga Studio,Men's Store,Italian Restaurant,Music Venue


In [122]:
to_data_merged.loc[to_data_merged['Cluster'] == 3, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
58,Islington-City Centre West,3,Coffee Shop,Fast Food Restaurant,Grocery Store,Pet Store,Pharmacy,Sandwich Place,Pizza Place,Vietnamese Restaurant,Restaurant,Shopping Mall
65,L'Amoreaux,3,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Gym Pool,Auto Garage,Shopping Mall,Breakfast Spot,Sandwich Place,Grocery Store,Tennis Court
73,Malvern,3,Pizza Place,Pharmacy,Park,Bank,Fast Food Restaurant,Sandwich Place,Grocery Store,Bakery,Salon / Barbershop,Gym / Fitness Center
95,Parkwoods-Donalda,3,Park,Chinese Restaurant,Fast Food Restaurant,Road,Coffee Shop,Train Station,Supermarket,Café,Discount Store,Caribbean Restaurant
105,Rouge,3,Zoo Exhibit,Zoo,Fast Food Restaurant,Dessert Shop,Gift Shop,Hungarian Restaurant,Fish & Chips Shop,Event Space,Falafel Restaurant,Farm
132,Woburn,3,Pharmacy,Coffee Shop,Indian Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Paper / Office Supplies Store,Big Box Store,Discount Store,Gym,Park


In [123]:
to_data_merged.loc[to_data_merged['Cluster'] == 4, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Banbury-Don Mills,4,Coffee Shop,Japanese Restaurant,Shoe Store,Restaurant,Clothing Store,American Restaurant,Park,Café,Burger Joint,Bakery
7,Bayview Village,4,Park,Coffee Shop,Outdoor Supply Store,Hardware Store,Shoe Store,Tennis Court,Grocery Store,Clothing Store,Fast Food Restaurant,Breakfast Spot
9,Bedford Park-Nortown,4,Pizza Place,Coffee Shop,Fast Food Restaurant,Italian Restaurant,Bagel Shop,Sushi Restaurant,Ice Cream Shop,Grocery Store,Convenience Store,Butcher
12,Birchcliffe-Cliffside,4,Park,Bank,College Stadium,Gym,Thai Restaurant,Gym Pool,Fast Food Restaurant,General Entertainment,Skating Rink,Restaurant
35,East End-Danforth,4,Coffee Shop,Pizza Place,Bus Line,Grocery Store,Sushi Restaurant,Ice Cream Shop,Skating Rink,Sandwich Place,Park,Flower Shop
39,Englemount-Lawrence,4,Coffee Shop,Park,Fast Food Restaurant,Pizza Place,Restaurant,Bagel Shop,Bakery,Bank,Sandwich Place,Liquor Store
40,Eringate-Centennial-West Deane,4,Pizza Place,Sandwich Place,Baseball Field,Convenience Store,Park,Record Shop,Eastern European Restaurant,Bank,Grocery Store,Shopping Mall
49,High Park North,4,Bar,Coffee Shop,Café,Pizza Place,Park,Thai Restaurant,Italian Restaurant,Sushi Restaurant,Convenience Store,Mexican Restaurant
50,High Park-Swansea,4,Park,Coffee Shop,Deli / Bodega,Bakery,Light Rail Station,Flower Shop,Pizza Place,Dog Run,Pub,Building
79,Moss Park,4,Coffee Shop,Café,Park,Italian Restaurant,Restaurant,Theater,Gastropub,Breakfast Spot,Japanese Restaurant,Bakery


In [124]:
to_data_merged.loc[to_data_merged['Cluster'] == 5, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
129,Willowdale East,5,Coffee Shop,Pizza Place,Ramen Restaurant,Japanese Restaurant,Café,Fast Food Restaurant,Pharmacy,Pet Store,Sushi Restaurant,Restaurant


In [125]:
to_data_merged.loc[to_data_merged['Cluster'] == 6, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
122,Waterfront Communities-The Island,6,Coffee Shop,Boat or Ferry,Pizza Place,Sports Bar,Café,Music Venue,Steakhouse,Hotel,Chinese Restaurant,Gym


In [126]:
to_data_merged.loc[to_data_merged['Cluster'] == 7, to_data_merged.columns[[2] + [1] + list(range(3, to_data_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Bathurst Manor,7,Park,Sports Bar,Breakfast Spot,Grocery Store,Sandwich Place,Hardware Store,Men's Store,Baseball Field,Playground,Convenience Store
8,Bayview Woods-Steeles,7,Park,Pool,Recreation Center,Café,Music Store,Liquor Store,Chinese Restaurant,Bank,Coffee Shop,Japanese Restaurant
10,Beechborough-Greenbrook,7,Sandwich Place,Dessert Shop,Furniture / Home Store,Fast Food Restaurant,Check Cashing Service,Grocery Store,Coffee Shop,Park,Restaurant,Discount Store
15,Briar Hill-Belgravia,7,Furniture / Home Store,Fast Food Restaurant,Park,Coffee Shop,Pharmacy,Sandwich Place,Bakery,Caribbean Restaurant,Mediterranean Restaurant,Thai Restaurant
17,Broadview North,7,Greek Restaurant,Bakery,Café,Pharmacy,Fast Food Restaurant,Chinese Restaurant,Bar,Grocery Store,Theater,Flower Shop
18,Brookhaven-Amesbury,7,Portuguese Restaurant,Coffee Shop,Athletics & Sports,Dim Sum Restaurant,Electronics Store,Grocery Store,Bank,Supermarket,Fast Food Restaurant,Restaurant
20,Caledonia-Fairbank,7,Coffee Shop,Pharmacy,Grocery Store,Bus Stop,Park,Mexican Restaurant,Food & Drink Shop,Café,Sporting Goods Shop,Market
25,Clanton Park,7,Gym / Fitness Center,Italian Restaurant,Other Great Outdoors,Sandwich Place,Gym,Bagel Shop,Park,Asian Restaurant,Pet Store,Pharmacy
26,Cliffcrest,7,Discount Store,Fast Food Restaurant,Coffee Shop,Burger Joint,Liquor Store,Wings Joint,Bank,Furniture / Home Store,Pizza Place,Bistro
27,Corso Italia-Davenport,7,Italian Restaurant,Café,Breakfast Spot,Convenience Store,Bakery,Coffee Shop,Mexican Restaurant,Bus Stop,Bus Line,Bank


In [None]:
toronto_geo = r'Neighbourhoods.json'

# create map and display it
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)
map_toronto.choropleth(
    geo_data=toronto_geo,
    data=df,
    columns=['Neighbourhood Number','Longitude'],
    key_on='feature.properties.AREA_LONG_CODE',
    fill_color='YlOrRd', 
    fill_opacity=0.6, 
    line_opacity=0.2,
    legend_name='Toronto Neighbourhood')
# display the map of Toronto
map_toronto