In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_list = pd.read_html(url)

In [3]:
len(df_list)

3

In [4]:
df_list

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [5]:
column_names = ['Postal_Code', 'Borough', 'Neighborhood']
column_names

['Postal_Code', 'Borough', 'Neighborhood']

In [6]:
pip list

Package                     Version
--------------------------- -------------------
absl-py                     0.9.0
aiohttp                     3.6.2
arcgis                      1.6.0
astor                       0.8.0
astropy                     4.0.1.post1
async-timeout               3.0.1
attrs                       19.3.0
azure-core                  1.8.0
azure-cosmos                4.1.0
backcall                    0.2.0
beautifulsoup4              4.9.1
biopython                   1.77
bleach                      3.1.5
blinker                     1.4
bokeh                       2.1.1
boto3                       1.13.11
botocore                    1.16.11
branca                      0.4.1
brotlipy                    0.7.0
ca-data-connector           11.2.0
cachetools                  4.1.1
cassandra-driver            3.24.0
certifi                     2020.6.20
cffi                        1.14.0
chardet                     3.0.4
click                       7.1.2
cloudpickle      

In [7]:
pd.test

<function pandas.util._tester.test(extra_args=None)>

In [8]:
df_list

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [9]:
df_list=df_list[0]

In [10]:
df_list

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [11]:
df_list=df_list[df_list.Borough  != 'Not assigned']
df_list.head()

df_list=df_list.rename(columns={'Postal Code':'PostalCode'})

In [12]:
df_list.shape

(103, 3)

In [13]:
df_list=df_list.reset_index(drop=True)

In [14]:
df_list

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [15]:
df_list.shape

(103, 3)

In [16]:
import geocoder

In [17]:
def get_geocoder(PostalCode):
    lat_lng_coords = None 
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(PostalCode))
        lat_lng_coords = g.latlng
        
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude,longitude
    latitude, longitude

In [18]:
toronto=df_list
toronto['Latitude'], toronto['Longitude'] = zip(*toronto['PostalCode'].apply(get_geocoder))

In [19]:
toronto

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [20]:
toronto.dtypes

PostalCode        object
Borough           object
Neighbourhood     object
Latitude         float64
Longitude        float64
dtype: object

In [21]:
import json
!pip install folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium




In [22]:
#create map of Toronto
map_toronto=folium.Map(location=[43.6532, -79.3832], zoom_start=5)


In [23]:
map_toronto

In [24]:

#add markers to map

for latitude, longitude, borough, neighbourhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        colors='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [25]:
CLIENT_ID = '1YFANG4CF3WP2NPCIES0COJ3F2QI4VBLGJF1HCAHBXPIJY0R' # your Foursquare ID
CLIENT_SECRET = 'TGRK15245ANJ3SPZRMAKFRMXHQPHG34HV43UD20YO0FHF1JO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1YFANG4CF3WP2NPCIES0COJ3F2QI4VBLGJF1HCAHBXPIJY0R
CLIENT_SECRET:TGRK15245ANJ3SPZRMAKFRMXHQPHG34HV43UD20YO0FHF1JO


In [26]:
toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188


In [27]:
toronto.loc[0,'Neighbourhood']

'Parkwoods'

In [28]:
neighborhood_latitude = toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75245000000007, -79.32990999999998.


In [29]:
LIMIT = 100
radius = 1000
url ='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?&client_id=1YFANG4CF3WP2NPCIES0COJ3F2QI4VBLGJF1HCAHBXPIJY0R&client_secret=TGRK15245ANJ3SPZRMAKFRMXHQPHG34HV43UD20YO0FHF1JO&v=20180605&ll=43.75245000000007,-79.32990999999998&radius=1000&limit=100'

In [30]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fa09b4f05724978509cfd6d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 23,
  'suggestedBounds': {'ne': {'lat': 43.76145000900008,
    'lng': -79.31747364773157},
   'sw': {'lat': 43.743449991000055, 'lng': -79.3423463522684}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b8991cbf964a520814232e3',
       'name': "Allwyn's Bakery",
       'location': {'address': '81 Underhill drive',
        'lat': 43.75984035203157,
        'lng': -79.32471879917513,
        'labeledLatLngs': [{'label': 'display',

In [31]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [32]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Brookbanks Park,Park,43.751976,-79.33214
2,Tim Hortons,Café,43.760668,-79.326368
3,Bruno's valu-mart,Grocery Store,43.746143,-79.32463
4,A&W,Fast Food Restaurant,43.760643,-79.326865


In [33]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

23 venues were returned by Foursquare.


In [34]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [35]:
toronto_venues = getNearbyVenues(names=toronto['Neighbourhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [36]:
print(toronto_venues.shape)

(2404, 7)


In [37]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park
3,Victoria Village,43.73057,-79.31306,Memories of Africa,43.726602,-79.312427,Grocery Store
4,Victoria Village,43.73057,-79.31306,Guardian Drug,43.730584,-79.307432,Pharmacy


In [38]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,15,15,15,15,15,15
"Alderwood, Long Branch",4,4,4,4,4,4
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",19,19,19,19,19,19
Berczy Park,60,60,60,60,60,60
...,...,...,...,...,...,...
"Willowdale, Willowdale West",5,5,5,5,5,5
Woburn,4,4,4,4,4,4
Woodbine Heights,18,18,18,18,18,18
York Mills West,3,3,3,3,3,3


In [39]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [40]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head(40)

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
toronto_onehot.shape

(2404, 270)

In [42]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Agincourt,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.000000,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
2,Bayview Village,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
4,Berczy Park,0.0,0.0,0.0,0.016667,0.0,0.016667,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.016667,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
92,Woburn,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.055556,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
94,York Mills West,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0


In [43]:
toronto_grouped.shape

(96, 270)

In [44]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.13
1  Hong Kong Restaurant  0.07
2       Badminton Court  0.07
3        Discount Store  0.07
4   Shanghai Restaurant  0.07


----Alderwood, Long Branch----
                   venue  freq
0      Convenience Store  0.25
1                    Pub  0.25
2  Performing Arts Venue  0.25
3                    Gym  0.25
4           Neighborhood  0.00


----Bayview Village----
                        venue  freq
0          Golf Driving Range  0.25
1  Construction & Landscaping  0.25
2                       Trail  0.25
3                        Park  0.25
4                 Music Venue  0.00


----Bedford Park, Lawrence Manor East----
                     venue  freq
0           Sandwich Place  0.11
1              Coffee Shop  0.11
2       Italian Restaurant  0.11
3  Comfort Food Restaurant  0.05
4              Sports Club  0.05


----Berczy Park----
                venue  freq
0         Coffee Shop  0.08
1          Restaurant

In [45]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [46]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Hong Kong Restaurant,Supermarket,Sushi Restaurant,Bakery,Bubble Tea Shop,Skating Rink,Vietnamese Restaurant,Shopping Mall,Badminton Court
1,"Alderwood, Long Branch",Convenience Store,Pub,Performing Arts Venue,Gym,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Donut Shop
2,Bayview Village,Construction & Landscaping,Golf Driving Range,Trail,Park,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space
3,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Comfort Food Restaurant,Café,Juice Bar,Restaurant,Thai Restaurant,Liquor Store,Pub
4,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Breakfast Spot,Beer Bar,Bakery,Cheese Shop,Seafood Restaurant,Farmers Market,Sandwich Place


In [47]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [48]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronot_grouped with toronto to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood', how = 'right')

toronto_merged.head() 

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,1,Food & Drink Shop,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.73057,-79.31306,1,Grocery Store,Park,Pharmacy,Zoo Exhibit,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0,Coffee Shop,Breakfast Spot,Pub,Electronics Store,Distribution Center,Bakery,Restaurant,Thai Restaurant,Theater,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,0,Clothing Store,Furniture / Home Store,Toy / Game Store,Women's Store,Cosmetics Shop,American Restaurant,Bookstore,Food Court,Coffee Shop,Men's Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,0,Coffee Shop,Sandwich Place,Café,Falafel Restaurant,Bank,Theater,Fried Chicken Joint,Italian Restaurant,Gastropub,Park


In [49]:
toronto_merged.head(40)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,1,Food & Drink Shop,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.73057,-79.31306,1,Grocery Store,Park,Pharmacy,Zoo Exhibit,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0,Coffee Shop,Breakfast Spot,Pub,Electronics Store,Distribution Center,Bakery,Restaurant,Thai Restaurant,Theater,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,0,Clothing Store,Furniture / Home Store,Toy / Game Store,Women's Store,Cosmetics Shop,American Restaurant,Bookstore,Food Court,Coffee Shop,Men's Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,0,Coffee Shop,Sandwich Place,Café,Falafel Restaurant,Bank,Theater,Fried Chicken Joint,Italian Restaurant,Gastropub,Park
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.66263,-79.52831,0,Pharmacy,Grocery Store,Café,Shopping Mall,Skating Rink,Bank,Park,Ethiopian Restaurant,Electronics Store,Elementary School
6,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662,0,Zoo Exhibit,Fast Food Restaurant,Furniture / Home Store,Fish Market,Fish & Chips Shop,Field,Flea Market,Farmers Market,Food,Farm
7,M3B,North York,Don Mills,43.74923,-79.36186,0,Intersection,Coffee Shop,Grocery Store,Gas Station,Beer Store,Supermarket,Smoke Shop,Bubble Tea Shop,Gym,Park
13,M3C,North York,Don Mills,43.72168,-79.34352,0,Intersection,Coffee Shop,Grocery Store,Gas Station,Beer Store,Supermarket,Smoke Shop,Bubble Tea Shop,Gym,Park
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.70718,-79.31192,0,Pizza Place,Intersection,Pharmacy,Pet Store,Rock Climbing Spot,Café,Fast Food Restaurant,Bus Line,Breakfast Spot,Gastropub


In [50]:
toronto_merged['Cluster Labels'].nunique()

3

In [51]:
toronto_merged.dropna()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,1,Food & Drink Shop,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.73057,-79.31306,1,Grocery Store,Park,Pharmacy,Zoo Exhibit,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0,Coffee Shop,Breakfast Spot,Pub,Electronics Store,Distribution Center,Bakery,Restaurant,Thai Restaurant,Theater,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,0,Clothing Store,Furniture / Home Store,Toy / Game Store,Women's Store,Cosmetics Shop,American Restaurant,Bookstore,Food Court,Coffee Shop,Men's Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,0,Coffee Shop,Sandwich Place,Café,Falafel Restaurant,Bank,Theater,Fried Chicken Joint,Italian Restaurant,Gastropub,Park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113,2,Pool,Farm,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Food
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133,0,Coffee Shop,Gay Bar,Japanese Restaurant,Restaurant,Sushi Restaurant,Café,Hotel,Bubble Tea Shop,Men's Store,Mediterranean Restaurant
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544,0,Coffee Shop,Hotel,Restaurant,Café,Italian Restaurant,Asian Restaurant,Bar,Pub,Theater,Salon / Barbershop
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945,0,Chinese Restaurant,Italian Restaurant,Flower Shop,Fast Food Restaurant,Sushi Restaurant,Park,Bank,Coffee Shop,Electronics Store,Elementary School


In [52]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster -1],
        fill=True,
        fill_color=rainbow[cluster -1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [53]:
#cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,-79.36264,0,Coffee Shop,Breakfast Spot,Pub,Electronics Store,Distribution Center,Bakery,Restaurant,Thai Restaurant,Theater,Italian Restaurant
3,North York,-79.45042,0,Clothing Store,Furniture / Home Store,Toy / Game Store,Women's Store,Cosmetics Shop,American Restaurant,Bookstore,Food Court,Coffee Shop,Men's Store
4,Downtown Toronto,-79.39188,0,Coffee Shop,Sandwich Place,Café,Falafel Restaurant,Bank,Theater,Fried Chicken Joint,Italian Restaurant,Gastropub,Park
5,Etobicoke,-79.52831,0,Pharmacy,Grocery Store,Café,Shopping Mall,Skating Rink,Bank,Park,Ethiopian Restaurant,Electronics Store,Elementary School
6,Scarborough,-79.19662,0,Zoo Exhibit,Fast Food Restaurant,Furniture / Home Store,Fish Market,Fish & Chips Shop,Field,Flea Market,Farmers Market,Food,Farm
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,-79.38146,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gym,Asian Restaurant,Salad Place,Japanese Restaurant,Seafood Restaurant
99,Downtown Toronto,-79.38133,0,Coffee Shop,Gay Bar,Japanese Restaurant,Restaurant,Sushi Restaurant,Café,Hotel,Bubble Tea Shop,Men's Store,Mediterranean Restaurant
100,East Toronto,-79.38544,0,Coffee Shop,Hotel,Restaurant,Café,Italian Restaurant,Asian Restaurant,Bar,Pub,Theater,Salon / Barbershop
101,Etobicoke,-79.48945,0,Chinese Restaurant,Italian Restaurant,Flower Shop,Fast Food Restaurant,Sushi Restaurant,Park,Bank,Coffee Shop,Electronics Store,Elementary School


In [54]:
#cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,-79.32991,1,Food & Drink Shop,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm,Dumpling Restaurant
1,North York,-79.31306,1,Grocery Store,Park,Pharmacy,Zoo Exhibit,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
18,Scarborough,-79.1747,1,Construction & Landscaping,Park,Gym / Fitness Center,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Dumpling Restaurant
27,North York,-79.35558,1,Park,Residential Building (Apartment / Condo),Zoo Exhibit,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Falafel Restaurant
35,East York,-79.33418,1,Convenience Store,Park,Intersection,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Dumpling Restaurant
45,North York,-79.3806,1,Park,Zoo Exhibit,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm,Dumpling Restaurant
49,North York,-79.48874,1,Bakery,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Zoo Exhibit,Eastern European Restaurant
66,North York,-79.40033,1,Convenience Store,Speakeasy,Park,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Farm
68,Central Toronto,-79.4144,1,French Restaurant,Park,Zoo Exhibit,Farm,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
69,West Toronto,-79.46281,1,Convenience Store,Residential Building (Apartment / Condo),Park,Sandwich Place,Creperie,Cuban Restaurant,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market


In [55]:
#cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,Etobicoke,-79.51113,2,Pool,Farm,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Food
