In [2]:
import requests
import csv
import lxml.html as lh
from bs4 import BeautifulSoup
import pandas as pd
from arcgis.gis import GIS
import urllib
from geopy.geocoders import Nominatim
import folium # map rendering library
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as plt
import numpy as np
from pandas.io.json import json_normalize
from sklearn.preprocessing import MinMaxScaler



Using Beautiful Soup's library functions to extract Postal codes, Borough and Neighbourhood data from Wikipedia page

In [3]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL)
html = BeautifulSoup(page.text, 'html.parser')
table = html.findAll('table',{"class":"sortable"})[0]
end_col = len(table.findAll('th'))
end_row = len(table.findAll('tr'))



Logic to extract data with html tags & appending them to empty list


In [4]:
row_data=[]
start_col = 0
start_row = 1   
tr = table.findAll(['tr'])[start_row:end_row]
th = table.find_all(['th'])[start_col:end_col]
th_data = [col.text.strip('\n') for col in th]
for cell in tr:
    td = cell.find_all('td')
    row = [i.text.replace('\n','') for i in td]
    if row[1] != "Not assigned":
        row_data.append(row)

Creating dataframe and setting the columns appropriately

In [5]:
df_toronto = pd.DataFrame(row_data) 
df_toronto.columns = th_data
df_toronto.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
df_toronto

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


Using the URL provided in the assignment to get the co-ordinates based on the Postal Code.

In [6]:
df_coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
df_coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
df_coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging the coordinates dataframe with the Neighbourhoods dataframe

In [7]:
df_toronto = pd.merge(df_toronto, df_coordinates, on='PostalCode', how='left')
df_toronto

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937




Filtering the data from all the neighbourhoods to with Boroughs containing Toronto in it's name


In [8]:
toronto_data = df_toronto[df_toronto['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259



Create a map of Toronto with neighborhoods superimposed on top.

In [17]:


CLIENT_ID = 'IZQOW0KRVWJVQLKLEUGHHAMIU2TVJH0S0A3S1SVDAISBV3OS' # your Foursquare ID
CLIENT_SECRET = 'Z2NPM51THSEZKFKT2CNDQ3QBMLBBCECG2QUVOU1SL4JDVYMT' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)



Your credentails:
CLIENT_ID: IZQOW0KRVWJVQLKLEUGHHAMIU2TVJH0S0A3S1SVDAISBV3OS
CLIENT_SECRET:Z2NPM51THSEZKFKT2CNDQ3QBMLBBCECG2QUVOU1SL4JDVYMT


In [12]:
toronto_data.loc[0, 'Neighbourhood']

'Regent Park, Harbourfront'

In [13]:
neighborhood_latitude = toronto_data.loc[0, 'Latitude']
neighborhood_longitude = toronto_data.loc[0, 'Longitude']
neighborhood_name = toronto_data.loc[0, 'Neighbourhood']
print(neighborhood_latitude, neighborhood_longitude, neighborhood_name)

43.6542599 -79.3606359 Regent Park, Harbourfront


In [18]:


LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url



'https://api.foursquare.com/v2/venues/explore?&client_id=IZQOW0KRVWJVQLKLEUGHHAMIU2TVJH0S0A3S1SVDAISBV3OS&client_secret=Z2NPM51THSEZKFKT2CNDQ3QBMLBBCECG2QUVOU1SL4JDVYMT&v=20180604&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f39493f2002af58d5e443ef'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 45,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [20]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [23]:

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698


In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

45 venues were returned by Foursquare.


In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)



toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )
toronto_venues



Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
5,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant
6,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
7,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
8,"Regent Park, Harbourfront",43.654260,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
9,"Regent Park, Harbourfront",43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [26]:


toronto_venues.head()



Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [27]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",22,22,22,22,22,22
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
Berczy Park,59,59,59,59,59,59
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",23,23,23,23,23,23
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",17,17,17,17,17,17
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16


In [28]:


print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))



There are 270 uniques categories.


In [29]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
toronto_onehot.shape

(2137, 270)

In [31]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
1,"Alderwood, Long Branch",0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
3,Bayview Village,0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
4,"Bedford Park, Lawrence Manor East",0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
5,Berczy Park,0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.016949,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
6,"Birch Cliff, Cliffside West",0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.000000,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
8,"Business reply mail Processing Centre, South C...",0.058824,0.0,0.000000,0.000000,0.0000,0.0000,0.000,0.000,0.000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.000000,0.0,0.000000,0.062500,0.0625,0.0625,0.125,0.125,0.125,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000


In [32]:

num_top_venues = 10

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')



----Agincourt----
                             venue  freq
0                   Clothing Store   0.2
1                   Breakfast Spot   0.2
2                           Lounge   0.2
3                     Skating Rink   0.2
4        Latin American Restaurant   0.2
5        Middle Eastern Restaurant   0.0
6              Monument / Landmark   0.0
7  Molecular Gastronomy Restaurant   0.0
8       Modern European Restaurant   0.0
9                Mobile Phone Shop   0.0


----Alderwood, Long Branch----
                             venue  freq
0                      Pizza Place  0.22
1                      Coffee Shop  0.11
2                              Pub  0.11
3                     Skating Rink  0.11
4                              Gym  0.11
5                   Sandwich Place  0.11
6               Athletics & Sports  0.11
7                             Pool  0.11
8  Molecular Gastronomy Restaurant  0.00
9       Modern European Restaurant  0.00


----Bathurst Manor, Wilson Heights, Downsview

                             venue  freq
0                   Sandwich Place  0.25
1                     Skating Rink  0.25
2                              Bar  0.25
3                   Discount Store  0.25
4                      Yoga Studio  0.00
5               Miscellaneous Shop  0.00
6  Molecular Gastronomy Restaurant  0.00
7       Modern European Restaurant  0.00
8                Mobile Phone Shop  0.00
9               Mexican Restaurant  0.00


----Don Mills----
                 venue  freq
0                  Gym  0.11
1       Clothing Store  0.07
2  Japanese Restaurant  0.07
3          Coffee Shop  0.07
4           Restaurant  0.07
5           Beer Store  0.07
6       Baseball Field  0.04
7                 Café  0.04
8   Dim Sum Restaurant  0.04
9       Sandwich Place  0.04


----Dorset Park, Wexford Heights, Scarborough Town Centre----
                             venue  freq
0                Indian Restaurant  0.33
1                        Pet Store  0.17
2               Light R

                             venue  freq
0                       Hobby Shop   0.2
1                 Department Store   0.2
2                Convenience Store   0.2
3                      Bus Station   0.2
4                      Coffee Shop   0.2
5                      Yoga Studio   0.0
6                Mobile Phone Shop   0.0
7              Moroccan Restaurant   0.0
8              Monument / Landmark   0.0
9  Molecular Gastronomy Restaurant   0.0


----Kensington Market, Chinatown, Grange Park----
                           venue  freq
0                    Coffee Shop  0.06
1                           Café  0.06
2  Vegetarian / Vegan Restaurant  0.06
3             Mexican Restaurant  0.05
4                   Dessert Shop  0.05
5          Vietnamese Restaurant  0.05
6                   Burger Joint  0.03
7           Caribbean Restaurant  0.03
8                         Bakery  0.03
9                            Bar  0.03


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richvie

                        venue  freq
0                        Park   0.5
1           Food & Drink Shop   0.5
2                 Yoga Studio   0.0
3               Metro Station   0.0
4  Modern European Restaurant   0.0
5           Mobile Phone Shop   0.0
6          Miscellaneous Shop   0.0
7   Middle Eastern Restaurant   0.0
8          Mexican Restaurant   0.0
9    Mediterranean Restaurant   0.0


----Queen's Park, Ontario Provincial Government----
                 venue  freq
0          Coffee Shop  0.26
1    College Cafeteria  0.06
2          Yoga Studio  0.03
3   Mexican Restaurant  0.03
4                  Bar  0.03
5     Sushi Restaurant  0.03
6             Beer Bar  0.03
7  Fried Chicken Joint  0.03
8   Persian Restaurant  0.03
9        Smoothie Shop  0.03


----Regent Park, Harbourfront----
                 venue  freq
0          Coffee Shop  0.18
1                 Park  0.07
2                  Pub  0.07
3                 Café  0.07
4               Bakery  0.07
5       Breakfast Spo

9             Beer Bar  0.03


----Victoria Village----
                             venue  freq
0            Portuguese Restaurant  0.25
1                     Hockey Arena  0.25
2                French Restaurant  0.25
3                      Coffee Shop  0.25
4        Middle Eastern Restaurant  0.00
5              Monument / Landmark  0.00
6  Molecular Gastronomy Restaurant  0.00
7       Modern European Restaurant  0.00
8                Mobile Phone Shop  0.00
9               Miscellaneous Shop  0.00


----West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale----
                             venue  freq
0                     Home Service   1.0
1                      Yoga Studio   0.0
2               Mexican Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0
5       Modern European Restaurant   0.0
6                Mobile Phone Shop   0.0
7               Miscellaneous Shop   0.0
8        Middle Eastern Restaurant   0.0
9 

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]



num_top_venues = 100

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()



Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,91th Most Common Venue,92th Most Common Venue,93th Most Common Venue,94th Most Common Venue,95th Most Common Venue,96th Most Common Venue,97th Most Common Venue,98th Most Common Venue,99th Most Common Venue,100th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Latin American Restaurant,Breakfast Spot,Clothing Store,Drugstore,Discount Store,Distribution Center,Dog Run,...,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Sandwich Place,Athletics & Sports,Pub,Pool,Skating Rink,Gym,Concert Hall,...,Arts & Crafts Store,Art Museum,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Park,Deli / Bodega,Supermarket,Middle Eastern Restaurant,Sushi Restaurant,Ice Cream Shop,Shopping Mall,...,Auto Workshop,Athletics & Sports,Asian Restaurant,Arts & Crafts Store,Art Museum,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal
3,Bayview Village,Café,Bank,Chinese Restaurant,Japanese Restaurant,Women's Store,Diner,Discount Store,Distribution Center,Dog Run,...,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport,Afghan Restaurant
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Thai Restaurant,Sandwich Place,Restaurant,Juice Bar,Butcher,Café,Indian Restaurant,...,BBQ Joint,Auto Workshop,Athletics & Sports,Asian Restaurant,Arts & Crafts Store,Art Museum,Art Gallery,Aquarium,Antique Shop,Airport Terminal


In [35]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:200]

array([2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 1, 0, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2,
       0, 2, 2, 2, 1, 0, 0, 0, 1, 0, 2, 2, 2, 0, 2, 2, 0, 3, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 2, 1, 2, 1, 2, 1, 0, 2, 0, 3, 2, 0, 1, 0, 0, 0, 1,
       2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0, 2, 0,
       2, 2, 1, 0, 0, 0, 1, 1], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

neighborhoods_venues_sorted

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,91th Most Common Venue,92th Most Common Venue,93th Most Common Venue,94th Most Common Venue,95th Most Common Venue,96th Most Common Venue,97th Most Common Venue,98th Most Common Venue,99th Most Common Venue,100th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Café,Pub,...,Burger Joint,Building,Bubble Tea Shop,Bridal Shop,Brewery,Auto Workshop,BBQ Joint,Brazilian Restaurant,Boutique,Bookstore
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,College Cafeteria,Sushi Restaurant,Bar,...,Bubble Tea Shop,Bridal Shop,Brewery,Breakfast Spot,Brazilian Restaurant,Boutique,Bookstore,Boat or Ferry,Bistro,Athletics & Sports
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,...,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Deli / Bodega,Dance Studio
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Cocktail Bar,Cosmetics Shop,...,Afghan Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Cupcake Shop,College Stadium,Airport Lounge,Beer Store,Bus Line,Burger Joint
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Trail,Health Food Store,Pub,Doner Restaurant,...,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Café,Cheese Shop,...,Bike Shop,Belgian Restaurant,Beer Store,Bed & Breakfast,Baseball Stadium,Baseball Field,Bar,Bank,Baby Store,Auto Workshop
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,...,Event Space,Cosmetics Shop,Art Gallery,Construction & Landscaping,Bridal Shop,Butcher,Bakery,Business Service,Bus Stop,Bank
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Grocery Store,Café,Park,Baby Store,...,Art Museum,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,0,Coffee Shop,Clothing Store,Café,Hotel,...,Discount Store,Diner,Food & Drink Shop,Dim Sum Restaurant,Climbing Gym,Comic Shop,Comfort Food Restaurant,Belgian Restaurant,Beer Store,Beer Bar
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,2,Bakery,Pharmacy,Gym / Fitness Center,Music Venue,...,Bistro,Art Museum,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate


In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,91th Most Common Venue,92th Most Common Venue,93th Most Common Venue,94th Most Common Venue,95th Most Common Venue,96th Most Common Venue,97th Most Common Venue,98th Most Common Venue,99th Most Common Venue,100th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Park,Café,Pub,Bakery,Breakfast Spot,Theater,Ice Cream Shop,...,Burger Joint,Building,Bubble Tea Shop,Bridal Shop,Brewery,Auto Workshop,BBQ Joint,Brazilian Restaurant,Boutique,Bookstore
1,Downtown Toronto,0,Coffee Shop,College Cafeteria,Sushi Restaurant,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Café,...,Bubble Tea Shop,Bridal Shop,Brewery,Breakfast Spot,Brazilian Restaurant,Boutique,Bookstore,Boat or Ferry,Bistro,Athletics & Sports
2,Downtown Toronto,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Ramen Restaurant,Middle Eastern Restaurant,...,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Deli / Bodega,Dance Studio
3,Downtown Toronto,0,Café,Coffee Shop,Cocktail Bar,Cosmetics Shop,Restaurant,Clothing Store,American Restaurant,Park,...,Afghan Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Cupcake Shop,College Stadium,Airport Lounge,Beer Store,Bus Line,Burger Joint
5,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Café,Cheese Shop,Seafood Restaurant,Restaurant,Farmers Market,Bakery,...,Bike Shop,Belgian Restaurant,Beer Store,Bed & Breakfast,Baseball Stadium,Baseball Field,Bar,Bank,Baby Store,Auto Workshop
6,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Department Store,Bubble Tea Shop,Burger Joint,Japanese Restaurant,...,Event Space,Cosmetics Shop,Art Gallery,Construction & Landscaping,Bridal Shop,Butcher,Bakery,Business Service,Bus Stop,Bank
8,Downtown Toronto,0,Coffee Shop,Clothing Store,Café,Hotel,Restaurant,Gym,Bar,Steakhouse,...,Discount Store,Diner,Food & Drink Shop,Dim Sum Restaurant,Climbing Gym,Comic Shop,Comfort Food Restaurant,Belgian Restaurant,Beer Store,Beer Bar
10,Downtown Toronto,0,Coffee Shop,Aquarium,Hotel,Café,Scenic Lookout,Fried Chicken Joint,Restaurant,Italian Restaurant,...,Dim Sum Restaurant,Field,Donut Shop,Fish & Chips Shop,Comic Shop,Women's Store,Colombian Restaurant,Boutique,Boat or Ferry,Bike Shop
11,West Toronto,0,Bar,Asian Restaurant,Coffee Shop,Restaurant,Vietnamese Restaurant,Café,Men's Store,Yoga Studio,...,Auto Workshop,Business Service,BBQ Joint,Bus Stop,Bus Station,Bus Line,Burrito Place,Burger Joint,Building,Bubble Tea Shop
12,East Toronto,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Restaurant,Spa,Bakery,...,Baby Store,BBQ Joint,Auto Workshop,Asian Restaurant,Bike Shop,Arts & Crafts Store,Art Museum,Art Gallery,Aquarium,Antique Shop


In [39]:


toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,91th Most Common Venue,92th Most Common Venue,93th Most Common Venue,94th Most Common Venue,95th Most Common Venue,96th Most Common Venue,97th Most Common Venue,98th Most Common Venue,99th Most Common Venue,100th Most Common Venue
18,Central Toronto,1,Park,Swim School,Bus Line,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,...,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport,Afghan Restaurant
21,Central Toronto,1,Trail,Park,Sushi Restaurant,Bus Line,Jewelry Store,Doner Restaurant,Diner,Discount Store,...,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport
29,Central Toronto,1,Park,Lawyer,Trail,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,...,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport,Afghan Restaurant
33,Downtown Toronto,1,Park,Playground,Trail,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Ethiopian Restaurant,Drugstore,...,Art Gallery,Aquarium,Antique Shop,American Restaurant,Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Airport Food Court,Airport


In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,91th Most Common Venue,92th Most Common Venue,93th Most Common Venue,94th Most Common Venue,95th Most Common Venue,96th Most Common Venue,97th Most Common Venue,98th Most Common Venue,99th Most Common Venue,100th Most Common Venue
