# Segmenting and Clustering Neighborhoods in Toronto


In [42]:
!pip install folium
!pip install requests_html
!pip install geopy
import requests
import folium
import requests_html
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import pandas as pd
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors



### 1. Get all Neighbourhoods Dateset in Toronto.

first using requests_html library to get the neighborhood list of Toronro.

In [2]:
wikipage_url = "https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto"

Session = requests_html.HTMLSession()
wiki_page = Session.get(wikipage_url)
Neighbour_tbdy = wiki_page.html.xpath('/html/body/div[3]/div[3]/div[5]/div[1]/table[1]/tbody')
all_tr_elmts = Neighbour_tbdy[0].find("tbody tr")[1:]

all_neighbour_list = []
for tr in all_tr_elmts:
  all_neighbour_list.extend(tr.find('td')[1].text.split(','))
all_neighbour_list[:5]



['Downtown',
 ' Harbourfront',
 ' Little Italy',
 ' Little Portugal',
 ' Dufferin Grove']

Use Nominatim to get latitude and longitute data of every neighborhood.

In [3]:
address = 'University'

geolocator = Nominatim(user_agent="nothing")
location = geolocator.geocode(address + ",Toronto")
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitude, longitude))

The geograpical coordinate are 43.6561192, -79.38467.


In [4]:
# define the dataframe columns
column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,Neighborhood,Latitude,Longitude


In [8]:
for neighborhood_name in all_neighbour_list:
  # print(neighborhood_name)
  geolocator = Nominatim(user_agent="nothing")
  try:
    location = geolocator.geocode(neighborhood_name + ",Toronto")
    neighborhood_lat = location.latitude
    neighborhood_lon = location.longitude
    neighborhoods = neighborhoods.append({'Neighborhood': neighborhood_name,
                        'Latitude': neighborhood_lat,
                        'Longitude': neighborhood_lon}, ignore_index=True)
  except:
    pass
  
  


## 1. Explore Neighbourhood Data in Toronato

In [6]:
print(neighborhoods.shape, len(all_neighbour_list))
neighborhoods.head()

(195, 3) 225


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Downtown,43.654174,-79.380812
1,Harbourfront,43.64008,-79.38015
2,Little Italy,43.655208,-79.414877
3,Little Portugal,43.647413,-79.431116
4,Dufferin Grove,43.653632,-79.426439


#### Create a map of Tornoto with neighborhoods superimposed on top.


In [7]:
# create map of Toronto using latitude and longitude values
address = 'Toronto'
geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Tornoto are {}, {}.'.format(latitude, longitude))
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
      label = '{}'.format(neighborhood)
      label = folium.Popup(label, parse_html=True)
      folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='red',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)
    
map_toronto

The geograpical coordinate of Tornoto are 43.6534817, -79.3839347.


In [9]:
CLIENT_ID = 'FAKEID' # your Foursquare ID
CLIENT_SECRET = 'FAKESECRET' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

def getNearbyVenues(names, latitudes, longitudes, radius=500):
  venues_list=[]
  for name, lat, lng in zip(names, latitudes, longitudes):
      print(name)
      # create the API request URL
      url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
          CLIENT_ID, 
          CLIENT_SECRET, 
          VERSION, 
          lat, 
          lng, 
          radius, 
          LIMIT)
          
      # make the GET request
      results = requests.get(url).json()["response"]['groups'][0]['items']
      
      # return only relevant information for each nearby venue
      venues_list.append([(
          name, 
          lat, 
          lng, 
          v['venue']['name'], 
          v['venue']['location']['lat'], 
          v['venue']['location']['lng'],  
          v['venue']['categories'][0]['name']) for v in results])

  nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
  nearby_venues.columns = ['Neighborhood', 
                'Neighborhood Latitude', 
                'Neighborhood Longitude', 
                'Venue', 
                'Venue Latitude', 
                'Venue Longitude', 
                'Venue Category']
  
  return(nearby_venues)

In [10]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                latitudes=neighborhoods['Latitude'],
                longitudes=neighborhoods['Longitude']
              )
print(toronto_venues.shape)
toronto_venues.head()

Downtown
 Harbourfront
 Little Italy
 Little Portugal
 Dufferin Grove
 Palmerston
 University
 Bay Street Corridor
 Kensington Market
 Chinatown
 Trinity Bellwoods
 South Niagara
 Island airport
 The Islands
 Queen's Park
 Victoria Hotel
 Central Bay Street
 First Canadian Place
 Design Exchange
 Adelaide
 University of Toronto
 Union Station
The Annex
 Yorkville
 South Hill
 Summerhill
 Wychwood Park
 Deer Park
 Casa Loma
Forest Hill South
 Oakwood–Vaughan
 Humewood–Cedarvale
 Corso Italia
 Humewood-Cedarvale
 Forest Hill Road Park
Bedford Park
 Lawrence Manor
 North Toronto
 Forest Hill North
 Lawrence Park
 Lawrence Heights
 Roselawn
North York
 Clanton Park
 Bathurst Manor
Willowdale West
 Newtonbrook West
 Lansing-Westgate
Cabbagetown
 St. Lawrence Market
 Toronto waterfront
 Moss Park
 Church and Wellesley
 Garden District
 Regent Park
 St. James Town
 Ryerson
 Berczy Park
Moore Park
 Rosedale
Davisville Village
 Midtown Toronto
 Mount Pleasant
 Davisville North
 Davisville
Leasi

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown,43.654174,-79.380812,Elgin And Winter Garden Theatres,43.653394,-79.378507,Theater
1,Downtown,43.654174,-79.380812,UNIQLO ユニクロ,43.65591,-79.380641,Clothing Store
2,Downtown,43.654174,-79.380812,Indigo,43.653515,-79.380696,Bookstore
3,Downtown,43.654174,-79.380812,Ed Mirvish Theatre,43.655102,-79.379768,Theater
4,Downtown,43.654174,-79.380812,LUSH,43.653557,-79.3804,Cosmetics Shop


Let's check how many venues were returned for each neighborhood

In [12]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,200,200,200,200,200,200
Alderwood,12,12,12,12,12,12
Baby Point,8,8,8,8,8,8
Banbury,12,12,12,12,12,12
Bathurst Manor,140,140,140,140,140,140
...,...,...,...,...,...,...
The Golden Mile,108,108,108,108,108,108
The Kingsway,68,68,68,68,68,68
Willowdale West,94,94,94,94,94,94
York,18,18,18,18,18,18


#### Let's find out how many unique categories can be curated from all the returned venues


In [13]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 321 uniques categories.


### Analyze Each Neighbourhood

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix='', prefix_sep='')
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
toronto_onehot.head()

Unnamed: 0,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bike Trail,Bistro,Bookstore,Botanical Garden,...,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Syrian Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [28]:
print(toronto_onehot.shape)
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped


(10566, 321)
(191, 321)


Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bike Trail,Bistro,Bookstore,...,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Syrian Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.000000,0.0,0.0,0.0,0.0,0.030000,0.0,0.0,0.0,0.01,0.0,0.000000,0.0,0.03,0.0,0.0,0.0,0.000000,0.0,0.0,0.010000,0.000000,0.010000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010000,0.0,0.0,0.0,0.000000,0.0,0.0,0.020000,...,0.000000,0.000000,0.0,0.010000,0.0,0.000000,0.000000,0.010000,0.0,0.000000,0.01,0.0,0.0,0.000000,0.01,0.0,0.0,0.000000,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.010000,0.000000,0.000000,0.000000,0.000000,0.0,0.01,0.000000,0.0,0.0,0.000000
1,Alderwood,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.0,0.000000
2,Baby Point,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.0,0.000000
3,Banbury,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.0,0.000000
4,Bathurst Manor,0.0,0.000000,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,0.0,0.00,0.0,0.014286,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.028571,0.000000,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,0.0,0.014286,0.0,0.0,0.014286,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.014286,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.014286,0.000000,0.028571,0.014286,0.000000,0.0,0.00,0.000000,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,The Golden Mile,0.0,0.018519,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.018519,0.0,0.00,0.0,0.0,0.0,0.018519,0.0,0.0,0.000000,0.018519,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.018519,0.018519,0.0,0.018519,0.0,0.018519,0.018519,0.000000,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.018519,0.000000,0.018519,0.018519,0.0,0.00,0.000000,0.0,0.0,0.000000
187,The Kingsway,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.029412,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.058824,0.0,0.000000,0.00,0.0,0.0,0.029412,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.0,0.000000
188,Willowdale West,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.021277,0.021277,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.021277,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.021277,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.0,0.021277
189,York,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.00,0.0,0.000000,0.0,0.00,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.111111,0.0,0.0,0.000000


#### Let's print each neighborhood along with the top 5 most common venues

In [31]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.06
2   Restaurant  0.06
3        Hotel  0.05
4          Gym  0.05


---- Alderwood----
            venue  freq
0     Pizza Place  0.33
1             Pub  0.17
2     Coffee Shop  0.17
3  Sandwich Place  0.17
4             Gym  0.17


---- Baby Point----
                     venue  freq
0                     Park  0.75
1                    River  0.25
2      Moroccan Restaurant  0.00
3                Nightclub  0.00
4  New American Restaurant  0.00


---- Banbury----
              venue  freq
0       Coffee Shop  0.17
1  Botanical Garden  0.17
2      Intersection  0.17
3       Gas Station  0.17
4             Trail  0.17


---- Bathurst Manor----
                         venue  freq
0            Korean Restaurant  0.20
1                Grocery Store  0.06
2                  Coffee Shop  0.04
3           Mexican Restaurant  0.03
4  Eastern European Restaurant  0.03


---- Bay Street Corridor----
                v

In [36]:
def return_most_common_venues(row, num_top_venues):
  row_categories = row.iloc[1:]
  # print(row)
  row_categories_sorted = row_categories.sort_values(ascending=False)
  return row_categories_sorted.index.values[0:num_top_venues]

In [90]:
num_top_venues = 10



# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Restaurant,Café,Gym,Hotel,Italian Restaurant,Japanese Restaurant,Asian Restaurant,American Restaurant,Gastropub
1,Alderwood,Pizza Place,Pub,Gym,Coffee Shop,Sandwich Place,Field,Dog Run,Doner Restaurant,Donut Shop,Fish Market
2,Baby Point,Park,River,Yoga Studio,Falafel Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
3,Banbury,Botanical Garden,Park,Gas Station,Trail,Coffee Shop,Intersection,Filipino Restaurant,Flower Shop,Dumpling Restaurant,Eastern European Restaurant
4,Bathurst Manor,Korean Restaurant,Grocery Store,Coffee Shop,Eastern European Restaurant,Mexican Restaurant,Ice Cream Shop,Fast Food Restaurant,Bakery,Bar,Video Store


## 3. cluster the neighborhoods


Run _k_-means to cluster the neighborhood into 5 clusters.


In [91]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
print(len(set(kmeans.labels_.astype('int'))))
kmeans.labels_[0:10] 


5


array([0, 0, 4, 2, 0, 0, 0, 0, 0, 0], dtype=int32)

create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [92]:
# add clustering labels
labels = kmeans.labels_.astype('int32')
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', labels)

toronto_merged = neighborhoods

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown,43.654174,-79.380812,0.0,Coffee Shop,Clothing Store,Hotel,Restaurant,Café,Diner,Seafood Restaurant,Sandwich Place,Bar,Tanning Salon
1,Harbourfront,43.64008,-79.38015,0.0,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Pizza Place,Steakhouse,History Museum,Brewery,Gym
2,Little Italy,43.655208,-79.414877,0.0,Italian Restaurant,Bar,Café,Sandwich Place,Burger Joint,Park,Restaurant,Bakery,Asian Restaurant,Tapas Restaurant
3,Little Portugal,43.647413,-79.431116,0.0,Bar,Café,Coffee Shop,Korean Restaurant,Cocktail Bar,Restaurant,Bakery,Boutique,Italian Restaurant,Japanese Restaurant
4,Dufferin Grove,43.653632,-79.426439,0.0,Coffee Shop,Bar,Italian Restaurant,Restaurant,Bakery,Nightclub,Sports Bar,Mexican Restaurant,Yoga Studio,Fast Food Restaurant


Finally, let's visualize the resulting clusters

In [93]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
colors_array = cm.rainbow(np.linspace(0, 1, 5))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    try:
      folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
    except:
      print(lat, lon, poi, cluster)
       
map_clusters

43.7373876 -79.4109253 Bedford Park nan
43.7062977 -79.3219073  Parkview Hill nan
8.8748315 -73.9766442  Mount Olive nan
43.7373876 -79.4109253 Bedford Park nan
43.7062977 -79.3219073  Parkview Hill nan
8.8748315 -73.9766442  Mount Olive nan


## 5. Examine Clusters





### Cluster 1

In [94]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.654174,Coffee Shop,Clothing Store,Hotel,Restaurant,Café,Diner,Seafood Restaurant,Sandwich Place,Bar,Tanning Salon
1,43.640080,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Pizza Place,Steakhouse,History Museum,Brewery,Gym
2,43.655208,Italian Restaurant,Bar,Café,Sandwich Place,Burger Joint,Park,Restaurant,Bakery,Asian Restaurant,Tapas Restaurant
3,43.647413,Bar,Café,Coffee Shop,Korean Restaurant,Cocktail Bar,Restaurant,Bakery,Boutique,Italian Restaurant,Japanese Restaurant
4,43.653632,Coffee Shop,Bar,Italian Restaurant,Restaurant,Bakery,Nightclub,Sports Bar,Mexican Restaurant,Yoga Studio,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
382,43.687311,Pharmacy,American Restaurant,Liquor Store,Coffee Shop,Yoga Studio,Fast Food Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
385,43.721362,Pizza Place,Breakfast Spot,Department Store,Sandwich Place,Flea Market,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant
386,43.678524,Airport,Airport Terminal,Yoga Studio,Fast Food Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
387,43.721770,Caribbean Restaurant,Bar,African Restaurant,Hakka Restaurant,Farmers Market,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space


### Cluster 2


In [95]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
130,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
135,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
137,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
138,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
162,43.696024,Fast Food Restaurant,Construction & Landscaping,Yoga Studio,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
164,43.713366,Fast Food Restaurant,Gas Station,Caribbean Restaurant,Flower Shop,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
325,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
330,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
332,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
333,43.80493,Park,Fast Food Restaurant,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant


### Cluster 3

In [96]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,43.623054,Music Venue,Café,Harbor / Marina,Park,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
29,43.693559,Bank,Playground,Mediterranean Restaurant,Park,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
36,43.722079,Kids Store,Park,Doctor's Office,Electronics Store,Bank,Fish Market,Fish & Chips Shop,Filipino Restaurant,Field,Flea Market
38,43.693559,Bank,Playground,Mediterranean Restaurant,Park,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
58,43.690388,Tennis Court,Gym,Park,Trail,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store
59,43.678356,Park,Playground,Bike Trail,Yoga Studio,Fast Food Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
65,43.704798,Convenience Store,Park,Japanese Restaurant,Sandwich Place,Yoga Studio,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant
74,43.732658,Thai Restaurant,Middle Eastern Restaurant,Park,Falafel Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
75,43.733797,Botanical Garden,Park,Gas Station,Trail,Coffee Shop,Intersection,Filipino Restaurant,Flower Shop,Dumpling Restaurant,Eastern European Restaurant
84,43.769509,Tennis Court,Intersection,Women's Store,Park,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space


### Cluster 4

In [97]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
70,43.7354,Music Venue,Yoga Studio,Hostel,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
265,43.7354,Music Venue,Yoga Studio,Hostel,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant


### Cluster 5

In [98]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
134,43.775504,Park,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
146,43.655874,Park,River,Yoga Studio,Falafel Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
188,43.699539,Park,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
329,43.775504,Park,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
341,43.655874,Park,River,Yoga Studio,Falafel Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant
383,43.699539,Park,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
