<a href="https://colab.research.google.com/github/abdurrahmanshidiq/IBM-Capstone-Project/blob/master/Segmenting_and_Clustering_Neighborhoods_in_Toronto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1 align=center><font size = 5>Segmenting and Clustering Neighborhoods in New York City</font></h1>

# Table of Content


1. Chapter 1 : Scrapping Wikipedia to get Toronto Neighborhoods Data 
2. Chapter 2 : Load Geolocation Data, then Merged to Toronto Neighborhoods Data based on Postal Code
3. Chapter 3 : Explore & Cluster the Neighborhoods in Toronto using Foursquare API & K-Means Clustering

# Chapter 1

Scrapping Wikipedia to get *Toronto Neighborhoods Data*

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

from bs4 import BeautifulSoup
import requests
import json
from geopy.geocoders import Nominatim
from pandas import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

import folium
from folium import plugins

from sklearn.cluster import KMeans

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw = requests.get(url).text

In [3]:
soup = BeautifulSoup(raw, 'html5lib')

In [4]:
len(soup.find('table'))

2

In [5]:
table = soup.find_all('table')[0].find_all('td')

post_code = []
borough = []
neigh = []

for i,row in enumerate(table):
  if table[i].span.text == 'Not assigned':
    pass
  else:
    post_code.append(table[i].p.text[:3])
    borough.append(table[i].span.text.split('(')[0])
    neigh.append(table[i].span.text.split('(')[1].replace('/',',').replace(')',''.strip(' ')))

**Alternative Code**
<!--
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
-->

In [6]:
#Checking Unique Values for Borough

df_toronto = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])
df_toronto['PostalCode'] = post_code
df_toronto['Borough'] = borough
df_toronto['Neighborhood'] = neigh

df_toronto['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'East YorkEast Toronto', 'Central Toronto',
       'MississaugaCanada Post Gateway Processing Centre',
       'Downtown TorontoStn A PO Boxes25 The Esplanade',
       'EtobicokeNorthwest',
       'East TorontoBusiness reply mail Processing Centre969 Eastern'],
      dtype=object)

In [7]:
# Fixing Borough Name
# Getting the Data Index
print(df_toronto[df_toronto['Borough']=='East YorkEast Toronto'].index)
print(df_toronto[df_toronto['Borough']=='MississaugaCanada Post Gateway Processing Centre'].index)
print(df_toronto[df_toronto['Borough']=='Downtown TorontoStn A PO Boxes25 The Esplanade'].index)
print(df_toronto[df_toronto['Borough']=='EtobicokeNorthwest'].index)
print(df_toronto[df_toronto['Borough']=='East TorontoBusiness reply mail Processing Centre969 Eastern'].index)

Int64Index([35], dtype='int64')
Int64Index([76], dtype='int64')
Int64Index([92], dtype='int64')
Int64Index([94], dtype='int64')
Int64Index([100], dtype='int64')


In [8]:
# Fixing Borough Name
# Replace wrong `Borough` name with the correct name
df_toronto.loc[35,'Borough'] = df_toronto.loc[35,'Borough'].replace('East YorkEast Toronto', 'East York')
df_toronto.loc[76,'Borough'] = df_toronto.loc[76,'Borough'].replace('MississaugaCanada Post Gateway Processing Centre', 'Mississauga')
df_toronto.loc[92,'Borough'] = df_toronto.loc[92,'Borough'].replace('Downtown TorontoStn A PO Boxes25 The Esplanade', 'Downtown Toronto')
df_toronto.loc[94,'Borough'] = df_toronto.loc[94,'Borough'].replace('EtobicokeNorthwest', 'Etobicoke')
df_toronto.loc[100,'Borough'] = df_toronto.loc[100,'Borough'].replace('East TorontoBusiness reply mail Processing Centre969 Eastern', 'East Toronto')

In [9]:
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [10]:
df_toronto.shape

(103, 3)

**End of Chapter 1**

# Chapter 2

Adding Latitude & Longitude to the Data Frame

In [11]:
# Import csv file to google colab
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
df_geo = pd.read_csv('/content/drive/My Drive/IBM DS Certificate/10. Capstone Project/2. K-Means Clustering/Geospatial_Coordinates.csv')
df_geo.columns = ['PostalCode', 'Latitude', 'Longitude']
df_geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
df_geo.shape

(103, 3)

In [14]:
# Merge `df_toronto` with `df_geo`based on PostalCode
df_merged = df_toronto.merge(df_geo, how='inner', on='PostalCode')
df_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
7,M3B,North York,Don MillsNorth,43.745906,-79.352188
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


**End of Chapter 2**

# Chapter 3

Explore and Cluster the neighborhoods in Toronto

In [15]:
neighborhoods = df_merged.copy()
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [16]:
print('The dataframe has {} borough & {} neighborhoods.'.format(len(neighborhoods['Borough'].unique()), neighborhoods.shape[0]))

The dataframe has 11 borough & 103 neighborhoods.


## Visualizing Toronto's Borough

In [17]:
# Use Geopy to get Lat, Lon of Toronto

address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent='on_explore')
location = geolocator.geocode(address)
lat = location.latitude
lon = location.longitude
print('Geographical Coordinate of Toronto are {}, {}'.format(lat, lon))

Geographical Coordinate of Toronto are 43.6534817, -79.3839347


In [18]:
# Create Map of Toronto
map_toronto = folium.Map(location=[lat,lon], zoom_start=10)

for lat, lon, borough, neigh in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
  label = 'Neighborhood: {} \n\n Borough: {}'.format(neigh, borough)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker([lat,lon], radius=5,
                      popup=label, color='blue',
                      fill=True, fill_color='#3186cc',
                      fill_opacity=0.5, parse_html=False).add_to(map_toronto)

map_toronto

In [19]:
neighborhoods['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
East Toronto         5
York                 5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

Simplify the above map and segment and cluster only the neighborhoods in `North York`. So let's slice the original dataframe and create a new dataframe of the `North York` data.

## Understanding `North York` Borough

For simplify the problem, Lets choose only `North York` Borough for further analysis

In [20]:
# Select Only North York Borough

df_NY = neighborhoods[neighborhoods['Borough']=='North York'].reset_index(drop=True)[['Borough', 'Neighborhood', 'Latitude', 'Longitude']]
df_NY.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
3,North York,Don MillsNorth,43.745906,-79.352188
4,North York,Glencairn,43.709577,-79.445073


In [21]:
df_NY.shape

(24, 4)

In [22]:
# Get Coordiates for North York, Toronto, Canada
address = 'North York, ON, Canada'

geolocator = Nominatim(user_agent='on_explore')
location = geolocator.geocode(address)
lat = location.latitude
lon = location.longitude
print('Geographical Coordinate of North York, Toronto are {}, {}'.format(lat, lon))

Geographical Coordinate of North York, Toronto are 43.7543263, -79.44911696639593


In [23]:
# Visualizing North York Map

map_NorthYork = folium.Map(location=[lat,lon], zoom_start=11)

for lat, lon, neigh in zip(df_NY['Latitude'], df_NY['Longitude'], df_NY['Neighborhood']):
  label = '{}'.format(neigh)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker([lat,lon], radius=5,
                      popup=label, color='blue',
                      fill=True, fill_color='#3186cc',
                      fill_opacity=0.5, parse_html=False).add_to(map_NorthYork)

map_NorthYork

In [24]:
map_NorthYork = folium.Map(location=[lat,lon], zoom_start=11)

incidents = plugins.MarkerCluster().add_to(map_NorthYork)

for lat, lon, neigh in zip(df_NY['Latitude'], df_NY['Longitude'], df_NY['Neighborhood']):
  label = '{}'.format(neigh)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker([lat,lon], radius=5,
                      popup=label, color='blue',
                      fill=True, fill_color='#3186cc',
                      fill_opacity=0.5, parse_html=False).add_to(incidents)

map_NorthYork

## Utilizing the Foursquare API to explore the neighborhoods in `North York` and segment them

In [25]:
CLIENT_ID = '3UT2CP1TASOWFFEBJL3DGSHWTXORPEA5H1UQLX02M3HGHDS1' # your Foursquare ID
CLIENT_SECRET = 'P4N2G2HR4TKTXLDEEVUT1EWROXQ3ZSRHO1TFGGZ2JAWKEDIS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3UT2CP1TASOWFFEBJL3DGSHWTXORPEA5H1UQLX02M3HGHDS1
CLIENT_SECRET:P4N2G2HR4TKTXLDEEVUT1EWROXQ3ZSRHO1TFGGZ2JAWKEDIS


In [26]:
df_NY.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
3,North York,Don MillsNorth,43.745906,-79.352188
4,North York,Glencairn,43.709577,-79.445073


In [27]:
df_NY.shape

(24, 4)

In [28]:
df_NY[df_NY['Neighborhood']=='York Mills , Silver Hills']

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
12,North York,"York Mills , Silver Hills",43.75749,-79.374714


Lets explore `Parkwoods` neighborhood

Parkwoods-North York-Toronto-Canada

In [29]:
# Parkwoods

neigh_name = df_NY.loc[12,'Neighborhood']
neigh_lat = df_NY.loc[12,'Latitude']
neigh_lon = df_NY.loc[12,'Longitude']

print('Coordinate for "{}" neighborhood is = Lat : {}, Lng : {}'.format(neigh_name, neigh_lat, neigh_lon))

Coordinate for "York Mills , Silver Hills" neighborhood is = Lat : 43.7574902, Lng : -79.37471409999999


End Point Venue/Explore = Returns a list of recommended venues near the current location.

 Get the top 100 venues that are in `Parkwoods` within a radius of 500 meters.

In [30]:
# Get the top 100 venues that are in Parkwoods within a radius of 500 meters.
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,
                                                                                                                           VERSION, neigh_lat, neigh_lon,
                                                                                                                           radius, LIMIT)
print(url)

https://api.foursquare.com/v2/venues/explore?client_id=3UT2CP1TASOWFFEBJL3DGSHWTXORPEA5H1UQLX02M3HGHDS1&client_secret=P4N2G2HR4TKTXLDEEVUT1EWROXQ3ZSRHO1TFGGZ2JAWKEDIS&v=20180605&ll=43.7574902,-79.37471409999999&radius=500&limit=100


In [31]:
results = requests.get(url).json()
len(results['response']['groups'][0]['items'])

0

In [32]:
# # function that extracts the category of the venue
# def get_category_type(row):
#     try:
#         categories_list = row['categories']
#     except:
#         categories_list = row['venue.categories']
        
#     if len(categories_list) == 0:
#         return None
#     else:
#         return categories_list[0]['name']

In [33]:
# venues = results['response']['groups'][0]['items']
    
# nearby_venues = json_normalize(venues) # flatten JSON

# # filter columns
# filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
# nearby_venues =nearby_venues.loc[:, filtered_columns]

# # filter the category for each row
# nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# # clean columns
# nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

# nearby_venues.head()

### Inspecting Json File ##

In [34]:
res = requests.get(url).json()

In [35]:
res.keys()

dict_keys(['meta', 'response'])

In [36]:
res['response'].keys()



In [37]:
res['response']['groups'][0].keys()

dict_keys(['type', 'name', 'items'])

In [38]:
res['response']['groups'][0]['type']

'Recommended Places'

In [39]:
res['response']['groups'][0]['name']

'recommended'

In [40]:
res['response']['groups'][0]['items']

[]

In [41]:
len(res['response']['groups'][0]['items'])

0

In [42]:
json_normalize(res['response']['groups'][0]['items']).head()

### Extracting Nearby `Venues` Information from JSON file

- Create function to extract nearby venues for all neighborhoods in `Nort York`, Toronto

In [43]:

# def getNearbyVenues(names, latitudes, longitudes, radius=500):
#   neigh_name = []
#   neigh_lat = []
#   neigh_lon = []
#   venue_name = []
#   venue_lat = []
#   venue_lon = []
#   venue_cat_name = []
#   print('Neighborhoods in North York, Toronto : \n')
#   for name, lat, lon in zip(names, latitudes, longitudes):
#     print(name)

#     url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,
#                                                                                                                            VERSION, lat, lon,
#                                                                                                                            radius, LIMIT)
#     results = requests.get(url).json()['response']['groups'][0]['items']

#     for val in results:
#       neigh_name.append(name)
#       neigh_lat.append(lat)
#       neigh_lon.append(lon)
#       venue_name.append(val['venue']['name'])
#       venue_lat.append(val['venue']['location']['lat'])
#       venue_lon.append(val['venue']['location']['lng'])
#       venue_cat_name.append(val['venue']['categories'][0]['name'])


#   nearby_venue = pd.DataFrame({'Neighborhood':neigh_name, 'Neighboorhood Latitude':neigh_lat, 'Neighborhood Longitude':neigh_lon, 
#                                'Venue':venue_name, 'Venue Latitude':venue_lat, 'Venue Longitude':venue_lon, 'Venue Category':venue_cat_name})
#   return nearby_venue



In [44]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
  neigh_name = []
  neigh_lat = []
  neigh_lon = []
  venue_name = []
  venue_lat = []
  venue_lon = []
  venue_cat_name = []
  print('Neighborhoods in North York, Toronto : \n')
  for name, lat, lon in zip(names, latitudes, longitudes):
    print(name)

    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,
                                                                                                                           VERSION, lat, lon,
                                                                                                                           radius, LIMIT)
    results = requests.get(url).json()['response']['groups'][0]['items']

    for val in results:
      if len(val)<1:
        break
      else:
        neigh_name.append(name)
        neigh_lat.append(lat)
        neigh_lon.append(lon)
        venue_name.append(val['venue']['name'])
        venue_lat.append(val['venue']['location']['lat'])
        venue_lon.append(val['venue']['location']['lng'])
        venue_cat_name.append(val['venue']['categories'][0]['name'])


  nearby_venue = pd.DataFrame({'Neighborhood':neigh_name, 'Neighboorhood Latitude':neigh_lat, 'Neighborhood Longitude':neigh_lon, 
                               'Venue':venue_name, 'Venue Latitude':venue_lat, 'Venue Longitude':venue_lon, 'Venue Category':venue_cat_name})
  return nearby_venue

In [45]:
northyork_venues = getNearbyVenues(names=df_NY['Neighborhood'],
                                   latitudes=df_NY['Latitude'],
                                   longitudes=df_NY['Longitude']
                                  )

Neighborhoods in North York, Toronto : 

Parkwoods
Victoria Village
Lawrence Manor , Lawrence Heights
Don MillsNorth
Glencairn
Don MillsSouth
Hillcrest Village
Bathurst Manor , Wilson Heights , Downsview North
Fairview , Henry Farm , Oriole
Northwood Park , York University
Bayview Village
DownsviewEast  
York Mills , Silver Hills
DownsviewWest
North Park , Maple Leaf Park , Upwood Park
Humber Summit
Willowdale , Newtonbrook
DownsviewCentral
Bedford Park , Lawrence Manor East
Humberlea , Emery
WillowdaleSouth
DownsviewNorthwest
York Mills West
WillowdaleWest


In [46]:
# All nearby venues in North York Borough
northyork_venues.head()

Unnamed: 0,Neighborhood,Neighboorhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [47]:
northyork_venues[northyork_venues['Neighborhood']=='York Mills , Silver Hills']

Unnamed: 0,Neighborhood,Neighboorhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


In [48]:
northyork_venues.shape

(250, 7)

In [49]:
northyork_venues.groupby('Neighborhood').count().reset_index()[['Neighborhood','Venue']].sort_values('Venue', ascending=False).head()

Unnamed: 0,Neighborhood,Venue
9,"Fairview , Henry Farm , Oriole",66
20,WillowdaleSouth,34
2,"Bedford Park , Lawrence Manor East",23
0,"Bathurst Manor , Wilson Heights , Downsview North",22
4,Don MillsSouth,20


In [50]:
print('There are {} uniques venue categories in North York Borough-Toronto-Canada.'.format(len(northyork_venues['Venue Category'].unique())))

There are 101 uniques venue categories in North York Borough-Toronto-Canada.


### Analyze Each Neighborhood

In [51]:
# one hot encoding
northyork_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
northyork_onehot['Neighborhood'] = northyork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

northyork_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Butcher,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Optical Shop,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [52]:
northyork_onehot.shape

(250, 102)

In [53]:
# let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
northyork_grouped = northyork_onehot.groupby('Neighborhood').mean().reset_index()
northyork_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Butcher,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Optical Shop,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor , Wilson Heights , Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park , Lawrence Manor East",0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0
3,Don MillsNorth,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don MillsSouth,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.05,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.05,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,DownsviewCentral,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,DownsviewEast,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,DownsviewNorthwest,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,DownsviewWest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Fairview , Henry Farm , Oriole",0.0,0.0,0.015152,0.0,0.0,0.015152,0.0,0.030303,0.030303,0.015152,0.015152,0.0,0.0,0.0,0.015152,0.0,0.0,0.015152,0.015152,0.0,0.0,0.0,0.0,0.0,0.015152,0.015152,0.136364,0.075758,0.0,0.0,0.015152,0.030303,0.0,0.015152,0.015152,0.0,0.0,0.0,0.0,0.015152,0.060606,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.045455,0.015152,0.030303,0.0,0.015152,0.0,0.015152,0.0,0.0,0.0,0.030303,0.015152,0.015152,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.015152,0.015152,0.015152,0.0,0.015152,0.0,0.015152,0.0,0.015152,0.0,0.015152,0.030303,0.015152,0.0,0.015152


In [54]:
northyork_grouped.shape

(23, 102)

In [55]:
# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in northyork_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = northyork_grouped[northyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor , Wilson Heights , Downsview North----
               venue  freq
0        Coffee Shop  0.09
1               Bank  0.09
2      Shopping Mall  0.05
3      Deli / Bodega  0.05
4  Mobile Phone Shop  0.05


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park , Lawrence Manor East----
                     venue  freq
0              Coffee Shop  0.09
1           Sandwich Place  0.09
2       Italian Restaurant  0.09
3  Comfort Food Restaurant  0.04
4               Hobby Shop  0.04


----Don MillsNorth----
                  venue  freq
0                   Gym   0.2
1  Caribbean Restaurant   0.2
2    Athletics & Sports   0.2
3                  Café   0.2
4   Japanese Restaurant   0.2


----Don MillsSouth----
                venue  freq
0          Restaurant  0.10
1                 Gym  0.10
2         Coffee Shop  

In [56]:
# Let's put that into a pandas dataframe

# First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [57]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = northyork_grouped['Neighborhood']

for ind in np.arange(northyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(northyork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor , Wilson Heights , Downsview North",Coffee Shop,Bank,Shopping Mall,Diner,Bridal Shop,Pharmacy,Pizza Place,Deli / Bodega,Mobile Phone Shop,Middle Eastern Restaurant
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
2,"Bedford Park , Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Butcher,Grocery Store,Pizza Place,Indian Restaurant,Hobby Shop,Pub,Liquor Store
3,Don MillsNorth,Japanese Restaurant,Gym,Caribbean Restaurant,Café,Athletics & Sports,Women's Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,Don MillsSouth,Coffee Shop,Restaurant,Gym,Dim Sum Restaurant,Beer Store,Chinese Restaurant,Asian Restaurant,Italian Restaurant,Grocery Store,Supermarket


## CLUSTER NEIGHBORHOODS in `North York` based on nearby venues

In [58]:
northyork_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Butcher,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Optical Shop,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor , Wilson Heights , Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park , Lawrence Manor East",0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0
3,Don MillsNorth,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don MillsSouth,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.05,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.05,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
northyork_grouped[northyork_grouped['Neighborhood']=='York Mills , Silver Hills']

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Butcher,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Optical Shop,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store


In [60]:
# set number of clusters
kclusters = 5

northyork_grouped_clustering = northyork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(northyork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 4, 2, 2, 2], dtype=int32)

In [61]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

northyork_merged = df_NY

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
northyork_merged = northyork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

northyork_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
1,North York,Victoria Village,43.725882,-79.315572,2.0,Intersection,French Restaurant,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Dessert Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping
2,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763,2.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Coffee Shop,Gift Shop,Carpet Store,Vietnamese Restaurant,Athletics & Sports,Bakery
3,North York,Don MillsNorth,43.745906,-79.352188,2.0,Japanese Restaurant,Gym,Caribbean Restaurant,Café,Athletics & Sports,Women's Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,North York,Glencairn,43.709577,-79.445073,2.0,Convenience Store,Park,Bakery,Smoke Shop,Japanese Restaurant,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping


In [62]:
northyork_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
1,North York,Victoria Village,43.725882,-79.315572,2.0,Intersection,French Restaurant,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Dessert Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping
2,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763,2.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Coffee Shop,Gift Shop,Carpet Store,Vietnamese Restaurant,Athletics & Sports,Bakery
3,North York,Don MillsNorth,43.745906,-79.352188,2.0,Japanese Restaurant,Gym,Caribbean Restaurant,Café,Athletics & Sports,Women's Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,North York,Glencairn,43.709577,-79.445073,2.0,Convenience Store,Park,Bakery,Smoke Shop,Japanese Restaurant,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
5,North York,Don MillsSouth,43.7259,-79.340923,2.0,Coffee Shop,Restaurant,Gym,Dim Sum Restaurant,Beer Store,Chinese Restaurant,Asian Restaurant,Italian Restaurant,Grocery Store,Supermarket
6,North York,Hillcrest Village,43.803762,-79.363452,1.0,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
7,North York,"Bathurst Manor , Wilson Heights , Downsview North",43.754328,-79.442259,2.0,Coffee Shop,Bank,Shopping Mall,Diner,Bridal Shop,Pharmacy,Pizza Place,Deli / Bodega,Mobile Phone Shop,Middle Eastern Restaurant
8,North York,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,2.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Mobile Phone Shop,Toy / Game Store,Cosmetics Shop,Juice Bar,Bank,Bakery
9,North York,"Northwood Park , York University",43.76798,-79.487262,2.0,Furniture / Home Store,Vietnamese Restaurant,Bar,Caribbean Restaurant,Massage Studio,Coffee Shop,Food Service,Food Court,Comfort Food Restaurant,Fried Chicken Joint


In [63]:
#DROP NaN Values
df_final = northyork_merged.dropna().reset_index(drop=True)
df_final['Cluster Labels'] = df_final['Cluster Labels'].astype('int')
df_final

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,43.753259,-79.329656,2,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
1,North York,Victoria Village,43.725882,-79.315572,2,Intersection,French Restaurant,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Dessert Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping
2,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Coffee Shop,Gift Shop,Carpet Store,Vietnamese Restaurant,Athletics & Sports,Bakery
3,North York,Don MillsNorth,43.745906,-79.352188,2,Japanese Restaurant,Gym,Caribbean Restaurant,Café,Athletics & Sports,Women's Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,North York,Glencairn,43.709577,-79.445073,2,Convenience Store,Park,Bakery,Smoke Shop,Japanese Restaurant,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
5,North York,Don MillsSouth,43.7259,-79.340923,2,Coffee Shop,Restaurant,Gym,Dim Sum Restaurant,Beer Store,Chinese Restaurant,Asian Restaurant,Italian Restaurant,Grocery Store,Supermarket
6,North York,Hillcrest Village,43.803762,-79.363452,1,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
7,North York,"Bathurst Manor , Wilson Heights , Downsview North",43.754328,-79.442259,2,Coffee Shop,Bank,Shopping Mall,Diner,Bridal Shop,Pharmacy,Pizza Place,Deli / Bodega,Mobile Phone Shop,Middle Eastern Restaurant
8,North York,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,2,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Mobile Phone Shop,Toy / Game Store,Cosmetics Shop,Juice Bar,Bank,Bakery
9,North York,"Northwood Park , York University",43.76798,-79.487262,2,Furniture / Home Store,Vietnamese Restaurant,Bar,Caribbean Restaurant,Massage Studio,Coffee Shop,Food Service,Food Court,Comfort Food Restaurant,Fried Chicken Joint


In [64]:
# Fisualize clusters

# create map
map_clusters = folium.Map(location=[lat, lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_final['Latitude'], df_final['Longitude'], df_final['Neighborhood'], df_final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [65]:
df_final.columns[[1] + list(range(5, df_final.shape[1]))]

Index(['Neighborhood', '1st Most Common Venue', '2nd Most Common Venue',
       '3rd Most Common Venue', '4th Most Common Venue',
       '5th Most Common Venue', '6th Most Common Venue',
       '7th Most Common Venue', '8th Most Common Venue',
       '9th Most Common Venue', '10th Most Common Venue'],
      dtype='object')

In [66]:
# CLuster 0
df_final[df_final['Cluster Labels']==0].iloc[:,[1,5,6,7,8,9,10,11,12,13,14]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,"Humberlea , Emery",Food Service,Baseball Field,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


In [67]:
# CLuster 1
df_final[df_final['Cluster Labels']==1].iloc[:,[1,5,6,7,8,9,10,11,12,13,14]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Hillcrest Village,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping


In [68]:
# CLuster 2
df_final[df_final['Cluster Labels']==2].iloc[:,[1,5,6,7,8,9,10,11,12,13,14]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
1,Victoria Village,Intersection,French Restaurant,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Dessert Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping
2,"Lawrence Manor , Lawrence Heights",Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Coffee Shop,Gift Shop,Carpet Store,Vietnamese Restaurant,Athletics & Sports,Bakery
3,Don MillsNorth,Japanese Restaurant,Gym,Caribbean Restaurant,Café,Athletics & Sports,Women's Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,Glencairn,Convenience Store,Park,Bakery,Smoke Shop,Japanese Restaurant,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
5,Don MillsSouth,Coffee Shop,Restaurant,Gym,Dim Sum Restaurant,Beer Store,Chinese Restaurant,Asian Restaurant,Italian Restaurant,Grocery Store,Supermarket
7,"Bathurst Manor , Wilson Heights , Downsview North",Coffee Shop,Bank,Shopping Mall,Diner,Bridal Shop,Pharmacy,Pizza Place,Deli / Bodega,Mobile Phone Shop,Middle Eastern Restaurant
8,"Fairview , Henry Farm , Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Mobile Phone Shop,Toy / Game Store,Cosmetics Shop,Juice Bar,Bank,Bakery
9,"Northwood Park , York University",Furniture / Home Store,Vietnamese Restaurant,Bar,Caribbean Restaurant,Massage Studio,Coffee Shop,Food Service,Food Court,Comfort Food Restaurant,Fried Chicken Joint
10,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


In [69]:
# CLuster 3
df_final[df_final['Cluster Labels']==3].iloc[:,[1,5,6,7,8,9,10,11,12,13,14]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,"Willowdale , Newtonbrook",Park,Women's Store,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
21,York Mills West,Park,Convenience Store,Women's Store,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega


In [70]:
# CLuster 4
df_final[df_final['Cluster Labels']==4].iloc[:,[1,5,6,7,8,9,10,11,12,13,14]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,DownsviewEast,Airport,Park,Business Service,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
