# The Battle of Neighborhoods

_hefangzhang  
Apr 7, 2020_

In [2]:
# import
import numpy as np
import pandas as pd
import json

from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

from bs4 import BeautifulSoup

!pip install folium
import folium

print('Libraries imported.')

Libraries imported.


## Downtown Toronto
### Step1: Load and transform the data

In [7]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source=requests.get(url).text
soup=BeautifulSoup(source)
w_data=soup.find('div',class_='mw-parser-output')
t_data=w_data.table.tbody

In [8]:
columns=['PostalCode','Borough','Neighbourhood']
data=dict({key:[]*len(columns) for key in columns})

for row in t_data.find_all('tr'):
    for v,column in zip(row.find_all('td'),columns):
        v=v.text
        v=v.replace('\n','')
        data[column].append(v)

In [9]:
df=pd.DataFrame.from_dict(data=data)[columns]
print(df.shape)
print(df.dtypes)
df.head()

(180, 3)
PostalCode       object
Borough          object
Neighbourhood    object
dtype: object


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


**Ignore cells with a borough that is Not assigned.**

In [10]:
df=df[df['Borough']!='Not assigned'].reset_index(drop=True)
df.shape

(103, 3)

**Combined into one row with the neighborhoods separated with a comma  
There is actually no More than one neighborhood exist in one postal code area  
But show the method here**

In [11]:
# Use PostalCode, Borough as the key

postcodes=df['PostalCode'].values
boroughs=df['Borough'].values
neighbours=df['Neighbourhood'].values

dic=dict({(key1,key2):[] for key1,key2 in zip(postcodes,boroughs)})
for postcode,borough,neighbour in zip(postcodes,boroughs,neighbours):
    key=(postcode,borough)
    dic[key].append(neighbour)
    
df=pd.DataFrame(columns=['PostalCode','Borough','Neighbourhood'])
for key,value in dic.items():
    postcode,borough,neighbour=key[0],key[1],value
    neighbour=','.join(neighbour)
    df=df.append({'PostalCode':postcode,
                  'Borough':borough,
                  'Neighbourhood':neighbour},ignore_index=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [12]:
# Use the .shape method to print the number of rows of the dataframe
df.shape

(103, 3)

### Step2: Get the Latitude and Longitude

In [13]:
!wget http://cocl.us/Geospatial_data

--2020-04-07 15:13:34--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 159.8.69.24, 159.8.69.21, 159.8.72.228
Connecting to cocl.us (cocl.us)|159.8.69.24|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2020-04-07 15:13:34--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|159.8.69.24|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-04-07 15:13:35--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.26.197, 107.152.27.197
Connecting to ibm.box.com (ibm.box.com)|107.152.26.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-04-07 15:13:35--  https://ibm.box.com/

In [14]:
lat_lng=pd.read_csv('Geospatial_data')
lat_lng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
lat_lng.rename(columns={"Postal Code":"PostalCode"},inplace=True)
lat_lng.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df=pd.merge(df,lat_lng,how='inner',on='PostalCode')
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [17]:
df.shape

(103, 5)

### Step3: Explore and Cluster the neighborhoods

In [20]:
# Get latitude, longitude
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [21]:
# create map
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [22]:
df['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [23]:
# Expore Downtown Toronto
downtown_toronto = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [26]:
# Visualize the neighbourhood of Downtown Toronto in map

address = 'Downtown Toronto ,Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of New York using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start= 11)

# add markers to map
for lat, lng, borough, neighborhood in zip(downtown_toronto['Latitude'], downtown_toronto['Longitude'], 
                                           downtown_toronto['Borough'], downtown_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

In [28]:
# export venues
lat = downtown_toronto.loc[0, 'Latitude'] 
lng = downtown_toronto.loc[0, 'Longitude'] 

# neighborhood name
neighborhood_name = downtown_toronto.loc[0, 'Neighbourhood'] 
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, lat, lng))

CLIENT_ID = '5RBQKBPE3LSY5QY4YAS4LCFF1HF5TOM0V2DWGX1EGSLIA1VT' 
CLIENT_SECRET = 'YSNYKXMLF0VWSNMUA4GL3QTSEGPSJRI0VJPEHSGZ2ODJFLPY'
VERSION = '20200406' 

LIMIT = 100
radius =1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION, lat,lng, radius, LIMIT)

# gettig the venues data form Forsquare API in json format
results = requests.get(url).json()
results

Latitude and longitude values of Regent Park / Harbourfront are 43.6542599, -79.3606359.


{'meta': {'code': 200, 'requestId': '5e8c9982a2e538001bd3eafe'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 116,
  'suggestedBounds': {'ne': {'lat': 43.66325990900001,
    'lng': -79.3482199002972},
   'sw': {'lat': 43.64525989099999, 'lng': -79.37305189970282}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label':

In [29]:
# get response
venues=results['response']['groups'][0]['items']
venues_df=json_normalize(venues)
venues_df.head()

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.neighborhood,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-54ea41ad498e9a11e9e13308-0,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",54ea41ad498e9a11e9e13308,362 King St E,CA,Toronto,Canada,Trinity St,...,"[{'label': 'display', 'lat': 43.65344672305267...",43.653447,-79.362017,,M5A 1K9,ON,Roselle Desserts,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-53b8466a498e83df908c3f21-1,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",53b8466a498e83df908c3f21,368 King St E,CA,Toronto,Canada,at Trinity St,...,"[{'label': 'display', 'lat': 43.65355870959944...",43.653559,-79.361809,,,ON,Tandem Coffee,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-574c229e498ebb5c6b257902-2,"[{'id': '52e81612bcbc57f1066b7a37', 'name': 'D...",574c229e498ebb5c6b257902,461 Cherry St,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65324910177244...",43.653249,-79.358008,,M5A 0H7,ON,Cooper Koo Family YMCA,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5612b1cc498e3dd742af0dc8-3,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",5612b1cc498e3dd742af0dc8,573 King St E,CA,Toronto,Canada,at St Lawrence St,...,"[{'label': 'display', 'lat': 43.65636850543279...",43.656369,-79.35698,,M5A 4L3,ON,Impact Kitchen,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ad4c05ef964a520bff620e3-4,"[{'id': '4deefb944765f83613cdba6e', 'name': 'H...",4ad4c05ef964a520bff620e3,"btwn Front, Cherry, Gardiner & Parliament",CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65024435658077...",43.650244,-79.359323,,M5A 3C4,ON,The Distillery Historic District,0,[],


In [None]:
# get the useful info
columns=['venue.name','venue.categories','venue.location.lat','venue.location.lng']
venues_df=venues_df.loc[:,columns]
venues_df.head()

In [30]:
# get the categories
venues_df['venue.categories']=venues_df.apply(lambda x: x['venue.categories'][0]['name'], axis=1)
venues_df.head()

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.neighborhood,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-54ea41ad498e9a11e9e13308-0,Bakery,54ea41ad498e9a11e9e13308,362 King St E,CA,Toronto,Canada,Trinity St,...,"[{'label': 'display', 'lat': 43.65344672305267...",43.653447,-79.362017,,M5A 1K9,ON,Roselle Desserts,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-53b8466a498e83df908c3f21-1,Coffee Shop,53b8466a498e83df908c3f21,368 King St E,CA,Toronto,Canada,at Trinity St,...,"[{'label': 'display', 'lat': 43.65355870959944...",43.653559,-79.361809,,,ON,Tandem Coffee,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-574c229e498ebb5c6b257902-2,Distribution Center,574c229e498ebb5c6b257902,461 Cherry St,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65324910177244...",43.653249,-79.358008,,M5A 0H7,ON,Cooper Koo Family YMCA,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5612b1cc498e3dd742af0dc8-3,Restaurant,5612b1cc498e3dd742af0dc8,573 King St E,CA,Toronto,Canada,at St Lawrence St,...,"[{'label': 'display', 'lat': 43.65636850543279...",43.656369,-79.35698,,M5A 4L3,ON,Impact Kitchen,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ad4c05ef964a520bff620e3-4,Historic Site,4ad4c05ef964a520bff620e3,"btwn Front, Cherry, Gardiner & Parliament",CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65024435658077...",43.650244,-79.359323,,M5A 3C4,ON,The Distillery Historic District,0,[],


In [31]:
# rename the columns
venues_df.columns = [col.split(".")[-1] for col in venues_df.columns]
venues_df.head()

Unnamed: 0,count,items,referralId,categories,id,address,cc,city,country,crossStreet,...,labeledLatLngs,lat,lng,neighborhood,postalCode,state,name,count.1,groups,id.1
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-54ea41ad498e9a11e9e13308-0,Bakery,54ea41ad498e9a11e9e13308,362 King St E,CA,Toronto,Canada,Trinity St,...,"[{'label': 'display', 'lat': 43.65344672305267...",43.653447,-79.362017,,M5A 1K9,ON,Roselle Desserts,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-53b8466a498e83df908c3f21-1,Coffee Shop,53b8466a498e83df908c3f21,368 King St E,CA,Toronto,Canada,at Trinity St,...,"[{'label': 'display', 'lat': 43.65355870959944...",43.653559,-79.361809,,,ON,Tandem Coffee,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-574c229e498ebb5c6b257902-2,Distribution Center,574c229e498ebb5c6b257902,461 Cherry St,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65324910177244...",43.653249,-79.358008,,M5A 0H7,ON,Cooper Koo Family YMCA,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5612b1cc498e3dd742af0dc8-3,Restaurant,5612b1cc498e3dd742af0dc8,573 King St E,CA,Toronto,Canada,at St Lawrence St,...,"[{'label': 'display', 'lat': 43.65636850543279...",43.656369,-79.35698,,M5A 4L3,ON,Impact Kitchen,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ad4c05ef964a520bff620e3-4,Historic Site,4ad4c05ef964a520bff620e3,"btwn Front, Cherry, Gardiner & Parliament",CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65024435658077...",43.650244,-79.359323,,M5A 3C4,ON,The Distillery Historic District,0,[],


In [123]:
# define a function to get each neighbours

def get_near_by_venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(name, lat, lng, 
                             v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'],
                             v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue in venues_list for item in venue])
    nearby_venues.columns = ['Neighbourhood','Neighbourhood Latitude', 'Neighbourhood Longitude', 
                             'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return nearby_venues

In [33]:
downtown_venues = get_near_by_venues(names=downtown_toronto['Neighbourhood'],latitudes=downtown_toronto['Latitude'],
                                      longitudes=downtown_toronto['Longitude'])
downtown_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [59]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))
print('Venues returned for each neighbourhood: ')
downtown_venues.groupby('Neighbourhood')['Venue'].count()

There are 204 uniques categories.
Venues returned for each neighbourhood: 


Neighbourhood
Berczy Park                                                                                                          57
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport     16
Central Bay Street                                                                                                   76
Christie                                                                                                             17
Church and Wellesley                                                                                                 79
Commerce Court / Victoria Hotel                                                                                     100
First Canadian Place / Underground city                                                                             100
Garden District, Ryerson                                                                                            100
Harbourfront East / Union 

**Analyse each neighborhood**

In [35]:

# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighbourhood'] = downtown_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront


In [36]:
downtown_onehot.shape

(1284, 205)

**Group rows by neighborhood, taking the mean of the frequency**

In [37]:
downtown_grouped = downtown_onehot.groupby('Neighbourhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
1,CN Tower / King and Spadina / Railway Lands / ...,0.0,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,0.0,...,0.0,0.0,0.0,0.013158,0.0,0.0,0.013158,0.0,0.0,0.013158
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,...,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316
5,Commerce Court / Victoria Hotel,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
6,First Canadian Place / Underground city,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0
8,Harbourfront East / Union Station / Toronto Is...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0
9,Kensington Market / Chinatown / Grange Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.04,0.0,0.053333,0.013333,0.0,0.0,0.0


**Check Top 5 most common venues for each neighborhood**

In [38]:
num_top_venues=5


for hood in downtown_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))

----Berczy Park----
                venue  freq
0         Coffee Shop  0.11
1        Cocktail Bar  0.05
2                Café  0.04
3              Bakery  0.04
4  Seafood Restaurant  0.04
----CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport----
                venue  freq
0     Airport Service  0.19
1      Airport Lounge  0.12
2    Airport Terminal  0.12
3            Boutique  0.06
4  Airport Food Court  0.06
----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.18
1                 Café  0.05
2   Italian Restaurant  0.05
3  Japanese Restaurant  0.04
4       Sandwich Place  0.04
----Christie----
           venue  freq
0  Grocery Store  0.24
1           Café  0.18
2           Park  0.12
3     Baby Store  0.06
4     Restaurant  0.06
----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.06
1              Gay Bar  0.05
2  Japanese Restaurant  0.05
3           Restaurant

In [39]:
# Put top venues into a new dataframe
def return_most_common_venues(row, num_top_venues):
    row = row.iloc[1:]
    row_sorted = row.sort_values(ascending=False)
    
    return row_sorted.index.values[0:num_top_venues]

In [40]:

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Neighbourhood'] = downtown_grouped['Neighbourhood']

for ind in np.arange(downtown_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Café,Bakery,Farmers Market,Seafood Restaurant,Beer Bar,Cheese Shop,Restaurant,Cosmetics Shop
1,CN Tower / King and Spadina / Railway Lands / ...,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Harbor / Marina,Boutique,Sculpture Garden,Rental Car Location,Bar,Plane
2,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Dessert Shop,Bubble Tea Shop,Burger Joint,Spa,Middle Eastern Restaurant
3,Christie,Grocery Store,Café,Park,Diner,Nightclub,Coffee Shop,Restaurant,Gas Station,Italian Restaurant,Baby Store
4,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Burger Joint,Hotel,Gastropub,Yoga Studio,Mediterranean Restaurant


**Clustering the Neighbours**

In [41]:
k = 5

X = downtown_grouped.drop('Neighbourhood', axis = 1)

kmeans = KMeans(n_clusters = k, random_state=0)
kmeans.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [42]:
# add clustering labels
venues_sorted['Cluster_Labels']=  kmeans.labels_

downtown_toronto_merged = downtown_toronto
# merge top venues_sorted with toronto_data
downtown_toronto_merged = downtown_toronto_merged.join(venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

downtown_toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Coffee Shop,Park,Bakery,Pub,Theater,Restaurant,Mexican Restaurant,Breakfast Spot,Café,Beer Store,4
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494,Coffee Shop,Diner,Yoga Studio,Creperie,Mexican Restaurant,Juice Bar,Italian Restaurant,Hobby Shop,Fried Chicken Joint,Distribution Center,4
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,Clothing Store,Coffee Shop,Cosmetics Shop,Bubble Tea Shop,Café,Japanese Restaurant,Middle Eastern Restaurant,Bookstore,Theater,Restaurant,0
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Coffee Shop,Café,Restaurant,Cocktail Bar,Italian Restaurant,Clothing Store,American Restaurant,Bakery,Beer Bar,Diner,0
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,Coffee Shop,Cocktail Bar,Café,Bakery,Farmers Market,Seafood Restaurant,Beer Bar,Cheese Shop,Restaurant,Cosmetics Shop,4


In [43]:
# create map
map_clusterd = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_toronto_merged['Latitude'], downtown_toronto_merged['Longitude'],
                                  downtown_toronto_merged['Neighbourhood'], downtown_toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusterd)
       
map_clusterd

__Examine Clusters__

In [45]:
# Cluster 1
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster_Labels'] == 0, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
2,Downtown Toronto,Clothing Store,Coffee Shop,Cosmetics Shop,Bubble Tea Shop,Café,Japanese Restaurant,Middle Eastern Restaurant,Bookstore,Theater,Restaurant,0
3,Downtown Toronto,Coffee Shop,Café,Restaurant,Cocktail Bar,Italian Restaurant,Clothing Store,American Restaurant,Bakery,Beer Bar,Diner,0
7,Downtown Toronto,Coffee Shop,Restaurant,Café,Gym,Thai Restaurant,Bakery,Vegetarian / Vegan Restaurant,Asian Restaurant,Bookstore,Office,0
11,Downtown Toronto,Café,Restaurant,Bookstore,Japanese Restaurant,Italian Restaurant,Bar,Bakery,Yoga Studio,Sushi Restaurant,Pub,0
12,Downtown Toronto,Café,Coffee Shop,Vietnamese Restaurant,Dumpling Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Bar,Arts & Crafts Store,Dessert Shop,Park,0
16,Downtown Toronto,Pizza Place,Café,Coffee Shop,Pub,Convenience Store,Park,Bakery,Restaurant,Italian Restaurant,Pharmacy,0
18,Downtown Toronto,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Burger Joint,Hotel,Gastropub,Yoga Studio,Mediterranean Restaurant,0


In [46]:
# Cluster 2
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster_Labels'] == 1, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
14,Downtown Toronto,Park,Playground,Trail,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,1


In [49]:
# Cluster 3
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster_Labels'] == 2, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
6,Downtown Toronto,Grocery Store,Café,Park,Diner,Nightclub,Coffee Shop,Restaurant,Gas Station,Italian Restaurant,Baby Store,2


In [50]:
# Cluster 4
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster_Labels'] == 3, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
13,Downtown Toronto,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Harbor / Marina,Boutique,Sculpture Garden,Rental Car Location,Bar,Plane,3


In [51]:
# Cluster 5
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster_Labels'] == 5, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels


## Do The same analysis for Manhattan

In [64]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [147]:
column_names = ['Borough', 'Neighbourhood', 'Latitude', 'Longitude']
neighborhoods = pd.DataFrame(columns=column_names)
# And make sure that the dataset has all 5 boroughs and 306 neighborhoods.
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)
neighborhoods_data = newyork_data['features']
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]

    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighbourhood': neighborhood_name,
                                          'Longitude': neighborhood_lon,
                                          'Latitude':neighborhood_lat}, ignore_index=True)
    
neighborhoods.head()
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

The dataframe has 0 boroughs and 0 neighborhoods.


Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [148]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [149]:
import folium
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

In [151]:
# add markers to map\n", 
for lat, lng, borough, neighborhood in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], 
                                           manhattan_data['Borough'], manhattan_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

In [152]:
def get_near_by_venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(name, lat, lng, 
                             v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'],
                             v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue in venues_list for item in venue])
    nearby_venues.columns = ['Neighbourhood','Neighbourhood Latitude', 'Neighbourhood Longitude', 
                             'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return nearby_venues

In [154]:
manhattan_venues = get_near_by_venues(names=manhattan_data['Neighbourhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],
                                 )


KeyError: 'groups'

In [155]:
manhattan_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,82,82,82,82,82,82
Carnegie Hill,92,92,92,92,92,92
Central Harlem,42,42,42,42,42,42
Chelsea,100,100,100,100,100,100
Chinatown,100,100,100,100,100,100
Civic Center,100,100,100,100,100,100
Clinton,100,100,100,100,100,100
East Harlem,41,41,41,41,41,41
East Village,100,100,100,100,100,100
Financial District,100,100,100,100,100,100


In [156]:
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighbourhood'] = manhattan_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Acupuncturist,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,...,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [157]:
manhattan_grouped = manhattan_onehot.groupby('Neighbourhood').mean().reset_index()

In [158]:
num_top_venues = 5

for hood in manhattan_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = manhattan_grouped[manhattan_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
            venue  freq
0            Park  0.07
1     Coffee Shop  0.06
2           Hotel  0.06
3  Clothing Store  0.04
4   Memorial Site  0.04


----Carnegie Hill----
                 venue  freq
0          Coffee Shop  0.07
1                 Café  0.05
2    French Restaurant  0.03
3            Wine Shop  0.03
4  Japanese Restaurant  0.03


----Central Harlem----
                 venue  freq
0   African Restaurant  0.07
1   Chinese Restaurant  0.05
2  American Restaurant  0.05
3   Seafood Restaurant  0.05
4    French Restaurant  0.05


----Chelsea----
                 venue  freq
0          Coffee Shop  0.07
1   Italian Restaurant  0.04
2               Bakery  0.04
3  American Restaurant  0.04
4    French Restaurant  0.03


----Chinatown----
                   venue  freq
0     Chinese Restaurant  0.08
1           Cocktail Bar  0.05
2    American Restaurant  0.04
3  Vietnamese Restaurant  0.03
4           Optical Shop  0.03


----Civic Center----
            

In [159]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [160]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = manhattan_grouped['Neighbourhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Hotel,Coffee Shop,Boat or Ferry,Clothing Store,Gym,Memorial Site,Pizza Place,BBQ Joint,Sandwich Place
1,Carnegie Hill,Coffee Shop,Café,French Restaurant,Wine Shop,Bookstore,Gym / Fitness Center,Bakery,Gym,Japanese Restaurant,Pizza Place
2,Central Harlem,African Restaurant,Seafood Restaurant,Bar,American Restaurant,French Restaurant,Chinese Restaurant,Spa,Bookstore,Boutique,Market
3,Chelsea,Coffee Shop,Bakery,American Restaurant,Italian Restaurant,Hotel,Ice Cream Shop,French Restaurant,Art Gallery,Cupcake Shop,Pet Store
4,Chinatown,Chinese Restaurant,Cocktail Bar,American Restaurant,Hotpot Restaurant,Optical Shop,Salon / Barbershop,Bakery,Dessert Shop,Vietnamese Restaurant,Spa
5,Civic Center,Coffee Shop,Hotel,Gym / Fitness Center,Cocktail Bar,Spa,French Restaurant,Yoga Studio,American Restaurant,Sandwich Place,Park
6,Clinton,Theater,Italian Restaurant,Gym / Fitness Center,Spa,American Restaurant,Coffee Shop,Hotel,Wine Shop,Sandwich Place,Gym
7,East Harlem,Mexican Restaurant,Thai Restaurant,Bakery,Pizza Place,Latin American Restaurant,Deli / Bodega,Spanish Restaurant,Fast Food Restaurant,Gas Station,Taco Place
8,East Village,Bar,Pizza Place,Ice Cream Shop,Cocktail Bar,Mexican Restaurant,Wine Bar,Vegetarian / Vegan Restaurant,Speakeasy,Coffee Shop,Ramen Restaurant
9,Financial District,Coffee Shop,Bar,American Restaurant,Cocktail Bar,Pizza Place,Gym,Gym / Fitness Center,Japanese Restaurant,Italian Restaurant,Food Truck


In [161]:
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 2, 2, 0, 0, 4, 2, 0], dtype=int32)

In [163]:
manhattan_merged = manhattan_data

# add clustering labels
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

manhattan_merged.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Manhattan,Marble Hill,40.876551,-73.91066,Sandwich Place,Gym,American Restaurant,Coffee Shop,Yoga Studio,Pharmacy,Supplement Shop,Steakhouse,Shopping Mall,Seafood Restaurant,3
1,Manhattan,Chinatown,40.715618,-73.994279,Chinese Restaurant,Cocktail Bar,American Restaurant,Hotpot Restaurant,Optical Shop,Salon / Barbershop,Bakery,Dessert Shop,Vietnamese Restaurant,Spa,2
2,Manhattan,Washington Heights,40.851903,-73.9369,Café,Bakery,Grocery Store,Mobile Phone Shop,Chinese Restaurant,Pizza Place,Bank,Mexican Restaurant,Spanish Restaurant,Supplement Shop,4
3,Manhattan,Inwood,40.867684,-73.92121,Café,Mexican Restaurant,Restaurant,Pizza Place,Lounge,Park,Chinese Restaurant,Deli / Bodega,Wine Bar,Frozen Yogurt Shop,4
4,Manhattan,Hamilton Heights,40.823604,-73.949688,Pizza Place,Café,Coffee Shop,Deli / Bodega,Mexican Restaurant,Yoga Studio,Sushi Restaurant,Caribbean Restaurant,School,Bakery,4


In [164]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighbourhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [165]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
9,Yorkville,Gym,Coffee Shop,Bar,Pizza Place,Deli / Bodega,Diner,Sushi Restaurant,Mexican Restaurant,Japanese Restaurant,0
10,Lenox Hill,Coffee Shop,Sushi Restaurant,Pizza Place,Cocktail Bar,Gym / Fitness Center,Café,Burger Joint,Gym,Salon / Barbershop,0
13,Lincoln Square,Theater,Italian Restaurant,Plaza,Performing Arts Venue,American Restaurant,Concert Hall,Gym / Fitness Center,Indie Movie Theater,Wine Shop,0
14,Clinton,Italian Restaurant,Gym / Fitness Center,Spa,American Restaurant,Coffee Shop,Hotel,Wine Shop,Sandwich Place,Gym,0
15,Midtown,Coffee Shop,Sporting Goods Shop,Clothing Store,Steakhouse,Bookstore,Gym,American Restaurant,Café,Theater,0
16,Murray Hill,Coffee Shop,American Restaurant,Hotel,Japanese Restaurant,Gym / Fitness Center,Bar,Mediterranean Restaurant,Cocktail Bar,Burger Joint,0
25,Manhattan Valley,Bar,Mexican Restaurant,Coffee Shop,Chinese Restaurant,Thai Restaurant,Pizza Place,Yoga Studio,Malay Restaurant,Bakery,0
28,Battery Park City,Hotel,Coffee Shop,Boat or Ferry,Clothing Store,Gym,Memorial Site,Pizza Place,BBQ Joint,Sandwich Place,0
29,Financial District,Bar,American Restaurant,Cocktail Bar,Pizza Place,Gym,Gym / Fitness Center,Japanese Restaurant,Italian Restaurant,Food Truck,0
30,Carnegie Hill,Café,French Restaurant,Wine Shop,Bookstore,Gym / Fitness Center,Bakery,Gym,Japanese Restaurant,Pizza Place,0


In [166]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
37,Stuyvesant Town,Bar,Park,Heliport,Gym / Fitness Center,Gas Station,Harbor / Marina,Cocktail Bar,Pet Service,Farmers Market,1


In [167]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,Chinatown,Cocktail Bar,American Restaurant,Hotpot Restaurant,Optical Shop,Salon / Barbershop,Bakery,Dessert Shop,Vietnamese Restaurant,Spa,2
5,Manhattanville,Seafood Restaurant,Italian Restaurant,Deli / Bodega,Mexican Restaurant,Park,Fried Chicken Joint,Climbing Gym,Falafel Restaurant,Farmers Market,2
6,Central Harlem,Seafood Restaurant,Bar,American Restaurant,French Restaurant,Chinese Restaurant,Spa,Bookstore,Boutique,Market,2
8,Upper East Side,Italian Restaurant,Exhibit,Bakery,Gym / Fitness Center,Juice Bar,Yoga Studio,Art Gallery,French Restaurant,Wine Shop,2
12,Upper West Side,Wine Bar,Bar,Bakery,Café,Coffee Shop,Indian Restaurant,Thai Restaurant,Ice Cream Shop,Restaurant,2
17,Chelsea,Bakery,American Restaurant,Italian Restaurant,Hotel,Ice Cream Shop,French Restaurant,Art Gallery,Cupcake Shop,Pet Store,2
18,Greenwich Village,Sushi Restaurant,Café,Clothing Store,Indian Restaurant,American Restaurant,Gym,French Restaurant,Spa,Caribbean Restaurant,2
19,East Village,Pizza Place,Ice Cream Shop,Cocktail Bar,Mexican Restaurant,Wine Bar,Vegetarian / Vegan Restaurant,Speakeasy,Coffee Shop,Ramen Restaurant,2
21,Tribeca,Italian Restaurant,Park,Spa,Café,Coffee Shop,Wine Bar,Men's Store,Bakery,Playground,2
22,Little Italy,Café,Bubble Tea Shop,Italian Restaurant,Mediterranean Restaurant,Sandwich Place,Hotel,Cocktail Bar,Salon / Barbershop,Seafood Restaurant,2


In [168]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Marble Hill,Gym,American Restaurant,Coffee Shop,Yoga Studio,Pharmacy,Supplement Shop,Steakhouse,Shopping Mall,Seafood Restaurant,3


In [169]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,Washington Heights,Bakery,Grocery Store,Mobile Phone Shop,Chinese Restaurant,Pizza Place,Bank,Mexican Restaurant,Spanish Restaurant,Supplement Shop,4
3,Inwood,Mexican Restaurant,Restaurant,Pizza Place,Lounge,Park,Chinese Restaurant,Deli / Bodega,Wine Bar,Frozen Yogurt Shop,4
4,Hamilton Heights,Café,Coffee Shop,Deli / Bodega,Mexican Restaurant,Yoga Studio,Sushi Restaurant,Caribbean Restaurant,School,Bakery,4
7,East Harlem,Thai Restaurant,Bakery,Pizza Place,Latin American Restaurant,Deli / Bodega,Spanish Restaurant,Fast Food Restaurant,Gas Station,Taco Place,4
11,Roosevelt Island,Bus Line,Residential Building (Apartment / Condo),Scenic Lookout,Sandwich Place,Liquor Store,Dry Cleaner,Supermarket,Dog Run,Monument / Landmark,4
20,Lower East Side,Coffee Shop,Café,Bakery,Pizza Place,Cocktail Bar,Art Gallery,Japanese Restaurant,Ramen Restaurant,Farmers Market,4
26,Morningside Heights,Coffee Shop,American Restaurant,Bookstore,Burger Joint,Sandwich Place,Deli / Bodega,Pharmacy,Supermarket,Greek Restaurant,4
36,Tudor City,Café,Mexican Restaurant,Deli / Bodega,Diner,Greek Restaurant,Coffee Shop,Asian Restaurant,Restaurant,Thai Restaurant,4
