# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#Import required libraries
from bs4 import BeautifulSoup
import requests
import csv
import json
from pandas.io.json import json_normalize
import xml
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### Download and explore dataset

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

result = BeautifulSoup(source, 'lxml')#Beautiful Soup to Parse the url page

table=result.find('table') #Finds the required table area

In [3]:
#table #Uncomment to view the table

In [4]:
#List initialization to collect the Postalcodes, Boroughs and Neighborhoods
postalcode=[]
borough=[]
neighborhood=[]

x=table.tbody #Navigation to the body of the table

for tr in x.find_all('tr'): #Iterating through rows 
    td=tr.find_all('td')
    counter = 1
    postalcode_value = -1
    borough_value = -1
    neighborhood_value = -1
    for cell in td: #Itering through columns
        if counter == 1:
            postalcode_value = cell.text #Collecting the Postalcodes
        if counter == 2:
            borough_value = cell.text
            tag_a_borough = cell.find('a')
        if counter == 3:
            neighborhood_value = str(cell.text).strip()
            tag_a_neighborhood = cell.find('a')
        
        counter +=1
    #Exclude postal codes with 'Not Assigned' values for Borough or Neighborhood 
    if (postalcode_value == 'Not Assigned' or borough_value == 'Not Assigned' or neighborhood_value == 'Not Assigned'):
        continue
    try:
        if ((tag_a_borough is None) or (tag_a_neighborhood is None)):
            continue
    except:
        pass
    if (postalcode_value == -1 or borough_value == -1 or neighborhood_value == -1):
        continue
#Append derived values to the list        
    postalcode.append(postalcode_value)
    borough.append(borough_value)
    neighborhood.append(neighborhood_value)

In [5]:
unique_p = set(postalcode)
print('num of unique Postal codes:', len(unique_p))
postalcode_u      = []
borough_u       = []
neighborhood_u = []


for postalcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postalcode_idx, postalcode_element in enumerate(postalcode):
        if postalcode_unique_element == postalcode_element:
            p_var = postalcode_element;
            b_var = borough[postalcode_idx]
            if n_var == '': 
                n_var = neighborhood[postalcode_idx]
            else:
                n_var = n_var + ', ' + neighborhood[postalcode_idx]
    postalcode_u.append(p_var)
    borough_u.append(b_var)
    neighborhood_u.append(n_var)

num of unique Postal codes: 84


### Transform data into Pandas DataFrame

In [6]:
df_toronto = pd.DataFrame({'PostalCode':postalcode_u,'Borough':borough_u,'Neighborhood':neighborhood_u})
df_toronto.to_csv('toronto_part1.csv')
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M9A,Etobicoke,Islington Avenue
1,M4H,East York,Thorncliffe Park
2,M1B,Scarborough,"Rouge, Malvern"
3,M9W,Etobicoke,Northwest
4,M9L,North York,Humber Summit


### Use Google maps geocode tp get coordinates for all post codes

In [7]:
#assign your google maps api key to API_KEY
API_KEY = 'AIzaSyAGoNimgEQYEmFIWJyvq6_5ZFW1CaRCzbk'

In [8]:
#Get geographical coordinates for all postal codes
latitude=[]
longitude=[]
for i in df_toronto['PostalCode']:
    j = i + ' toronto'
    url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(API_KEY,j)
    response = requests.get(url).json()
    geographical_data = response['results'][0]['geometry']['location']
    latitude.append(geographical_data['lat'])
    longitude.append(geographical_data['lng'])

In [9]:
#Add Latitude and Longitude columns to the toronto dataframe
df_toronto['Latitude']=latitude #Adding a column in the main dataframe for Latitude  

df_toronto['Longitude']=longitude #Adding a column in the main dataframe for Longitude 
df_toronto.to_csv('toronto_part2.csv')
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M4H,East York,Thorncliffe Park,43.705369,-79.349372
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
3,M9W,Etobicoke,Northwest,43.706748,-79.594054
4,M9L,North York,Humber Summit,43.756303,-79.565963


In [10]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df_toronto['Borough'].unique()),df_toronto.shape[0]))

The dataframe has 9 boroughs and 84 neighborhoods.


In [11]:
#Get coordinates for Toronto
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
tor_latitude = location.latitude
tor_longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(tor_latitude, tor_longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


### Create map of Toronto with neighborhoods superimposed on top

In [12]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[tor_latitude, tor_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Let's dig deeper into Scarborough, one of the borough of Toronto

In [13]:
#Get dataframe for Scarborough
df_scarborough = df_toronto[df_toronto['Borough']=='Scarborough'].reset_index(drop=True)
df_scarborough

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1T,Scarborough,Tam O'Shanter,43.781638,-79.304302
2,M1W,Scarborough,Steeles West,43.799525,-79.318389
3,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
4,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476
5,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
6,M1R,Scarborough,"Maryvale, Wexford",43.750071,-79.295849
7,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
8,M1V,Scarborough,"Agincourt North, Milliken",43.815252,-79.284577
9,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577


In [14]:
#Get coordinates for Scarborough using Nominatim
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
scar_latitude = location.latitude
scar_longitude = location.longitude
print('The geograpical coordinates of Scarborough are {}, {}.'.format(scar_latitude, scar_longitude))

The geograpical coordinates of Scarborough are 43.773077, -79.257774.


In [15]:
#Print a map for Scarborough
map_scarborough = folium.Map(location=[scar_latitude,scar_longitude],zoom_start=12)

#add markers to map
for lat,lng,label in zip(df_scarborough['Latitude'],df_scarborough['Longitude'],df_scarborough['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarborough)
    
map_scarborough

### Define FourSquare credentials and version

In [16]:
CLIENT_ID = '1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1' # your Foursquare ID
CLIENT_SECRET = '2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45' # your Foursquare Secret
VERSION = '20190216' # Foursquare API version

print('Aashish Foursquare Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Aashish Foursquare Credentials:
CLIENT_ID: 1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1
CLIENT_SECRET:2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45


### Let's explore Steeles West neighborhood in Scarborough

In [17]:
#get dataframe for one of the neighborhood. for e.g. Steeles West
df_scarborough.loc[2,'Neighborhood']

'Steeles West'

In [18]:
neighborhood_latitude = df_scarborough.loc[2, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_scarborough.loc[2, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_scarborough.loc[2, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Steeles West are 43.7995252, -79.3183887.


### Let's get top 100 venues in Steeles West in radius of 500 meters

In [19]:
#Use foursquare api to explore the venues in Steeles West neighborhood
LIMIT=100
radius=500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'\
        .format(CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude,radius,LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1&client_secret=2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45&v=20190216&ll=43.7995252,-79.3183887&radius=500&limit=100'

In [20]:
results = requests.get(url).json()

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Mr Congee Chinese Cuisine 龍粥記,Chinese Restaurant,43.798879,-79.318335
1,Phoenix Restaurant 金鳳餐廳,Chinese Restaurant,43.798198,-79.318432
2,Price Chopper,Grocery Store,43.799445,-79.318563
3,Subway,Sandwich Place,43.798983,-79.318838
4,Tim Hortons,Coffee Shop,43.798281,-79.318317


In [23]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

14 venues were returned by Foursquare.


### Exploring neighborhoods in Scarborough

In [24]:
#Function to create a dataframe consisting of Neighborhood details and venue details
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
scarborough_venues = getNearbyVenues(names=df_scarborough['Neighborhood'],
                                   latitudes=df_scarborough['Latitude'],
                                   longitudes=df_scarborough['Longitude']
                                  )

Rouge, Malvern
Tam O'Shanter
Steeles West
Upper Rouge
Cliffcrest, Cliffside
Highland Creek, Rouge Hill, Port Union
Maryvale, Wexford
Scarborough Village
Agincourt North, Milliken
Clairlea, Golden Mile, Oakridge
Woburn
Ionview, Kennedy Park
Birch Cliff
Morningside, West Hill
Cedarbrae
Agincourt
Dorset Park, Scarborough Town Centre, Wexford Heights


In [26]:
scarborough_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Tam O'Shanter,43.781638,-79.304302,Remezzo Italian Bistro,43.778649,-79.308264,Italian Restaurant
2,Tam O'Shanter,43.781638,-79.304302,The Royal Chinese Restaurant 避風塘小炒,43.780505,-79.298844,Chinese Restaurant
3,Tam O'Shanter,43.781638,-79.304302,Eight Noodles,43.778234,-79.308299,Noodle House
4,Tam O'Shanter,43.781638,-79.304302,Kub Khao,43.780438,-79.299837,Thai Restaurant


In [27]:
#Grouping venues by Neighborhood
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Agincourt North, Milliken",2,2,2,2,2,2
Birch Cliff,4,4,4,4,4,4
Cedarbrae,7,7,7,7,7,7
"Clairlea, Golden Mile, Oakridge",10,10,10,10,10,10
"Cliffcrest, Cliffside",3,3,3,3,3,3
"Dorset Park, Scarborough Town Centre, Wexford Heights",8,8,8,8,8,8
"Highland Creek, Rouge Hill, Port Union",1,1,1,1,1,1
"Ionview, Kennedy Park",9,9,9,9,9,9
"Maryvale, Wexford",8,8,8,8,8,8


In [28]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 55 uniques categories.


### Analyze Each Neighborhood

In [29]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot= scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,...,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Train Station,Vietnamese Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Tam O'Shanter,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Tam O'Shanter,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Tam O'Shanter,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Tam O'Shanter,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [30]:
scarborough_onehot.shape

(91, 56)

In [31]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,...,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, Milliken",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0


In [32]:
#Function to sort venues in descending order
def return_most_common_venues(row,num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [34]:
num_top_venues = 10
indicators = ['st','nd','rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
1,"Agincourt North, Milliken",Park,Playground,Caribbean Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,Convenience Store
2,Birch Cliff,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
3,Cedarbrae,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Fried Chicken Joint,Bakery,Bank,Hakka Restaurant,Coffee Shop,General Entertainment,Food Truck
4,"Clairlea, Golden Mile, Oakridge",Bakery,Bus Line,Park,Food Truck,Soccer Field,Intersection,Fast Food Restaurant,Bus Station,Clothing Store,Fried Chicken Joint


### Cluster Neighborhoods

In [35]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 4, 0, 0, 0, 0, 0, 3, 0, 0], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = df_scarborough

# merge scarborough_grouped with df_scarborough to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarborough_merged.dropna(inplace=True)
scarborough_merged.reset_index(drop=True)

scarborough_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2.0,Fast Food Restaurant,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Electronics Store,Discount Store,Department Store,Convenience Store
1,M1T,Scarborough,Tam O'Shanter,43.781638,-79.304302,0.0,Pizza Place,Noodle House,Fast Food Restaurant,Italian Restaurant,Chinese Restaurant,Pharmacy,Fried Chicken Joint,Thai Restaurant,Bank,Clothing Store
2,M1W,Scarborough,Steeles West,43.799525,-79.318389,0.0,Chinese Restaurant,Fast Food Restaurant,Sandwich Place,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Noodle House,Electronics Store
4,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476,0.0,American Restaurant,Skating Rink,Motel,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store
5,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,3.0,Bar,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


### Visualizing the clusters in map of Scarborough

In [37]:
# create map
map_clusters = folium.Map(location=[scar_latitude, scar_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'],\
                                  scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine neighborhoods in each cluster

In [38]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Pizza Place,Noodle House,Fast Food Restaurant,Italian Restaurant,Chinese Restaurant,Pharmacy,Fried Chicken Joint,Thai Restaurant,Bank,Clothing Store
2,Scarborough,0.0,Chinese Restaurant,Fast Food Restaurant,Sandwich Place,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Noodle House,Electronics Store
4,Scarborough,0.0,American Restaurant,Skating Rink,Motel,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store
6,Scarborough,0.0,Middle Eastern Restaurant,Accessories Store,Bakery,Breakfast Spot,Shopping Mall,Sandwich Place,Auto Garage,Fast Food Restaurant,Food Truck,Electronics Store
9,Scarborough,0.0,Bakery,Bus Line,Park,Food Truck,Soccer Field,Intersection,Fast Food Restaurant,Bus Station,Clothing Store,Fried Chicken Joint
11,Scarborough,0.0,Discount Store,Hobby Shop,Bus Station,Department Store,Convenience Store,Coffee Shop,Chinese Restaurant,Train Station,Auto Garage,Grocery Store
12,Scarborough,0.0,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
13,Scarborough,0.0,Electronics Store,Breakfast Spot,Rental Car Location,Pizza Place,Medical Center,Mexican Restaurant,Vietnamese Restaurant,Chinese Restaurant,Food Truck,Fast Food Restaurant
14,Scarborough,0.0,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Fried Chicken Joint,Bakery,Bank,Hakka Restaurant,Coffee Shop,General Entertainment,Food Truck
15,Scarborough,0.0,Skating Rink,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store


In [39]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Scarborough,1.0,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store


In [40]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2.0,Fast Food Restaurant,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Electronics Store,Discount Store,Department Store,Convenience Store


In [41]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,3.0,Bar,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


In [42]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Scarborough,4.0,Smoke Shop,Playground,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
8,Scarborough,4.0,Park,Playground,Caribbean Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,Convenience Store
