# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#Import required libraries
from bs4 import BeautifulSoup
import requests
import csv
import json
from pandas.io.json import json_normalize
import xml
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import pgeocode
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### Download and explore dataset

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

result = BeautifulSoup(source, 'lxml')#Beautiful Soup to Parse the url page

table=result.find('table') #Finds the required table area

In [3]:
# table #Uncomment to view the table

In [4]:
#List initialization to collect the Postalcodes, Boroughs and Neighborhoods
postalcode=[]
borough=[]
neighborhood=[]

for tr in table.find_all('tr'): #Iterating through rows 
    td=tr.find_all('td')
    counter = 1
    for cell in td: #Itering through columns
        if counter == 1:
            postalcode.append(cell.find(text=True).rstrip('\n')) #Collecting the Postalcodes
        if counter == 2:
            borough.append(cell.find(text=True).rstrip('\n'))
        if counter == 3:
            neighborhood.append(cell.find(text=True).rstrip('\n'))
            
        counter += 1

### Transform data into Pandas DataFrame

In [5]:
df_toronto = pd.DataFrame({'PostalCode':postalcode,'Borough':borough,'Neighborhood':neighborhood})

In [6]:
#Exclude postal codes with 'Not Assigned' values for Post Code or Borough 
df_toronto = df_toronto[(df_toronto['Borough'] != 'Not assigned') & (df_toronto['PostalCode'] != 'Not assigned')]

#If neighboorhood is not assigned, assign borough name to neighborhood
for i in range(1,df_toronto.shape[0]):
    if (df_toronto.iloc[i,2] == "Not assigned")==True:
        df_toronto.iloc[i,2] = df_toronto.iloc[i,1]

In [7]:
#Group neighbordhoods with same postcode
for i in range(1,df_toronto.shape[0]):
    if (df_toronto.iloc[i,0] == df_toronto.iloc[i-1,0])==True:
        df_toronto.iloc[i-1,0] = 'Not assigned'
        df_toronto.iloc[i,2] = df_toronto.iloc[i-1,2] + ", " + df_toronto.iloc[i,2]
           
df_toronto = df_toronto[df_toronto["PostalCode"] != "Not assigned"]
df_toronto = df_toronto.sort_values(by='PostalCode')
df_toronto = df_toronto.reset_index(drop=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
df_toronto.shape

(103, 3)

### Use Google maps geocode tp get coordinates for all post codes

In [9]:
#assign your google maps api key to API_KEY
API_KEY = 'AIzaSyAGoNimgEQYEmFIWJyvq6_5ZFW1CaRCzbk'

In [10]:
#Get geographical coordinates for all postal codes
latitude=[]
longitude=[]
for i in df_toronto['PostalCode']:
    geolocator = pgeocode.Nominatim('ca')
    location = geolocator.query_postal_code(i)
    latitude.append(location.latitude)
    longitude.append(location.longitude)

In [15]:
#Add Latitude and Longitude columns to the toronto dataframe
df_toronto['Latitude']=latitude #Adding a column in the main dataframe for Latitude  

df_toronto['Longitude']=longitude #Adding a column in the main dataframe for Longitude
df_toronto.dropna(inplace=True)
df_toronto.to_csv('toronto_part2.csv')
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",-34.889942,-56.079098
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",-34.889942,-56.079098
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.765717,-79.221898
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389


In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df_toronto['Borough'].unique()),df_toronto.shape[0]))

The dataframe has 9 boroughs and 102 neighborhoods.


In [17]:
#Get coordinates for Toronto
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
tor_latitude = location.latitude
tor_longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(tor_latitude, tor_longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


### Create map of Toronto with neighborhoods superimposed on top

In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[tor_latitude, tor_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Let's dig deeper into Scarborough, one of the borough of Toronto

In [19]:
#Get dataframe for Scarborough
df_scarborough = df_toronto[df_toronto['Borough']=='Scarborough'].reset_index(drop=True)
df_scarborough

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",-34.889942,-56.079098
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",-34.889942,-56.079098
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.765717,-79.221898
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.7298,-79.2639
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.7122,-79.2843
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7247,-79.2312
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6952,-79.2646


In [20]:
#Get coordinates for Scarborough using Nominatim
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
scar_latitude = location.latitude
scar_longitude = location.longitude
print('The geograpical coordinates of Scarborough are {}, {}.'.format(scar_latitude, scar_longitude))

The geograpical coordinates of Scarborough are 43.773077, -79.257774.


In [21]:
#Print a map for Scarborough
map_scarborough = folium.Map(location=[scar_latitude,scar_longitude],zoom_start=12)

#add markers to map
for lat,lng,label in zip(df_scarborough['Latitude'],df_scarborough['Longitude'],df_scarborough['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarborough)
    
map_scarborough

### Define FourSquare credentials and version

In [22]:
CLIENT_ID = '1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1' # your Foursquare ID
CLIENT_SECRET = '2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45' # your Foursquare Secret
VERSION = '20190216' # Foursquare API version

print('Aashish Foursquare Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Aashish Foursquare Credentials:
CLIENT_ID: 1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1
CLIENT_SECRET:2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45


### Let's explore Steeles West neighborhood in Scarborough

In [34]:
#get dataframe for one of the neighborhood. for e.g. Steeles West
# df_scarborough.loc[15,'Neighborhood']
df_scarborough[df_scarborough.Neighborhood.str.contains('Steeles West')]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
15,M1W,Scarborough,"Steeles West, L'Amoreaux West",43.815455,-79.327721


In [35]:
neighborhood_latitude = df_scarborough.loc[15, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_scarborough.loc[15, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_scarborough.loc[15, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Steeles West, L'Amoreaux West are 43.81545466594209, -79.32772103687638.


### Let's get top 100 venues in Steeles West in radius of 500 meters

In [36]:
#Use foursquare api to explore the venues in Steeles West neighborhood
LIMIT=100
radius=500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'\
        .format(CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude,radius,LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1&client_secret=2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45&v=20190216&ll=43.81545466594209,-79.32772103687638&radius=500&limit=100'

In [37]:
results = requests.get(url).json()

In [38]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [40]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Pho 88,Vietnamese Restaurant,43.814757,-79.323881
1,D&R Wings 美華茶餐廳,Chinese Restaurant,43.814698,-79.323946
2,Sam Woo BBQ 三和燒臘,BBQ Joint,43.81557,-79.323007
3,Asian Legend 味香村,Chinese Restaurant,43.818846,-79.331058
4,Tai Ping Hsiang B.B.Q 太平香便當,Taiwanese Restaurant,43.818845,-79.330906


In [41]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

21 venues were returned by Foursquare.


### Exploring neighborhoods in Scarborough

In [42]:
#Function to create a dataframe consisting of Neighborhood details and venue details
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
scarborough_venues = getNearbyVenues(names=df_scarborough['Neighborhood'],
                                   latitudes=df_scarborough['Latitude'],
                                   longitudes=df_scarborough['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


In [44]:
scarborough_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",-34.889942,-56.079098,El Rey del Chivito,-34.893113,-56.077577,Spanish Restaurant
1,"Malvern, Rouge",-34.889942,-56.079098,Plaza Suiza,-34.8922,-56.078327,Plaza
2,"Malvern, Rouge",-34.889942,-56.079098,Magma,-34.892971,-56.07755,Women's Store
3,"Malvern, Rouge",-34.889942,-56.079098,Porto Vanila,-34.893044,-56.077533,Fast Food Restaurant
4,"Malvern, Rouge",-34.889942,-56.079098,Homero Bar,-34.892967,-56.079695,Restaurant


In [45]:
#Grouping venues by Neighborhood
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,3,3,3,3,3,3
"Clarks Corners, Tam O'Shanter, Sullivan",12,12,12,12,12,12
"Cliffside, Cliffcrest, Scarborough Village West",8,8,8,8,8,8
"Dorset Park, Wexford Heights, Scarborough Town Centre",2,2,2,2,2,2
"Golden Mile, Clairlea, Oakridge",11,11,11,11,11,11
"Guildwood, Morningside, West Hill",30,30,30,30,30,30
"Kennedy Park, Ionview, East Birchmount Park",16,16,16,16,16,16
"Malvern, Rouge",6,6,6,6,6,6


In [46]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 70 uniques categories.


### Analyze Each Neighborhood

In [47]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot= scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Auto Garage,BBQ Joint,Badminton Court,Bakery,Bank,Beer Store,Bistro,Breakfast Spot,...,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vietnamese Restaurant,Women's Store,Xinjiang Restaurant
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
scarborough_onehot.shape

(135, 71)

In [49]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Auto Garage,BBQ Joint,Badminton Court,Bakery,Bank,Beer Store,Bistro,Breakfast Spot,...,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vietnamese Restaurant,Women's Store,Xinjiang Restaurant
0,Agincourt,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0
3,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0
4,"Cliffside, Cliffcrest, Scarborough Village West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
#Function to sort venues in descending order
def return_most_common_venues(row,num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [59]:
num_top_venues = 10
indicators = ['st','nd','rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Shanghai Restaurant,Badminton Court,Skating Rink,Latin American Restaurant,Breakfast Spot,Xinjiang Restaurant,Electronics Store,Convenience Store,Department Store,Discount Store
1,"Birch Cliff, Cliffside West",Café,Skating Rink,General Entertainment,College Stadium,Xinjiang Restaurant,Electronics Store,Convenience Store,Department Store,Discount Store,Dumpling Restaurant
2,Cedarbrae,Construction & Landscaping,Lounge,Trail,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Xinjiang Restaurant
3,"Clarks Corners, Tam O'Shanter, Sullivan",Pizza Place,Pharmacy,Bus Stop,Gas Station,Intersection,Italian Restaurant,Thai Restaurant,Bank,Fried Chicken Joint,Chinese Restaurant
4,"Cliffside, Cliffcrest, Scarborough Village West",Ice Cream Shop,Bistro,Liquor Store,Pharmacy,Pizza Place,Coffee Shop,Sandwich Place,Beer Store,Bank,Greek Restaurant


### Cluster Neighborhoods

In [60]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 4, 2, 4, 0, 4, 0])

In [63]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = df_scarborough

# merge scarborough_grouped with df_scarborough to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarborough_merged.dropna(inplace=True)
scarborough_merged.reset_index(drop=True)

scarborough_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",-34.889942,-56.079098,0.0,Restaurant,Women's Store,Fast Food Restaurant,Supermarket,Plaza,Spanish Restaurant,Discount Store,Coffee Shop,College Stadium,Construction & Landscaping
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",-34.889942,-56.079098,0.0,Restaurant,Women's Store,Fast Food Restaurant,Supermarket,Plaza,Spanish Restaurant,Discount Store,Coffee Shop,College Stadium,Construction & Landscaping
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7678,-79.1866,0.0,Pizza Place,Coffee Shop,Greek Restaurant,Fast Food Restaurant,Bank,Restaurant,Liquor Store,Medical Center,Mexican Restaurant,Pharmacy
3,M1G,Scarborough,Woburn,43.765717,-79.221898,4.0,Fish & Chips Shop,Indian Restaurant,Coffee Shop,Business Service,Park,Fast Food Restaurant,Department Store,Discount Store,Dumpling Restaurant,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,0.0,Construction & Landscaping,Lounge,Trail,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Xinjiang Restaurant


### Visualizing the clusters in map of Scarborough

In [71]:
# create map
map_clusters = folium.Map(location=[scar_latitude, scar_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'],\
                                  scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine neighborhoods in each cluster

In [73]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0.0,Restaurant,Women's Store,Fast Food Restaurant,Supermarket,Plaza,Spanish Restaurant,Discount Store,Coffee Shop,College Stadium,Construction & Landscaping
1,Scarborough,0.0,Restaurant,Women's Store,Fast Food Restaurant,Supermarket,Plaza,Spanish Restaurant,Discount Store,Coffee Shop,College Stadium,Construction & Landscaping
2,Scarborough,0.0,Pizza Place,Coffee Shop,Greek Restaurant,Fast Food Restaurant,Bank,Restaurant,Liquor Store,Medical Center,Mexican Restaurant,Pharmacy
4,Scarborough,0.0,Construction & Landscaping,Lounge,Trail,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Xinjiang Restaurant
9,Scarborough,0.0,Café,Skating Rink,General Entertainment,College Stadium,Xinjiang Restaurant,Electronics Store,Convenience Store,Department Store,Discount Store,Dumpling Restaurant
12,Scarborough,0.0,Shanghai Restaurant,Badminton Court,Skating Rink,Latin American Restaurant,Breakfast Spot,Xinjiang Restaurant,Electronics Store,Convenience Store,Department Store,Discount Store
13,Scarborough,0.0,Pizza Place,Pharmacy,Bus Stop,Gas Station,Intersection,Italian Restaurant,Thai Restaurant,Bank,Fried Chicken Joint,Chinese Restaurant
15,Scarborough,0.0,Chinese Restaurant,Fast Food Restaurant,Park,Pharmacy,Pizza Place,Shopping Mall,Bubble Tea Shop,Korean Restaurant,Electronics Store,Xinjiang Restaurant


In [74]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,1.0,Auto Garage,Xinjiang Restaurant,Fish & Chips Shop,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Food & Drink Shop


In [75]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Scarborough,2.0,Asian Restaurant,Bakery,Fish & Chips Shop,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fast Food Restaurant,Food & Drink Shop


In [76]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,3.0,Pharmacy,Sushi Restaurant,Xinjiang Restaurant,Fast Food Restaurant,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Electronics Store,Fish & Chips Shop


In [77]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,4.0,Fish & Chips Shop,Indian Restaurant,Coffee Shop,Business Service,Park,Fast Food Restaurant,Department Store,Discount Store,Dumpling Restaurant,Electronics Store
5,Scarborough,4.0,Grocery Store,Park,Spa,Xinjiang Restaurant,Electronics Store,Convenience Store,Department Store,Discount Store,Dumpling Restaurant,Fast Food Restaurant
6,Scarborough,4.0,Coffee Shop,Bus Line,Hobby Shop,Hockey Arena,Intersection,Light Rail Station,Discount Store,Department Store,Convenience Store,Pharmacy
7,Scarborough,4.0,Bus Line,Intersection,Bakery,Coffee Shop,Park,Metro Station,Soccer Field,Bus Station,Discount Store,Dumpling Restaurant
8,Scarborough,4.0,Ice Cream Shop,Bistro,Liquor Store,Pharmacy,Pizza Place,Coffee Shop,Sandwich Place,Beer Store,Bank,Greek Restaurant
