# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#Import required libraries
from bs4 import BeautifulSoup
import requests
import csv
import json
from pandas.io.json import json_normalize
import xml
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### Download and explore dataset

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

result = BeautifulSoup(source, 'lxml')#Beautiful Soup to Parse the url page

table=result.find('table') #Finds the required table area

In [3]:
#table #Uncomment to view the table

In [4]:
#List initialization to collect the Postalcodes, Boroughs and Neighborhoods
postalcode=[]
borough=[]
neighborhood=[]

for tr in table.find_all('tr'): #Iterating through rows 
    td=tr.find_all('td')
    counter = 1
    for cell in td: #Itering through columns
        if counter == 1:
            postalcode.append(cell.find(text=True).rstrip('\n')) #Collecting the Postalcodes
        if counter == 2:
            borough.append(cell.find(text=True).rstrip('\n'))
        if counter == 3:
            neighborhood.append(cell.find(text=True).rstrip('\n'))
            
        counter += 1

### Transform data into Pandas DataFrame

In [5]:
df_toronto = pd.DataFrame({'PostalCode':postalcode,'Borough':borough,'Neighborhood':neighborhood})

In [6]:
#Exclude postal codes with 'Not Assigned' values for Post Code or Borough 
df_toronto = df_toronto[df_toronto['Borough'] != 'Not assigned']

#If neighboorhood is not assigned, assign borough name to neighborhood
for i in range(1,df_toronto.shape[0]):
    if (df_toronto.iloc[i,2] == "Not assigned")==True:
        df_toronto.iloc[i,2] = df_toronto.iloc[i,1]

In [7]:
#Group neighbordhoods with same postcode
for i in range(1,df_toronto.shape[0]):
    if (df_toronto.iloc[i,0] == df_toronto.iloc[i-1,0])==True:
        df_toronto.iloc[i-1,0] = 'Not assigned'
        df_toronto.iloc[i,2] = df_toronto.iloc[i-1,2] + ", " + df_toronto.iloc[i,2]
           
df_toronto = df_toronto[df_toronto["PostalCode"] != "Not assigned"]
df_toronto = df_toronto.sort_values(by='PostalCode')
df_toronto = df_toronto.reset_index(drop=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
df_toronto.shape

(103, 3)

### Use Google maps geocode tp get coordinates for all post codes

In [9]:
#assign your google maps api key to API_KEY
API_KEY = 'AIzaSyAGoNimgEQYEmFIWJyvq6_5ZFW1CaRCzbk'

In [10]:
#Get geographical coordinates for all postal codes
latitude=[]
longitude=[]
for i in df_toronto['PostalCode']:
    j = i + ' toronto'
    url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(API_KEY,j)
    response = requests.get(url).json()
    geographical_data = response['results'][0]['geometry']['location']
    latitude.append(geographical_data['lat'])
    longitude.append(geographical_data['lng'])

In [11]:
#Add Latitude and Longitude columns to the toronto dataframe
df_toronto['Latitude']=latitude #Adding a column in the main dataframe for Latitude  

df_toronto['Longitude']=longitude #Adding a column in the main dataframe for Longitude 
df_toronto.to_csv('toronto_part2.csv')
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df_toronto['Borough'].unique()),df_toronto.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [13]:
#Get coordinates for Toronto
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
tor_latitude = location.latitude
tor_longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(tor_latitude, tor_longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


### Create map of Toronto with neighborhoods superimposed on top

In [14]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[tor_latitude, tor_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Let's dig deeper into Scarborough, one of the borough of Toronto

In [15]:
#Get dataframe for Scarborough
df_scarborough = df_toronto[df_toronto['Borough']=='Scarborough'].reset_index(drop=True)
df_scarborough

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [17]:
#Get coordinates for Scarborough using Nominatim
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
scar_latitude = location.latitude
scar_longitude = location.longitude
print('The geograpical coordinates of Scarborough are {}, {}.'.format(scar_latitude, scar_longitude))

The geograpical coordinates of Scarborough are 43.773077, -79.257774.


In [18]:
#Print a map for Scarborough
map_scarborough = folium.Map(location=[scar_latitude,scar_longitude],zoom_start=12)

#add markers to map
for lat,lng,label in zip(df_scarborough['Latitude'],df_scarborough['Longitude'],df_scarborough['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarborough)
    
map_scarborough

### Define FourSquare credentials and version

In [19]:
CLIENT_ID = '1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1' # your Foursquare ID
CLIENT_SECRET = '2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45' # your Foursquare Secret
VERSION = '20190216' # Foursquare API version

print('Aashish Foursquare Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Aashish Foursquare Credentials:
CLIENT_ID: 1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1
CLIENT_SECRET:2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45


### Let's explore Steeles West neighborhood in Scarborough

In [20]:
#get dataframe for one of the neighborhood. for e.g. Steeles West
df_scarborough.loc[2,'Neighborhood']

'Guildwood, Morningside, West Hill'

In [21]:
neighborhood_latitude = df_scarborough.loc[2, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_scarborough.loc[2, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_scarborough.loc[2, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Guildwood, Morningside, West Hill are 43.7635726, -79.1887115.


### Let's get top 100 venues in Steeles West in radius of 500 meters

In [22]:
#Use foursquare api to explore the venues in Steeles West neighborhood
LIMIT=100
radius=500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'\
        .format(CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude,radius,LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=1MPXNMPUOYDOFVVMY0RCM4GCF4QOU54JZTHX41WA0DOGQCL1&client_secret=2BVD34TECCSGP4RXP4GHKNSCJTNTXTTTBSLPTINBZQV5MD45&v=20190216&ll=43.7635726,-79.1887115&radius=500&limit=100'

In [23]:
results = requests.get(url).json()

In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Swiss Chalet Rotisserie & Grill,Pizza Place,43.767697,-79.189914
1,G & G Electronics,Electronics Store,43.765309,-79.191537
2,Big Bite Burrito,Mexican Restaurant,43.766299,-79.19072
3,Enterprise Rent-A-Car,Rental Car Location,43.764076,-79.193406
4,Woburn Medical Centre,Medical Center,43.766631,-79.192286


In [26]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

6 venues were returned by Foursquare.


### Exploring neighborhoods in Scarborough

In [27]:
#Function to create a dataframe consisting of Neighborhood details and venue details
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
scarborough_venues = getNearbyVenues(names=df_scarborough['Neighborhood'],
                                   latitudes=df_scarborough['Latitude'],
                                   longitudes=df_scarborough['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge


In [29]:
scarborough_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


In [30]:
#Grouping venues by Neighborhood
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,8,8,8,8,8,8
"Clairlea, Golden Mile, Oakridge",9,9,9,9,9,9
"Clarks Corners, Sullivan, Tam O'Shanter",8,8,8,8,8,8
"Cliffcrest, Cliffside, Scarborough Village West",2,2,2,2,2,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",7,7,7,7,7,7
"East Birchmount Park, Ionview, Kennedy Park",6,6,6,6,6,6
"Guildwood, Morningside, West Hill",6,6,6,6,6,6


In [31]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 51 uniques categories.


### Analyze Each Neighborhood

In [32]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot= scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,Bus Line,...,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
scarborough_onehot.shape

(84, 52)

In [34]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,Bus Line,...,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.222222,...,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0


In [35]:
#Function to sort venues in descending order
def return_most_common_venues(row,num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10
indicators = ['st','nd','rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Skating Rink,Sandwich Place,Breakfast Spot,Lounge,Vietnamese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
2,"Birch Cliff, Cliffside West",General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
3,Cedarbrae,Caribbean Restaurant,Bakery,Fried Chicken Joint,Lounge,Bank,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Auto Garage,Convenience Store
4,"Clairlea, Golden Mile, Oakridge",Bakery,Bus Line,Soccer Field,Intersection,Food Truck,Fast Food Restaurant,Park,Vietnamese Restaurant,Coffee Shop,Fried Chicken Joint


### Cluster Neighborhoods

In [37]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [38]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = df_scarborough

# merge scarborough_grouped with df_scarborough to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarborough_merged.dropna(inplace=True)
scarborough_merged.reset_index(drop=True)

scarborough_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,3.0,Fast Food Restaurant,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Electronics Store,Discount Store,Department Store,Convenience Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0.0,Bar,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Electronics Store,Rental Car Location,Breakfast Spot,Pizza Place,Medical Center,Mexican Restaurant,Vietnamese Restaurant,Chinese Restaurant,Food Truck,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Caribbean Restaurant,Bakery,Fried Chicken Joint,Lounge,Bank,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Auto Garage,Convenience Store


### Visualizing the clusters in map of Scarborough

In [39]:
# create map
map_clusters = folium.Map(location=[scar_latitude, scar_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'],\
                                  scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine neighborhoods in each cluster

In [40]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Bar,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


In [41]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1.0,Electronics Store,Rental Car Location,Breakfast Spot,Pizza Place,Medical Center,Mexican Restaurant,Vietnamese Restaurant,Chinese Restaurant,Food Truck,Fast Food Restaurant
3,Scarborough,1.0,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store
4,Scarborough,1.0,Caribbean Restaurant,Bakery,Fried Chicken Joint,Lounge,Bank,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Auto Garage,Convenience Store
6,Scarborough,1.0,Discount Store,Train Station,Department Store,Convenience Store,Coffee Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint,Food Truck
7,Scarborough,1.0,Bakery,Bus Line,Soccer Field,Intersection,Food Truck,Fast Food Restaurant,Park,Vietnamese Restaurant,Coffee Shop,Fried Chicken Joint
8,Scarborough,1.0,American Restaurant,Motel,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
9,Scarborough,1.0,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
10,Scarborough,1.0,Indian Restaurant,Pet Store,Brewery,Chinese Restaurant,Latin American Restaurant,Vietnamese Restaurant,Sandwich Place,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
11,Scarborough,1.0,Middle Eastern Restaurant,Auto Garage,Smoke Shop,Sandwich Place,Breakfast Spot,Vietnamese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store
12,Scarborough,1.0,Chinese Restaurant,Skating Rink,Sandwich Place,Breakfast Spot,Lounge,Vietnamese Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store


In [42]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,2.0,Park,Playground,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


In [43]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,3.0,Fast Food Restaurant,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Electronics Store,Discount Store,Department Store,Convenience Store


In [44]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,4.0,Playground,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Food Truck,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
