## Week 3 assignment of Capstone week from IBM Professional Certificate in Data Science

### 1: Downloading and Preparing Dataset

In [2]:
import pandas as pd
import requests

In [4]:
#Gets and reads table from the Wikipedia webpage
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url)
source_data = pd.read_html(source.text)[0]
source_data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [5]:
# drop rows where Borough is not assigned
clean_data = source_data.drop(source_data[source_data.Borough =='Not assigned'].index).reset_index(drop=True)
clean_data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


Let's verify if there are any Not assigned Neighborhoods:

In [6]:
clean_data[clean_data['Neighbourhood']=='Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


There are no not assigned neighborhoods.

In [7]:
print("Shape of Clean Data, with no 'Not assigned' Boroughs is ", clean_data.shape)

Shape of Clean Data, with no 'Not assigned' Boroughs is  (103, 3)


### 2: Adding Latitude and Longitude values

In [8]:
!pip install geocoder
import geocoder # import geocoder



#### After trying a few times, the Geocoder function didn't work: it was mostly retuning NONE results. I will then proceed with the CSV file.

In [9]:
# Import the Geospatial Data CSV file into a DataFrame
path='http://cocl.us/Geospatial_data'
geocodes_data = pd.read_csv(path)
geocodes_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
complete_data = clean_data.merge(geocodes_data, on='Postal Code', how = 'left')
complete_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### 3: Exploring and Clustering the neighborhoods in Toronto

In [11]:
!pip install geopy



In [12]:
!pip install folium



In [13]:
import numpy as np

import json
from pandas.io.json import json_normalize

# import for geo location exercise
from geopy.geocoders import Nominatim

# libraries for map visualization
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


#### Let's start by visualizing the Toronto area map with neighborhoods in it.

In [14]:
# Get coordinates (longitude and latitude) of Toronto

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="geo_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto as {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto as 43.6534817, -79.3839347.


In [16]:
# create map of Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add neighbordhood markers to map
for lat, lng, label in zip(complete_data['Latitude'], complete_data['Longitude'], complete_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Now, like the exercise done with New York city, we will get the top 100 venues in each neighborhood.

Note that, in the case of Toronto, sometimes two or more neighborhoods are listed under the same postal code and consequently under the same coordinates latitute and longitude. So, what we will end up getting will actually be top 100 venues for each postal code, but representing all neighborhoods in Toronto.

In [17]:
# We start by specifying Foursquare credentials, version, and limit of results, which we will use the standard 100
CLIENT_ID = 'S533B0JYPPDULC0CWZPUGPXOI3PBMB1MQZVRLW4XQNJPXEVM'
CLIENT_SECRET = '0C1NBVOUK3DRCOYCUVYA3KCHNUX1YNO0FVWEOT4GC5BRNXY0'
ACCESS_TOKEN = 'WS4QS5ULSFTUVX3TEOBPDA4FXINIRFPT13GWGWDFY05EAXLE'
VERSION = '20180604'
LIMIT = 100

In [18]:
# Like the exercise done with New York, let's define the function that pulls information on top 100 venues for all coordinates we input

def getNearbyVenues(pcodes, names, latitudes, longitudes, radius=500):
    
    venues_list=[] # create empty list to collect all venue information
    for code, name, lat, lng in zip(pcodes, names, latitudes, longitudes):
        print(code,":",name)      
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            code,
            name,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code','Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
# Now we will run the above code for all Neighborhoods:
toronto_venues = getNearbyVenues(pcodes=clean_data['Postal Code'],
                                names=clean_data['Neighbourhood'],
                                latitudes=complete_data['Latitude'],
                                longitudes=complete_data['Longitude']
                                  )

M3A : Parkwoods
M4A : Victoria Village
M5A : Regent Park, Harbourfront
M6A : Lawrence Manor, Lawrence Heights
M7A : Queen's Park, Ontario Provincial Government
M9A : Islington Avenue, Humber Valley Village
M1B : Malvern, Rouge
M3B : Don Mills
M4B : Parkview Hill, Woodbine Gardens
M5B : Garden District, Ryerson
M6B : Glencairn
M9B : West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
M1C : Rouge Hill, Port Union, Highland Creek
M3C : Don Mills
M4C : Woodbine Heights
M5C : St. James Town
M6C : Humewood-Cedarvale
M9C : Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
M1E : Guildwood, Morningside, West Hill
M4E : The Beaches
M5E : Berczy Park
M6E : Caledonia-Fairbanks
M1G : Woburn
M4G : Leaside
M5G : Central Bay Street
M6G : Christie
M1H : Cedarbrae
M2H : Hillcrest Village
M3H : Bathurst Manor, Wilson Heights, Downsview North
M4H : Thorncliffe Park
M5H : Richmond, Adelaide, King
M6H : Dufferin, Dovercourt Village
M1J : Scarborough Village
M2J : Fairview, Hen

In [20]:
# checking the size of the resulting DataFrame
print(toronto_venues.shape)
toronto_venues.head()

(2133, 8)


Unnamed: 0,Postal Code,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,M3A,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,M4A,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,M4A,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
4,M4A,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [21]:
# check the number of venues found for each Postal Code
toronto_venues.groupby('Postal Code').count()

Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M1B,2,2,2,2,2,2,2
M1C,3,3,3,3,3,3,3
M1E,8,8,8,8,8,8,8
M1G,3,3,3,3,3,3,3
M1H,9,9,9,9,9,9,9
...,...,...,...,...,...,...,...
M9M,1,1,1,1,1,1,1
M9P,7,7,7,7,7,7,7
M9R,4,4,4,4,4,4,4
M9V,8,8,8,8,8,8,8


Important observation here is that we only got 98 rows, which means that our get from Foursquare could not find information for 5 of the 103 originally listed postal codes in our complete_data dataframe.

In [22]:
#checking the number of venue categories in the new dataframe
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 273 uniques categories.


#### We will now create a DataFrame showing the top 10 most frequent venue categories for each neighborhood

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add postal code and neighborhood columns back to dataframe
toronto_onehot['Postal Code'] = toronto_venues['Postal Code'] 

# move Postal Code column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Postal Code,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# Now we will group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
toronto_grouped

Unnamed: 0,Postal Code,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,M9M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,M9P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
95,M9R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,M9V,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
# function to present venue categories in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
# finally, creating a DataFrame with top 10 venue categories for each neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Print Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Department Store,Donut Shop
1,M1C,Moving Target,Bar,Home Service,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,Mexican Restaurant,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Electronics Store,Breakfast Spot,Eastern European Restaurant,Drugstore
3,M1G,Coffee Shop,Korean BBQ Restaurant,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Lounge,Gas Station,Bank,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bakery,Hakka Restaurant,Eastern European Restaurant


In [29]:
# verify the size of the dataframe
neighborhoods_venues_sorted.shape

(98, 11)

#### We will know cluster the Toronto neighborhoods based on postal codes' top 10 venue categories.

In [30]:
#Run _k_-means to cluster the neighborhood into 5 clusters.

kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
print(kmeans.labels_) # verify labels
print("number of results: ", kmeans.labels_.shape)

[1 1 1 1 1 4 1 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 3 1 1 1 1 3 1 0 1 1 1 1 1 1 1
 3 1 1 1 3 1 1 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1
 1 1 3 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 1 0 1 1 1 1]
number of results:  (98,)


In [31]:
# Let's add the cluster labesl to our dataframe
neighborhoods_venues_sorted.insert(1, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,1,Fast Food Restaurant,Print Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Department Store,Donut Shop
1,M1C,1,Moving Target,Bar,Home Service,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,1,Mexican Restaurant,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Electronics Store,Breakfast Spot,Eastern European Restaurant,Drugstore
3,M1G,1,Coffee Shop,Korean BBQ Restaurant,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,1,Lounge,Gas Station,Bank,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bakery,Hakka Restaurant,Eastern European Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
93,M9M,0,Baseball Field,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Fast Food Restaurant
94,M9P,1,Pizza Place,Sandwich Place,Coffee Shop,Discount Store,Chinese Restaurant,Intersection,Dog Run,Dim Sum Restaurant,Diner,Distribution Center
95,M9R,1,Pizza Place,Sandwich Place,Bus Line,Mobile Phone Shop,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Dessert Shop
96,M9V,1,Grocery Store,Pizza Place,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Beer Store,Sandwich Place,Distribution Center,Dim Sum Restaurant,Diner


In [32]:
clustered_data = neighborhoods_venues_sorted.merge(complete_data, on='Postal Code', how = 'left')
clustered_data

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Borough,Neighbourhood,Latitude,Longitude
0,M1B,1,Fast Food Restaurant,Print Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Department Store,Donut Shop,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,1,Moving Target,Bar,Home Service,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,1,Mexican Restaurant,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Electronics Store,Breakfast Spot,Eastern European Restaurant,Drugstore,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,1,Coffee Shop,Korean BBQ Restaurant,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Scarborough,Woburn,43.770992,-79.216917
4,M1H,1,Lounge,Gas Station,Bank,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bakery,Hakka Restaurant,Eastern European Restaurant,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,M9M,0,Baseball Field,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Fast Food Restaurant,North York,"Humberlea, Emery",43.724766,-79.532242
94,M9P,1,Pizza Place,Sandwich Place,Coffee Shop,Discount Store,Chinese Restaurant,Intersection,Dog Run,Dim Sum Restaurant,Diner,Distribution Center,Etobicoke,Westmount,43.696319,-79.532242
95,M9R,1,Pizza Place,Sandwich Place,Bus Line,Mobile Phone Shop,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Dessert Shop,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
96,M9V,1,Grocery Store,Pizza Place,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Beer Store,Sandwich Place,Distribution Center,Dim Sum Restaurant,Diner,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [33]:
# let's rearrange our columns
columns = ['Postal Code','Borough','Neighbourhood','Latitude','Longitude','Cluster Labels','1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue','4th Most Common Venue','5th Most Common Venue','6th Most Common Venue','7th Most Common Venue','8th Most Common Venue','9th Most Common Venue','10th Most Common Venue']
clustered_data = clustered_data[columns]
clustered_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Fast Food Restaurant,Print Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Department Store,Donut Shop
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1,Moving Target,Bar,Home Service,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Mexican Restaurant,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Electronics Store,Breakfast Spot,Eastern European Restaurant,Drugstore
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Korean BBQ Restaurant,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Lounge,Gas Station,Bank,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bakery,Hakka Restaurant,Eastern European Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,M9M,North York,"Humberlea, Emery",43.724766,-79.532242,0,Baseball Field,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Fast Food Restaurant
94,M9P,Etobicoke,Westmount,43.696319,-79.532242,1,Pizza Place,Sandwich Place,Coffee Shop,Discount Store,Chinese Restaurant,Intersection,Dog Run,Dim Sum Restaurant,Diner,Distribution Center
95,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1,Pizza Place,Sandwich Place,Bus Line,Mobile Phone Shop,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Dessert Shop
96,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,1,Grocery Store,Pizza Place,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Beer Store,Sandwich Place,Distribution Center,Dim Sum Restaurant,Diner


#### We now have all Toronto neighborhoods clustered. Let's visualize it:

In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(clustered_data['Latitude'], clustered_data['Longitude'], clustered_data['Neighbourhood'], clustered_data['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Let's now examine our 5 clusters:

In [35]:
clustered_data[clustered_data['Cluster Labels']==0]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,M3M,North York,Downsview,43.728496,-79.495697,0,Food Truck,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dim Sum Restaurant
93,M9M,North York,"Humberlea, Emery",43.724766,-79.532242,0,Baseball Field,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Fast Food Restaurant


In [36]:
clustered_data[clustered_data['Cluster Labels']==1]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Fast Food Restaurant,Print Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Department Store,Donut Shop
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1,Moving Target,Bar,Home Service,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Mexican Restaurant,Medical Center,Restaurant,Rental Car Location,Bank,Intersection,Electronics Store,Breakfast Spot,Eastern European Restaurant,Drugstore
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Korean BBQ Restaurant,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Lounge,Gas Station,Bank,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bakery,Hakka Restaurant,Eastern European Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,M9L,North York,Humber Summit,43.756303,-79.565963,1,Gym,Shopping Mall,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
94,M9P,Etobicoke,Westmount,43.696319,-79.532242,1,Pizza Place,Sandwich Place,Coffee Shop,Discount Store,Chinese Restaurant,Intersection,Dog Run,Dim Sum Restaurant,Diner,Distribution Center
95,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1,Pizza Place,Sandwich Place,Bus Line,Mobile Phone Shop,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Dessert Shop
96,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,1,Grocery Store,Pizza Place,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Beer Store,Sandwich Place,Distribution Center,Dim Sum Restaurant,Diner


In [37]:
clustered_data[clustered_data['Cluster Labels']==2]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
90,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,2,Filipino Restaurant,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore


In [38]:
clustered_data[clustered_data['Cluster Labels']==3]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,M1V,Scarborough,"Milliken, Agincourt North, Steeles East, L'Amo...",43.815252,-79.284577,3,Playground,Park,Arts & Crafts Store,Intersection,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
20,M2P,North York,York Mills West,43.752758,-79.400049,3,Park,Convenience Store,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
22,M3A,North York,Parkwoods,43.753259,-79.329656,3,Food & Drink Shop,Park,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
27,M3K,North York,Downsview,43.737473,-79.464763,3,Airport,Park,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
37,M4J,East York,"East Toronto, Broadview North (Old East York)",43.685347,-79.338106,3,Park,Convenience Store,Intersection,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
41,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Business Service,Park,Bus Line,Swim School,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Yoga Studio
45,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,3,Park,Lawyer,Restaurant,Trail,Yoga Studio,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
47,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,3,Park,Playground,Trail,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
71,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,3,Park,Women's Store,Pool,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
76,M6L,North York,"North Park, Maple Leaf Park, Upwood Park",43.713756,-79.490074,3,Basketball Court,Park,Bakery,Construction & Landscaping,Yoga Studio,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore


In [39]:
clustered_data[clustered_data['Cluster Labels']==4]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4,Playground,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore


From our exploration above, seems like our 5 clusters had the following profile:
1. Baseball Field area
2. General residential
3. Filipino community
4. Parks and Entertainment
5. Big playground