## Part 1: Load and explore the data

Build a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name

In [1]:
import pandas as pd
import numpy as np

Scrape the Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe.

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# 'read_html' read html tables into a list of DataFrame objects
df = pd.read_html(url)
neighborhood = df[0]
neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Data Cleaning

Ignore cells with a borough that is Not assigned.

In [3]:

neighborhood = neighborhood[neighborhood.Borough !='Not assigned']
neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
# Reset index since we deleted not assigned rows

neighborhood.reset_index(drop = True, inplace = True)
neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
neighborhood.shape

(103, 3)

# Part 2: Use csv file to create dataframe with longitude and latitude values

In [6]:
coord = pd.read_csv('http://cocl.us/Geospatial_data')
coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
df = pd.merge(neighborhood, coord, on='Postal Code')

In [8]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3: Explore and cluster the neighborhoods in Toronto.

In [9]:

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
import requests

<b>Use geopy library to get the latitude and longitude values of New York City.</b>
In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent to_explorer, as shown below.

In [10]:
address = 'Toronto'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


<b>Create a map of Toronto with Postal code locations superimposed on top.</b>

In [11]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neigh in zip(df['Latitude'], df['Longitude'], df['Borough'],  df['Neighbourhood']):
    label = '{}, {}'.format( neigh, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<b>Foursquare Credentials and Version</b>

In [12]:
CLIENT_ID = 'Y00M5MVEZUIUF41HG3CAM1MBTHRJTI50FU5OI0JUJAG15EGF' # your Foursquare ID
CLIENT_SECRET = 'Y00M5MVEZUIUF41HG3CAM1MBTHRJTI50FU5OI0JUJAG15EGF' # your Foursquare Secret
ACCESS_TOKEN = 'ZSQ2J45KTSUFDSKILYLNU22GYARZPCWYUW5D0DGMX4VI42G3'
VERSION = '20180605' # Foursquare API version

In [None]:
## Foursquare Credentials and Version

In [13]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Explore Neighborhoods in Toronto

Create a function to all the nearby neighborhoods in Toronto

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&oauth_token={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            ACCESS_TOKEN,
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
# defining radius and limit of venues to get
radius=500
LIMIT=100

In [16]:
toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [17]:
print(toronto_venues.shape)
toronto_venues.head()

(3100, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
3,Parkwoods,43.753259,-79.329656,Towns On The Ravine,43.754754,-79.332552,Hotel
4,Parkwoods,43.753259,-79.329656,Sun Life,43.75476,-79.332783,Construction & Landscaping


Let's check how many venues were returned for each Neighbourhood

In [18]:
toronto_venues.groupby('Neighbourhood').count()
toronto_venues.shape

(3100, 7)

Let's find out how many unique categories can be curated from all the returned venues

In [19]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 321 uniques categories.


# Analyze Each Neighbourhood

In [20]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Watch Shop,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group rows by Neighbourhood and by taking the mean of the frequency of occurrence of each category

In [21]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Watch Shop,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
1,"Alderwood, Long Branch",0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
3,Bayview Village,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
4,"Bedford Park, Lawrence Manor East",0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,"Willowdale, Willowdale West",0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
94,Woburn,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
95,Woodbine Heights,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
96,York Mills West,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0


Print each neighborhood along with the top 5 most common venues

In [22]:
num_top_venues = 5

for code in toronto_grouped['Neighbourhood']:
    print("----"+code+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0            Fireworks Store  0.14
1             Hardware Store  0.14
2             Clothing Store  0.14
3                     Lounge  0.14
4  Latin American Restaurant  0.14


----Alderwood, Long Branch----
                venue  freq
0         Pizza Place   0.2
1            Pharmacy   0.1
2                Pool   0.1
3  Athletics & Sports   0.1
4         Coffee Shop   0.1


----Bathurst Manor, Wilson Heights, Downsview North----
               venue  freq
0     Ice Cream Shop  0.06
1  Mobile Phone Shop  0.06
2           Pharmacy  0.06
3        Coffee Shop  0.06
4                Spa  0.06


----Bayview Village----
                 venue  freq
0                  Spa  0.17
1                  Gym  0.17
2                 Café  0.17
3                 Bank  0.17
4  Japanese Restaurant  0.17


----Bedford Park, Lawrence Manor East----
                venue  freq
0                 Spa  0.09
1  Italian Restaurant  0.06
2         Pizza Place  

## Put that into a pandas dataframe

In [23]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood.

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Clothing Store,Hardware Store,Fireworks Store,Latin American Restaurant,Skating Rink,Lounge,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Athletics & Sports,Pub,Dance Studio,Coffee Shop,Pool,Skating Rink,Event Service
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Ice Cream Shop,Bank,Mobile Phone Shop,Spa,Pharmacy,Diner,Deli / Bodega,Intersection,Frozen Yogurt Shop
3,Bayview Village,Bank,Café,Spa,Japanese Restaurant,Chinese Restaurant,Gym,Yoga Studio,Empanada Restaurant,Dry Cleaner,Dumpling Restaurant
4,"Bedford Park, Lawrence Manor East",Spa,Italian Restaurant,Pizza Place,Hobby Shop,Massage Studio,Sandwich Place,Sushi Restaurant,Business Service,Coffee Shop,Thai Restaurant


# Cluster Neighbourhoods

Run k-means to cluster the Postal Code into 5 clusters.

In [25]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [26]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')


In [27]:
toronto_merged.reset_index(drop = True, inplace = True)
toronto_merged.tail(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
93,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,3.0,Pizza Place,Pharmacy,Gym,Athletics & Sports,Pub,Dance Studio,Coffee Shop,Pool,Skating Rink,Event Service
94,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054,3.0,Truck Stop,Garden Center,Rental Car Location,Drugstore,Bar,Doner Restaurant,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
95,M1X,Scarborough,Upper Rouge,43.836125,-79.205636,,,,,,,,,,,
96,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,3.0,Coffee Shop,Park,Pizza Place,Restaurant,Café,Pet Store,Bakery,Italian Restaurant,Flower Shop,Pub
97,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,3.0,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,Asian Restaurant,Salad Place,American Restaurant,Steakhouse
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,1.0,Yoga Studio,Park,Smoke Shop,River,Cosmetics Shop,Doner Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
99,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,3.0,Coffee Shop,Gay Bar,Sushi Restaurant,Japanese Restaurant,Burrito Place,Burger Joint,Yoga Studio,Bubble Tea Shop,Men's Store,Restaurant
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,3.0,Light Rail Station,Gym / Fitness Center,Fast Food Restaurant,Yoga Studio,Smoke Shop,Auto Workshop,Spa,Farmers Market,Brewery,Skate Park
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,3.0,Construction & Landscaping,Locksmith,Home Service,Park,Business Service,Ethiopian Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,3.0,Thrift / Vintage Store,ATM,Gym,Grocery Store,Kids Store,Flower Shop,Fast Food Restaurant,Discount Store,Convenience Store,Sandwich Place


We find that there is no data available for neighbourhood "Upper Rouge" 
We need to drop rows taht have NaN values

In [28]:
toronto_merged=toronto_merged.dropna()
toronto_merged.tail(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
92,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846,3.0,Coffee Shop,Seafood Restaurant,Cocktail Bar,Restaurant,Hotel,Café,Italian Restaurant,Japanese Restaurant,Beer Bar,Park
93,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,3.0,Pizza Place,Pharmacy,Gym,Athletics & Sports,Pub,Dance Studio,Coffee Shop,Pool,Skating Rink,Event Service
94,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054,3.0,Truck Stop,Garden Center,Rental Car Location,Drugstore,Bar,Doner Restaurant,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
96,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,3.0,Coffee Shop,Park,Pizza Place,Restaurant,Café,Pet Store,Bakery,Italian Restaurant,Flower Shop,Pub
97,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,3.0,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,Asian Restaurant,Salad Place,American Restaurant,Steakhouse
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,1.0,Yoga Studio,Park,Smoke Shop,River,Cosmetics Shop,Doner Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
99,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,3.0,Coffee Shop,Gay Bar,Sushi Restaurant,Japanese Restaurant,Burrito Place,Burger Joint,Yoga Studio,Bubble Tea Shop,Men's Store,Restaurant
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,3.0,Light Rail Station,Gym / Fitness Center,Fast Food Restaurant,Yoga Studio,Smoke Shop,Auto Workshop,Spa,Farmers Market,Brewery,Skate Park
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,3.0,Construction & Landscaping,Locksmith,Home Service,Park,Business Service,Ethiopian Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,3.0,Thrift / Vintage Store,ATM,Gym,Grocery Store,Kids Store,Flower Shop,Fast Food Restaurant,Discount Store,Convenience Store,Sandwich Place


Convert Cluster Labels from float to integer

In [29]:
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

Visualize the resulting clusters

In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

## Cluster 1

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Scarborough,0,Electronics Store,Rental Car Location,Intersection,Spa,Mexican Restaurant,Bank,Restaurant,Medical Center,Breakfast Spot,Curling Ice
64,York,0,Jewelry Store,Convenience Store,Park,Electronics Store,Escape Room,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
66,North York,0,Construction & Landscaping,Convenience Store,Park,Flower Shop,Electronics Store,Ethiopian Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
71,Scarborough,0,Health & Beauty Service,Bakery,Fireworks Store,Smoke Shop,Middle Eastern Restaurant,Miscellaneous Shop,Electronics Store,Yoga Studio,Escape Room,Dry Cleaner


## Cluster 2

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,York,1,Park,Miscellaneous Shop,Women's Store,Spa,Ethiopian Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
35,East York,1,Film Studio,Convenience Store,Metro Station,Park,Ethiopian Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
45,North York,1,Martial Arts School,Cafeteria,Park,Yoga Studio,Escape Room,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
49,North York,1,Massage Studio,Bakery,Park,Construction & Landscaping,Harbor / Marina,Empanada Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dry Cleaner
52,North York,1,Home Service,Park,Gym,Yoga Studio,Escape Room,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant
68,Central Toronto,1,Bus Line,Park,Sushi Restaurant,Trail,Jewelry Store,Empanada Restaurant,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant
83,Central Toronto,1,Trail,Tennis Court,Park,Playground,Beer Store,Yoga Studio,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant
85,Scarborough,1,Park,Intersection,Yoga Studio,Escape Room,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
91,Downtown Toronto,1,Park,Playground,Trail,Yoga Studio,Empanada Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant
98,Etobicoke,1,Yoga Studio,Park,Smoke Shop,River,Cosmetics Shop,Doner Restaurant,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant


## Cluster 3 

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,2,Women's Store,Concert Hall,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Escape Room,Ethiopian Restaurant


## Cluster 4

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3,Construction & Landscaping,Convenience Store,Park,Fireworks Store,Food & Drink Shop,Bus Stop,BBQ Joint,Hotel,Creperie,Cuban Restaurant
1,North York,3,Portuguese Restaurant,Hockey Arena,Bridal Shop,Pizza Place,Financial or Legal Service,Intersection,Coffee Shop,Empanada Restaurant,Donut Shop,Drugstore
2,Downtown Toronto,3,Coffee Shop,Pub,Furniture / Home Store,Bakery,Café,Park,Art Gallery,Beer Store,Italian Restaurant,Rental Car Location
3,North York,3,Clothing Store,Furniture / Home Store,Accessories Store,Home Service,Health & Beauty Service,Shoe Store,Tailor Shop,Sporting Goods Shop,Lighting Store,Medical Center
4,Downtown Toronto,3,Coffee Shop,Sandwich Place,Sushi Restaurant,Fast Food Restaurant,Chinese Restaurant,Café,Pizza Place,Salad Place,Bank,Italian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,3,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,Asian Restaurant,Salad Place,American Restaurant,Steakhouse
99,Downtown Toronto,3,Coffee Shop,Gay Bar,Sushi Restaurant,Japanese Restaurant,Burrito Place,Burger Joint,Yoga Studio,Bubble Tea Shop,Men's Store,Restaurant
100,East Toronto,3,Light Rail Station,Gym / Fitness Center,Fast Food Restaurant,Yoga Studio,Smoke Shop,Auto Workshop,Spa,Farmers Market,Brewery,Skate Park
101,Etobicoke,3,Construction & Landscaping,Locksmith,Home Service,Park,Business Service,Ethiopian Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store


## Cluster 5

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Etobicoke,4,Pizza Place,Yoga Studio,Escape Room,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
