In [232]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

from geopy.geocoders import Nominatim

from sklearn.cluster import KMeans

import folium

import matplotlib.cm as cm
import matplotlib.colors as colors

# Part 1

In [104]:
html_doc = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(html_doc.content, 'html.parser')

In [105]:
mydivs = soup.findAll("table", {"class": "wikitable sortable"})
str_mydivs = str(mydivs[0])

In [106]:
df = pd.read_html(str_mydivs)[0]

In [107]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [108]:
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[~df['Borough'].str.contains("Not assigned")]

In [109]:
duplicates = df['Postal Code'].duplicated()
duplicates

2      False
3      False
4      False
5      False
6      False
       ...  
160    False
165    False
168    False
169    False
178    False
Name: Postal Code, Length: 103, dtype: bool

In [110]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [111]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df.loc[df['Neighborhood']=="Not assigned"]
# No "Not assigned" neighborhoods remaining

Unnamed: 0,Postal Code,Borough,Neighborhood


In [112]:
# More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, 
# you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. 
# These two rows will be combined into one row with the neighborhoods separated with a comma as shown in 
# row 11 in the above table.

# Resolved on the original Wikipedia pge.

In [114]:
df.shape

(103, 3)

# Part 2

In [115]:
geospatial = pd.read_csv('Geospatial_Coordinates.csv')

In [116]:
geospatial

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [117]:
toronto_df = df.set_index('Postal Code').join(geospatial.set_index('Postal Code'))
toronto_df.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3 - Exploration and Clustering of Toronto Neighbourhoods

- Step 1: Filter out boroughs without Toronto in the name
- Step 2: Replicate methodology used on New York city data.

In [118]:
CLIENT_ID = 'SNQPC40OLYSGHMEL2M0F0R0I3NL1O4LIMX3ZGIHOUJX4F4DK' # your Foursquare ID
CLIENT_SECRET = 'Z2HY1TOF43IEZV3VZWKAC0GXIH1U5NDKZOEGTNMERVETWXQF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SNQPC40OLYSGHMEL2M0F0R0I3NL1O4LIMX3ZGIHOUJX4F4DK
CLIENT_SECRET:Z2HY1TOF43IEZV3VZWKAC0GXIH1U5NDKZOEGTNMERVETWXQF


In [124]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

In [202]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for n, lat, lng in zip(names, latitudes, longitudes):
        print(n, lat, lng)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            n, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

    return(nearby_venues)

In [203]:
toronto_data = toronto_df[toronto_df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,Downtown Toronto,St. James Town,43.651494,-79.375418
4,East Toronto,The Beaches,43.676357,-79.293031


### Splitting the dataframe so that it is one neighborhood per row.


In [204]:

new_toronto_data = []
columns = [ 'Borough', 'Neighborhood', 'Latitude', 'Longitude' ]

for index, row in toronto_data.iterrows():
#     print(index, row['Neighborhood'])

    for neighborhood in row['Neighborhood'].split(', '):
#         print(neighborhood)
        new_toronto_data.append([row['Borough'], neighborhood, row['Latitude'], row['Longitude']])

new_toronto_data = pd.DataFrame(new_toronto_data, columns=columns)
new_toronto_data.head()   
            

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Downtown Toronto,Regent Park,43.65426,-79.360636
1,Downtown Toronto,Harbourfront,43.65426,-79.360636
2,Downtown Toronto,Queen's Park,43.662301,-79.389494
3,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494
4,Downtown Toronto,Garden District,43.657162,-79.378937


In [205]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6541737, -79.38081164513409.


In [209]:
toronto_venues = getNearbyVenues(names=new_toronto_data['Neighborhood'],
                                   latitudes=new_toronto_data['Latitude'],
                                   longitudes=new_toronto_data['Longitude']
                                  )

Regent Park 43.6542599 -79.3606359
Harbourfront 43.6542599 -79.3606359
Queen's Park 43.6623015 -79.3894938
Ontario Provincial Government 43.6623015 -79.3894938
Garden District 43.6571618 -79.37893709999999
Ryerson 43.6571618 -79.37893709999999
St. James Town 43.6514939 -79.3754179
The Beaches 43.67635739999999 -79.2930312
Berczy Park 43.644770799999996 -79.3733064
Central Bay Street 43.6579524 -79.3873826
Christie 43.669542 -79.4225637
Richmond 43.65057120000001 -79.3845675
Adelaide 43.65057120000001 -79.3845675
King 43.65057120000001 -79.3845675
Dufferin 43.66900510000001 -79.4422593
Dovercourt Village 43.66900510000001 -79.4422593
Harbourfront East 43.6408157 -79.38175229999999
Union Station 43.6408157 -79.38175229999999
Toronto Islands 43.6408157 -79.38175229999999
Little Portugal 43.647926700000006 -79.4197497
Trinity 43.647926700000006 -79.4197497
The Danforth West 43.6795571 -79.352188
Riverdale 43.6795571 -79.352188
Toronto Dominion Centre 43.6471768 -79.38157640000001
Design Ex

In [213]:
print(toronto_venues.shape)
toronto_venues.head()

(3180, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,Regent Park,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,Regent Park,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [214]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,94,94,94,94,94,94
Bathurst Quay,17,17,17,17,17,17
Berczy Park,55,55,55,55,55,55
Brockton,22,22,22,22,22,22
Business reply mail Processing Centre,17,17,17,17,17,17
...,...,...,...,...,...,...
Underground city,100,100,100,100,100,100
Union Station,100,100,100,100,100,100
University of Toronto,34,34,34,34,34,34
Victoria Hotel,100,100,100,100,100,100


In [217]:
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 236 unique categories.


In [218]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [220]:
toronto_onehot.shape

(3180, 236)

In [221]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Adelaide,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.021277,0.0,...,0.0,0.0,0.00,0.010638,0.000000,0.0,0.00,0.0,0.0,0.010638
1,Bathurst Quay,0.000000,0.0,0.058824,0.058824,0.117647,0.176471,0.117647,0.000000,0.0,...,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.000000
2,Berczy Park,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.00,0.018182,0.000000,0.0,0.00,0.0,0.0,0.000000
3,Brockton,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.000000
4,Business reply mail Processing Centre,0.058824,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,Underground city,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.030000,0.0,...,0.0,0.0,0.01,0.010000,0.000000,0.0,0.01,0.0,0.0,0.000000
72,Union Station,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.01,0.010000,0.000000,0.0,0.01,0.0,0.0,0.000000
73,University of Toronto,0.029412,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.00,0.000000,0.029412,0.0,0.00,0.0,0.0,0.000000
74,Victoria Hotel,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.040000,0.0,...,0.0,0.0,0.00,0.020000,0.000000,0.0,0.01,0.0,0.0,0.000000


In [222]:
toronto_grouped.shape

(76, 236)

In [223]:
## Top 5 most common venues for each neighborhood

In [224]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
           venue  freq
0    Coffee Shop  0.10
1           Café  0.05
2     Restaurant  0.04
3          Hotel  0.03
4  Deli / Bodega  0.03


----Bathurst Quay----
                 venue  freq
0      Airport Service  0.18
1       Airport Lounge  0.12
2     Airport Terminal  0.12
3                Plane  0.06
4  Rental Car Location  0.06


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3         Cheese Shop  0.04
4          Restaurant  0.04


----Brockton----
            venue  freq
0            Café  0.14
1  Breakfast Spot  0.09
2     Coffee Shop  0.09
3       Pet Store  0.05
4    Intersection  0.05


----Business reply mail Processing Centre----
              venue  freq
0       Yoga Studio  0.06
1  Recording Studio  0.06
2        Smoke Shop  0.06
3              Park  0.06
4     Burrito Place  0.06


----CN Tower----
                 venue  freq
0      Airport Service  0.18
1       Airpo

                 venue  freq
0                 Park   0.5
1           Restaurant   0.5
2          Yoga Studio   0.0
3  Moroccan Restaurant   0.0
4     Malay Restaurant   0.0


----Summerhill West----
                venue  freq
0                 Pub  0.12
1         Coffee Shop  0.12
2  Light Rail Station  0.06
3        Liquor Store  0.06
4      Sandwich Place  0.06


----Swansea----
                venue  freq
0    Sushi Restaurant  0.08
1                Café  0.08
2         Coffee Shop  0.08
3         Pizza Place  0.08
4  Italian Restaurant  0.05


----The Annex----
            venue  freq
0  Sandwich Place  0.14
1            Café  0.14
2     Coffee Shop  0.10
3  History Museum  0.05
4       BBQ Joint  0.05


----The Beaches----
                 venue  freq
0    Health Food Store  0.25
1                Trail  0.25
2                  Pub  0.25
3          Yoga Studio  0.00
4  Moroccan Restaurant  0.00


----The Beaches West----
                  venue  freq
0                  Park  0.14

In [225]:
## Putting into pandas dataframe

In [226]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [242]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Restaurant,Gym,Hotel,Clothing Store,Deli / Bodega,Thai Restaurant,Salad Place,Sushi Restaurant
1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
2,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Beer Bar,Seafood Restaurant,Cheese Shop,Café,Restaurant,Clothing Store,Eastern European Restaurant
3,Brockton,Café,Breakfast Spot,Coffee Shop,Furniture / Home Store,Burrito Place,Restaurant,Italian Restaurant,Intersection,Stadium,Bar
4,Business reply mail Processing Centre,Yoga Studio,Auto Workshop,Park,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Light Rail Station


In [243]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 3, 4, 4, 4, 3, 4, 4, 4, 4], dtype=int32)

In [244]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = new_toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Regent Park,43.654260,-79.360636,4,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space
1,Downtown Toronto,Harbourfront,43.654260,-79.360636,4,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space
2,Downtown Toronto,Queen's Park,43.662301,-79.389494,4,Coffee Shop,Sushi Restaurant,Yoga Studio,College Cafeteria,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
3,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494,4,Coffee Shop,Sushi Restaurant,Yoga Studio,College Cafeteria,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
4,Downtown Toronto,Garden District,43.657162,-79.378937,4,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bubble Tea Shop,Tea Room,Bakery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Downtown Toronto,First Canadian Place,43.648429,-79.382280,4,Coffee Shop,Café,Restaurant,Gym,Hotel,Japanese Restaurant,Salad Place,Steakhouse,Asian Restaurant,Deli / Bodega
74,Downtown Toronto,Underground city,43.648429,-79.382280,4,Coffee Shop,Café,Restaurant,Gym,Hotel,Japanese Restaurant,Salad Place,Steakhouse,Asian Restaurant,Deli / Bodega
75,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,4,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Burger Joint,Pub,Café,Bubble Tea Shop,Yoga Studio
76,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558,4,Yoga Studio,Auto Workshop,Park,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Light Rail Station


In [240]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
#     print(lat, lon, poi, cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [247]:
toronto_merged[toronto_merged['Cluster Labels'] == 4]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Regent Park,43.65426,-79.360636,4,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space
1,Downtown Toronto,Harbourfront,43.65426,-79.360636,4,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space
2,Downtown Toronto,Queen's Park,43.662301,-79.389494,4,Coffee Shop,Sushi Restaurant,Yoga Studio,College Cafeteria,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
3,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494,4,Coffee Shop,Sushi Restaurant,Yoga Studio,College Cafeteria,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
4,Downtown Toronto,Garden District,43.657162,-79.378937,4,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bubble Tea Shop,Tea Room,Bakery
5,Downtown Toronto,Ryerson,43.657162,-79.378937,4,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bubble Tea Shop,Tea Room,Bakery
6,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Coffee Shop,Café,Restaurant,Italian Restaurant,Gastropub,Cocktail Bar,American Restaurant,Pharmacy,Bakery,Diner
8,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Coffee Shop,Cocktail Bar,Bakery,Beer Bar,Seafood Restaurant,Cheese Shop,Café,Restaurant,Clothing Store,Eastern European Restaurant
9,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Department Store,Modern European Restaurant
10,Downtown Toronto,Christie,43.669542,-79.422564,4,Grocery Store,Café,Park,Restaurant,Candy Store,Diner,Athletics & Sports,Italian Restaurant,Baby Store,Coffee Shop


In [248]:
toronto_merged[toronto_merged['Cluster Labels'] == 3]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Downtown Toronto,CN Tower,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
63,Downtown Toronto,King and Spadina,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
64,Downtown Toronto,Railway Lands,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
65,Downtown Toronto,Harbourfront West,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
66,Downtown Toronto,Bathurst Quay,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
67,Downtown Toronto,South Niagara,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar
68,Downtown Toronto,Island airport,43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Boutique,Rental Car Location,Plane,Coffee Shop,Boat or Ferry,Bar


In [249]:
toronto_merged[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
36,Central Toronto,Forest Hill North & West,43.696948,-79.411307,2,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
37,Central Toronto,Forest Hill Road Park,43.696948,-79.411307,2,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


In [250]:
toronto_merged[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,Central Toronto,Moore Park,43.689574,-79.38316,1,Park,Restaurant,College Rec Center,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
53,Central Toronto,Summerhill East,43.689574,-79.38316,1,Park,Restaurant,College Rec Center,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
69,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


In [251]:
toronto_merged[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Health Food Store,Pub,Doner Restaurant,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Women's Store
57,Central Toronto,Summerhill West,43.686412,-79.400049,0,Pub,Coffee Shop,Sandwich Place,Light Rail Station,Vietnamese Restaurant,Supermarket,Liquor Store,Sushi Restaurant,American Restaurant,Restaurant
58,Central Toronto,Rathnelly,43.686412,-79.400049,0,Pub,Coffee Shop,Sandwich Place,Light Rail Station,Vietnamese Restaurant,Supermarket,Liquor Store,Sushi Restaurant,American Restaurant,Restaurant
59,Central Toronto,South Hill,43.686412,-79.400049,0,Pub,Coffee Shop,Sandwich Place,Light Rail Station,Vietnamese Restaurant,Supermarket,Liquor Store,Sushi Restaurant,American Restaurant,Restaurant
60,Central Toronto,Forest Hill SE,43.686412,-79.400049,0,Pub,Coffee Shop,Sandwich Place,Light Rail Station,Vietnamese Restaurant,Supermarket,Liquor Store,Sushi Restaurant,American Restaurant,Restaurant
61,Central Toronto,Deer Park,43.686412,-79.400049,0,Pub,Coffee Shop,Sandwich Place,Light Rail Station,Vietnamese Restaurant,Supermarket,Liquor Store,Sushi Restaurant,American Restaurant,Restaurant


## Analysis

- Cluster 4 tends to have more coffee shops, cafes and pubs, and is the most numerous. Seems to be the dining areas, along with high street services.
- Cluster 3 is primarily characterised by airport services and amenities.
- Cluster 2 seems to have more jewelry stores and other retail.
- Cluster 1 has more parks and restaurants.
- Cluster 0 seems to be a residual category since the beaches don't match with the other 5 neighborhoods, which have identical top 10 most common venues. This is because they all have the same latitudes and longitudes.
- More fine-grained latitude and longitude information would increase the accuracy of the dataset.
