In [1]:
import pandas as pd # import required library
import numpy as np
# read the data using pandas read_html
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

In [2]:
# since it's the first table, let's add it to a dataframe
df0 = tables[0]
df0.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
# remove cells with a borough that is Not assigned.
df0 = df0[df0['Borough'] != 'Not assigned']
df0[:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [4]:
# now, let's create a new data frame with each postal code per row
# initialize the DF with unique postal codes
# note: this is an alternative to groupby

df = pd.DataFrame({'PostalCode' : list(df0['Postcode'].unique())})
df[:10]

Unnamed: 0,PostalCode
0,M3A
1,M4A
2,M5A
3,M6A
4,M7A
5,M9A
6,M1B
7,M3B
8,M4B
9,M5B


In [5]:
# helper functions for getting boroughs and neighborhoods

# the borough is located in the 3rd column of df0
def getBorough(code):
    return(df0[df0['Postcode'] == code].iloc[0,1])

# the neighborhood is located on the 4th column
# extract the values and join them using a comma
def getNeighborhood(code):
    tempList = df0[df0['Postcode'] == code].iloc[:,2].values
    return ','.join(tempList)

In [6]:
# apply the functions to df


# get the borough for each postal code
df['Borough'] = df['PostalCode'].apply(getBorough)

# get the neighborhood for each postal code
df['Neighborhood'] = df['PostalCode'].apply(getNeighborhood)

df[:10]

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Not assigned
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [7]:
for i in range(len(df)):
    if df.iloc[i,2] == 'Not assigned': # if the value is Not assigned
        df.iloc[i,2] = df.iloc[i,1] # assign the value of the borough

df[:10]       

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [8]:
df.shape

(103, 3)

In [9]:
# codes_data = pd.read_csv('https://cocl.us/Geospatial_data')
# codes_data.to_csv('./codes_data.csv')
codes_data = pd.read_csv('./codes_data.csv')
codes_data.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Latitude,Longitude
0,0,M1B,43.806686,-79.194353
1,1,M1C,43.784535,-79.160497
2,2,M1E,43.763573,-79.188711
3,3,M1G,43.770992,-79.216917
4,4,M1H,43.773136,-79.239476


In [10]:
df2 = pd.merge(df,codes_data,left_on='PostalCode', right_on='Postal Code')
df2.drop(columns=['Postal Code'], inplace=True)
df2.head()

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
0,M3A,North York,Parkwoods,25,43.753259,-79.329656
1,M4A,North York,Victoria Village,34,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",71,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494


# Clusters

In [11]:
import json
from geopy.geocoders import Nominatim
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [12]:
neighborhoods = df2
neighborhoods.head()

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
0,M3A,North York,Parkwoods,25,43.753259,-79.329656
1,M4A,North York,Victoria Village,34,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",71,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494


You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data. It is up to you. 

In [13]:
neighborhoods['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Scarborough',
       'East York', 'Etobicoke', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

we see that the following boroughs contain the word 'Toronto':
'Downtown Toronto',East Toronto,West Toronto and Central Toronto.
So let's use only these:

In [14]:
neighborhoods = neighborhoods[
    (neighborhoods['Borough'] == 'Downtown Toronto')|
    (neighborhoods['Borough'] == 'East Toronto')|
    (neighborhoods['Borough'] == 'West Toronto')|
    (neighborhoods['Borough'] == 'Central Toronto')
             ]
neighborhoods.head(40)

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494
9,M5B,Downtown Toronto,"Ryerson,Garden District",54,43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,55,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,37,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,56,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,57,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,75,43.669542,-79.422564
30,M5H,Downtown Toronto,"Adelaide,King,Richmond",58,43.650571,-79.384568
31,M6H,West Toronto,"Dovercourt Village,Dufferin",76,43.669005,-79.442259


In [15]:
# address = 'Toronto, Canada'

# geolocator = Nominatim(user_agent="ny_explorer")
# location = geolocator.geocode(address)
# latitude = location.latitude
# longitude = location.longitude
# print('The geograpical coordinate of Torono are {}, {}.'.format(latitude, longitude))

# Nominatim is not working for me, let's hardcode lat and long
# 43.6532° N, 79.3832° W

latitude = 43.6532
longitude = -79.3832

In [16]:
# create map using latitude and longitude values
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

In [17]:
CLIENT_ID = 'ZDBQLK3MNWFC2GWJ4YLR2ROADVTCLXJIEUPZBYHCCYU2E03U' # your Foursquare ID
CLIENT_SECRET = 'SZKUARM4PXEWLT21MLK1GFWXXUTWMXR5NS3TXHV2ZZCXGY4E' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [18]:
LIMIT = 100
radius = 500

In [19]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# toronto_venues = pd.read_csv('./toronto_venues.csv')
                             
#run only once to save time
toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                 latitudes=neighborhoods['Latitude'],
                                 longitudes=neighborhoods['Longitude']
                                  )
#toronto_venues.to_csv('./toronto_venues.csv')

Harbourfront
Queen's Park
Ryerson,Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide,King,Richmond
Dovercourt Village,Dufferin
Harbourfront East,Toronto Islands,Union Station
Little Portugal,Trinity
The Danforth West,Riverdale
Design Exchange,Toronto Dominion Centre
Brockton,Exhibition Place,Parkdale Village
The Beaches West,India Bazaar
Commerce Court,Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North,Forest Hill West
High Park,The Junction South
North Toronto West
The Annex,North Midtown,Yorkville
Parkdale,Roncesvalles
Davisville
Harbord,University of Toronto
Runnymede,Swansea
Moore Park,Summerhill East
Chinatown,Grange Park,Kensington Market
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown,St. James Town
First Canadian Place,Underground city

In [22]:
print(toronto_venues.shape)
toronto_venues.head()

(1698, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [23]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton,Exhibition Place,Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",14,14,14,14,14,14
"Cabbagetown,St. James Town",43,43,43,43,43,43
Central Bay Street,82,82,82,82,82,82
"Chinatown,Grange Park,Kensington Market",86,86,86,86,86,86
Christie,17,17,17,17,17,17
Church and Wellesley,83,83,83,83,83,83


## 3. Analyze Each Neighborhood

In [24]:
 # one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()



Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
toronto_onehot.shape

(1698, 225)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [26]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0
2,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.071429,0.071429,0.142857,0.214286,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0
7,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.046512,0.0,0.069767,0.011628,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012048,0.012048,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,...,0.012048,0.012048,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,0.0


In [27]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.04
2       Steakhouse  0.04
3   Cosmetics Shop  0.03
4  Thai Restaurant  0.03


----Berczy Park----
            venue  freq
0     Coffee Shop  0.07
1    Cocktail Bar  0.05
2        Beer Bar  0.04
3  Farmers Market  0.04
4            Café  0.04


----Brockton,Exhibition Place,Parkdale Village----
            venue  freq
0     Coffee Shop  0.09
1            Café  0.09
2  Breakfast Spot  0.09
3          Bakery  0.05
4       Nightclub  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                  venue  freq
0           Yoga Studio  0.06
1         Auto Workshop  0.06
2            Comic Shop  0.06
3                  Park  0.06
4  Gym / Fitness Center  0.06


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
              venue  freq
0   Airport Service  0.21
1    Airport Lounge  0.14
2  Airport Terminal  0.14
3



First, let's write a function to sort the venues in descending order.


In [28]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [29]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Steakhouse,Restaurant,Asian Restaurant,Bakery,Bar,Cosmetics Shop,Thai Restaurant,Sushi Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Farmers Market,Café,Beer Bar,Bakery,Seafood Restaurant,Cheese Shop,Steakhouse,Shopping Mall
2,"Brockton,Exhibition Place,Parkdale Village",Coffee Shop,Breakfast Spot,Café,Climbing Gym,Stadium,Bar,Bakery,Restaurant,Intersection,Pet Store
3,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Park,Restaurant,Fast Food Restaurant,Farmers Market,Auto Workshop,Spa,Pizza Place,Burrito Place,Light Rail Station
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Boutique,Airport,Airport Food Court,Sculpture Garden,Bar,Harbor / Marina


# Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.


In [30]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)


# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:10] 
for i in set(list(kmeans.labels_)):
    print (f'{i}  {list(kmeans.labels_).count(i)} ')

0  35 
1  1 
2  1 
3  2 


In [31]:
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [32]:
toronto_data = neighborhoods.reset_index(drop=True)
# add clustering labels
# neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_
toronto_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # 

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636,Coffee Shop,Pub,Café,Park,Bakery,Mexican Restaurant,Restaurant,Chocolate Shop,Performing Arts Venue,Dessert Shop,0
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Beer Bar,Sandwich Place,Italian Restaurant,Salad Place,Diner,0
9,M5B,Downtown Toronto,"Ryerson,Garden District",54,43.657162,-79.378937,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Ramen Restaurant,Electronics Store,Middle Eastern Restaurant,Bubble Tea Shop,Pizza Place,0
15,M5C,Downtown Toronto,St. James Town,55,43.651494,-79.375418,Coffee Shop,Café,Restaurant,Bakery,American Restaurant,Beer Bar,Italian Restaurant,Cocktail Bar,Hotel,Cosmetics Shop,0
19,M4E,East Toronto,The Beaches,37,43.676357,-79.293031,Pub,Trail,Health Food Store,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,0


In [33]:
toronto_merged['Cluster Labels'].unique()

array([0, 2, 3, 1])

looks like some boroughs don't have venues and are assigned to nan clusters, let's remove these

In [34]:


# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters



In [35]:
# first cluster
# this is a very large cluster, where cofeeshops, cafes and parks are common venues

toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,Downtown Toronto,-79.360636,Coffee Shop,Pub,Café,Park,Bakery,Mexican Restaurant,Restaurant,Chocolate Shop,Performing Arts Venue,Dessert Shop,0
4,Downtown Toronto,-79.389494,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Beer Bar,Sandwich Place,Italian Restaurant,Salad Place,Diner,0
9,Downtown Toronto,-79.378937,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Ramen Restaurant,Electronics Store,Middle Eastern Restaurant,Bubble Tea Shop,Pizza Place,0
15,Downtown Toronto,-79.375418,Coffee Shop,Café,Restaurant,Bakery,American Restaurant,Beer Bar,Italian Restaurant,Cocktail Bar,Hotel,Cosmetics Shop,0
19,East Toronto,-79.293031,Pub,Trail,Health Food Store,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,0
20,Downtown Toronto,-79.373306,Coffee Shop,Cocktail Bar,Farmers Market,Café,Beer Bar,Bakery,Seafood Restaurant,Cheese Shop,Steakhouse,Shopping Mall,0
24,Downtown Toronto,-79.387383,Coffee Shop,Italian Restaurant,Ice Cream Shop,Sandwich Place,Juice Bar,Burger Joint,Café,Japanese Restaurant,Salad Place,Spa,0
25,Downtown Toronto,-79.422564,Grocery Store,Café,Park,Baby Store,Restaurant,Diner,Italian Restaurant,Candy Store,Coffee Shop,Gas Station,0
30,Downtown Toronto,-79.384568,Coffee Shop,Café,Steakhouse,Restaurant,Asian Restaurant,Bakery,Bar,Cosmetics Shop,Thai Restaurant,Sushi Restaurant,0
31,West Toronto,-79.442259,Bakery,Pharmacy,Brazilian Restaurant,Park,Bank,Supermarket,Brewery,Café,Middle Eastern Restaurant,Fast Food Restaurant,0


In [36]:
# second cluster
# this small cluster is quite particular, because playgrounds and tennis courts are quite common
# cofeeshops are not that common

toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
83,Central Toronto,-79.38316,Playground,Tennis Court,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,1


In [37]:
# third cluster, 
# lot's of gardens and home service venues
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
62,Central Toronto,-79.416936,Garden,Home Service,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,2


In [38]:
# fourth cluster
# parks, department stoers and playgrounds

toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
67,Central Toronto,-79.390197,Department Store,Park,Breakfast Spot,Gym,Sandwich Place,Food & Drink Shop,Hotel,Women's Store,Dumpling Restaurant,Donut Shop,3
91,Downtown Toronto,-79.377529,Park,Trail,Playground,Women's Store,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,3
