In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

First, we pull html data from the given Wikipedia page using the requests module and print it to observe which tags we can pull table data from

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url) 
print(data.text) 

<!DOCTYPE html>
<html class="client-nojs" lang="en" dir="ltr">
<head>
<meta charset="UTF-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );</script>
<script>(window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":876823784,"wgRevisionId":876823784,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","w

Since there is only one table and all the data are in 'td' tags, we use BeautifulSoup to parse the text and extract all 'td' tags from the 'table' tag

In [3]:
soup = BeautifulSoup(data.text, 'html.parser')
tableContent = soup.table.find_all('td')

We then prepare a list that will eventually be parsed into a pandas dataframe. Since the data is in rows of 3 columns, we split them into lists of 3 elements (y), and append them into the main list (x). Once that is done, we parse x into a dataframe, and manually add in column titles: 'Postcode', 'Borough', 'Neighbourhood'

In [4]:
#Initiate main list and sub-list
x = []
y = []

#Loop through entire list
for z in range(int(len(tableContent))):
    #Append values into sub-list and remove line breaks
    y.append(tableContent[z].text.replace('\n',''))
    #For every third value, append sub-list to main list and reset sub-list
    if z % 3 == 2:
        x.append(y)
        y = []

#Parse main list into a dataframe
df = pd.DataFrame(x)

#Add column titles
df.columns = ['Postcode','Borough','Neighbourhood']

#Check dataframe
print(df.shape)
df.head(10)

(289, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


We then clean the data to meet the requirements of the assignment as explained in the code

In [5]:
#Remove rows where Borough = 'Not assigned'
df = df[df.Borough != 'Not assigned']

#If Borough is assigned by Neighbourhood is not assigned, Neighbourhood = Borough
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough']

#Sort values by postcode for easier observation
df = df.sort_values('Postcode')

Lastly, for each unique postcode/borough, we have to combine related neighbourhoods into a single value

In [6]:
#Extract the first two columns into a new dataframe and drop duplicates to find unique values of Postcode/Borough
df1 = df[['Postcode','Borough']].drop_duplicates()

#Initiate the Neighbourhood column in df1
df1['Neighbourhood'] = ''

#For each unique Postcode, concatenate unique values of Neighbourhood into a single string
for pc in df1['Postcode']:
    string1 = ''
    for pc1 in df.loc[df['Postcode'] == pc,'Neighbourhood'].unique():
        string1 = string1 + str(pc1) + ','
    #Once all unique Neighbourhoods have been included in the string, allocate it to the Neighbourhood column for that particular Postcode in df1
    df1.loc[df1['Postcode'] == pc,'Neighbourhood'] = string1[:-1]

#Reset index
df1.reset_index(inplace=True, drop=True)

df1.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Golden Mile,Oakridge,Clairlea"
8,M1M,Scarborough,"Cliffcrest,Scarborough Village West,Cliffside"
9,M1N,Scarborough,"Cliffside West,Birch Cliff"


In [7]:
df1.shape

(103, 3)

In [30]:
#download geospatial data
geodata = pd.read_csv('http://cocl.us/Geospatial_data')

In [31]:
#initialize lat and long columns in df1
df1['Latitude'] = ''
df1['Longitude'] = ''

In [36]:
for pc in df1['Postcode']:
    df1.loc[df1['Postcode'] == pc,'Latitude'] = geodata.loc[geodata['Postal Code'] == pc,'Latitude']
    df1.loc[df1['Postcode'] == pc,'Longitude'] = geodata.loc[geodata['Postal Code'] == pc,'Longitude']

df1.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395
5,M1J,Scarborough,Scarborough Village,43.7447,-79.2395
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.7279,-79.262
7,M1L,Scarborough,"Golden Mile,Oakridge,Clairlea",43.7111,-79.2846
8,M1M,Scarborough,"Cliffcrest,Scarborough Village West,Cliffside",43.7163,-79.2395
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.6927,-79.2648


In [41]:
#extract only boroughs with "Toronto" in them
df2 = df1[df1['Borough'].str.contains("Toronto")]
df2.reset_index(inplace=True, drop=True)
df2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.6764,-79.293
1,M4K,East Toronto,"The Danforth West,Riverdale",43.6796,-79.3522
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.669,-79.3156
3,M4M,East Toronto,Studio District,43.6595,-79.3409
4,M4N,Central Toronto,Lawrence Park,43.728,-79.3888
5,M4P,Central Toronto,Davisville North,43.7128,-79.3902
6,M4R,Central Toronto,North Toronto West,43.7154,-79.4057
7,M4S,Central Toronto,Davisville,43.7043,-79.3888
8,M4T,Central Toronto,"Moore Park,Summerhill East",43.6896,-79.3832
9,M4V,Central Toronto,"South Hill,Rathnelly,Forest Hill SE,Deer Park,...",43.6864,-79.4


In [45]:
#import libraries
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

Collecting folium
  Downloading https://files.pythonhosted.org/packages/47/28/b3199bf87100e389c1dff88a44a38936d27e5e99eece870b5308186217c8/folium-0.8.2-py2.py3-none-any.whl (87kB)
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.8.2


In [47]:
CLIENT_ID = '3T1DC2OMPAW5IB2JWHWOTJXS4AHD11GWHAHLFO1K4CRUDLCH' # your Foursquare ID
CLIENT_SECRET = 'W54KY4ZVRZSHEGQEIPJTFZOI5QH3FHDKA5BDRRFWZQTJ4DJH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: 3T1DC2OMPAW5IB2JWHWOTJXS4AHD11GWHAHLFO1K4CRUDLCH
CLIENT_SECRET:W54KY4ZVRZSHEGQEIPJTFZOI5QH3FHDKA5BDRRFWZQTJ4DJH


In [53]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [55]:
LIMIT = 20

toronto_venues = getNearbyVenues(names=df2['Neighbourhood'],
                                   latitudes=df2['Latitude'],
                                   longitudes=df2['Longitude']
                                  )

print('Done!')

The Beaches
The Danforth West,Riverdale
The Beaches West,India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
South Hill,Rathnelly,Forest Hill SE,Deer Park,Summerhill West
Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Regent Park,Harbourfront
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,Richmond,King
Harbourfront East,Union Station,Toronto Islands
Toronto Dominion Centre,Design Exchange
Commerce Court,Victoria Hotel
Roselawn
Forest Hill West,Forest Hill North
North Midtown,The Annex,Yorkville
Harbord,University of Toronto
Kensington Market,Grange Park,Chinatown
King and Spadina,CN Tower,South Niagara,Bathurst Quay,Island airport,Railway Lands,Harbourfront West
Stn A PO Boxes 25 The Esplanade
Underground city,First Canadian Place
Christie
Dovercourt Village,Dufferin
Trinity,Little Portugal
Exhibition Place,Brockton,Parkdale Village
High Park,The Junction South
Roncesvalles,Parkda

In [56]:
print(toronto_venues.shape)
toronto_venues.head(10)

(625, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant
5,"The Danforth West,Riverdale",43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop
6,"The Danforth West,Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop
7,"The Danforth West,Riverdale",43.679557,-79.352188,Messini Authentic Gyros,43.677827,-79.350569,Greek Restaurant
8,"The Danforth West,Riverdale",43.679557,-79.352188,Cafe Fiorentina,43.677743,-79.350115,Italian Restaurant
9,"The Danforth West,Riverdale",43.679557,-79.352188,Mezes,43.677962,-79.350196,Greek Restaurant


In [57]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,Richmond,King",20,20,20,20,20,20
Berczy Park,20,20,20,20,20,20
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"Cabbagetown,St. James Town",20,20,20,20,20,20
Central Bay Street,20,20,20,20,20,20
Christie,15,15,15,15,15,15
Church and Wellesley,20,20,20,20,20,20
"Commerce Court,Victoria Hotel",20,20,20,20,20,20
Davisville,20,20,20,20,20,20
Davisville North,11,11,11,11,11,11


In [58]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 162 uniques categories.


In [60]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West,Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [62]:
toronto_onehot.shape

(625, 163)

In [63]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Adelaide,Richmond,King",0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
toronto_grouped.shape

(38, 163)

In [68]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,Richmond,King----
              venue  freq
0  Asian Restaurant  0.10
1             Hotel  0.10
2        Steakhouse  0.10
3             Plaza  0.05
4      Neighborhood  0.05


----Berczy Park----
             venue  freq
0   Farmers Market  0.10
1             Café  0.10
2     Cocktail Bar  0.10
3             Park  0.05
4  Thai Restaurant  0.05


----Business Reply Mail Processing Centre 969 Eastern----
           venue  freq
0    Yoga Studio  0.06
1  Auto Workshop  0.06
2            Spa  0.06
3  Burrito Place  0.06
4     Smoke Shop  0.06


----Cabbagetown,St. James Town----
                   venue  freq
0             Restaurant  0.15
1                   Café  0.10
2                    Pub  0.05
3  General Entertainment  0.05
4                Butcher  0.05


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.30
1     Bubble Tea Shop  0.10
2            Tea Room  0.05
3                 Spa  0.05
4  Chinese Restaurant  0.05


----Christie----
    

In [69]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [72]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,Richmond,King",Asian Restaurant,Steakhouse,Hotel,Gym / Fitness Center,Pizza Place,Noodle House,Concert Hall,Plaza,Neighborhood,Café
1,Berczy Park,Café,Cocktail Bar,Farmers Market,Museum,Seafood Restaurant,Belgian Restaurant,Beer Bar,Liquor Store,Jazz Club,Steakhouse
2,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Recording Studio,Skate Park,Spa,Brewery,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant,Light Rail Station
3,"Cabbagetown,St. James Town",Restaurant,Café,Gift Shop,Indian Restaurant,Pub,Deli / Bodega,Caribbean Restaurant,Butcher,Diner,Jewelry Store
4,Central Bay Street,Coffee Shop,Bubble Tea Shop,Spa,Bar,Sushi Restaurant,Ramen Restaurant,Art Museum,Tea Room,Chinese Restaurant,Sandwich Place


In [73]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 0, 0, 2, 2, 0, 2, 0, 0])

In [75]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df2

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.6764,-79.293,2,Pub,Health Food Store,Neighborhood,Coffee Shop,Comfort Food Restaurant,Dessert Shop,Ethiopian Restaurant,Eastern European Restaurant,Dog Run,Discount Store
1,M4K,East Toronto,"The Danforth West,Riverdale",43.6796,-79.3522,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Bookstore,Fruit & Vegetable Store,Indian Restaurant,Juice Bar,Dessert Shop,Cosmetics Shop,Pizza Place
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.669,-79.3156,0,Park,Sandwich Place,Pet Store,Brewery,Burger Joint,Burrito Place,Liquor Store,Fast Food Restaurant,Italian Restaurant,Steakhouse
3,M4M,East Toronto,Studio District,43.6595,-79.3409,2,Coffee Shop,Café,Bakery,Ice Cream Shop,Bookstore,Seafood Restaurant,Juice Bar,Middle Eastern Restaurant,Sandwich Place,Stationery Store
4,M4N,Central Toronto,Lawrence Park,43.728,-79.3888,1,Bus Line,Park,Swim School,Yoga Studio,Ethiopian Restaurant,Eastern European Restaurant,Dog Run,Discount Store,Diner,Dessert Shop


In [81]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters