# Segmenting and Clustering Neighbourhoods in Toronto

1 Scrap Contents from Wiki Page

In [9]:
#Importing the libraries
import numpy as np
import pandas as pd
import requests
import json
from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import os
import folium



In [13]:
#Scraping the Wiki Page
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.tbody.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned\n":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned\n" in row[2]: 
            row[2] = row[1]
        res.append(row)

# Dataframe with 3 columns
df = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A\n,North York\n,Parkwoods\n
1,M4A\n,North York\n,Victoria Village\n
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"


In [14]:
df['Neighborhood'] = df['Neighborhood'].str.replace('\n',"")
df['PostalCode'] = df['PostalCode'].str.replace('\n',"")
df['Borough'] = df['Borough'].str.replace('\n',"")

In [15]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
#Grouping the PostCode and Borough to get the Neighborhoods in one line
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(", ".join).reset_index()

In [6]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Getting Latitudes and Longitudes of each neighborhood

In [16]:
df_geog = pd.read_csv(r'http://cocl.us/Geospatial_data')

In [7]:
df_geog.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
#Merging the DataFrames
df_toronto = pd.merge(df,df_geog,how = 'left' ,left_on ='PostalCode',right_on = 'Postal Code')

In [12]:
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


Exploring the Neighbourhood of Toronto

In [18]:
address = 'Toronto,ON'
geolocator = Nominatim(user_agent = 'toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The latitude and longitude of Toronto are {},{}".format(latitude,longitude))

The latitude and longitude of Toronto are 43.6534817,-79.3839347


In [8]:
pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 2.4 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [19]:
f = folium.Map(location =[latitude,longitude],zoom_start = 10)
f

# Adding markers to map 

In [20]:
for lat, lng, borough, neighborhood in zip(
      df_toronto['Latitude'], 
        df_toronto['Longitude'], 
        df_toronto['Borough'], 
        df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color = 'green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(f)  
f

# Map only those places that have 'Toronto' in it

In [21]:
df_toronto_new = df_toronto[df_toronto['Borough'].str.contains('Toronto')].reset_index(drop=True)
df_toronto_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",M7A,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031


In [23]:
#Plotting the Map again
f1 = folium.Map(location = [latitude,longitude],zoom_start = 12)
f1

In [24]:
#Adding Markers to the Map again
for lat, lng, borough, neighborhood in zip(
        df_toronto_new['Latitude'], 
        df_toronto_new['Longitude'], 
        df_toronto_new['Borough'], 
        df_toronto_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color = 'red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(f1)  
f1

# FourSquare Credentials

In [25]:
CLIENT_ID = 'P1GBQUJTU10U2DLTFVZRK3FDMW50NHQZTYNUQTXHFBDSW4BQ'
CLIENT_SECRET = 'F4AICVH3VELRPIXT5GRPQVIEVZDOZMEGTSNZEJXXXKP34FPB'
VERSION = '20201129'

Exploring the First Neighborhood in Toronto New Data Frame

In [26]:
n_name = df_toronto_new.loc[0,'Neighborhood']
print(f"The first neighborhood's name is '{n_name}'.")

n_latitude = df_toronto_new.loc[0,'Latitude']
n_longitude = df_toronto_new.loc[0,'Longitude']
print('Latitude and longitude values of {} are {}, {}.'.format(n_name, 
                                                               n_latitude, 
                                                               n_longitude))

The first neighborhood's name is 'Regent Park, Harbourfront'.
Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


Exploring the top 100 venues within 500 meters of The Beaches

In [27]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    n_latitude, 
    n_longitude, 
    radius, 
    LIMIT)

# get the result to a json file
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fc4d84a01f51442ca611ab2'},
 'response': {'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 45,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.653446723052674,
          'lng': -79.3620167174383}],
        'distance': 143,
       

Extracting the Category

In [28]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [29]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues


  from ipykernel import kernelapp as app


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Impact Kitchen,Restaurant,43.656369,-79.35698
4,Body Blitz Spa East,Spa,43.654735,-79.359874
5,Figs Breakfast & Lunch,Breakfast Spot,43.655675,-79.364503
6,Corktown Common,Park,43.655618,-79.356211
7,The Extension Room,Gym / Fitness Center,43.653313,-79.359725
8,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
9,Dominion Pub and Kitchen,Pub,43.656919,-79.358967


Exploring neighborhoods in part of Toronto City

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
# Creating a new Data Frame for the new neighborhoods

toronto_new_nhoods = getNearbyVenues(names=df_toronto_new['Neighborhood'],
                                   latitudes=df_toronto_new['Latitude'],
                                   longitudes=df_toronto_new['Longitude']
                                  )

In [33]:
toronto_new_nhoods.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [34]:
# Grouping the Neighborhoods and counting the number of Unique Venues
toronto_new_nhoods.groupby(['Neighborhood']).count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
"Brockton, Parkdale Village, Exhibition Place",25,25,25,25,25,25
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17
Central Bay Street,68,68,68,68,68,68
Christie,16,16,16,16,16,16
Church and Wellesley,75,75,75,75,75,75
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,34,34,34,34,34,34
Davisville North,9,9,9,9,9,9


In [35]:
# Calculating number of Unique Categories
print("The Number of Unique Categories are {}".format(len(toronto_new_nhoods['Venue Category'].unique())))

The Number of Unique Categories are 240


# Analyse each Neighborhood

In [36]:
#Converting the Venue Category to Numericalvalues
toronto_new_nhoods_conv = pd.get_dummies(toronto_new_nhoods[['Venue Category']],prefix="",prefix_sep="")

In [37]:
toronto_new_nhoods_conv.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
toronto_new_nhoods_conv['Neighborhood'] = toronto_new_nhoods['Neighborhood']

In [39]:
toronto_new_nhoods_conv.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
#MMoving Neighborhood column to the start of the dataframe and formatting the other columns
new_columns = [toronto_new_nhoods_conv.columns[-1]]+list(toronto_new_nhoods_conv.columns[:-1])
toronto_new_nhoods_conv = toronto_new_nhoods_conv[new_columns]
toronto_new_nhoods_conv.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
# Grouping by Neighbourhood and taking the mean
toronto_new_nhoods_grp = toronto_new_nhoods_conv.groupby('Neighborhood').mean().reset_index()

In [42]:
toronto_new_nhoods_grp.head()

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.058824,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0


Finding the 10 most common venues in each neighborhood

In [43]:
def most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for i in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(i+1, indicators[i]))
    except:
        columns.append('{}th Most Common Venue'.format(i+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_new_nhoods_grp['Neighborhood']

for ind in np.arange(toronto_new_nhoods_grp.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = most_common_venues(toronto_new_nhoods_grp.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Farmers Market,Bakery,Beer Bar,Cocktail Bar,Cheese Shop,Seafood Restaurant,Restaurant,Grocery Store,Pub
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Climbing Gym,Bar,Bookstore,Burrito Place,Restaurant,Playground
2,"Business reply mail Processing Centre, South C...",Park,Recording Studio,Restaurant,Light Rail Station,Auto Workshop,Fast Food Restaurant,Farmers Market,Burrito Place,Pizza Place,Butcher
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Plane,Harbor / Marina,Boutique,Boat or Ferry,Rental Car Location,Bar,Historic Site,Coffee Shop
4,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Department Store,Japanese Restaurant,Thai Restaurant,Burger Joint,Bubble Tea Shop,Salad Place


# Clustering Neighborhoods

Running KMeans Clustering

In [44]:
clusters = 5
toronto_new_nhoods_cl = toronto_new_nhoods_grp.drop('Neighborhood',1)
km= KMeans(n_clusters = clusters,random_state=0)
km.fit(toronto_new_nhoods_cl)
km.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [48]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', km.labels_)

toronto_cluster_df = df_toronto_new

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_cluster_df = toronto_cluster_df.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_cluster_df.head() # check the last columns!


Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Café,Theater,Breakfast Spot,Shoe Store,Distribution Center,Electronics Store
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",M7A,43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Café,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place,Distribution Center,Restaurant,Diner
2,M5B,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Cosmetics Shop,Japanese Restaurant,Furniture / Home Store,Hotel,Pizza Place,Bookstore
3,M5C,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418,0,Coffee Shop,Café,Cocktail Bar,Restaurant,Gastropub,Beer Bar,American Restaurant,Japanese Restaurant,Seafood Restaurant,Clothing Store
4,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031,1,Health Food Store,Pub,Trail,Dive Bar,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Women's Store


Plotting the Clusters once again

In [49]:
toronto_map = folium.Map(location = [latitude,longitude],zoom_start = 11)
toronto_map

In [51]:
markers_colors = []
for lat, lon, poi, cluster in zip(
        toronto_cluster_df['Latitude'], 
        toronto_cluster_df['Longitude'], 
        toronto_cluster_df['Neighborhood'], 
        toronto_cluster_df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(toronto_map)
       
toronto_map