# Segmenting and Clustering Neighborhoods in Toronto - 3

## Install and load the libreries

In [1]:
import pandas as pd
import numpy as np

import json # library to handle JSON files. Will be used to retreive data through the Foursquare API
import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge beautifulsoup4 --yes   #install beautifulsoup, a library to scrap data from web pages
import bs4  #import the Beautifulsoup library


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from sklearn
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


In [2]:
print(bs4.__version__) #prints the Beautifulsoup library's version, to check that it has been correctly imported

4.9.1


## Scrape the data

I will use the Python method *read_html* together with the *BeautifulSoup* librery to retreive the data from the Wikipedia web page.

In [3]:
# create the list of DataFrames from all of the tables on the web page
FSA = pd.read_html('http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', flavor='bs4')

This creates a list of Data Frames, one for each table present in the web page. We inspect the result and see that what we need is just the first Data Frame.

In [4]:
FSA

[    Postal Code          District  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [5]:
# keep only the first DataFrame, which is the one we need
FSA = FSA[0]
FSA

Unnamed: 0,Postal Code,District,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


## Prepare the data

We hav esaved the data in the Data Frame 'FSA'. We now have to rename the columns as per in the guidelines.

In [6]:
#change the name of the columns to match the ones given in the guidelines
FSA.columns=['Postal Code', 'Borough', 'Neighborhood']
FSA

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


We now have to clean the Data Frame, dropping the rows with 'Not assigned' Borough.

In [7]:
FSA =FSA[FSA.Borough != 'Not assigned']  #drops the rows with 'Not assigned' Borough
#FSA.reset_index(inplace=True)            #reset the index
FSA

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


We now have to check if some postal codes are listed multiple times for different neighborhoods, and merge them. We can see that we have here 103 different Postal Codes, and 103 rows in the DataFrame, which means that no Postal Code is listed twice. We don't have to do anything else.

In [8]:
PostalCodes = FSA['Postal Code'].nunique()  #counts the number of different Postal Codes.
DF_rows = FSA.shape[0]

print('There are {} different Postal Codes and {} rows in the DataFrame.'.format(PostalCodes, DF_rows))

There are 103 different Postal Codes and 103 rows in the DataFrame.


We then have to check if there still are some 'Not assigned' Neighborhoods, and replace it with the name of the Borough. I will start by showing only the rows with a 'Not assigned' Neighborhood:

In [9]:
FSA[FSA['Neighborhood'] == 'Not assigned']  #checks if there are still 'Not assigned' Neighborhoods 

Unnamed: 0,Postal Code,Borough,Neighborhood


There are no 'Not assigned' Neighborhoods, so we just have to show the size of the DataFrame.

In [10]:
FSA.shape

(103, 3)

## Retreiving geographical coordinates

As the geocoder package gets stacked with the second postal code (M4A), after multiple attempts I chose to load the coordinates from the .csv file.

In [11]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv')

In [12]:
coordinates

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


I will now join the two DataFrames to append the coordinates of each Postal Code area to the name of the Borough and its Neighborhoods.

To be sure that all Postal Codes are taken into account, I use the 'outer' join, and check if the dimension of the resulting DataFrame has grown (which would mean that there were rows in one of the DataFrames that were not presents in the other one).

In [13]:
result = pd.merge(FSA, coordinates, how='outer', on=['Postal Code', 'Postal Code'])
result

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [14]:
result.shape

(103, 5)

I can see that the number of rows is unchanged: the result must be correct.

## Explore the neighborhoods

I will restrict myself to work only with the Neighborhoods in Toronto. I will therefore select only the Boroughs containing the word 'Toronto'.

In [15]:
Toronto = result[result['Borough'].str.contains('Toronto')]
Toronto.reset_index(inplace=True)
Toronto


Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,19,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


Using Foursuqare's API I will retreive the most popular venues in every Neighborhood. First, I prepare the credentials for the API calls, thenI  define a function that retreives the venues for a given neighborhood, then apply it to all of the Boroughs in Toronto.

In [16]:
# prepare the credentials for API calls
CLIENT_ID = 'Q1J1K4GQJYW55KHIMWE4F2LRO531VGVWOC0W51M1RXOVJ2Y3'
CLIENT_SECRET = '5SLKRTEZVS3AHIV1U2BEHNHCGA04K3JOUCYO1BHWXCI15RGC'
VERSION = '20200823'  #Foursquare version
LIMIT = 100  
#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

In [17]:
# get venues near a given list of neighborhoods. 
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lon in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lon, 
            radius, 
            LIMIT)
        
    
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        #print(type(results))
        #print(results)
    
    
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lon, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'], 
            #v['venues']['categories'].apply(get_category_type, axis=1) )for v in results])
            v['venue']['categories'][0]['name']) for v in results])
                

    nearby_venues = pd.DataFrame([item for venue_data in venues_list for item in venue_data])
    nearby_venues.columns = ['Postal Code', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
# apply the function to the different Boroughs in Toronto.

Toronto_venues = getNearbyVenues(names=Toronto['Postal Code'],
                                   latitudes=Toronto['Latitude'],
                                   longitudes=Toronto['Longitude']
                                  )
Toronto_venues.shape

M5A
M7A
M5B
M5C
M4E
M5E
M5G
M6G
M5H
M6H
M5J
M6J
M4K
M5K
M6K
M4L
M5L
M4M
M4N
M5N
M4P
M5P
M6P
M4R
M5R
M6R
M4S
M5S
M6S
M4T
M5T
M4V
M5V
M4W
M5W
M4X
M5X
M4Y
M7Y


(1635, 7)

In [19]:
Toronto_venues.head()

Unnamed: 0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,M5A,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [20]:
# Counts unique categories
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 240 uniques categories.


### Neighborhood analysis

We look at the venues in each neighborhood. We start by using a one hot encoding for the categorical variable *Venue Category*. This will be particularly usefull later on to apply teh K-mean algorithm.

In [21]:
# one hot encoding of Venue Category
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add the Postal Code column back to dataframe
Toronto_onehot['Postal Code'] = Toronto_venues['Postal Code'] 

# and move the neighborhood column to the first column for easier reading.
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head(10)

Unnamed: 0,Postal Code,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Now we group by Postal Code and take the mean of the occurrency of each kind of venue for the Postal Code area.

In [22]:
Toronto_grouped = Toronto_onehot.groupby('Postal Code').mean().reset_index()
Toronto_grouped

Unnamed: 0,Postal Code,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M4E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,...,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439
4,M4N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05
7,M4S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0


I will order the kind of venues in each neighborhood by frequency and display the 5 most common venues.

In [23]:
max_top_venues = 5

for PC in Toronto_grouped['Postal Code']:   #for every Postal Code Area
    print("----"+PC+"----")
    temp = Toronto_grouped[Toronto_grouped['Postal Code'] == PC].T.reset_index() #select only the rows of the neighborhood, then transpose the matrix
    temp.columns = ['venue','freq']    #Name the columns 'venue' and 'frequency'
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)   #set the frequency to be of float type, with precision of 2 decimals
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(max_top_venues))
    print('\n')  # order the results, show only the top X, then go to a new line

----M4E----
                 venue  freq
0         Neighborhood  0.25
1    Health Food Store  0.25
2                  Pub  0.25
3                Trail  0.25
4  Martial Arts School  0.00


----M4K----
                    venue  freq
0        Greek Restaurant  0.17
1             Coffee Shop  0.10
2      Italian Restaurant  0.07
3  Furniture / Home Store  0.05
4          Ice Cream Shop  0.05


----M4L----
                  venue  freq
0                  Park  0.10
1      Sushi Restaurant  0.05
2                   Pub  0.05
3  Fast Food Restaurant  0.05
4     Fish & Chips Shop  0.05


----M4M----
                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.07
2              Brewery  0.05
3  American Restaurant  0.05
4               Bakery  0.05


----M4N----
           venue  freq
0           Park  0.33
1       Bus Line  0.33
2    Swim School  0.33
3        Airport  0.00
4  Movie Theater  0.00


----M4P----
                  venue  freq
0  Gym / Fitness Center  0.12


Let's save this into a Data Frame. First we write a function to sort the venues, then use it in creating the Data Frame.

In [24]:
def return_most_common_venues(row, max_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:max_top_venues]

In [25]:
max_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(max_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = Toronto_grouped['Postal Code']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], max_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,Pub,Health Food Store,Trail,Neighborhood,Yoga Studio
1,M4K,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Restaurant
2,M4L,Park,Pet Store,Board Shop,Brewery,Sandwich Place
3,M4M,Café,Coffee Shop,Brewery,Gastropub,Bakery
4,M4N,Park,Bus Line,Swim School,Dessert Shop,Ethiopian Restaurant


### K-Mean

Now we can start to cluster the neighborhoods using a K-Mean algorithm.

In [26]:
# set number of clusters
k = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 3, 3, 2, 3, 3, 3, 1, 3], dtype=int32)

We save into a new Data Frame the cluster to which each neighborhood belongs.

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = Toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Postal Code'), on='Postal Code')

Toronto_merged.head() # check the last columns!

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,Coffee Shop,Park,Bakery,Café,Pub
1,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Coffee Shop,College Cafeteria,Yoga Studio,Distribution Center,Portuguese Restaurant
2,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Japanese Restaurant
3,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,Coffee Shop,Café,Cocktail Bar,American Restaurant,Restaurant
4,19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Health Food Store,Trail,Neighborhood,Yoga Studio


### Folium

We can now visualize the clusters on a map, using Folium. I will first get the coordinates of Toronto (using geopy), then plot again with Folium the neighborhoods of Toronto.

In [28]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="geocode_agent")  #to be able to use geocode,
# we need to define a user_agent. This will implicitely define an instance of geopy
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [29]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Postal Code'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters