## Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

In [25]:
CLIENT_ID = 'Q31AKC1KB4APQSMNOF2MC0HWRHBSXPJ5VWJY221UPEQ0VVBV' # your Foursquare ID
CLIENT_SECRET = 'SWO0BZGWBZQ21NJTRXI551XIDXOS3VFMRIPIFFSHVUJL2KYQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### Task 1
1. Scrap postal codes of Canada from Wiki
1. Preproccess data in table:
    * Ignore cells with a borough that is Not assigned.
    * More than one neighborhood can exist in one postal code area. Neighborhoods should be separated by coma.
    * If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
1. Use the .shape method to print the number of rows of your dataframe

In [26]:
# Import necessary libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests

#### First the simplest method to get data. Copy table from Wiki to text file and use read_table()

In [27]:
df = pd.read_table('test.txt')

In [28]:
# Pass rows with 'Not assigned'
df = df[df['Borough '] != 'Not assigned']

In [29]:
# Check if 'Neighborhood' does not fill
if df[(df['Neighborhood'] == '')].empty:
    print('There isn\'t empty Neighborhood')

There isn't empty Neighborhood


#### Second method  to get data. Use BeautifulSoup

In [23]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
print(f'Page status code: {page.status_code}')

Page status code: 200


In [24]:
soup = BeautifulSoup(page.text, "html.parser")
table = soup.find("table", attrs={ "class" : "wikitable sortable" })
table_body = table.find('tbody')

In [8]:
rows = []

trs = table_body.findAll('tr')
columns_names = [td.get_text(strip=True) for td in trs[0].find_all('th')]  # Get columns names from table header
for tr in trs:
    # Pass rows with 'Not assigned'
    if str(tr).find('Not assigned') > 0:
        continue
        
    tds = tr.findAll('td')
    if len(tds) == 3:
        row = []
        for td in tds:
            row.append(td.find(text=True).strip())
        rows.append(row)

In [9]:
df = pd.DataFrame(data=rows, columns=columns_names)

In [10]:
# Check if 'Neighborhood' does not fill
if df[(df['Neighborhood'] == '')].empty:
    print('There isn\'t empty Neighborhood')

KeyError: 'Neighborhood'

In [30]:
# Print resulting DataFrame
df.head(11)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"


In [31]:
print(f'Number of rows in dataset: {df.shape}')

Number of rows in dataset: (180, 3)


### Task 2
Add Latitude and Longitude to every Postal Code

In [38]:
# Get data from CSV file
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [39]:
# Merge 2 tables using full outer join
df = df.merge(df_geo, on='Postal Code', how='outer')
df

KeyError: 'Postal Code'

## Task 3. 
1. Explore and cluster the neighborhoods in Toronto
1. Visualize it on map

In [34]:
# Import libraries
import numpy as np
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [35]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [36]:
# create map using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

KeyError: 'Latitude'

#### Let's create a function to repeat the same process to all the neighborhoods

In [310]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        print(len(results))

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Get venues for all Neighborhoods

In [311]:
radius = 1000
LIMIT = 80
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude'],
                                   radius=1000
                                  )

Parkwoods
29
Victoria Village
14
Regent Park, Harbourfront
80
Lawrence Manor, Lawrence Heights
51
Queen's Park, Ontario Provincial Government
80
Islington Avenue
12
Malvern, Rouge
19
Don Mills
33
Parkview Hill, Woodbine Gardens
20
Garden District, Ryerson
80
Glencairn
38
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
14
Rouge Hill, Port Union, Highland Creek
5
Don Mills
43
Woodbine Heights
26
St. James Town
80
Humewood-Cedarvale
25
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
19
Guildwood, Morningside, West Hill
24
The Beaches
80
Berczy Park
80
Caledonia-Fairbanks
22
Woburn
8
Leaside
62
Central Bay Street
80
Christie
80
Cedarbrae
29
Hillcrest Village
19
Bathurst Manor, Wilson Heights, Downsview North
31
Thorncliffe Park
51
Richmond, Adelaide, King
80
Dufferin, Dovercourt Village
66
Scarborough Village
12
Fairview, Henry Farm, Oriole
44
Northwood Park, York University
23
East Toronto
80
Harbourfront East, Union Station, Toronto Islands
80
Little 

#### Let's check the size of the resulting dataframe

In [312]:
print(toronto_venues.shape)
toronto_venues.head()

(4376, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
3,Parkwoods,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant
4,Parkwoods,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store


In [313]:
toronto_venues.groupby('Neighborhood').count().sort_values(by='Venue')
# Willowdale has more than LIMIT venues because it has 2 Post Codes

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"York Mills, Silver Hills",4,4,4,4,4,4
Northwest,4,4,4,4,4,4
"Rouge Hill, Port Union, Highland Creek",5,5,5,5,5,5
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",7,7,7,7,7,7
"Humberlea, Emery",8,8,8,8,8,8
...,...,...,...,...,...,...
"Richmond, Adelaide, King",80,80,80,80,80,80
"Regent Park, Harbourfront",80,80,80,80,80,80
"The Danforth West, Riverdale",80,80,80,80,80,80
Davisville,80,80,80,80,80,80


In [314]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 328 uniques categories.


#### Find and delete Neighborhood with no venues

In [316]:
for nbh in df['Neighborhood']:
    if toronto_venues[toronto_venues['Neighborhood'] == nbh].shape[0] == 0:
        print(f'Delete row for \'{nbh}\' Neighborhood')
        df = df[(df['Neighborhood'] == 'Upper Rouge').map(lambda x: not x)]
        #df = df[(df['Neighborhood'] == 'Upper Rouge').map(lambda x: not x)] # Upper Rouge == nbh?
        
df.shape

Delete row for 'Upper Rouge' Neighborhood


(102, 5)

#### Use one-hot encoding to create dataframe with all venues categories for each Neighborhood

In [317]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
# add Neighborhood column
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
# Move column 'Neighborhood' at first place
lst = list(toronto_onehot.columns)
lst.remove('Neighborhood')
columns_order = ['Neighborhood'] + lst

toronto_onehot = toronto_onehot[columns_order]
toronto_onehot

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4371,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4372,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4373,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4374,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [318]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,...,0.000000,0.021277,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.032258,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,...,0.025641,0.000000,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
93,Woburn,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
94,Woodbine Heights,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
95,York Mills West,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [319]:
toronto_grouped.shape

(97, 328)

#### Let's create dataframe with top 10 venue for each Neighborhood

In [320]:
ef return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)d

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Caribbean Restaurant,Bakery,Pizza Place,Seafood Restaurant,Latin American Restaurant,Sri Lankan Restaurant,Breakfast Spot,Lounge
1,"Alderwood, Long Branch",Pharmacy,Discount Store,Pizza Place,Park,Print Shop,Intersection,Convenience Store,Skating Rink,Shopping Mall,Donut Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Ski Chalet,Ski Area,Mediterranean Restaurant,Bridal Shop,Shopping Mall,Supermarket,Sushi Restaurant,Trail
3,Bayview Village,Bank,Japanese Restaurant,Gas Station,Chinese Restaurant,Grocery Store,Park,Restaurant,Trail,Café,Intersection
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Park,Bank,Sandwich Place,Comfort Food Restaurant,Bridal Shop,Bakery,Japanese Restaurant,Juice Bar
5,Berczy Park,Coffee Shop,Café,Japanese Restaurant,Hotel,Restaurant,Beer Bar,Bakery,Cocktail Bar,Park,Creperie
6,"Birch Cliff, Cliffside West",Park,Convenience Store,College Stadium,Thai Restaurant,Skating Rink,Diner,Gym,Auto Workshop,Gym Pool,General Entertainment
7,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Bakery,Restaurant,Bar,Gift Shop,Thrift / Vintage Store,Soccer Stadium,Supermarket,Indian Restaurant
8,Business reply mail Processing Centre,Park,Coffee Shop,Pizza Place,Brewery,Sushi Restaurant,Bakery,Italian Restaurant,Fast Food Restaurant,Comic Shop,Snack Place
9,"CN Tower, King and Spadina, Railway Lands, Har...",Harbor / Marina,Coffee Shop,Café,Sushi Restaurant,Sculpture Garden,Dog Run,Track,Garden,Park,Scenic Lookout


#### Cluster into 6 groups using k-mean

In [321]:
# set number of clusters
kclusters = 6

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 1, 1, 1, 3, 3, 3, 1, 3])

In [322]:
# add clustering labels
if 'Cluster Labels' in neighborhoods_venues_sorted.columns:
    neighborhoods_venues_sorted.drop('Cluster Labels', axis=1, inplace=True)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

#toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].map(int)
toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,Park,Convenience Store,Pharmacy,Bus Stop,Shopping Mall,Cosmetics Shop,Coffee Shop,Shop & Service,Tennis Court,Chinese Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Coffee Shop,Portuguese Restaurant,Intersection,Men's Store,Café,Park,Golf Course,Grocery Store,Gym / Fitness Center,Sporting Goods Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,Coffee Shop,Café,Theater,Pub,Bakery,Park,Italian Restaurant,Breakfast Spot,Diner,Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Fast Food Restaurant,Clothing Store,Coffee Shop,Restaurant,Dessert Shop,Miscellaneous Shop,Fried Chicken Joint,Sushi Restaurant,Furniture / Home Store,Accessories Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Park,Sushi Restaurant,Coffee Shop,Japanese Restaurant,Gastropub,Bookstore,Yoga Studio,Bubble Tea Shop,Pizza Place,Burger Joint


In [323]:
for i in range(kclusters):
    count = toronto_merged['Cluster Labels'].value_counts()[i]
    print(f'Cluster \'{i+1}\' has {count} Neighobrhoods')

Cluster '1' has 1 Neighobrhoods
Cluster '2' has 47 Neighobrhoods
Cluster '3' has 1 Neighobrhoods
Cluster '4' has 38 Neighobrhoods
Cluster '5' has 13 Neighobrhoods
Cluster '6' has 2 Neighobrhoods


#### Visualize clusters on map

In [324]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)

In [325]:
# Add legend and show map
legend_html = """
<div style="position:fixed;
     bottom: 50px; 
     left: 50px; 
     width: 150px; 
     height:150px; 
     border:2px solid grey; 
     z-index: 9999;
     font-size:14px;">
     &nbsp;<b>K-means clusters:</b><br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(127.5, 0,      255,   1)"></i>&nbsp;1<br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(25.5,  149.9,  242.5, 1)"></i>&nbsp;2<br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(76.5,  242.5,  206.3, 1)"></i>&nbsp;3<br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(178.5, 242.5,  149.9, 1)"></i>&nbsp;4<br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(255,   149.9,  78.8,  1)"></i>&nbsp;5<br>
     &nbsp;<i class="fa fa-circle fa-1x" style="color:rgba(255,   0.03,   0.0,   1)"></i>&nbsp;6
</div>"""

map_clusters.get_root().html.add_child(folium.Element(legend_html))
map_clusters

[![Copy this link if map does no show https://ibb.co/frzBt07](https://ibb.co/frzBt07)](https://ibb.co/frzBt07)