## Question 1


In [1]:
import pandas as pd
import requests
import urllib.request
from bs4 import BeautifulSoup
import numpy as np
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.10.0 --yes 
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.10.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.10.0              |             py_0          59 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be

Create Beautiful Soup Object from Wikipedia site

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, 'html.parser')

Get table from page and iterate over each row, splitting at the '\n' and adding as list to list

In [3]:
table = soup.find_all('table')[0]
list = []
rows = table.find("tbody").find_all("tr")
for row in rows:
    list.append(row.get_text().split('\n')[1:4])


Create Dataframe using first row as columns

In [4]:
df = pd.DataFrame(list[1:], columns=list[0])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Get the index of rows where borough is not assigned and drop

In [5]:
na_index =df[df['Borough']== 'Not assigned'].index
na_index
df.drop(na_index, inplace=True)

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


Reset the Index

In [6]:
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Group by Postcode, joining neighborhood values with a comma and resetting the index

In [7]:
df = df.groupby(['Borough','Postcode'])['Neighbourhood'].apply(', '.join).reset_index()

In [8]:
df.head()

Unnamed: 0,Borough,Postcode,Neighbourhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,North Toronto West
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


Reordering the dataframe according to specs

In [9]:
df = df[['Postcode', 'Borough', 'Neighbourhood']]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4N,Central Toronto,Lawrence Park
1,M4P,Central Toronto,Davisville North
2,M4R,Central Toronto,North Toronto West
3,M4S,Central Toronto,Davisville
4,M4T,Central Toronto,"Moore Park, Summerhill East"


Finding the index where neighborhood is not assigned then iterating through index to replace with value for borough

In [10]:
na_index1 =df[df['Neighbourhood']== 'Not assigned'].index

for x in range(len(na_index1)):
    if df.loc[na_index1[x], 'Neighbourhood'] == 'Not assigned' :
        df.loc[na_index1[x], 'Neighbourhood'] = df.loc[na_index1[x],'Borough']


In [11]:
df.shape

(103, 3)

# Question 2

<i>The geocoder package wasn't working well for me, so decided to use the csv file</i> 

In [12]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data

In [13]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')

In [14]:
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_geo.rename(columns={"Postal Code" : "Postcode"}, inplace=True)
df_merged = pd.merge(df, df_geo, on='Postcode')
df_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


# Question 3

In [16]:
df_merged['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
York                 5
East York            5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

In [17]:
toronto_lat = 43.6532
toronto_long = -79.3832

In [20]:
map_toronto = folium.Map(location=[toronto_lat, toronto_long], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto


In [23]:
CLIENT_ID = 'JKV131EOVUZUZEC0LRZZCYHX3LKOYGD3UPHPTDR23GERHAPQ' # your Foursquare ID
CLIENT_SECRET = 'JJGEHFFTIVPTY0JUK5WSUG1ELRJYKJQR4OVJIRC24KYDYK0M' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version






In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=400):
    venues_list = []
    
    for name, lat, lng in zip(names,latitudes, longitudes):
       
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                )
       
        results = requests.get(url).json()["response"]['venues']
        #print(results)
       
        for v in results:
            
            if v['categories']:
                category = v['categories'][0]['name']
            else:
                category = 'Undefined'
            
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['name'], 
                v['location']['lat'], 
                v['location']['lng'],
                category
                )])
         

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                      'Neighborhood Latitude', 
                      'Neighborhood Longitude', 
                      'Venue', 
                      'Venue Latitude', 
                      'Venue Longitude',
                        'Category',
                      ]

    return(nearby_venues)
    

In [26]:
toronto_venues = getNearbyVenues(names=df_merged['Borough'],
                                   latitudes=df_merged['Latitude'],
                                   longitudes=df_merged['Longitude']
                                  )

### Visualise first 300

In [27]:
toronto_venues_short = toronto_venues[0:300]
toronto_venues.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Category
0,Central Toronto,43.72802,-79.38879,Toronto French School,43.728625,-79.383707,School
1,Central Toronto,43.72802,-79.38879,Glendon Rose Garden,43.727334,-79.378222,College Quad
2,Central Toronto,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
3,Central Toronto,43.72802,-79.38879,Lake,43.72791,-79.386857,Lake
4,Central Toronto,43.72802,-79.38879,Buckingham Avenue,43.728052,-79.382129,Undefined
5,Central Toronto,43.72802,-79.38879,Gallery Towers,43.73341,-79.39361,Undefined
6,Central Toronto,43.72802,-79.38879,TTC Bus #11 Bayview,43.713398,-79.383289,Bus Line
7,Central Toronto,43.72802,-79.38879,TTC Bus #124 Sunnybrook,43.69735,-79.392657,Bus Line
8,Central Toronto,43.72802,-79.38879,Toronto French School,43.728554,-79.385325,High School
9,Central Toronto,43.72802,-79.38879,Subway,43.725834,-79.401267,Sandwich Place


In [28]:
map_toronto1 = folium.Map(location=[toronto_lat, toronto_long], zoom_start=10)

for lat, lng, borough, neighbourhood in zip(toronto_venues_short['Venue Latitude'], toronto_venues_short['Venue Longitude'], toronto_venues_short['Venue'], toronto_venues_short['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto1)  
map_toronto1

In [29]:

toronto_venues.shape

(3090, 7)

In [30]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Category
0,Central Toronto,43.72802,-79.38879,Toronto French School,43.728625,-79.383707,School
1,Central Toronto,43.72802,-79.38879,Glendon Rose Garden,43.727334,-79.378222,College Quad
2,Central Toronto,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
3,Central Toronto,43.72802,-79.38879,Lake,43.72791,-79.386857,Lake
4,Central Toronto,43.72802,-79.38879,Buckingham Avenue,43.728052,-79.382129,Undefined


In [31]:
toronto_venues.groupby('Neighborhood').size()

Neighborhood
Central Toronto     270
Downtown Toronto    540
East Toronto        150
East York           150
Etobicoke           360
Mississauga          30
North York          720
Queen's Park         30
Scarborough         510
West Toronto        180
York                150
dtype: int64

In [32]:
print('There are {} uniques categories.'.format(len(toronto_venues['Category'].unique())))

There are 369 uniques categories.


In [33]:
toronto_onehot = pd.get_dummies(toronto_venues[['Category']], prefix="", prefix_sep="")

In [34]:
toronto_onehot.insert(loc=0, column='Neighborhood', value=toronto_venues['Neighborhood'])
toronto_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,...,Warehouse,Warehouse Store,Water Park,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_onehot.shape

(3090, 370)

In [36]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

In [37]:
toronto_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,...,Warehouse,Warehouse Store,Water Park,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Central Toronto,0.0,0.0,0.0,0.003704,0.0,0.0,0.003704,0.0,0.0,...,0.0,0.0,0.0,0.003704,0.003704,0.0,0.0,0.0,0.0,0.0
1,Downtown Toronto,0.0,0.0,0.001852,0.0,0.0,0.0,0.003704,0.001852,0.007407,...,0.0,0.0,0.0,0.0,0.0,0.001852,0.001852,0.003704,0.0,0.0
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.006667,0.013333,0.0,0.006667
3,East York,0.0,0.0,0.0,0.0,0.0,0.006667,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.006667,0.0,0.0,0.006667,0.0,0.0
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.002778,0.0,0.002778,...,0.0,0.002778,0.002778,0.002778,0.0,0.002778,0.0,0.002778,0.0,0.0
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North York,0.001389,0.001389,0.0,0.0,0.0,0.002778,0.001389,0.0,0.0,...,0.0,0.001389,0.0,0.0,0.0,0.0,0.001389,0.001389,0.0,0.0
7,Queen's Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Scarborough,0.0,0.003922,0.0,0.0,0.001961,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.003922,0.0,0.0,0.0,0.001961,0.001961
9,West Toronto,0.0,0.0,0.0,0.005556,0.0,0.0,0.0,0.0,0.0,...,0.005556,0.0,0.005556,0.0,0.0,0.0,0.0,0.005556,0.0,0.0


Neighborhood with Top 5 most common venue types

In [38]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
                                      venue  freq
0                                 Undefined  0.35
1  Residential Building (Apartment / Condo)  0.04
2                                       Spa  0.03
3                          Dentist's Office  0.02
4                                  Bus Line  0.02


----Downtown Toronto----
                                      venue  freq
0                                 Undefined  0.07
1                                    Office  0.05
2  Residential Building (Apartment / Condo)  0.04
3                               Coffee Shop  0.04
4                                  Building  0.03


----East Toronto----
                 venue  freq
0            Undefined  0.21
1                 Park  0.05
2                  Spa  0.03
3  Rental Car Location  0.02
4           Playground  0.02


----East York----
             venue  freq
0        Undefined  0.20
1             Bank  0.05
2         Bus Line  0.04
3             Park  0.03
4  Laun

In [39]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [41]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
1,Downtown Toronto,Undefined,Office,Residential Building (Apartment / Condo),Coffee Shop,Building,Moving Target,Park,Café,Hotel,Bakery
2,East Toronto,Undefined,Park,Spa,Office,Coffee Shop,Rental Car Location,Fast Food Restaurant,Ice Cream Shop,Moving Target,Playground
3,East York,Undefined,Bank,Bus Line,Laundry Service,Park,Rental Car Location,Church,Intersection,Breakfast Spot,Building
4,Etobicoke,Undefined,Office,Bus Line,Pizza Place,Park,Residential Building (Apartment / Condo),Factory,Dentist's Office,Miscellaneous Shop,Pharmacy
5,Mississauga,Office,Undefined,Conference Room,Automotive Shop,Vegetarian / Vegan Restaurant,Gas Station,Student Center,Print Shop,Mobile Phone Shop,Chinese Restaurant
6,North York,Undefined,Office,Park,Residential Building (Apartment / Condo),Bank,Clothing Store,Gas Station,Medical Center,Government Building,Church
7,Queen's Park,Government Building,Undefined,Capitol Building,Sandwich Place,General Entertainment,Restaurant,Music Store,Sushi Restaurant,Cafeteria,Light Rail Station
8,Scarborough,Undefined,Automotive Shop,Building,Office,Bus Line,Coffee Shop,Bank,Spa,Park,Chinese Restaurant
9,West Toronto,Undefined,Office,Residential Building (Apartment / Condo),Coffee Shop,Park,Gym / Fitness Center,Building,Salon / Barbershop,Church,Dog Run


In [42]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood',1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:10]


array([1, 0, 2, 2, 2, 4, 2, 3, 2, 0], dtype=int32)

In [43]:
df_merged.rename(columns={"Borough": "Zone"}, inplace=True)
df_merged.head()

Unnamed: 0,Postcode,Zone,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [44]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Central Toronto,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
1,0,Downtown Toronto,Undefined,Office,Residential Building (Apartment / Condo),Coffee Shop,Building,Moving Target,Park,Café,Hotel,Bakery
2,2,East Toronto,Undefined,Park,Spa,Office,Coffee Shop,Rental Car Location,Fast Food Restaurant,Ice Cream Shop,Moving Target,Playground
3,2,East York,Undefined,Bank,Bus Line,Laundry Service,Park,Rental Car Location,Church,Intersection,Breakfast Spot,Building
4,2,Etobicoke,Undefined,Office,Bus Line,Pizza Place,Park,Residential Building (Apartment / Condo),Factory,Dentist's Office,Miscellaneous Shop,Pharmacy


In [45]:
df_merged.head()

Unnamed: 0,Postcode,Zone,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [46]:
toronto_merged = df_merged
neighborhoods_venues_sorted.rename(columns={"Neighborhood": "Zone"}, inplace=True)




In [47]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Zone'), on='Zone')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Zone,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,1,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,1,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,1,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Undefined,Residential Building (Apartment / Condo),Spa,Park,Dentist's Office,Office,Bus Line,Assisted Living,Café,Pharmacy


In [51]:
# create map
map_clusters = folium.Map(location=[toronto_lat, toronto_long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters