# <span style="color:green"> One single notebook for all 3 parts. Please scroll down to review all 3 parts</span>
# <span style="color:blue"> Part 1 </span>
##    <span style="color:blue"> Use pandas, or the BeautifulSoup package, or any other way you are comfortable with to transform the data in the table on the Wikipedia page into the above pandas dataframe.</span>

#### Importing libraries

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# read and sort wiki table
web_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(web_url,'lxml')
#print(soup.prettify())

#### From the soup results, extract tabular data in class="wikitable sortable

In [3]:
wiki_table = soup.find('table',{'class':'wikitable sortable'})
#wiki_table

In [4]:
print(wiki_table.tr.text)


Postcode
Borough
Neighborhood



#### Extracting all values in tr (row tag) and separating each td value by ','

In [5]:
table_tmp = ""
for tr in wiki_table.find_all('tr'):
    row_tmp = ""
    for tds in tr.find_all('td'):
        row_tmp = row_tmp + "," + tds.text
    table_tmp = table_tmp + row_tmp[1:]
#print(table_tmp)

#### Saving wiki table and creating dataframe

In [6]:
import pandas as pd
file = open("Toronto_Neighb.csv","w")
file.write(table_tmp)

df = pd.read_csv("Toronto_Neighb.csv",header=None)
df.columns=["PostalCode","Borough","Neighbourhood"]

In [7]:
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


#### Dropping rows with a borough that is Not Assigned

In [8]:
# Get indexes where Borough ="Not assigned"
indexNA = df[ df['Borough'] =='Not assigned'].index

# Drop Not Assigned rows
df.drop(indexNA,inplace=True)

In [9]:
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


#### In case of a Not Assigned Neighbourhood, the borough name will be used

In [10]:
df.loc[df['Neighbourhood'] =='Not assigned' , 'Neighbourhood'] = df['Borough']

In [11]:
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


#### Combine rows with the same (postalCode+Borough) into one row with the neighbourhoods separated by ","

In [12]:
df_tmp = df.groupby(['PostalCode','Borough'], sort=False).agg(', '.join)
df_2 = df_tmp.reset_index()
df_2.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [13]:
df_2.shape

(103, 3)

# <span style="color:blue"> Part 2 </span>
## <span style="color:blue">Use the Geography data from web to create dataframe with longitude and latitude values</span>

#### A csv file that has the geographical coordinates of each postal code from  http://cocl.us/Geospatial_data

In [14]:
!wget -q -O 'Toronto_geography.csv'  http://cocl.us/Geospatial_data
df_geo = pd.read_csv('Toronto_geography.csv')
df_geo.columns=['PostalCode','Latitude','Longitude']

In [15]:
df_geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
Toronto_df = pd.merge(df_2, df_geo[['PostalCode','Latitude', 'Longitude']], on='PostalCode')
Toronto_df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


# <span style="color:blue">Part 3</span>
## <span style="color:blue">Explore and cluster the neighborhoods in Toronto</span>

### Use geopy library to get the latitude and longitude values of Toronto

In [17]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import matplotlib.cm as cm # Matplotlib plotting modules
import matplotlib.colors as co
from sklearn.cluster import KMeans
import folium # map rendering

In [18]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude_tor = location.latitude
longitude_tor = location.longitude
print('Geograpical coordinates of Toronto: {}, {}.'.format(latitude_tor, longitude_tor))

Geograpical coordinates of Toronto: 43.653963, -79.387207.


In [61]:
Toronto_map = folium.Map(location=[latitude_tor, longitude_tor], zoom_start=10)

# add markers
for lat, lng, borough, Neighbourhood in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Borough'], Toronto_df['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5.5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map

### Define Foursquare Credentials and Version

In [20]:
# Define parameters for the API request URL
CLIENT_ID = 'ZCIVRZ3YVFXODE4KHXE1JD2HPAXPYESYKEZ1QANWW4OHJ2F5' # your Foursquare ID
CLIENT_SECRET = 'AADKGGASHXAEYRXNHFARXLP2MR1BEVTJQT3YSJQDYFGNBU5M' # your Foursquare Secret
VERSION = '20180604'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZCIVRZ3YVFXODE4KHXE1JD2HPAXPYESYKEZ1QANWW4OHJ2F5
CLIENT_SECRET:AADKGGASHXAEYRXNHFARXLP2MR1BEVTJQT3YSJQDYFGNBU5M


In [21]:
# Radius and limit of venues to get
radius=500
LIMIT=100

In [22]:
# get nearby venues of each Neighbourhood in Toronto
list_venues=[]
for ngb, lat, lng in zip(Toronto_df['Neighbourhood'],Toronto_df['Latitude'],Toronto_df['Longitude']):
    # generate API request url
    url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, lng, 
        radius, 
        LIMIT)
    # make the request
    results = requests.get(url).json()["response"]["groups"][0]["items"]
    # return only relevant information for each nearby venue
    list_venues.append([(
        ngb, lat, lng, 
        vn['venue']['name'], 
        vn['venue']['location']['lat'], 
        vn['venue']['location']['lng'],  
        vn['venue']['categories'][0]['name']) for vn in results])

Toronto_venues = pd.DataFrame([item for list_venue in list_venues for item in list_venue])
Toronto_venues.columns = ['Neighbourhood', 
    'Neighb. Lat.', 
    'Neighb. Lng.', 
    'Venue', 
    'Venue Lat.', 
    'Venue Lng.', 
    'Venue Category']

In [23]:
Toronto_venues.head(15)

Unnamed: 0,Neighbourhood,Neighb. Lat.,Neighb. Lng.,Venue,Venue Lat.,Venue Lng.,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Parkwoods,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
5,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
6,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
7,Victoria Village,43.725882,-79.315572,Memories of Africa,43.726602,-79.312427,Grocery Store
8,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
9,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop


### Analyzing Neighbourhoods and Each Neighbourhood

In [24]:
# Number of venues for each neighbourhood
Toronto_venues.groupby('Neighbourhood').count().head(20)

Unnamed: 0_level_0,Neighb. Lat.,Neighb. Lng.,Venue,Venue Lat.,Venue Lng.,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9,9,9,9,9,9
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Downsview North, Wilson Heights",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
Berczy Park,54,54,54,54,54,54
"Birch Cliff, Cliffside West",4,4,4,4,4,4


See some details of each Neighbourhood. One hot encoding will be applied to the 'Venue Catagory'

In [25]:
Toronto_1hot = pd.get_dummies(Toronto_venues['Venue Category'],prefix="",prefix_sep="")

#### Insert Neighbourhood column as the first column of the new dataframe

In [26]:
if 'Neighbourhood' not in Toronto_1hot.columns:
    Toronto_1hot.insert(0,'Neighbourhood',Toronto_venues['Neighbourhood'])
Toronto_1hot.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Find first 5 most common venues of each Neighbourhood

In [27]:
# group by Neighbourhood
Toronto_grp = Toronto_1hot.groupby('Neighbourhood').mean().reset_index()
Toronto_grp.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
# Compute and show the top 5 Venue of each Neighbourhood
top_num = 5
for ngb in Toronto_grp['Neighbourhood']:
    print("---- "+ngb+" ----")
    tmp = Toronto_grp[Toronto_grp['Neighbourhood'] == ngb].T.reset_index()
    tmp.columns = ['Venue','Freq.']
    #print(tmp.loc[0])
    tmp = tmp.loc[1:]
    tmp['Freq.'] = tmp['Freq.'].astype(float)
    tmp = tmp.round({'Freq.': 2})
    print(tmp.sort_values('Freq.', ascending=False).reset_index(drop=True).head(top_num))
    print('\n')

---- Adelaide, King, Richmond ----
         Venue  Freq.
0  Coffee Shop   0.08
1          Bar   0.04
2   Steakhouse   0.04
3         Café   0.04
4       Bakery   0.03


---- Agincourt ----
                       Venue  Freq.
0             Breakfast Spot    0.2
1                     Lounge    0.2
2  Latin American Restaurant    0.2
3               Skating Rink    0.2
4             Clothing Store    0.2


---- Agincourt North, L'Amoreaux East, Milliken, Steeles East ----
               Venue  Freq.
0               Park   0.33
1         Playground   0.33
2        Coffee Shop   0.33
3  Accessories Store   0.00
4        Men's Store   0.00


---- Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown ----
                 Venue  Freq.
0        Grocery Store   0.22
1             Pharmacy   0.11
2  Fried Chicken Joint   0.11
3          Pizza Place   0.11
4       Sandwich Place   0.11


---- Alderwood, Long Branch ----
            Venue  Fr

                        Venue  Freq.
0                      Bakery   0.25
1  Construction & Landscaping   0.25
2                        Park   0.25
3            Basketball Court   0.25
4          Mexican Restaurant   0.00


---- East Birchmount Park, Ionview, Kennedy Park ----
                       Venue  Freq.
0             Discount Store    0.4
1          Convenience Store    0.2
2           Department Store    0.2
3                Coffee Shop    0.2
4  Middle Eastern Restaurant    0.0


---- East Toronto ----
               Venue  Freq.
0               Park   0.33
1  Convenience Store   0.33
2        Coffee Shop   0.33
3  Accessories Store   0.00
4      Metro Station   0.00


---- Emery, Humberlea ----
                        Venue  Freq.
0              Baseball Field    1.0
1           Accessories Store    0.0
2               Metro Station    0.0
3  Modern European Restaurant    0.0
4           Mobile Phone Shop    0.0


---- Fairview, Henry Farm, Oriole ----
                  Ven

                       Venue  Freq.
0                Coffee Shop   0.10
1             Clothing Store   0.05
2             Cosmetics Shop   0.04
3                       Café   0.04
4  Middle Eastern Restaurant   0.03


---- Scarborough Village ----
                        Venue  Freq.
0                  Playground    0.5
1               Grocery Store    0.5
2           Accessories Store    0.0
3                 Men's Store    0.0
4  Modern European Restaurant    0.0


---- Silver Hills, York Mills ----
                        Venue  Freq.
0                   Cafeteria    1.0
1           Accessories Store    0.0
2               Metro Station    0.0
3  Modern European Restaurant    0.0
4           Mobile Phone Shop    0.0


---- St. James Town ----
         Venue  Freq.
0         Café   0.06
1  Coffee Shop   0.06
2   Restaurant   0.05
3     Beer Bar   0.03
4        Hotel   0.03


---- Stn A PO Boxes 25 The Esplanade ----
                Venue  Freq.
0         Coffee Shop   0.12
1         

Convert previous data into dataDrame: with Venue name and frequency

In [45]:
import numpy as np
top_num = 10

columns=['Neighbourhood']
for i in range(1,top_num+1):
    columns.append('Top {} Common Venue'.format(str(i)))
    
Tor_Ngb_venues_sorted = pd.DataFrame(columns=columns)
Tor_Ngb_venues_sorted['Neighbourhood'] = Toronto_grp['Neighbourhood']

for ind in np.arange(Toronto_grp.shape[0]):
    Tor_Ngb_venues_sorted.iloc[ind, 1:] = Toronto_grp.iloc[ind,1:].sort_values(ascending=False).index.values[0:top_num]
Tor_Ngb_venues_sorted.head()

Unnamed: 0,Neighbourhood,Top 1 Common Venue,Top 2 Common Venue,Top 3 Common Venue,Top 4 Common Venue,Top 5 Common Venue,Top 6 Common Venue,Top 7 Common Venue,Top 8 Common Venue,Top 9 Common Venue,Top 10 Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Bar,Bakery,Sushi Restaurant,Burger Joint,Hotel,Asian Restaurant,Thai Restaurant
1,Agincourt,Latin American Restaurant,Breakfast Spot,Lounge,Skating Rink,Clothing Store,Yoga Studio,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Coffee Shop,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Liquor Store,Sandwich Place,Beer Store,Pharmacy,Fast Food Restaurant,Fried Chicken Joint,Empanada Restaurant,Electronics Store
4,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Pharmacy,Skating Rink,Pub,Sandwich Place,Yoga Studio,Doner Restaurant,Dim Sum Restaurant


## Clustering of Neighbourhoods
#### k-means to cluster the neighbourhoods

In [46]:
k_clst = 5 # number of clusters

Toronto_grp_clst = Toronto_grp.drop('Neighbourhood', 1)

# k-means clustering
kmeans = KMeans(n_clusters=k_clst, random_state=0).fit(Toronto_grp_clst)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 0, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 2, 4, 4,
       4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4,
       1, 4, 4, 4, 4, 3, 4, 3, 4, 0, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3], dtype=int32)

In [47]:
# add clustering labels
Tor_Ngb_venues_sorted.insert(0, 'Cluster_Labels', kmeans.labels_)
Toronto_merged = Toronto_df
Toronto_merged = Toronto_merged.join(Tor_Ngb_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
# drop not-available data
Toronto_merged = Toronto_merged.dropna()
Toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster_Labels,Top 1 Common Venue,Top 2 Common Venue,Top 3 Common Venue,Top 4 Common Venue,Top 5 Common Venue,Top 6 Common Venue,Top 7 Common Venue,Top 8 Common Venue,Top 9 Common Venue,Top 10 Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Park,Food & Drink Shop,Fireworks Store,Bus Stop,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,4.0,Grocery Store,Coffee Shop,Hockey Arena,Portuguese Restaurant,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,4.0,Coffee Shop,Bakery,Pub,Park,Café,Breakfast Spot,Mexican Restaurant,Dessert Shop,Chocolate Shop,Performing Arts Venue
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,4.0,Clothing Store,Accessories Store,Vietnamese Restaurant,Miscellaneous Shop,Coffee Shop,Gift Shop,Furniture / Home Store,Boutique,Women's Store,Diner
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,4.0,Coffee Shop,Park,Gym,Yoga Studio,Smoothie Shop,Burger Joint,Burrito Place,Sandwich Place,Café,Portuguese Restaurant


In [58]:
# create map
import matplotlib.colors as colors
map_clusters = folium.Map(location=[latitude_tor, longitude_tor], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_clst)
ys = [i + x + (i*x)**2 for i in range(k_clst)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ', Cluster: ' + str(int(cluster)), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters