In [98]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

 #### Part 1 of the assignment 

 #### WebScraping using BeautifulSoup

In [1]:
import requests
r = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [2]:
r.status_code

200

In [3]:
# import BeautifulSoup and parse webpage 
from bs4 import BeautifulSoup
soup = BeautifulSoup(r.content, 'html.parser')



In [4]:
My_table = soup.find('table',{'class':'wikitable sortable'})
My_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Harbourfront</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Lawrence_Heights" title="Lawrence Heights">Lawrence Heights</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North

In [5]:
# looking for columns and rows within table body

import numpy as np

data = []

rows = My_table.find('tbody')

for row in rows:
    cols = rows.find_all('td')
    cols = [x.text.strip() for x in cols]
    data.append([x for x in cols if x])

In [6]:
import pandas as pd
tt = data[0]
len(tt)

861

In [7]:
#create 3 lists accordingly

Post_code = []
Borough =[]
Neighbourhood = []
P = np.arange(0,861,3)
B = np.arange(1,861,3)
N = np.arange(2,861,3)

for p in P:
    Post_code.append(tt[p])
for b in B:
    Borough.append(tt[b])
for n in N:
    Neighbourhood.append(tt[n])

In [8]:
# create DataFrame from lists
dt = {'Post_code':Post_code,'Borough':Borough,'Neighbourhood':Neighbourhood}
df = pd.DataFrame(dt)

In [9]:
# drop Borough with Not assigned

df['Borough'].replace("Not assigned",np.nan,inplace=True)
df.dropna(axis = 0,inplace = True)

In [10]:
df.head()

Unnamed: 0,Post_code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [11]:
# merge Neighbourhood rows 
df_1 = df.groupby(['Post_code','Borough'])['Neighbourhood'].apply(lambda x: " ,".join(x))
#df_1 = df.groupby(['Post_code'])['Neighbourhood'].apply(lambda x: " ,".join(x))

In [12]:
df_1 = df_1.to_frame()

In [13]:
df_1[df_1['Neighbourhood'] == "Not assigned"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Post_code,Borough,Unnamed: 2_level_1
M7A,Queen's Park,Not assigned


In [14]:
# set Neighbourhood with Not assigned value according to Borough
df_1[df_1['Neighbourhood'] == "Not assigned"] = "Queen's Park"

In [27]:
df_1.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Post_code,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge ,Malvern"
M1C,Scarborough,"Highland Creek ,Rouge Hill ,Port Union"
M1E,Scarborough,"Guildwood ,Morningside ,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park ,Ionview ,Kennedy Park"
M1L,Scarborough,"Clairlea ,Golden Mile ,Oakridge"
M1M,Scarborough,"Cliffcrest ,Cliffside ,Scarborough Village West"
M1N,Scarborough,"Birch Cliff ,Cliffside West"


In [28]:
df_1.reset_index('Borough',inplace = True)
df_1.head()
df_1.shape

(103, 2)

#### Part 2 

#### Importing geo data from csv file 

In [40]:
geo = pd.read_csv('C:\Python1\Geospatial_Coordinates.csv')

#geo.merge(df_1,left_on='Postal code',right_on='Post_code')
geo.columns = ['Post_code','Latitude','Longitude']
geo.head()
df_1 = df_1.merge(geo,how='outer',on = 'Post_code')

In [41]:
df_1.head()

Unnamed: 0,Post_code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge ,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek ,Rouge Hill ,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood ,Morningside ,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Part 3

#### Exploring Toronto using cluster analysis and mapping 

In [35]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [43]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_1['Latitude'], df_1['Longitude'], df_1['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [44]:
CLIENT_ID = 'GEGSXOVEZ04FLIQN42EEEFDIDGCBNGIYZURUAUI52YPHXVYF' # your Foursquare ID
CLIENT_SECRET = 'HKJZD34KTLJRY4CLL1PCM5H220XZWCWSDKP5VWS5HH0DBVNW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GEGSXOVEZ04FLIQN42EEEFDIDGCBNGIYZURUAUI52YPHXVYF
CLIENT_SECRET:HKJZD34KTLJRY4CLL1PCM5H220XZWCWSDKP5VWS5HH0DBVNW


In [69]:
mask = df_1['Borough'].str.contains("Toronto")
toronto_data = df_1[mask == True]
toronto_data.head()

Unnamed: 0,Post_code,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West ,Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West ,India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [72]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius


In [70]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [73]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

The Beaches
The Danforth West ,Riverdale
The Beaches West ,India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park ,Summerhill East
Deer Park ,Forest Hill SE ,Rathnelly ,South Hill ,Summerhill West
Rosedale
Cabbagetown ,St. James Town
Church and Wellesley
Harbourfront
Ryerson ,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide ,King ,Richmond
Harbourfront East ,Toronto Islands ,Union Station
Design Exchange ,Toronto Dominion Centre
Commerce Court ,Victoria Hotel
Roselawn
Forest Hill North ,Forest Hill West
The Annex ,North Midtown ,Yorkville
Harbord ,University of Toronto
Chinatown ,Grange Park ,Kensington Market
CN Tower ,Bathurst Quay ,Island airport ,Harbourfront West ,King and Spadina ,Railway Lands ,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place ,Underground city
Christie
Dovercourt Village ,Dufferin
Little Portugal ,Trinity
Brockton ,Exhibition Place ,Parkdale Village
High Park ,The Junction Sout

In [74]:
print(toronto_venues.shape)
toronto_venues.head()

(1685, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood


In [75]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide ,King ,Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"Brockton ,Exhibition Place ,Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower ,Bathurst Quay ,Island airport ,Harbourfront West ,King and Spadina ,Railway Lands ,South Niagara",15,15,15,15,15,15
"Cabbagetown ,St. James Town",43,43,43,43,43,43
Central Bay Street,84,84,84,84,84,84
"Chinatown ,Grange Park ,Kensington Market",94,94,94,94,94,94
Christie,17,17,17,17,17,17
Church and Wellesley,83,83,83,83,83,83


In [76]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 232 uniques categories.


In [77]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
toronto_onehot.shape

(1685, 232)

In [79]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,"Adelaide ,King ,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0
2,"Brockton ,Exhibition Place ,Parkdale Village",0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower ,Bathurst Quay ,Island airport ,Harbo...",0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown ,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,...,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0
7,"Chinatown ,Grange Park ,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.010638,0.0,0.0,0.0,0.031915,0.0,0.042553,0.010638,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012048,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,...,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,0.012048


In [80]:
toronto_grouped.shape

(38, 232)

In [81]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide ,King ,Richmond----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.05
2       Steakhouse  0.04
3  Thai Restaurant  0.04
4           Bakery  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1          Steakhouse  0.04
2                Café  0.04
3            Beer Bar  0.04
4  Seafood Restaurant  0.04


----Brockton ,Exhibition Place ,Parkdale Village----
            venue  freq
0     Coffee Shop  0.09
1            Café  0.09
2  Breakfast Spot  0.09
3     Yoga Studio  0.05
4    Intersection  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0          Smoke Shop  0.06
1       Garden Center  0.06
2          Skate Park  0.06
3      Farmers Market  0.06
4  Light Rail Station  0.06


----CN Tower ,Bathurst Quay ,Island airport ,Harbourfront West ,King and Spadina ,Railway Lands ,South Niagara----
              venue  freq
0    Airport Lounge  0.13
1   Airport Service  0.13
2  Ai

In [82]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [94]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide ,King ,Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,Asian Restaurant,Salad Place,Restaurant,Bar,Bakery,Burger Joint
1,Berczy Park,Coffee Shop,Beer Bar,Steakhouse,Bakery,Farmers Market,Cocktail Bar,Seafood Restaurant,Cheese Shop,Café,Breakfast Spot
2,"Brockton ,Exhibition Place ,Parkdale Village",Breakfast Spot,Coffee Shop,Café,Yoga Studio,Pet Store,Restaurant,Italian Restaurant,Intersection,Burrito Place,Bar
3,Business Reply Mail Processing Centre 969 Eastern,Skate Park,Burrito Place,Recording Studio,Fast Food Restaurant,Auto Workshop,Farmers Market,Spa,Pizza Place,Restaurant,Smoke Shop
4,"CN Tower ,Bathurst Quay ,Island airport ,Harbo...",Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Rental Car Location,Harbor / Marina,Boat or Ferry,Bar,Airport Gate,Airport Food Court


In [95]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [96]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data
toronto_merged = neighborhoods_venues_sorted.set_index('Neighborhood').merge(toronto_data, left_on = 'Neighborhood', right_on = 'Neighbourhood')
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
#toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
0,0,Coffee Shop,Café,Thai Restaurant,Steakhouse,Asian Restaurant,Salad Place,Restaurant,Bar,Bakery,Burger Joint,M5H,Downtown Toronto,"Adelaide ,King ,Richmond",43.650571,-79.384568
1,0,Coffee Shop,Beer Bar,Steakhouse,Bakery,Farmers Market,Cocktail Bar,Seafood Restaurant,Cheese Shop,Café,Breakfast Spot,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
2,0,Breakfast Spot,Coffee Shop,Café,Yoga Studio,Pet Store,Restaurant,Italian Restaurant,Intersection,Burrito Place,Bar,M6K,West Toronto,"Brockton ,Exhibition Place ,Parkdale Village",43.636847,-79.428191
3,0,Skate Park,Burrito Place,Recording Studio,Fast Food Restaurant,Auto Workshop,Farmers Market,Spa,Pizza Place,Restaurant,Smoke Shop,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
4,0,Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Rental Car Location,Harbor / Marina,Boat or Ferry,Bar,Airport Gate,Airport Food Court,M5V,Downtown Toronto,"CN Tower ,Bathurst Quay ,Island airport ,Harbo...",43.628947,-79.39442


In [101]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [102]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,1st Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
0,Coffee Shop,Asian Restaurant,Salad Place,Restaurant,Bar,Bakery,Burger Joint,M5H,Downtown Toronto,"Adelaide ,King ,Richmond",43.650571,-79.384568
1,Coffee Shop,Farmers Market,Cocktail Bar,Seafood Restaurant,Cheese Shop,Café,Breakfast Spot,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
2,Breakfast Spot,Pet Store,Restaurant,Italian Restaurant,Intersection,Burrito Place,Bar,M6K,West Toronto,"Brockton ,Exhibition Place ,Parkdale Village",43.636847,-79.428191
3,Skate Park,Auto Workshop,Farmers Market,Spa,Pizza Place,Restaurant,Smoke Shop,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
4,Airport Service,Rental Car Location,Harbor / Marina,Boat or Ferry,Bar,Airport Gate,Airport Food Court,M5V,Downtown Toronto,"CN Tower ,Bathurst Quay ,Island airport ,Harbo...",43.628947,-79.39442
5,Coffee Shop,Italian Restaurant,Pub,Bakery,Café,Liquor Store,Deli / Bodega,M4X,Downtown Toronto,"Cabbagetown ,St. James Town",43.667967,-79.367675
6,Coffee Shop,Burger Joint,Café,Salad Place,Juice Bar,Bubble Tea Shop,Japanese Restaurant,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,Café,Vietnamese Restaurant,Mexican Restaurant,Coffee Shop,Bakery,Vegetarian / Vegan Restaurant,Record Shop,M5T,Downtown Toronto,"Chinatown ,Grange Park ,Kensington Market",43.653206,-79.400049
8,Grocery Store,Italian Restaurant,Diner,Restaurant,Baby Store,Athletics & Sports,Coffee Shop,M6G,Downtown Toronto,Christie,43.669542,-79.422564
9,Sushi Restaurant,Restaurant,Mediterranean Restaurant,Gastropub,Gym,Hotel,Café,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316


In [103]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,1st Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
17,Park,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,M5P,Central Toronto,"Forest Hill North ,Forest Hill West",43.696948,-79.411307
27,Park,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529


In [104]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,1st Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
22,Park,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [105]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,1st Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
28,Health & Beauty Service,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,M5N,Central Toronto,Roselawn,43.711695,-79.416936


In [106]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,1st Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Post_code,Borough,Neighbourhood,Latitude,Longitude
24,Restaurant,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,M4T,Central Toronto,"Moore Park ,Summerhill East",43.689574,-79.38316
