# Segmenting and Clustering Neighbourhoods in Toronto

## Question 1

### Import Libraries

In [14]:
import pandas as pd
import numpy as np
import requests

### Importing Data from Wikipedia

In [15]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_url = requests.get(url)
wiki_url

<Response [200]>

In [16]:
wiki_data = pd.read_html(wiki_url.text)
wiki_data

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [17]:
len(wiki_data), type(wiki_data)

(3, list)

In [18]:
wiki_data = wiki_data[0]
wiki_data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [19]:
df = wiki_data[wiki_data["Borough"] != "Not assigned"]
df


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [20]:
df.groupby(['Postal Code']).first()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [21]:
len(df['Postal Code'].unique())

103

In [22]:
df[df['Borough'] == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [23]:
df.shape

(103, 3)

# Question 2

In [28]:
pip install geocoder

Note: you may need to restart the kernel to use updated packages.


In [29]:
import geocoder

In [30]:
url = 'http://cocl.us/Geospatial_data'

In [31]:
df_geo = pd.read_csv(url)
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [32]:
df_geo.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

In [33]:
df.dtypes

Postal Code      object
Borough          object
Neighbourhood    object
dtype: object

In [34]:
df.shape

(103, 3)

In [35]:
df_geo.shape

(103, 3)

In [36]:
df = df.join(df_geo.set_index('Postal Code'), on='Postal Code')
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
165,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [37]:
df = df.reset_index()

In [38]:
df.drop(['index'], axis = 'columns', inplace = True)

In [39]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Question 3

In [40]:
!conda install -c conda-forge geocoder --yes
import geocoder
from geopy.geocoders import Nominatim 

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [41]:
import folium

In [43]:
#Creating the map of Toronto
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_Toronto)  
    
map_Toronto

In [44]:
CLIENT_ID = 'JELNUIAY01PO1WFUT31XYY0VR0UVIGKQ1XBJL3HQH45FOZKY' 
CLIENT_SECRET = 'UCSA2RS4N0QT42SGNRTFYYRFDA2AKAJWUAHJYI0XL4HKMNBZ'
VERSION = '20210130' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JELNUIAY01PO1WFUT31XYY0VR0UVIGKQ1XBJL3HQH45FOZKY
CLIENT_SECRET:UCSA2RS4N0QT42SGNRTFYYRFDA2AKAJWUAHJYI0XL4HKMNBZ


In [45]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius
            )
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [46]:
venues_in_toronto = getNearbyVenues(df['Neighbourhood'], df['Latitude'], df['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [47]:
venues_in_toronto.shape

(1330, 5)

In [48]:
venues_in_toronto.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,Portuguese Restaurant


In [49]:
venues_in_toronto.groupby('Neighbourhood').head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,Portuguese Restaurant
...,...,...,...,...,...
1317,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Wingporium,Wings Joint
1318,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,South St. Burger,Burger Joint
1319,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Dollarama,Discount Store
1320,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Healthy Planet,Supplement Shop


In [50]:
venues_in_toronto.groupby('Venue Category').max()

Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accessories Store,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Ardene Shoes Outlet
Airport,Downsview,43.737473,-79.394420,Toronto Downsview Airport (YZD)
Airport Food Court,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Billy Bishop Café
Airport Gate,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Gate 8
Airport Lounge,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Porter Lounge
...,...,...,...,...
Warehouse Store,Thorncliffe Park,43.705369,-79.349372,Costco
Wine Bar,"Toronto Dominion Centre, Design Exchange",43.653206,-79.379817,The National Club
Wings Joint,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Wingporium
Women's Store,"Lawrence Manor, Lawrence Heights",43.733283,-79.419750,Want Boutique


### One Hot encoding the venue Categories

In [51]:
toronto_venue_cat = pd.get_dummies(venues_in_toronto[['Venue Category']], prefix="", prefix_sep="")
toronto_venue_cat

Unnamed: 0,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1325,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1326,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1327,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1328,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
toronto_venue_cat['Neighbourhood'] = venues_in_toronto['Neighbourhood'] 

# moving neighborhood column to the first column
fixed_columns = [toronto_venue_cat.columns[-1]] + list(toronto_venue_cat.columns[:-1])
toronto_venue_cat = toronto_venue_cat[fixed_columns]

toronto_venue_cat.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [53]:
toronto_grouped = toronto_venue_cat.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0


In [54]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [55]:
import numpy as np

In [56]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Chinese Restaurant,Breakfast Spot,Dance Studio,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Skating Rink,Dance Studio,Gym,Pharmacy,Athletics & Sports,Pub,Yoga Studio,Department Store
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Diner,Bridal Shop,Sushi Restaurant,Ice Cream Shop,Intersection,Restaurant,Middle Eastern Restaurant,Deli / Bodega
3,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Yoga Studio,Dance Studio,Drugstore,Donut Shop,Dog Run,Distribution Center
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Locksmith,Spa,Juice Bar,Liquor Store,Restaurant,Pub,Sushi Restaurant


In [57]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [58]:
# set number of clusters
k_num_clusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k_num_clusters, random_state=0).fit(toronto_grouped_clustering)
kmeans

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)

In [59]:
kmeans.labels_[0:100]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 3, 0, 1, 0, 1, 1, 1, 1, 1, 4, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0], dtype=int32)

In [60]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [61]:
toronto_merged = df

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Convenience Store,Food & Drink Shop,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Portuguese Restaurant,Coffee Shop,Intersection,Pizza Place,Hockey Arena,Yoga Studio,Dance Studio,Deli / Bodega,Department Store,Dim Sum Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Coffee Shop,Bakery,Park,Theater,Breakfast Spot,Historic Site,French Restaurant,Farmers Market,Event Space,Distribution Center
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Furniture / Home Store,Accessories Store,Boutique,Women's Store,Event Space,Miscellaneous Shop,Coffee Shop,Vietnamese Restaurant,Airport Terminal
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Italian Restaurant,Creperie,Diner,Fast Food Restaurant,Portuguese Restaurant,Bank,Japanese Restaurant


In [62]:
toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])

In [63]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [64]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged_nonan['Latitude'], toronto_merged_nonan['Longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(map_clusters)
        
map_clusters

##### cluster 1

In [65]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0.0,Park,Convenience Store,Food & Drink Shop,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
21,York,0.0,Park,Women's Store,Gluten-free Restaurant,Cupcake Shop,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
32,Scarborough,0.0,Playground,Convenience Store,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
35,East York,0.0,Park,Convenience Store,Metro Station,Curling Ice,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
64,York,0.0,Park,Jewelry Store,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
66,North York,0.0,Park,Convenience Store,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
68,Central Toronto,0.0,Park,Jewelry Store,Trail,Sushi Restaurant,Yoga Studio,Cupcake Shop,Dog Run,Distribution Center,Discount Store,Diner
83,Central Toronto,0.0,Park,Playground,Trail,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
85,Scarborough,0.0,Park,Playground,Intersection,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
91,Downtown Toronto,0.0,Park,Playground,Trail,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant


##### cluster 2

In [67]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1.0,Portuguese Restaurant,Coffee Shop,Intersection,Pizza Place,Hockey Arena,Yoga Studio,Dance Studio,Deli / Bodega,Department Store,Dim Sum Restaurant
2,Downtown Toronto,1.0,Coffee Shop,Bakery,Park,Theater,Breakfast Spot,Historic Site,French Restaurant,Farmers Market,Event Space,Distribution Center
3,North York,1.0,Clothing Store,Furniture / Home Store,Accessories Store,Boutique,Women's Store,Event Space,Miscellaneous Shop,Coffee Shop,Vietnamese Restaurant,Airport Terminal
4,Downtown Toronto,1.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Italian Restaurant,Creperie,Diner,Fast Food Restaurant,Portuguese Restaurant,Bank,Japanese Restaurant
7,North York,1.0,Gym,Coffee Shop,Café,Restaurant,Beer Store,Japanese Restaurant,Clothing Store,Supermarket,Chinese Restaurant,Caribbean Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
96,Downtown Toronto,1.0,Restaurant,Coffee Shop,Italian Restaurant,Café,Bakery,Gastropub,Indian Restaurant,Beer Store,Japanese Restaurant,Diner
97,Downtown Toronto,1.0,Café,Coffee Shop,Restaurant,Seafood Restaurant,Hotel,Gym / Fitness Center,Concert Hall,Pizza Place,Pub,Sandwich Place
99,Downtown Toronto,1.0,Sushi Restaurant,Salon / Barbershop,Beer Bar,Pub,Indian Restaurant,Ice Cream Shop,Ethiopian Restaurant,Dance Studio,Hobby Shop,Dessert Shop
100,East Toronto,1.0,Light Rail Station,Pizza Place,Spa,Brewery,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant,Skate Park,Auto Workshop


##### cluster 3

In [68]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,North York,2.0,Restaurant,Cupcake Shop,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop


 #### cluster 4

In [69]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,3.0,Fast Food Restaurant,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant


#### cluster 5

In [70]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,4.0,Baseball Field,Furniture / Home Store,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
101,Etobicoke,4.0,Baseball Field,Yoga Studio,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
