<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>
<h2>Part 1: Web scraping</h2>

<h6>1.1 Import the necessary libraries </h6>

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

<h6>1.2 Perform the web scraping and save the data in a dataframe </h6>

In [22]:
# get the data table (tboby)
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
tabla = soup.find('table', class_='wikitable').tbody

# create the dataframe and add rows
data=[]
for tr in tabla.find_all('tr'):
    row =tr.text.split('\n')[1::2]
    data.append(row)
df = pd.DataFrame(data[1:],columns=data[0])
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


<h6>1.3 Preprocess the data</h6>

In [24]:
# delete rows with a borough that is Not assigned.
df_filter = df.loc[df['Borough']!='Not assigned']
df_filter=df_filter.reset_index(drop=True)

# if cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
for indice_fila, fila in df_filter.iterrows():
    if fila['Neighborhood']=='Not assigned':
        df_filter.iloc[indice_fila][2]=df_filter.iloc[indice_fila][1]
        
# group data by postcode
dataset=df_filter.groupby(['Postal Code','Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).reset_index()

In [25]:
dataset.shape

(103, 3)

In [30]:
dataset

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


<h2>Part 2: Add latitude and longitude to the dataframe "dataset"</h2>

In [127]:
# import library geocoder
import geocoder
lat_lng = [geocoder.arcgis('{}, Toronto, Ontario'.format(x)) for x in dataset['Postal Code']]

In [128]:
# get the longitude and latitude of the postal codes
dataset['Latitude'] = [x.latlng[0] for x in lat_lng]
dataset['Longitude'] = [x.latlng[1] for x in lat_lng]

In [129]:
dataset

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.808626,-79.189913
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.785779,-79.157368
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765806,-79.185284
3,M1G,Scarborough,Woburn,43.771545,-79.218135
4,M1H,Scarborough,Cedarbrae,43.768791,-79.238813
...,...,...,...,...,...
98,M9N,York,Weston,43.705496,-79.520370
99,M9P,Etobicoke,Westmount,43.696296,-79.533126
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.686887,-79.565507
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.744055,-79.581203


In [130]:
dataset[dataset['Postal Code'] == 'M5G']

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653


In [50]:
#guardamos el dataset
dataset.to_excel('dataset.xlsx', index=False)

<h2>Part 3: Explore and cluster the neighborhoods in Toronto.</h2>

<h6>3.1 Create a map of Toronto with neighborhoods </h6>

In [52]:
address='Toronto, Ontario'
g=geocoder.arcgis(address)
latitude = g.latlng[0]
longitude = g.latlng[1]
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.648690000000045, -79.38543999999996.


In [53]:
import folium
# create map using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dataset['Latitude'], dataset['Longitude'], dataset['Borough'], dataset['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<h6>3.2 Let's simplify the above map and segment and cluster only boroughs that contain the word Toronto.</h6>

In [54]:
nh_Toronto=dataset[dataset['Borough'].apply(lambda x: x.find("Toronto")>0)].reset_index(drop=True)
nh_Toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.678148,-79.295349
1,M4K,East Toronto,"The Danforth West, Riverdale",43.683424,-79.354564
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668291,-79.315578
3,M4M,East Toronto,Studio District,43.648,-79.33926
4,M4N,Central Toronto,Lawrence Park,43.729455,-79.386415


In [55]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(nh_Toronto['Borough'].unique()),
        nh_Toronto.shape[0]
    )
)

The dataframe has 4 boroughs and 39 neighborhoods.


In [56]:
# create map using latitude and longitude values
sub_map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(nh_Toronto['Latitude'], nh_Toronto['Longitude'], nh_Toronto['Borough'], nh_Toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(sub_map_toronto)  
    
sub_map_toronto

<h6>3.3 Define Foursquare Credentials and Version</h6>

In [57]:
CLIENT_ID = 'your_id' # your Foursquare ID
CLIENT_SECRET = 'your_secret' # your Foursquare Secret
VERSION = '20191119'

In [58]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

<h6>3.4 Explore neighborhoods</h6>

In [131]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT =100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [132]:
toronto_venues = getNearbyVenues(names = nh_Toronto['Neighborhood'],
                                   latitudes = nh_Toronto['Latitude'],
                                   longitudes = nh_Toronto['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West,  Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High

In [133]:
print(toronto_venues.shape)
toronto_venues.head()

(1582, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.678148,-79.295349,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.678148,-79.295349,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.678148,-79.295349,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.678148,-79.295349,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.683424,-79.354564,Centennial College - Story Arts Centre,43.684874,-79.34914,Business Service


In [134]:
#venues for each neighborhood
toronto_venues.groupby('Neighborhood').count().reset_index()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,64,64,64,64,64,64
1,"Brockton, Parkdale Village, Exhibition Place",43,43,43,43,43,43
2,"Business reply mail Processing Centre, South C...",100,100,100,100,100,100
3,"CN Tower, King and Spadina, Railway Lands, Har...",64,64,64,64,64,64
4,Central Bay Street,57,57,57,57,57,57
5,Christie,11,11,11,11,11,11
6,Church and Wellesley,83,83,83,83,83,83
7,"Commerce Court, Victoria Hotel",100,100,100,100,100,100
8,Davisville,25,25,25,25,25,25
9,Davisville North,6,6,6,6,6,6


In [135]:
#number of categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 224 uniques categories.


<h6>3.5 Analize each neighborhoods</h6>

In [136]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
# because Neighborhood is an Venue category, rename de column
toronto_onehot['N_hood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,N_hood,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [137]:
toronto_onehot.shape

(1582, 225)

<h6> Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category </h6>

In [138]:
toronto_grouped = toronto_onehot.groupby('N_hood').mean().reset_index()
toronto_grouped

Unnamed: 0,N_hood,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.015625,...,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.015625
1,"Brockton, Parkdale Village, Exhibition Place",0.023256,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.02,0.01,0.0,0.0,0.03,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.012048,0.012048,0.0,0.0,0.012048,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.012048,0.0,0.012048
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.04,0.01,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [139]:
toronto_grouped.shape

(39, 225)

In [140]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['N_hood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['N_hood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
          venue  freq
0   Coffee Shop  0.09
1  Cocktail Bar  0.05
2    Restaurant  0.03
3   Cheese Shop  0.03
4          Café  0.03


----Brockton, Parkdale Village, Exhibition Place----
                    venue  freq
0             Coffee Shop  0.07
1                    Café  0.07
2  Thrift / Vintage Store  0.05
3                   Diner  0.05
4               Gift Shop  0.05


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                 venue  freq
0          Coffee Shop  0.07
1                Hotel  0.05
2  Japanese Restaurant  0.03
3                 Café  0.03
4     Asian Restaurant  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
               venue  freq
0               Café  0.06
1        Coffee Shop  0.06
2               Park  0.05
3  French Restaurant  0.05
4             Lounge  0.03


----Central Bay Street----
                       v

<h6>Let's put that into a pandas dataframe </h6>

In [141]:
# First, let's write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [172]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['N_hood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Lounge,Seafood Restaurant,Cheese Shop,Hotel,Restaurant,Café,Breakfast Spot,Beer Bar
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Thrift / Vintage Store,Diner,Gift Shop,Accessories Store,Italian Restaurant,Brewery,Japanese Restaurant,Korean Restaurant
2,"Business reply mail Processing Centre, South C...",Coffee Shop,Hotel,Restaurant,Café,Japanese Restaurant,Asian Restaurant,Gym,Taco Place,Tea Room,Seafood Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Café,Coffee Shop,Park,French Restaurant,Speakeasy,Lounge,Restaurant,Bar,Italian Restaurant,Gym / Fitness Center
4,Central Bay Street,Coffee Shop,Japanese Restaurant,Sandwich Place,Café,Clothing Store,Plaza,Bubble Tea Shop,Middle Eastern Restaurant,Spa,Chinese Restaurant


<h6>3.6 Cluster Neighborhoods</h6>

In [173]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('N_hood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0,
       0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0])

<h6>Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.</h6>

In [174]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = nh_Toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.678148,-79.295349,0,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Donut Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
1,M4K,East Toronto,"The Danforth West, Riverdale",43.683424,-79.354564,0,Bus Line,Business Service,Park,Grocery Store,Discount Store,Yoga Studio,Eastern European Restaurant,Flower Shop,Fish Market,Fish & Chips Shop
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668291,-79.315578,0,Fast Food Restaurant,Sandwich Place,Pizza Place,Movie Theater,Liquor Store,Sushi Restaurant,Italian Restaurant,Restaurant,Intersection,Food & Drink Shop
3,M4M,East Toronto,Studio District,43.648,-79.33926,2,Baseball Field,Business Service,Night Market,Government Building,Yoga Studio,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
4,M4N,Central Toronto,Lawrence Park,43.729455,-79.386415,3,Swim School,Bus Line,Yoga Studio,Eastern European Restaurant,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


<h6>Finally, let's visualize the resulting clusters</h6>

In [162]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [175]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h6>3.7 Examine Clusters </h6>

Cluster 1

In [176]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,0,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Donut Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
1,"The Danforth West, Riverdale",0,Bus Line,Business Service,Park,Grocery Store,Discount Store,Yoga Studio,Eastern European Restaurant,Flower Shop,Fish Market,Fish & Chips Shop
2,"India Bazaar, The Beaches West",0,Fast Food Restaurant,Sandwich Place,Pizza Place,Movie Theater,Liquor Store,Sushi Restaurant,Italian Restaurant,Restaurant,Intersection,Food & Drink Shop
5,Davisville North,0,Food & Drink Shop,Breakfast Spot,Department Store,Gym / Fitness Center,Gym,Park,Eastern European Restaurant,Flower Shop,Fish Market,Fish & Chips Shop
6,"North Toronto West, Lawrence Park",0,Playground,Gym Pool,Garden,Park,Dumpling Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
7,Davisville,0,Dessert Shop,Café,Pizza Place,Sandwich Place,Coffee Shop,Italian Restaurant,Thai Restaurant,Diner,Sushi Restaurant,Seafood Restaurant
8,"Moore Park, Summerhill East",0,Gym,Convenience Store,Park,Yoga Studio,Dumpling Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
9,"Summerhill West, Rathnelly, South Hill, Forest...",0,Light Rail Station,Coffee Shop,Supermarket,Café,Athletics & Sports,Liquor Store,Skating Rink,Yoga Studio,Ethiopian Restaurant,Flower Shop
10,Rosedale,0,Playground,Candy Store,Park,Grocery Store,Dumpling Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
11,"St. James Town, Cabbagetown",0,Café,Coffee Shop,Pizza Place,Restaurant,Chinese Restaurant,Italian Restaurant,Bakery,Park,Pub,Caribbean Restaurant


Cluster 2

In [177]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,"Forest Hill North & West, Forest Hill Road Park",1,Gym / Fitness Center,Yoga Studio,Dumpling Restaurant,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


Cluster 3

In [178]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Studio District,2,Baseball Field,Business Service,Night Market,Government Building,Yoga Studio,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


Cluster 4

In [179]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Lawrence Park,3,Swim School,Bus Line,Yoga Studio,Eastern European Restaurant,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


Cluster 5

In [182]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Roselawn,4,Home Service,Ice Cream Shop,Dumpling Restaurant,Food Court,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
