## Segmenting and Clustering Neighborhoods in Toronto, Part 3: Explore and Cluster the Neighborhoods in Toronto

#### Recreate Merged DataFrame from Part 2

In [1]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request
import requests

url = urllib.request.urlopen('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').read()
soup = BeautifulSoup(url, 'xml')
table = soup.find('table')

df = pd.read_html(str(table))
df = pd.DataFrame(df[0])

df = df[df['Borough'] != 'Not assigned']
df.reset_index(drop=True)

df_postalcode = pd.read_csv('https://cocl.us/Geospatial_data')

df_merged = pd.merge(left=df, right=df_postalcode, how='left', left_on='Postal Code', right_on='Postal Code')
df_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### I am American, but my mom grew up in North York and her parents have been in the same house since 1977. I am very curious to explore the Borough of North York further!

#### Create a new DataFrame for North York

In [2]:
north_york = df_merged[df_merged['Borough'] == 'North York'].reset_index(drop=True)
north_york

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M3C,North York,Don Mills,43.7259,-79.340923
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262


#### Importing all Libraries I might need later

In [3]:
# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

import folium # plotting library

# import k-means from clustering stage
from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

#### Latitude and Longitude values for my grandparents' house

In [4]:
ny_lat = 43.749905
ny_lng = -79.39283

#### Visualizing Neighborhoods in North York

In [5]:
# create map of North York using latitude and longitude values
map_ny = folium.Map(location=[ny_lat, ny_lng], zoom_start=12)

# add a red circle marker to represent my grandparents' home
folium.CircleMarker(
    [ny_lat, ny_lng],
    radius=10,
    color='red',
    popup='Old Home',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_ny)

# add markers to map
for lat, lng, label in zip(north_york['Latitude'], north_york['Longitude'], north_york['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_ny)  
    
map_ny

#### Defining Foursquare Credentials and Version

In [6]:
CLIENT_ID = 'TRQJZLZKSTRSUPBJXPJP3FKI1AWNCEFTPXGAKUMQV1HIEEND' # your Foursquare ID
CLIENT_SECRET = 'SZRGIFVBPJROPWJ54YCKR5G0BIMKI4VFTZYRSFI3U1D14ZEG' # your Foursquare Secret
VERSION = '20201210' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TRQJZLZKSTRSUPBJXPJP3FKI1AWNCEFTPXGAKUMQV1HIEEND
CLIENT_SECRET:SZRGIFVBPJROPWJ54YCKR5G0BIMKI4VFTZYRSFI3U1D14ZEG


#### Verifying the neighborhood in the dataframe closest to the house, and retrieving the latitude and longitude values. From the map this is York Mills West. To double check, my grandparents' postal code does indeed start with M2P.

In [7]:
north_york.loc[22, 'Neighbourhood']

'York Mills West'

In [8]:
neighbourhood_latitude = north_york.loc[22, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = north_york.loc[22, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = north_york.loc[22, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of York Mills West are 43.752758299999996, -79.4000493.


#### Retrieving the top 10 venues that are in York Mills West within a radius of 1000 meters

In [9]:
LIMIT = 10 # limit of number of venues returned by Foursquare API
radius = 1000

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json()

#I won't print out "results" here because it's long and messy

#### Categorizing Venues

In [10]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Create a *pandas* DataFrame for the Venues

In [11]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

#Let's view all 10 venues
nearby_venues.head(10)

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Auberge du Pommier,French Restaurant,43.746962,-79.407879
1,Rabba Fine Foods,Grocery Store,43.757952,-79.408936
2,Avondale Park,Dog Run,43.757053,-79.408127
3,Swiss Chalet,Restaurant,43.747687,-79.406753
4,Petro-Canada,Gas Station,43.757319,-79.410023
5,Starbucks,Coffee Shop,43.758015,-79.409613
6,Tim Hortons,Coffee Shop,43.744928,-79.405924
7,Tournament Park,Park,43.751257,-79.399717
8,Kitchen Food Fair,Convenience Store,43.751298,-79.401393
9,Avonshire Park,Park,43.757486,-79.404466


### Now let's explore North York in even more detail

#### Create a function to repeat the same process to all the neighborhoods

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the above function on each neighborhood in North York and create a new DataFrame called north_york_venues

In [13]:
north_york_venues = getNearbyVenues(names=north_york['Neighbourhood'],
                                    latitudes=north_york['Latitude'],
                                    longitudes=north_york['Longitude']
                                   )

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


#### Let's check the size of the resulting DataFrame, and explore the first five rows

In [14]:
print(north_york_venues.shape)
north_york_venues.head()

(222, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
3,Parkwoods,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant
4,Parkwoods,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store


#### Let's check how many venues were returned for each neighborhood

In [15]:
north_york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",10,10,10,10,10,10
Bayview Village,10,10,10,10,10,10
"Bedford Park, Lawrence Manor East",10,10,10,10,10,10
Don Mills,20,20,20,20,20,20
Downsview,30,30,30,30,30,30
"Fairview, Henry Farm, Oriole",10,10,10,10,10,10
Glencairn,10,10,10,10,10,10
Hillcrest Village,10,10,10,10,10,10
Humber Summit,10,10,10,10,10,10
"Humberlea, Emery",8,8,8,8,8,8


#### Let's find out how many unique categories can be curated from all the returned venues

In [16]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 80 uniques categories.


#### Analyze each neighborhood

In [17]:
# one hot encoding
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
north_york_onehot['Neighborhood'] = north_york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [north_york_onehot.columns[-1]] + list(north_york_onehot.columns[:-1])
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Art Gallery,Athletics & Sports,Auto Workshop,Bagel Shop,Bakery,Bank,Baseball Field,...,Sporting Goods Shop,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Theater,Toy / Game Store,Turkish Restaurant,Vietnamese Restaurant
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size:

In [18]:
north_york_onehot.shape

(222, 81)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [19]:
north_york_grouped = north_york_onehot.groupby('Neighborhood').mean().reset_index()
north_york_grouped

Unnamed: 0,Neighborhood,Airport,American Restaurant,Art Gallery,Athletics & Sports,Auto Workshop,Bagel Shop,Bakery,Bank,Baseball Field,...,Sporting Goods Shop,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Theater,Toy / Game Store,Turkish Restaurant,Vietnamese Restaurant
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.166667
5,"Fairview, Henry Farm, Oriole",0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,...,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
north_york_grouped.shape

(20, 81)

#### Let's print each neighborhood along with the top 5 most common venues

In [21]:
num_top_venues = 5

for hood in north_york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = north_york_grouped[north_york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
                      venue  freq
0               Coffee Shop   0.2
1            Ice Cream Shop   0.1
2  Mediterranean Restaurant   0.1
3             Deli / Bodega   0.1
4                Restaurant   0.1


----Bayview Village----
                 venue  freq
0          Gas Station   0.2
1                 Bank   0.2
2  Japanese Restaurant   0.1
3           Restaurant   0.1
4        Grocery Store   0.1


----Bedford Park, Lawrence Manor East----
                venue  freq
0  Italian Restaurant   0.2
1                Café   0.1
2                 Pub   0.1
3          Bagel Shop   0.1
4    Sushi Restaurant   0.1


----Don Mills----
                 venue  freq
0  Japanese Restaurant  0.15
1                  Gym  0.10
2          Coffee Shop  0.10
3         Burger Joint  0.05
4   Italian Restaurant  0.05


----Downsview----
                   venue  freq
0  Vietnamese Restaurant  0.17
1          Grocery Store  0.07
2     Turkish Restaur

#### Let's put this into a *pandas* DataFrame

In [22]:
#function to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = north_york_grouped['Neighborhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Deli / Bodega,Ice Cream Shop,Middle Eastern Restaurant,Mediterranean Restaurant,Bank,Restaurant,Park,Bridal Shop,Eastern European Restaurant
1,Bayview Village,Gas Station,Bank,Park,Grocery Store,Chinese Restaurant,Japanese Restaurant,Café,Restaurant,Electronics Store,Dim Sum Restaurant
2,"Bedford Park, Lawrence Manor East",Italian Restaurant,Restaurant,Coffee Shop,Sushi Restaurant,Bagel Shop,Sports Club,Indian Restaurant,Café,Pub,Vietnamese Restaurant
3,Don Mills,Japanese Restaurant,Gym,Coffee Shop,Salad Place,History Museum,Italian Restaurant,Discount Store,Pizza Place,Caribbean Restaurant,Café
4,Downsview,Vietnamese Restaurant,Pizza Place,Hotel,Turkish Restaurant,Grocery Store,Pharmacy,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Gas Station


#### Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 5 clusters.

In [29]:
# set number of clusters
kclusters = 5

north_york_grouped_clustering = north_york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 3, 1, 1, 0, 0, 1, 0, 0, 3], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [31]:
#add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

north_york_merged = north_york

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
north_york_merged = north_york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

#Remove all rows that have NaN for Cluster Label
north_york_merged.dropna(subset=['Cluster Labels'], axis=0, inplace=True)

#Convert Cluster Labels from float to int
north_york_merged['Cluster Labels'] = north_york_merged['Cluster Labels'].astype("int")

north_york_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Pharmacy,Fish & Chips Shop,Park,Caribbean Restaurant,Café,Pizza Place,Fast Food Restaurant,Grocery Store,Supermarket,Discount Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,3,Coffee Shop,Pizza Place,Hockey Arena,Golf Course,Playground,Intersection,Portuguese Restaurant,Sporting Goods Shop,French Restaurant,Dog Run
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,Restaurant,Vietnamese Restaurant,Grocery Store,Athletics & Sports,Greek Restaurant,Furniture / Home Store,Fried Chicken Joint,Boutique,Fast Food Restaurant,Electronics Store
3,M3B,North York,Don Mills,43.745906,-79.352188,1,Japanese Restaurant,Gym,Coffee Shop,Salad Place,History Museum,Italian Restaurant,Discount Store,Pizza Place,Caribbean Restaurant,Café
4,M6B,North York,Glencairn,43.709577,-79.445073,1,Grocery Store,Coffee Shop,Playground,Ice Cream Shop,Bakery,Japanese Restaurant,Pet Store,Latin American Restaurant,Electronics Store,Dim Sum Restaurant


### Visualize the Resulting Clusters

In [32]:
# create map
map_clusters = folium.Map(location=[ny_lat, ny_lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'], north_york_merged['Neighbourhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#### Cluster 1

In [33]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Pharmacy,Fish & Chips Shop,Park,Caribbean Restaurant,Café,Pizza Place,Fast Food Restaurant,Grocery Store,Supermarket,Discount Store
6,North York,0,Pharmacy,Pizza Place,Bank,Korean Restaurant,Park,Coffee Shop,Sandwich Place,Grocery Store,Bakery,Fast Food Restaurant
8,North York,0,Salon / Barbershop,Bakery,Coffee Shop,Pharmacy,Electronics Store,Restaurant,Shopping Mall,Movie Theater,Toy / Game Store,American Restaurant
11,North York,0,Vietnamese Restaurant,Pizza Place,Hotel,Turkish Restaurant,Grocery Store,Pharmacy,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Gas Station
13,North York,0,Vietnamese Restaurant,Pizza Place,Hotel,Turkish Restaurant,Grocery Store,Pharmacy,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Gas Station
15,North York,0,Electronics Store,Pizza Place,Bakery,Medical Center,Park,Shopping Mall,Bank,Italian Restaurant,Pharmacy,Fried Chicken Joint
17,North York,0,Vietnamese Restaurant,Pizza Place,Hotel,Turkish Restaurant,Grocery Store,Pharmacy,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Gas Station
21,North York,0,Vietnamese Restaurant,Pizza Place,Hotel,Turkish Restaurant,Grocery Store,Pharmacy,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Gas Station
23,North York,0,Pharmacy,Bakery,Pizza Place,Park,Coffee Shop,Butcher,Eastern European Restaurant,Convenience Store,Grocery Store,Fried Chicken Joint


#### Cluster 2

In [35]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,1,Japanese Restaurant,Gym,Coffee Shop,Salad Place,History Museum,Italian Restaurant,Discount Store,Pizza Place,Caribbean Restaurant,Café
4,North York,1,Grocery Store,Coffee Shop,Playground,Ice Cream Shop,Bakery,Japanese Restaurant,Pet Store,Latin American Restaurant,Electronics Store,Dim Sum Restaurant
5,North York,1,Japanese Restaurant,Gym,Coffee Shop,Salad Place,History Museum,Italian Restaurant,Discount Store,Pizza Place,Caribbean Restaurant,Café
16,North York,1,Korean Restaurant,Café,Hookah Bar,Coffee Shop,Grocery Store,Middle Eastern Restaurant,Dessert Shop,Fast Food Restaurant,Discount Store,Dog Run
18,North York,1,Italian Restaurant,Restaurant,Coffee Shop,Sushi Restaurant,Bagel Shop,Sports Club,Indian Restaurant,Café,Pub,Vietnamese Restaurant
20,North York,1,Japanese Restaurant,Coffee Shop,Theater,Hotel,Grocery Store,Steakhouse,Café,Seafood Restaurant,Movie Theater,Ramen Restaurant


#### Cluster 3

In [36]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,North York,2,Park,Pool,Vietnamese Restaurant,Fast Food Restaurant,Dessert Shop,Dim Sum Restaurant,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store


#### Cluster 4

In [37]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 3, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3,Coffee Shop,Pizza Place,Hockey Arena,Golf Course,Playground,Intersection,Portuguese Restaurant,Sporting Goods Shop,French Restaurant,Dog Run
2,North York,3,Restaurant,Vietnamese Restaurant,Grocery Store,Athletics & Sports,Greek Restaurant,Furniture / Home Store,Fried Chicken Joint,Boutique,Fast Food Restaurant,Electronics Store
10,North York,3,Gas Station,Bank,Park,Grocery Store,Chinese Restaurant,Japanese Restaurant,Café,Restaurant,Electronics Store,Dim Sum Restaurant
14,North York,3,Coffee Shop,Convenience Store,Bakery,Park,Dim Sum Restaurant,Gas Station,Pizza Place,Intersection,Athletics & Sports,Golf Course
19,North York,3,Convenience Store,Park,Discount Store,Auto Workshop,Storage Facility,Golf Course,Bakery,Gas Station,Fast Food Restaurant,Dim Sum Restaurant
22,North York,3,Coffee Shop,Park,Grocery Store,Gas Station,Restaurant,French Restaurant,Convenience Store,Dog Run,Eastern European Restaurant,Dessert Shop


#### Cluster 5

In [38]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 4, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,North York,4,Coffee Shop,Deli / Bodega,Ice Cream Shop,Middle Eastern Restaurant,Mediterranean Restaurant,Bank,Restaurant,Park,Bridal Shop,Eastern European Restaurant
9,North York,4,Coffee Shop,Pizza Place,Middle Eastern Restaurant,Caribbean Restaurant,Bank,Japanese Restaurant,Restaurant,Massage Studio,Eastern European Restaurant,Dim Sum Restaurant
