### Install necessary packages

In [18]:
!pip install lxml #pd.read_html dependency
!pip install geopy 
!pip install folium #For rendering map

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 358 kB/s eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


### Import the necessary libararies for data wrangling

In [153]:
import pandas as pd #library for data processing
import numpy as np #library for vectorized data
from sklearn.cluster import KMeans

from geopy.geocoders import Nominatim # Address to lat/lng conversion
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm 
import matplotlib.colors as colors

import folium #Map rendering library

print('Libraries imported')

Libraries imported


#### Read url and let pandas extract all data frames

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs = pd.read_html(url)

print(len(dfs)) # Count how many dataframes pandas scraped from the URL

3


In [4]:
df = dfs[0] # Toronto neighborhood table is the first dataframe
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Omit all the rows where Borough contains 'Not assigned'

In [5]:
label = 'Not assigned'
df['Borough'].replace(label, np.nan, inplace = True) #Replace 'Not assigned' with NaN so that pandas can recognize it
df.dropna(subset = ['Borough'],inplace = True) #Drop the NaN rows
df.reset_index(drop = True,inplace = True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
#Check to make sure that neighbourhoods column does not have any NaNs 
#If so, Neighbourhood = Borough
df['Neighbourhood'].isna().sum()

0

Let's check its shape

In [7]:
df.shape 

(103, 3)

#### Since geocoder for google is not working properly, we'll just use the csv file

In [8]:
df_coord = pd.read_csv('http://cocl.us/Geospatial_data')
df_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge the two datasets into a new data frame

In [44]:
toronto_data = df.merge(df_coord, on = 'Postal Code')
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Use geopy library to get the latitude and longitude values of Toronto Canada.

In [24]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent = 'to_explorer')
location = geolocator.geocode(address)

latitude = location.latitude
longitude = location.longitude

print('The coordinates for Toronto, Ontario are:\n Latitude: {}\n Longitude: {}'.format(latitude,longitude))


The coordinates for Toronto, Ontario are:
 Latitude: 43.6534817
 Longitude: -79.3839347


### Create a map of Toronto with neighbors superimposed on them

In [211]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighbourhood']):
    label = '{}: {}'.format(borough, neighborhood)
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto)
        
map_toronto

#### To simplify the map above, let's just segment and cluster the neighborhoods in the Toronto area

Let's subset our dataframe by containing only Downtown Toronto and store it in a new dataframe

In [66]:
toronto_area = toronto_data[toronto_data['Borough'].str.find('Toronto') != -1].reset_index(drop = True)
toronto_area.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### Revisualize the simplified map

In [213]:
map_toronto_area = folium.Map(location = [latitude, longitude], zoom_start = 11)

for lat, lng, borough, neighborhood in zip(toronto_area['Latitude'], toronto_area['Longitude'], toronto_area['Borough'], toronto_area['Neighbourhood']):
    label = '{}: {}'.format(borough, neighborhood)
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto_area)
        
map_toronto_area

#### Now we will start utilizing the Foursquare API to explore and segment the Toronto area

Define the Foursquare crendentials and version

In [70]:
CLIENT_ID = '1ELFBFF3AJOOT2R242SNC1IQAGD1CZZ5TREF3ZUD3OT22S3D'
CLIENT_SECRET = 'EZXPMER5IAK3HWLYHDILADOW5EUIEO4IMF1UAGLGCZNDPGW5'
VERSION = '20200725'

### Define a function that will get the venues across the neighborhoods of Toronto

Let's limit the requests to the top 100 venues within 700 meters of each neighborhood

In [74]:
limit = 100
radius = 700

In [75]:
def getNearbyVenues(names, latitudes, longitudes, radius = 500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [81]:
toronto_venues = getNearbyVenues(names=toronto_area['Neighbourhood'],
                                   latitudes=toronto_area['Latitude'],
                                   longitudes=toronto_area['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

#### Let's check out our new dataframe

In [82]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


### Analyze Each Neighborhood in Toronto

In [107]:
#one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep = "")

#Add neighborhood column back into dataframe
toronto_onehot.insert(0, 'Neighbourhood', toronto_venues['Neighborhood'])

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Let's checkout the dataframe size

In [108]:
toronto_onehot.shape

(1637, 235)

#### Now we will group the rows by neighborhood and get the mean of the frequency of each category

In [109]:
toronto_group = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_group.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.015625


#### Print the neighborhoods top 5 venues

In [125]:
num_top_venues = 5

for hood in toronto_group['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_group[toronto_group['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


----Berczy Park----
            venue  freq
0     Coffee Shop  0.10
1        Beer Bar  0.03
2  Farmers Market  0.03
3      Restaurant  0.03
4            Café  0.03


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.13
1  Breakfast Spot  0.09
2          Bakery  0.09
3     Coffee Shop  0.09
4    Intersection  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2          Restaurant  0.06
3         Pizza Place  0.06
4       Burrito Place  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0   Airport Service  0.18
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3   Harbor / Marina  0.06
4               Bar  0.06


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.17
1               

#### We will place that in a pandas data frame

First let us define a function to sort the values in descending order

In [126]:
def get_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return(row_categories_sorted.index.values[0:num_top_venues])

Let's create a dataframe for the top 10 venues for each neighborhood

In [130]:
num_top_venues = 10

ordinal_indicator = ['st', 'nd', 'rd']

columns = ['Neighbourhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind + 1,ordinal_indicator[ind]))
    except:
        columns.append('{}th Most Common Venus'.format(ind + 1))

neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_group['Neighbourhood']

for ind in np.arange(toronto_group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = get_common_venues(toronto_group.iloc[ind,1:], num_top_venues)
    
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venus,5th Most Common Venus,6th Most Common Venus,7th Most Common Venus,8th Most Common Venus,9th Most Common Venus,10th Most Common Venus
0,Berczy Park,Coffee Shop,Café,Cocktail Bar,Bakery,Seafood Restaurant,Farmers Market,Restaurant,Cheese Shop,Beer Bar,Pharmacy
1,"Brockton, Parkdale Village, Exhibition Place",Café,Bakery,Coffee Shop,Breakfast Spot,Convenience Store,Stadium,Italian Restaurant,Restaurant,Intersection,Bar
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Auto Workshop,Gym / Fitness Center,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden,Airport Food Court,Airport Gate,Bar,Boat or Ferry,Coffee Shop
4,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Department Store,Bubble Tea Shop,Salad Place,Burger Joint,Poke Place


## Cluster Neighborhoods

In [201]:
# set number of clusters
k_clusters = 3

#drop neighbourhood column to fit dataframe in kmeans algorithm
toronto_group_clustering = toronto_group.drop('Neighbourhood', axis = 1)

#fit model
kmeans = KMeans(n_clusters = k_clusters, random_state = 0) 
kmeans.fit(toronto_group_clustering)

kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

Now we add a new dataframe with the top 10 venues per neighborhood with its associated cluster

In [202]:
#insert clusters
neighborhoods_venues_sorted.drop('Cluster Labels', 1, inplace = True)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_area

#merge the top neighborhood venues with the toronto data

toronto_merged = toronto_merged.merge(neighborhoods_venues_sorted, on = 'Neighbourhood')
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venus,5th Most Common Venus,6th Most Common Venus,7th Most Common Venus,8th Most Common Venus,9th Most Common Venus,10th Most Common Venus
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,Coffee Shop,Park,Pub,Bakery,Breakfast Spot,Theater,Café,Shoe Store,Farmers Market,Restaurant
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,Coffee Shop,Diner,Yoga Studio,Café,Bank,Bar,Beer Bar,Italian Restaurant,Burrito Place,Sandwich Place
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,Clothing Store,Coffee Shop,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Café,Bubble Tea Shop,Bookstore,Fast Food Restaurant,Middle Eastern Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Café,Coffee Shop,Restaurant,Clothing Store,Cocktail Bar,Cosmetics Shop,American Restaurant,Seafood Restaurant,Park,Moroccan Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Neighborhood,Health Food Store,Asian Restaurant,Pub,Trail,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop,Donut Shop


### Finally, let's visualize the clusters

In [204]:
map_clusters = folium.Map(location = [latitude,longitude], zoom_start = 11)

colors_array = cm.rainbow(np.linspace(0,1, len(set(kmeans.labels_))))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lat, lng, nei, cluster in zip(toronto_merged['Latitude'],toronto_merged['Longitude'],toronto_merged['Neighbourhood'],toronto_merged['Cluster Labels']):
    label = '{}:\nCluster {}'.format(nei, cluster)
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
    [lat,lng],
    radius = 5,
    popup = label,
    color = rainbow[cluster - 1],
    fill = True,
    fill_color = rainbow[cluster - 1],
    fill_opacity = 0.7).add_to(map_clusters)

map_clusters
    

### Let's Examine the Clusters

#### Cluster 1

In [206]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(6,toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venus,5th Most Common Venus,6th Most Common Venus,7th Most Common Venus,8th Most Common Venus,9th Most Common Venus,10th Most Common Venus
18,Lawrence Park,Park,Bus Line,Swim School,Yoga Studio,Discount Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
21,"Forest Hill North & West, Forest Hill Road Park",Park,Jewelry Store,Trail,Sushi Restaurant,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
29,"Moore Park, Summerhill East",Gym,Park,Summer Camp,Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
33,Rosedale,Park,Playground,Trail,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 2

In [207]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(6,toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venus,5th Most Common Venus,6th Most Common Venus,7th Most Common Venus,8th Most Common Venus,9th Most Common Venus,10th Most Common Venus
0,"Regent Park, Harbourfront",Coffee Shop,Park,Pub,Bakery,Breakfast Spot,Theater,Café,Shoe Store,Farmers Market,Restaurant
1,"Queen's Park, Ontario Provincial Government",Coffee Shop,Diner,Yoga Studio,Café,Bank,Bar,Beer Bar,Italian Restaurant,Burrito Place,Sandwich Place
2,"Garden District, Ryerson",Clothing Store,Coffee Shop,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Café,Bubble Tea Shop,Bookstore,Fast Food Restaurant,Middle Eastern Restaurant
3,St. James Town,Café,Coffee Shop,Restaurant,Clothing Store,Cocktail Bar,Cosmetics Shop,American Restaurant,Seafood Restaurant,Park,Moroccan Restaurant
4,The Beaches,Neighborhood,Health Food Store,Asian Restaurant,Pub,Trail,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop,Donut Shop
5,Berczy Park,Coffee Shop,Café,Cocktail Bar,Bakery,Seafood Restaurant,Farmers Market,Restaurant,Cheese Shop,Beer Bar,Pharmacy
6,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Department Store,Bubble Tea Shop,Salad Place,Burger Joint,Poke Place
7,Christie,Grocery Store,Café,Park,Diner,Baby Store,Italian Restaurant,Restaurant,Coffee Shop,Nightclub,Candy Store
8,"Richmond, Adelaide, King",Coffee Shop,Café,Clothing Store,Restaurant,Hotel,Thai Restaurant,Bar,Gym,Steakhouse,Deli / Bodega
9,"Dufferin, Dovercourt Village",Pharmacy,Bakery,Middle Eastern Restaurant,Music Venue,Park,Pet Store,Café,Brewery,Bar,Supermarket


#### Cluster 3

In [208]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(6,toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venus,5th Most Common Venus,6th Most Common Venus,7th Most Common Venus,8th Most Common Venus,9th Most Common Venus,10th Most Common Venus
19,Roselawn,Garden,Home Service,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### By examining the clusters above, it is fairly apparent that cluster 1 neighbourhoods' **(red)** top venues are parks and trails, whereas cluster 2 neighborhoods' **(purple)** top venues focuses are coffee shops and cafes. Cluster 3 **(light green)** contains only one neighborhood where its top venues are Roselawns and Gardens