In [1]:
# The code was removed by Watson Studio for sharing.

# Geographical Coordinates of the Neighborhoods of Toronto

The data are on a Wikipedia table, at https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

Web scraping --> I will use the Beautiful Soup library to get the data from the table.

_NOTE: the first part is the same as the "Segmentation and Clustering of the Neighborhoods of Toronto" Notebook_

In [2]:
# import the request library
import requests
from bs4 import BeautifulSoup

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
website_url = requests.get(url).text

# Prettify() function in BeautifulSoup will enable us to view how the tags are nested in the document
soup = BeautifulSoup(website_url,'lxml')

### Extract the table using the soup.find. Then, extract all the <td> ... </td> which contain the postcodes, boroughs and neighborhoods

In [3]:
# extract the table
mytable = soup.find('table',{'class':'wikitable sortable'})
# extract the rows that start with <td>
tdALL   = mytable.find_all('td')

### Loop in the tdALL and extract the data that don't have the 'Not Assigned' in the Borough column

In [5]:
postcode = []
borough  = []
neighborhood = []
for ii in range(0,len(tdALL)-3,3):
    if "Not" not in tdALL[ii+1].text:
        postcode.append(tdALL[ii].text)
        borough.append(tdALL[ii+1].text)
        neighborhood.append(tdALL[ii+2].text)        

### Use the lists just found to create a dataframe

In [14]:
# create a dataframe with PostalCode, Borough, and Neighborhood using the lists found above
import pandas as pd
df = pd.DataFrame()
df['PostalCode']   = postcode
df['Borough']      = borough
df['Neighborhood'] = neighborhood

# strip off the '\n' from the Neighborhood column
df['Neighborhood'] = df['Neighborhood'].map(lambda x: x.rstrip('\n'))

### Create a new dataframe, grouping by PostalCode

In [56]:
# group by PostalCode
df2    = df[['PostalCode','Borough','Neighborhood']].groupby('PostalCode')
# get the arrays with the values for Boroughs and Neighborhoods, given a unique PostalCode
l1     = df2.apply(lambda x: x['Neighborhood'].unique())
l2     = df2.apply(lambda x: x['Borough'].unique())                                            
# create a dictionary with the 2 lists
d      = {'Borough':l2,'Neighborhood':l1}
dfnew  = pd.DataFrame(d)
# or:
#dfnew = pd.DataFrame({k: v for k, v in d.items()})
dfnew.reset_index(level=0, inplace=True)
dfnew.rename(columns={'index': 'PostalCode'},inplace=True)
dfnew['Borough'] = dfnew['Borough'].str.get(0)

### Add columns for Latitude and Longitude to the dfnew dataframe

In [57]:
# Create 2 new columns in the dfnew dataframe, which will be filled with the longitudes and latitudes
import numpy as np
dfnew["Latitude"]  = np.nan
dfnew["Longitude"] = np.nan

### Import the Lat and Lon from a csv file

In [18]:
df_coords = pd.read_csv(project.get_file('Geospatial_Coordinates.csv'))

### Fill the Lat and Lon columns in dfnew with values from df_coords, using the postal code as a key 

In [58]:
for postal_code in dfnew['PostalCode']:
    latitude  = df_coords.loc[df_coords['Postal Code']==postal_code]['Latitude']
    longitude = df_coords.loc[df_coords['Postal Code']==postal_code]['Longitude']
    dfnew.loc[dfnew['PostalCode']==postal_code, 'Latitude']  = latitude
    dfnew.loc[dfnew['PostalCode']==postal_code, 'Longitude'] = longitude

### Clustering the neighborhoods in Toronto

In [21]:
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  29.19 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  19.56 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  18.99 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  23.13 MB/s
Libraries imported.


#### Find the lat and lon of Toronto, in oder to create a map of the city using Folium library

In [151]:
address = 'Toronto'

geolocator  = Nominatim(user_agent="ny_explorer")
location    = geolocator.geocode(address)
latitudeTo  = location.latitude
longitudeTo = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitudeTo, longitudeTo))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [55]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitudeTo, longitudeTo], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfnew['Latitude'], dfnew['Longitude'], dfnew['Borough'], dfnew['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Select only the Boroughts that have "Toronto" in their name

In [65]:
boroughs = dfnew[dfnew['Borough'].str.contains('Toronto')==True].reset_index(drop=True)
print('The shape of the new dataframe is: ',boroughs.shape)
print("The dataframe looks like this: ")
boroughs.head()

The shape of the new dataframe is:  (38, 5)
The dataframe looks like this: 


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,[The Beaches],43.676357,-79.293031
1,M4K,East Toronto,"[The Danforth West, Riverdale]",43.679557,-79.352188
2,M4L,East Toronto,"[The Beaches West, India Bazaar]",43.668999,-79.315572
3,M4M,East Toronto,[Studio District],43.659526,-79.340923
4,M4N,Central Toronto,[Lawrence Park],43.72802,-79.38879


#### Now, we can use Foursquare API to explore the boroughs and segment them

In [66]:
# The code was removed by Watson Studio for sharing.

#### We want to extract 100 venues that are in each borough/neighborhoods, within a radius of 500 m 

In [92]:
# create a function that will take the name of the boroughs and their lat and lon, and will return
# the venues in a radius of 500 m
def getNearbyVenues(names,neighs, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, neigh, lat, lng in zip(names, neighs, latitudes, longitudes):            
        print(name, neigh)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            neigh,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                             'Neighborhood',
                             'Borough Latitude', 
                             'Borough Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    return(nearby_venues)

In [110]:
# run the function to get 100 venues in the previously selected Boroughs
LIMIT  = 100
borough_venues = getNearbyVenues(names      = boroughs['Borough'],
                                 neighs     = boroughs['Neighborhood'],
                                 latitudes  = boroughs['Latitude'],
                                 longitudes = boroughs['Longitude'],
                                 radius     = 500
                                )


East Toronto ['The Beaches']
East Toronto ['The Danforth West' 'Riverdale']
East Toronto ['The Beaches West' 'India Bazaar']
East Toronto ['Studio District']
Central Toronto ['Lawrence Park']
Central Toronto ['Davisville North']
Central Toronto ['North Toronto West']
Central Toronto ['Davisville']
Central Toronto ['Moore Park' 'Summerhill East']
Central Toronto ['Deer Park' 'Forest Hill SE' 'Rathnelly' 'South Hill' 'Summerhill West']
Downtown Toronto ['Rosedale']
Downtown Toronto ['Cabbagetown' 'St. James Town']
Downtown Toronto ['Church and Wellesley']
Downtown Toronto ['Harbourfront' 'Regent Park']
Downtown Toronto ['Ryerson' 'Garden District']
Downtown Toronto ['St. James Town']
Downtown Toronto ['Berczy Park']
Downtown Toronto ['Central Bay Street']
Downtown Toronto ['Adelaide' 'King' 'Richmond']
Downtown Toronto ['Harbourfront East' 'Toronto Islands' 'Union Station']
Downtown Toronto ['Design Exchange' 'Toronto Dominion Centre']
Downtown Toronto ['Commerce Court' 'Victoria Hotel']

In [111]:
borough_venues.head()

Unnamed: 0,Borough,Neighborhood,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,East Toronto,[The Beaches],43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,East Toronto,[The Beaches],43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,East Toronto,[The Beaches],43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
3,East Toronto,[The Beaches],43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,East Toronto,"[The Danforth West, Riverdale]",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [95]:
# size of the venues
print(borough_venues.shape)

(1692, 8)


In [99]:
# count by borough/neighborhood:
borough_venues['Neighborhood'] = borough_venues['Neighborhood'].apply(tuple)
borough_venues.groupby('Neighborhood')['Borough','Venue'].count()

Unnamed: 0_level_0,Borough,Venue
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
"(Adelaide, King, Richmond)",100,100
"(Berczy Park,)",56,56
"(Brockton, Exhibition Place, Parkdale Village)",18,18
"(Business Reply Mail Processing Centre 969 Eastern,)",17,17
"(CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara)",14,14
"(Cabbagetown, St. James Town)",46,46
"(Central Bay Street,)",82,82
"(Chinatown, Grange Park, Kensington Market)",100,100
"(Christie,)",16,16
"(Church and Wellesley,)",86,86


In [100]:
print('There are {} uniques categories.'.format(len(borough_venues['Venue Category'].unique())))

There are 233 uniques categories.


#### Analyze each borough/neighborhoods

In [125]:
# one hot encoding
borough_onehot   = pd.get_dummies(borough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
borough_onehot['Neighborhood'] = borough_venues['Neighborhood'] 
# find where which column is 'Neighborhood'
locNeigh         = borough_onehot.columns.get_loc('Neighborhood')
# move neighborhood column to the first column
fixed_columns    = [borough_onehot.columns[locNeigh]] + list(borough_onehot.columns[:locNeigh]) + list(borough_onehot.columns[locNeigh+1:])
borough_onehot   = borough_onehot[fixed_columns]

In [126]:
# size of the new dataframe
borough_onehot.shape

(1692, 233)

In [127]:
borough_onehot.head()

Unnamed: 0,Neighborhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,[The Beaches],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,[The Beaches],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,[The Beaches],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,[The Beaches],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"[The Danforth West, Riverdale]",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [129]:
# since Neighborhood is a ndarray, it needs to be firs converted into a tuple
borough_onehot2 = borough_onehot.copy()
borough_onehot2['Neighborhood'] = borough_onehot2['Neighborhood'].apply(tuple)
toronto_grouped = borough_onehot2.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"(Adelaide, King, Richmond)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
1,"(Berczy Park,)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"(Brockton, Exhibition Place, Parkdale Village)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,(Business Reply Mail Processing Centre 969 Eas...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
4,"(CN Tower, Bathurst Quay, Island airport, Harb...",0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [130]:
toronto_grouped.shape

(38, 233)

#### Let's print each neighborhood along with the top 10 most common venues

In [134]:
num_top_venues = 10

for hood in toronto_grouped['Neighborhood']:
    print("----"+str(hood)+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----('Adelaide', 'King', 'Richmond')----
              venue  freq
0       Coffee Shop  0.06
1              Café  0.05
2        Steakhouse  0.04
3               Bar  0.04
4   Thai Restaurant  0.04
5  Asian Restaurant  0.03
6               Gym  0.03
7      Burger Joint  0.03
8            Bakery  0.03
9             Hotel  0.03


----('Berczy Park',)----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2          Restaurant  0.04
3                Café  0.04
4         Cheese Shop  0.04
5  Seafood Restaurant  0.04
6              Bakery  0.04
7      Farmers Market  0.04
8  Italian Restaurant  0.04
9          Steakhouse  0.04


----('Brockton', 'Exhibition Place', 'Parkdale Village')----
                    venue  freq
0          Breakfast Spot  0.11
1             Coffee Shop  0.11
2                    Café  0.11
3            Climbing Gym  0.06
4                     Bar  0.06
5  Furniture / Home Store  0.06
6    Caribbean Restaurant  0.06
7                 S

#### Save what we just found in a dataframe

In [133]:
# first, sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [171]:
# then, create the dataframe
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"(Adelaide, King, Richmond)",Coffee Shop,Café,Thai Restaurant,Bar,Steakhouse,Hotel,Burger Joint,Bakery,Gym,American Restaurant
1,"(Berczy Park,)",Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café,Restaurant,Farmers Market,Seafood Restaurant,Steakhouse,Italian Restaurant
2,"(Brockton, Exhibition Place, Parkdale Village)",Coffee Shop,Café,Breakfast Spot,Gym,Furniture / Home Store,Convenience Store,Climbing Gym,Caribbean Restaurant,Burrito Place,Pet Store
3,(Business Reply Mail Processing Centre 969 Eas...,Yoga Studio,Garden Center,Burrito Place,Restaurant,Smoke Shop,Auto Workshop,Recording Studio,Fast Food Restaurant,Farmers Market,Garden
4,"(CN Tower, Bathurst Quay, Island airport, Harb...",Airport Lounge,Airport Service,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boutique,Plane,Harbor / Marina


### Run the k-Means algorithm to cluster the neighborhoods in 5 clusters 

In [172]:
# set number of clusters
kclusters = 5

# drop the Neighborhood column, since we will just need numbers for the model
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

#### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [173]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = boroughs
toronto_merged['Neighborhood'] = toronto_merged['Neighborhood'].apply(tuple)

# merge toronto_grouped with borough to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,"(The Beaches,)",43.676357,-79.293031,2,Health Food Store,Coffee Shop,Pub,Yoga Studio,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,M4K,East Toronto,"(The Danforth West, Riverdale)",43.679557,-79.352188,2,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Yoga Studio,Dessert Shop,Brewery,Bubble Tea Shop,Restaurant
2,M4L,East Toronto,"(The Beaches West, India Bazaar)",43.668999,-79.315572,2,Pizza Place,Park,Ice Cream Shop,Pet Store,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint,Brewery
3,M4M,East Toronto,"(Studio District,)",43.659526,-79.340923,2,Café,Coffee Shop,Bakery,Gastropub,American Restaurant,Italian Restaurant,Yoga Studio,Cheese Shop,Fish Market,Juice Bar
4,M4N,Central Toronto,"(Lawrence Park,)",43.72802,-79.38879,0,Park,Swim School,Bus Line,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


## Visualize the results!

In [180]:
# create map
map_clusters = folium.Map(location=[latitudeTo, longitudeTo], zoom_start=11)

# set color scheme for the clusters
x  = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Look at the first clusters

#### Cluster 0

In [175]:
# selec the results for the cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,0,Park,Swim School,Bus Line,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
5,Central Toronto,0,Gym,Burger Joint,Breakfast Spot,Park,Dance Studio,Sandwich Place,Hotel,Food & Drink Shop,Discount Store,Dog Run
23,Central Toronto,0,Park,Trail,Sushi Restaurant,Jewelry Store,Bus Line,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store


_It looks like Cluster 0 is popular for sport and outdoor activities, with few restaurants_

#### Cluster 1

In [176]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,1,Health & Beauty Service,Garden,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


#### Cluster 2

In [177]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,2,Health Food Store,Coffee Shop,Pub,Yoga Studio,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,East Toronto,2,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Yoga Studio,Dessert Shop,Brewery,Bubble Tea Shop,Restaurant
2,East Toronto,2,Pizza Place,Park,Ice Cream Shop,Pet Store,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint,Brewery
3,East Toronto,2,Café,Coffee Shop,Bakery,Gastropub,American Restaurant,Italian Restaurant,Yoga Studio,Cheese Shop,Fish Market,Juice Bar
6,Central Toronto,2,Clothing Store,Coffee Shop,Sporting Goods Shop,Yoga Studio,Mexican Restaurant,Sandwich Place,Salon / Barbershop,Rental Car Location,Park,Miscellaneous Shop
7,Central Toronto,2,Sandwich Place,Dessert Shop,Restaurant,Coffee Shop,Pizza Place,Italian Restaurant,Café,Sushi Restaurant,Pharmacy,Brewery
9,Central Toronto,2,Coffee Shop,Pub,American Restaurant,Light Rail Station,Sports Bar,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Vietnamese Restaurant
11,Downtown Toronto,2,Restaurant,Coffee Shop,Park,Pizza Place,Café,Italian Restaurant,Bakery,Pub,Market,Breakfast Spot
12,Downtown Toronto,2,Japanese Restaurant,Coffee Shop,Gay Bar,Sushi Restaurant,Restaurant,Pub,Bubble Tea Shop,Café,Gastropub,Fast Food Restaurant
13,Downtown Toronto,2,Coffee Shop,Café,Park,Pub,Bakery,Breakfast Spot,Theater,Mexican Restaurant,Farmers Market,Shoe Store


_This cluster has a little bit of everything, from restaurant and cafes, to stores, gyms and movie theaters._

#### Cluster 3

In [178]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Park,Playground,Trail,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


#### Cluster 4

In [179]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,4,Playground,Restaurant,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
