# This notebook is for segmenting and clustering neighborhoods in the city of Toronto, Canada.

In [2]:
import pandas as pd
import numpy as np
import json
import requests
from sklearn.cluster import KMeans

First and formost, let us download the data table.

In [3]:
toronto_neigh = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

print('The data type is {}.'.format(type(toronto_neigh)))

The data type is <class 'list'>.


As the type is list, we will need to convert it into dataframe.

In [4]:
df_toronto = toronto_neigh[0]
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


We now successfully have the raw data shown as dataframe.

Let's look at the shape of our data frame before cleansing it.

In [5]:
df_toronto.shape

(287, 3)

Step #1: Slice all rows whose borough name is NOT 'not assigned'.

In [6]:
df_toronto_1 = df_toronto[df_toronto.Borough != 'Not assigned']
df_toronto_1.shape
df_toronto_1.Postcode.value_counts()

M9V    8
M8Y    8
M5V    7
M9B    5
M4V    5
M8Z    5
M9R    4
M9C    4
M6M    4
M1V    4
M1E    3
M6L    3
M8V    3
M5H    3
M1T    3
M6K    3
M5J    3
M1P    3
M1M    3
M5T    3
M3H    3
M5R    3
M2J    3
M8X    3
M1C    3
M1L    3
M1K    3
M5M    2
M6R    2
M5X    2
      ..
M3M    1
M4S    1
M9W    1
M4J    1
M6G    1
M4A    1
M2N    1
M3L    1
M4G    1
M5E    1
M1X    1
M7A    1
M7R    1
M4E    1
M9P    1
M5N    1
M1G    1
M3N    1
M5C    1
M4P    1
M6C    1
M1H    1
M4N    1
M7Y    1
M2P    1
M5G    1
M4M    1
M4Y    1
M1S    1
M2K    1
Name: Postcode, Length: 103, dtype: int64

Step #2. Merge rows that have the same borough names.

In [7]:
df_toronto_2 = df_toronto_1.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x)).reset_index()
df_toronto_2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
df_toronto_2.shape

(103, 3)

Step #3: For neighbourhoods that have 'Not assigned' value, assign the borough name as the neighbourhood name.

In [9]:
df_toronto_2.loc[(df_toronto_2['Neighbourhood'] == 'Not assigned')]

Unnamed: 0,Postcode,Borough,Neighbourhood
93,M9A,Queen's Park,Not assigned


In [10]:
df_toronto_2.at[93, 'Neighbourhood'] = 'Queen\'s Park'

In [11]:
df_toronto_2.loc[93]

Postcode                  M9A
Borough          Queen's Park
Neighbourhood    Queen's Park
Name: 93, dtype: object

In [12]:
df_toronto_proc = df_toronto_2

In [13]:
df_toronto_proc.shape

(103, 3)

---We now start the second section of the project---
Let's go ahead to install geocoder using conda.

In [47]:
!conda install -c conda-forge geocoder

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.4 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

In [48]:
import geocoder

In [15]:
lat_lng_coords = None

while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
    lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

lat_lng_coords

---geocoder doesn't seem to work. Let's use the given geospatial data instead.----

In [16]:
geo_data = pd.read_csv('http://cocl.us/Geospatial_data')

In [17]:
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Let's change column name 'Postal Code' to 'Postcode' before we merge the dataframes.

In [18]:
geo_data.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
geo_data.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [19]:
df_toronto_merged = pd.merge(df_toronto_proc, geo_data, on='Postcode')
df_toronto_merged.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)
df_toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [22]:
df_toronto_merged.shape

(103, 5)

---We now start the third section of the project---

Step #1: define Foursquare Credentials and Version

In [24]:
CLIENT_ID = input("enter your Foursquare id: ")
CLIENT_SECRET = input("enter your Foursquare secret key: ")
VERSION = '20200217'

enter your Foursquare id: G0M1NADTKXFTFDZRVWFZHEHUUJUALLINOBZ3UAMBCEEDMZMN
enter your Foursquare secret key: DT0O2BO11ASLRUXOUT2CBWVIPFDKXTCCPGILGQSVQY3UVQ2K


Step #2: create a function to get nearby venues

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Step #3: generate a new dataframe to include boroughs that have 'Toronto' in their names.

In [26]:
torontos_data = df_toronto_merged[df_toronto_merged['Borough'].str.contains('Toronto')]
torontos_data.shape

(39, 5)

Step #4: run the getNearbyVenues on each neighborhood and create a new dataframe called torontos_venues.

In [28]:
radius = 500
limit = 100

torontos_venues = getNearbyVenues(names=torontos_data['Neighborhood'],
                                   latitudes=torontos_data['Latitude'],
                                   longitudes=torontos_data['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

Let's check the size of the resulting dataframe.

In [43]:
print(torontos_venues.shape)
torontos_venues.head()

(1691, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood


Let's quickly look at the quantity of venues returned for each neighbourhood.

In [44]:
torontos_venues.groupby('Neighborhood').count()

Let's find out how many unique categories can be curated from all returned venues.

In [45]:
print('There are {} uniques categories.'.format(len(torontos_venues['Venue Category'].unique())))

There are 228 uniques categories.


Step #5: Let's go ahead and analyze each neighborhood

In [52]:
# one hot encoding
torontos_onehot = pd.get_dummies(torontos_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
torontos_onehot['Neighborhood'] = torontos_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [torontos_onehot.columns[-1]] + list(torontos_onehot.columns[:-1])
torontos_onehot = torontos_onehot[fixed_columns]

Next, we are going to group rows by neighborhood and by taking the means of the frequency of occurence of each category.

In [53]:
torontos_grouped = torontos_onehot.groupby('Neighborhood').mean().reset_index()
torontos_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,...,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.035294,0.0,0.047059,0.011765,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.0125,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,...,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0


In [54]:
torontos_grouped.shape

(39, 228)

Let's print each neighborhood along with the top 5 most common venues.

In [55]:
num_top_venues = 5

for hood in torontos_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = torontos_grouped[torontos_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1  Thai Restaurant  0.04
2             Café  0.04
3       Steakhouse  0.04
4       Restaurant  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1          Steakhouse  0.04
2                Café  0.04
3            Beer Bar  0.04
4  Seafood Restaurant  0.04


----Brockton, Exhibition Place, Parkdale Village----
               venue  freq
0               Café  0.14
1     Breakfast Spot  0.09
2        Coffee Shop  0.09
3  Convenience Store  0.05
4          Nightclub  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.11
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3          Comic Shop  0.06
4         Pizza Place  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.18
1    Airport 

Now let's put the output into a pandas dataframe.

First, we are going to define a function to sort the venues in descending order.

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Let's create a new dataframw and display the top 10 venues for each neighborhood.

In [58]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = torontos_grouped['Neighborhood']

for ind in np.arange(torontos_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(torontos_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Thai Restaurant,Café,Burger Joint,Restaurant,Bakery,Bar,Cosmetics Shop,Sushi Restaurant
1,Berczy Park,Coffee Shop,Steakhouse,Farmers Market,Seafood Restaurant,Cocktail Bar,Bakery,Beer Bar,Cheese Shop,Café,Greek Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Café,Breakfast Spot,Coffee Shop,Performing Arts Venue,Bar,Bakery,Intersection,Italian Restaurant,Restaurant,Burrito Place
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Comic Shop,Park,Pizza Place,Butcher,Burrito Place,Restaurant,Brewery
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Coffee Shop,Boutique,Rental Car Location,Bar,Harbor / Marina,Sculpture Garden


Step #6: Use K-means to cluster the neighborhood into 5 clusters

In [64]:
kclusters = 5

torontos_grouped_clustering = torontos_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(torontos_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:50] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0,
       3, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 4, 4, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [65]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

torontos_merged = torontos_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
torontos_merged = torontos_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

torontos_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Park,Trail,Health Food Store,Pub,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Italian Restaurant,Coffee Shop,Ice Cream Shop,Furniture / Home Store,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store,Juice Bar
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,4,Sandwich Place,Food & Drink Shop,Light Rail Station,Brewery,Liquor Store,Burger Joint,Italian Restaurant,Burrito Place,Pub,Ice Cream Shop
43,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Bakery,Italian Restaurant,Brewery,American Restaurant,Gastropub,Yoga Studio,Clothing Store,Bookstore
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Swim School,Bus Line,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner


We shall visulize the resulting clusters.

In [66]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.1               |             py_0         575 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

In [69]:
# create map
toronto_coort = (43.6532, -79.3832)
latitude = toronto_coort[0]
longitude = toronto_coort[1]

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

import matplotlib.cm as cm
import matplotlib.colors as colors

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(torontos_merged['Latitude'], torontos_merged['Longitude'], torontos_merged['Neighborhood'], torontos_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Step #7: Examine clusters one by one

In [70]:
# Cluster 1

torontos_merged.loc[torontos_merged['Cluster Labels'] == 0, torontos_merged.columns[[1] + list(range(5, torontos_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,East Toronto,0,Greek Restaurant,Italian Restaurant,Coffee Shop,Ice Cream Shop,Furniture / Home Store,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store,Juice Bar
43,East Toronto,0,Café,Coffee Shop,Bakery,Italian Restaurant,Brewery,American Restaurant,Gastropub,Yoga Studio,Clothing Store,Bookstore
46,Central Toronto,0,Clothing Store,Coffee Shop,Yoga Studio,Café,Diner,Salon / Barbershop,Dessert Shop,Spa,Restaurant,Sporting Goods Shop
47,Central Toronto,0,Sandwich Place,Dessert Shop,Pizza Place,Coffee Shop,Gym,Italian Restaurant,Café,Sushi Restaurant,Pharmacy,Brewery
49,Central Toronto,0,Pub,Coffee Shop,American Restaurant,Restaurant,Sushi Restaurant,Sports Bar,Pizza Place,Fried Chicken Joint,Supermarket,Liquor Store
51,Downtown Toronto,0,Bakery,Coffee Shop,Café,Restaurant,Italian Restaurant,Pizza Place,Pub,Pet Store,Pharmacy,Playground
52,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Fast Food Restaurant,Gastropub,Café,Gym,Hotel
53,Downtown Toronto,0,Coffee Shop,Bakery,Pub,Park,Café,Breakfast Spot,Mexican Restaurant,Theater,Beer Store,Hotel
54,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Italian Restaurant,Lingerie Store,Ice Cream Shop,Middle Eastern Restaurant,Electronics Store
55,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Breakfast Spot,Beer Bar,Clothing Store,Bakery,Cosmetics Shop,Hotel,American Restaurant


Let's label Cluster 0 'Coffee Lover Heaven'.

In [71]:
# Cluster 2

torontos_merged.loc[torontos_merged['Cluster Labels'] == 1, torontos_merged.columns[[1] + list(range(5, torontos_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,Central Toronto,1,Park,Playground,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner,Dim Sum Restaurant
50,Downtown Toronto,1,Park,Playground,Trail,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner


Let's label Cluster 1 'Family Friendly'.

In [72]:
# Cluster 3

torontos_merged.loc[torontos_merged['Cluster Labels'] == 2, torontos_merged.columns[[1] + list(range(5, torontos_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Central Toronto,2,Garden,Women's Store,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner


Let's label Cluster 2 'Female Friendly'.

In [73]:
# Cluster 4

torontos_merged.loc[torontos_merged['Cluster Labels'] == 3, torontos_merged.columns[[1] + list(range(5, torontos_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,Central Toronto,3,Park,Swim School,Bus Line,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner


Let's label Cluster 3 'Budget Friendly'.

In [74]:
# Cluster 5

torontos_merged.loc[torontos_merged['Cluster Labels'] == 4, torontos_merged.columns[[1] + list(range(5, torontos_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,4,Park,Trail,Health Food Store,Pub,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
42,East Toronto,4,Sandwich Place,Food & Drink Shop,Light Rail Station,Brewery,Liquor Store,Burger Joint,Italian Restaurant,Burrito Place,Pub,Ice Cream Shop
45,Central Toronto,4,Park,Gym,Breakfast Spot,Sandwich Place,Food & Drink Shop,Department Store,Hotel,Dance Studio,Donut Shop,Doner Restaurant
64,Central Toronto,4,Park,Jewelry Store,Trail,Sushi Restaurant,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store


Let's label Cluster 4 'Outdoor Friendly'.

---This concludes the project---