# The Capstone Project Notebook

#### This notebook will be used for Segmenting and Clustering Toronto Neighborhoods Assignment

In [92]:
# let's download all the required dependencies
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analysis 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
from pandas.io.json import json_normalize # Utility function json_normalize for flattening semi-structured JSON objects
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [93]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


## Segmenting and Clustering Toronto Neighborhoods

In [94]:
import requests

In [95]:
from bs4 import BeautifulSoup

### Scrape the Wikipedia page
#### Online articles regarding web scraping that I found very helpful, and that I used to scrape the required Wikipedia page

##### Web scraping Wikipedia Tables using BeautifulSoup and Python
https://medium.com/analytics-vidhya/web-scraping-wiki-tables-using-beautifulsoup-and-python-6b9ea26d8722

##### Scraping a Wikipedia table using Python
https://qxf2.com/blog/web-scraping-using-python/

##### Find Child Elements Using BeautifulSoup
http://carrefax.com/new-blog/2018/9/5/find-child-elements-using-beautifulsoup

In [96]:
wikiPostalCodes = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(wikiPostalCodes,'lxml') 

In [97]:
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );
  </script>
  <script>
   (window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":890001695,"wgRevisionId":890001695,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wg

In [98]:
My_table = soup.find('table',{'class':'wikitable sortable'})
My_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>

In [99]:
rows = My_table.findAll('tr')
rows

[<tr>
 <th>Postcode</th>
 <th>Borough</th>
 <th>Neighbourhood
 </th></tr>, <tr>
 <td>M1A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M2A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M3A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
 </td></tr>, <tr>
 <td>M4A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
 </td></tr>, <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
 </td></tr>, <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
 </td></tr>, <tr>
 <td>M6A</td>
 <td

In [100]:
parsed_data = []

In [101]:
for row in rows:
    children = row.findChildren(recursive=False)
    row_text = []
    for child in children: 
        clean_text = child.text 
        clean_text = clean_text.split('&#91;')[0] # This is to discard reference/citation links
        clean_text = clean_text.split('&#160;')[-1] # This is to clean the header row of the sort icons
        clean_text = clean_text.strip()
        row_text.append(clean_text)
    parsed_data.append(row_text)

In [102]:
parsed_data[:10]

[['Postcode', 'Borough', 'Neighbourhood'],
 ['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned']]

### Load data into a pandas DataFrame

In [103]:
# Define the dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood']

# Instantiate and populate the dataframe
df = pd.DataFrame(parsed_data[1:], columns=column_names)

# Examine the resulting dataframe
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [104]:
# Quickly examine the tail of the resulting dataframe
df.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West
286,M8Z,Etobicoke,South of Bloor
287,M9Z,Not assigned,Not assigned


In [105]:
df.shape

(288, 3)

1. Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
2. If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
3. More than one neighborhood can exist in one postal code area.

In [106]:
# Process the cells that have an assigned borough. Ignore cells with a borough that is not assigned.
df.drop(df[df['Borough']=='Not assigned'].index, inplace=True)
df.reset_index(inplace=True, drop=True)
print("The new number of rows in dataframe after dropping unassigned boroughs:", df.shape[0])

The new number of rows in dataframe after dropping unassigned boroughs: 211


In [107]:
# The neighborhood will be the same as the borough if a cell has a borough but a Not assigned neighborhood.
df['Neighborhood'].where(df['Neighborhood'] != 'Not assigned', df['Borough'], inplace=True)

In [108]:
# More than one neighborhood can exist in one postal code area. 
# Combined the rows into one row with the neighborhoods separated with a comma
df=df.groupby("PostalCode").agg(lambda x:','.join(set(x)))

In [109]:
df=df.reset_index()

In [110]:
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Morningside,Guildwood,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Ionview,Kennedy Park,East Birchmount Park"
7,M1L,Scarborough,"Golden Mile,Clairlea,Oakridge"
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest"
9,M1N,Scarborough,"Cliffside West,Birch Cliff"


In [111]:
print("The number of rows in dataframe:", df.shape[0])
df.shape

The number of rows in dataframe: 103


(103, 3)

In [112]:
df_copy = df.copy() # Make a copy of the dataframe

### Get Geographical coordinates using the csv file provided by the instructor

In [113]:
geodata = pd.read_csv('https://cocl.us/Geospatial_data')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [114]:
geodata.rename(index=str, columns={"Postal Code":"PostalCode"},inplace=True)
geodata.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [115]:
# Merge the original df_copy with geodata
df = df_copy.merge(geodata, how='inner', on='PostalCode')

In [116]:
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside,Guildwood,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Ionview,Kennedy Park,East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile,Clairlea,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.692657,-79.264848


### Explore and Cluster Toronto neighborhoods

In [117]:
# Check how many boroughs and neighborhoods there are
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


#### The following replicates the code and analysis that was done in the New York City data lab

#### Create a map of Toronto with neighborhoods superimposed on top

In [118]:
import folium # Forgot to import Folium visualization library - import it

## Let's segment and cluster only Etobicoke

In [119]:
etobicoke_data = df[df['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
3,M8Y,Etobicoke,"Sunnylea,Kingsway Park South East,Mimico NE,Th...",43.636258,-79.498509
4,M8Z,Etobicoke,"Kingsway Park South West,The Queensway West,Mi...",43.628841,-79.520999
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M9B,Etobicoke,"Princess Gardens,West Deane Park,Martin Grove,...",43.650943,-79.554724
7,M9C,Etobicoke,"Markland Wood,Old Burnhamthorpe,Bloordale Gard...",43.643515,-79.577201
8,M9P,Etobicoke,Westmount,43.696319,-79.532242
9,M9R,Etobicoke,"Richview Gardens,Kingsview Village,St. Phillip...",43.688905,-79.554724


In [120]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [121]:
# Use geopy library to get the latitude and longitude values of Etobicoke.
address = 'Etobicoke, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Etobicoke are 43.67145915, -79.5524920661167.


In [122]:
# Create a map of Etobicoke using Latitude and Longitude values
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map
for lat, lng, label in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke

In [123]:
# Next, I am going to start utilizing the Foursquare API to explore the neighborhoods and segment them.
CLIENT_ID = 'RDZDO5MSITL4N20HVKR2WPZ1RHFP3JVEI1OZHZLTRJC1MYMX' # Foursquare ID
CLIENT_SECRET = 'WB3GMUZTGTJCTXN0CSCJGG54F0UBXRE1IWC5WMDJ1KSCAM4X' # Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Credentails:
CLIENT_ID: RDZDO5MSITL4N20HVKR2WPZ1RHFP3JVEI1OZHZLTRJC1MYMX
CLIENT_SECRET:WB3GMUZTGTJCTXN0CSCJGG54F0UBXRE1IWC5WMDJ1KSCAM4X


### Explore the first neighborhoods in the dataframe

In [124]:
etobicoke_data.loc[0, 'Neighborhood'] # Get the neighborhood name

'Mimico South,New Toronto,Humber Bay Shores'

In [125]:
# Get the neighborhoods' latitude and longitude values
neighborhood_latitude = etobicoke_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = etobicoke_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = etobicoke_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Mimico South,New Toronto,Humber Bay Shores are 43.6056466, -79.50132070000001.


In [126]:
# Let's get the top 100 venues that are in New Toronto, Mimico South, Humber Bay Shores within a radius of 500 meters.
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id=RDZDO5MSITL4N20HVKR2WPZ1RHFP3JVEI1OZHZLTRJC1MYMX&client_secret=WB3GMUZTGTJCTXN0CSCJGG54F0UBXRE1IWC5WMDJ1KSCAM4X&v=20180605&ll=43.6056466,-79.50132070000001&radius=500&limit=100'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=RDZDO5MSITL4N20HVKR2WPZ1RHFP3JVEI1OZHZLTRJC1MYMX&client_secret=WB3GMUZTGTJCTXN0CSCJGG54F0UBXRE1IWC5WMDJ1KSCAM4X&v=20180605&ll=43.6056466,-79.50132070000001&radius=500&limit=100'

In [127]:
# Send the GET request and examine the results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cbfc52ddd5797241412935f'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 16,
  'suggestedBounds': {'ne': {'lat': 43.6101466045, 'lng': -79.49511771930959},
   'sw': {'lat': 43.6011465955, 'lng': -79.50752368069043}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b119977f964a520488023e3',
       'name': 'LCBO',
       'location': {'address': '2762 Lake Shore Blvd W',
        'crossStreet': 'btwn 1st & 2nd St',
        'lat': 43.60228082768786,
        'lng': -79.4993016827402,
        'labeledLatLngs': [{'label': 'display',
          'lat':

In [128]:
# All the information is in the items key. Let's borrow the get_category_type function from the Foursquare lab.
# Function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [129]:
# Clean the json and structure in into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,LCBO,Liquor Store,43.602281,-79.499302
1,Domino's Pizza,Pizza Place,43.601676,-79.500908
2,New Toronto Fish & Chips,Restaurant,43.601849,-79.503281
3,Delicia Bakery & Pastry,Bakery,43.601403,-79.503012
4,Lucky Dice Restaurant,Café,43.601392,-79.503056


In [130]:
# How many values were returned by Foursquare?
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

16 venues were returned by Foursquare.


### Explore Neighborhoods in Etobicoke

In [131]:
# Use the function from the lab to repeat the same process to all the neighborhoods in Etobicoke
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [132]:
# Run the above function on each neighborhood and create a new dataframe called etobicoke_venues.
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighborhood'],
                                   latitudes=etobicoke_data['Latitude'],
                                   longitudes=etobicoke_data['Longitude']
                                  )

Mimico South,New Toronto,Humber Bay Shores
Alderwood,Long Branch
The Kingsway,Montgomery Road,Old Mill North
Sunnylea,Kingsway Park South East,Mimico NE,The Queensway East,Old Mill South,Humber Bay,King's Mill Park,Royal York South East
Kingsway Park South West,The Queensway West,Mimico NW,Royal York South West,South of Bloor
Islington Avenue
Princess Gardens,West Deane Park,Martin Grove,Islington,Cloverdale
Markland Wood,Old Burnhamthorpe,Bloordale Gardens,Eringate
Westmount
Richview Gardens,Kingsview Village,St. Phillips,Martin Grove Gardens
Mount Olive,South Steeles,Thistletown,Silverstone,Beaumond Heights,Albion Gardens,Humbergate,Jamestown
Northwest


In [133]:
# Check the size of the resulting dataframe
print(etobicoke_venues.shape)
etobicoke_venues.head()

(74, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,Domino's Pizza,43.601676,-79.500908,Pizza Place
2,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
3,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
4,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café


In [134]:
# How many venues were returned for each neighborhood
etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood,Long Branch",9,9,9,9,9,9
"Kingsway Park South West,The Queensway West,Mimico NW,Royal York South West,South of Bloor",14,14,14,14,14,14
"Markland Wood,Old Burnhamthorpe,Bloordale Gardens,Eringate",7,7,7,7,7,7
"Mimico South,New Toronto,Humber Bay Shores",16,16,16,16,16,16
"Mount Olive,South Steeles,Thistletown,Silverstone,Beaumond Heights,Albion Gardens,Humbergate,Jamestown",8,8,8,8,8,8
Northwest,3,3,3,3,3,3
"Princess Gardens,West Deane Park,Martin Grove,Islington,Cloverdale",1,1,1,1,1,1
"Richview Gardens,Kingsview Village,St. Phillips,Martin Grove Gardens",3,3,3,3,3,3
"Sunnylea,Kingsway Park South East,Mimico NE,The Queensway East,Old Mill South,Humber Bay,King's Mill Park,Royal York South East",3,3,3,3,3,3
"The Kingsway,Montgomery Road,Old Mill North",3,3,3,3,3,3


In [135]:
# Find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 41 uniques categories.


In [136]:
etobicoke_venues.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
69,"Mount Olive,South Steeles,Thistletown,Silverst...",43.739416,-79.588437,Sunny Foodmart,43.74184,-79.590561,Grocery Store
70,"Mount Olive,South Steeles,Thistletown,Silverst...",43.739416,-79.588437,McDonald's,43.741635,-79.584446,Fast Food Restaurant
71,Northwest,43.706748,-79.594054,Economy Rent A Car,43.708471,-79.589943,Rental Car Location
72,Northwest,43.706748,-79.594054,Logistics Distribution,43.707554,-79.589252,Bar
73,Northwest,43.706748,-79.594054,Saand Rexdale,43.705072,-79.598725,Drugstore


### Analyze Each Neighborhood

In [137]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Bank,Bar,Baseball Field,Beer Store,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Discount Store,Drugstore,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Grocery Store,Gym,Intersection,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Wings Joint
0,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.222222,0.111111,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0
1,"Kingsway Park South West,The Queensway West,Mi...",0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.071429,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.071429,0.071429,0.071429
2,"Markland Wood,Old Burnhamthorpe,Bloordale Gard...",0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Mimico South,New Toronto,Humber Bay Shores",0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0625,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0
4,"Mount Olive,South Steeles,Thistletown,Silverst...",0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0


In [138]:
# Examine the new dataframe size
etobicoke_onehot.shape

(74, 42)

In [139]:
# Let's confirm the new size
etobicoke_grouped.shape

(11, 42)

In [140]:
etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Bank,Bar,Baseball Field,Beer Store,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Discount Store,Drugstore,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Grocery Store,Gym,Intersection,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Wings Joint
0,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.222222,0.111111,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0
1,"Kingsway Park South West,The Queensway West,Mi...",0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.071429,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.071429,0.071429,0.071429
2,"Markland Wood,Old Burnhamthorpe,Bloordale Gard...",0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Mimico South,New Toronto,Humber Bay Shores",0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0625,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0
4,"Mount Olive,South Steeles,Thistletown,Silverst...",0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0
5,Northwest,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Princess Gardens,West Deane Park,Martin Grove,...",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Richview Gardens,Kingsview Village,St. Phillip...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Sunnylea,Kingsway Park South East,Mimico NE,Th...",0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"The Kingsway,Montgomery Road,Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [141]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood,Long Branch----
          venue  freq
0   Pizza Place  0.22
1   Coffee Shop  0.11
2           Pub  0.11
3           Gym  0.11
4  Skating Rink  0.11


----Kingsway Park South West,The Queensway West,Mimico NW,Royal York South West,South of Bloor----
            venue  freq
0     Wings Joint  0.07
1  Sandwich Place  0.07
2          Bakery  0.07
3             Gym  0.07
4   Grocery Store  0.07


----Markland Wood,Old Burnhamthorpe,Bloordale Gardens,Eringate----
               venue  freq
0       Liquor Store  0.14
1  Convenience Store  0.14
2               Park  0.14
3         Beer Store  0.14
4           Pharmacy  0.14


----Mimico South,New Toronto,Humber Bay Shores----
                 venue  freq
0                 Café  0.12
1  American Restaurant  0.06
2          Pizza Place  0.06
3   Mexican Restaurant  0.06
4         Liquor Store  0.06


----Mount Olive,South Steeles,Thistletown,Silverstone,Beaumond Heights,Albion Gardens,Humbergate,Jamestown----
                 venue

In [142]:
# Let's put that into a pandas dataframe
# Use the function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [143]:
# Create the new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood,Long Branch",Pizza Place,Gym,Skating Rink,Sandwich Place,Coffee Shop,Pub,Pool,Pharmacy,Café,Drugstore
1,"Kingsway Park South West,The Queensway West,Mi...",Wings Joint,Burger Joint,Gym,Tanning Salon,Fast Food Restaurant,Discount Store,Convenience Store,Flower Shop,Burrito Place,Sandwich Place
2,"Markland Wood,Old Burnhamthorpe,Bloordale Gard...",Pizza Place,Liquor Store,Beer Store,Convenience Store,Park,Café,Pharmacy,Wings Joint,Fast Food Restaurant,Drugstore
3,"Mimico South,New Toronto,Humber Bay Shores",Café,American Restaurant,Pizza Place,Flower Shop,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pet Store,Pharmacy
4,"Mount Olive,South Steeles,Thistletown,Silverst...",Grocery Store,Fried Chicken Joint,Pharmacy,Fast Food Restaurant,Sandwich Place,Beer Store,Pizza Place,Flower Shop,Drugstore,Discount Store


### Cluster Neighborhoods

In [144]:
# Run k-means to cluster the neighborhood into 5 clusters
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 3, 2, 1, 4, 4], dtype=int32)

#### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [145]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

etobicoke_merged = etobicoke_data

# merge etobicoke_grouped with etobicoke_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head() # check the last columns

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"Mimico South,New Toronto,Humber Bay Shores",43.605647,-79.501321,0.0,Café,American Restaurant,Pizza Place,Flower Shop,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pet Store,Pharmacy
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,0.0,Pizza Place,Gym,Skating Rink,Sandwich Place,Coffee Shop,Pub,Pool,Pharmacy,Café,Drugstore
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944,4.0,Pool,River,Park,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
3,M8Y,Etobicoke,"Sunnylea,Kingsway Park South East,Mimico NE,Th...",43.636258,-79.498509,4.0,Pool,Baseball Field,Construction & Landscaping,Wings Joint,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
4,M8Z,Etobicoke,"Kingsway Park South West,The Queensway West,Mi...",43.628841,-79.520999,0.0,Wings Joint,Burger Joint,Gym,Tanning Salon,Fast Food Restaurant,Discount Store,Convenience Store,Flower Shop,Burrito Place,Sandwich Place


In [146]:
# Ignore/drop NaNs
etobicoke_merged.dropna(axis=0, how='any',inplace=True)
etobicoke_merged.reset_index(inplace=True, drop=True)
print("Number of rows after dropping NaNs:", len(etobicoke_merged))
print("Number of NaNs:", etobicoke_merged.isna().sum())

Number of rows after dropping NaNs: 11
Number of NaNs: PostalCode                0
Borough                   0
Neighborhood              0
Latitude                  0
Longitude                 0
Cluster Labels            0
1st Most Common Venue     0
2nd Most Common Venue     0
3rd Most Common Venue     0
4th Most Common Venue     0
5th Most Common Venue     0
6th Most Common Venue     0
7th Most Common Venue     0
8th Most Common Venue     0
9th Most Common Venue     0
10th Most Common Venue    0
dtype: int64


### Let's visualize the resulting clusters

In [147]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighborhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [148]:
etobicoke_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0.0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
1.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4.0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2


#### Cluster 1

In [149]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 0, etobicoke_merged.columns[[2] + list(range(6, etobicoke_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Mimico South,New Toronto,Humber Bay Shores",Café,American Restaurant,Pizza Place,Flower Shop,Fried Chicken Joint,Gym,Liquor Store,Mexican Restaurant,Pet Store,Pharmacy
1,"Alderwood,Long Branch",Pizza Place,Gym,Skating Rink,Sandwich Place,Coffee Shop,Pub,Pool,Pharmacy,Café,Drugstore
4,"Kingsway Park South West,The Queensway West,Mi...",Wings Joint,Burger Joint,Gym,Tanning Salon,Fast Food Restaurant,Discount Store,Convenience Store,Flower Shop,Burrito Place,Sandwich Place
6,"Markland Wood,Old Burnhamthorpe,Bloordale Gard...",Pizza Place,Liquor Store,Beer Store,Convenience Store,Park,Café,Pharmacy,Wings Joint,Fast Food Restaurant,Drugstore
7,Westmount,Pizza Place,Intersection,Sandwich Place,Middle Eastern Restaurant,Coffee Shop,Chinese Restaurant,Baseball Field,Beer Store,Burger Joint,Burrito Place
9,"Mount Olive,South Steeles,Thistletown,Silverst...",Grocery Store,Fried Chicken Joint,Pharmacy,Fast Food Restaurant,Sandwich Place,Beer Store,Pizza Place,Flower Shop,Drugstore,Discount Store


#### Cluster 2

In [150]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 1, etobicoke_merged.columns[[2] + list(range(6, etobicoke_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Richview Gardens,Kingsview Village,St. Phillip...",Mobile Phone Shop,Park,Pizza Place,Wings Joint,Grocery Store,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store


#### Cluster 3

In [151]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 2, etobicoke_merged.columns[[2] + list(range(6, etobicoke_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,"Princess Gardens,West Deane Park,Martin Grove,...",Bank,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping


#### Cluster 4

In [152]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 3, etobicoke_merged.columns[[2] + list(range(6, etobicoke_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Northwest,Bar,Drugstore,Rental Car Location,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Discount Store,Convenience Store


#### Cluster 5

In [153]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 4, etobicoke_merged.columns[[2] + list(range(6, etobicoke_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"The Kingsway,Montgomery Road,Old Mill North",Pool,River,Park,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
3,"Sunnylea,Kingsway Park South East,Mimico NE,Th...",Pool,Baseball Field,Construction & Landscaping,Wings Joint,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
