### This notebook contains various sections of the code that will be implemented to achieve goals laid out in Coursera IBM Data Science Professional Certicate course. 

In [143]:
import pandas as pd
import numpy as np
import os
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium 
from pandas.io.json import json_normalize

In [144]:
print("Hello Capstone Project!")

Hello Capstone Project!


## Web scraping page to get Toronto burough info

In [145]:
# sending in a request to wikipedia webpage
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

# creating a BeautifulSoup object
soup = BeautifulSoup(res.content,'lxml')

# parsing the table
table = soup.find_all('table')[0] 

# converting the output to a pandas dataframe
df = pd.read_html(str(table))[0]
df.head(6)

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights


In [146]:
# filling Null neighborhood values with borough value
df['Neighborhood'] = df['Neighborhood'].fillna(df['Borough'])

# dropping rows with Borough = Not assigned
indexNames = df[ df['Borough'] == 'Not assigned' ].index
 
# Delete these row indexes from dataFrame
df.drop(indexNames , inplace=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [147]:
df.rename({"Postal code": "Postal Code"}, inplace = True, axis = 1)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [148]:
df.shape

(103, 3)

## Getting the shape file or Latitude and Longitude information for the locations in the above dataframe

#### Couldn't install the package. Installation failed everytime. 

In [149]:
# importing shape file
locDf = pd.read_csv("Geospatial_Coordinates.csv")

In [150]:
locDf.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [151]:
# joining shape file data frame to neighborhood dataframe
newDf = df.join(locDf.set_index('Postal Code'), on='Postal Code')

In [152]:
newDf.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
5,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [153]:
# dropping rows with Borough is Null
indexNames = newDf[ newDf['Borough'].isnull()].index
 
# Delete these row indexes from dataFrame
newDf.drop(indexNames , inplace=True)

In [154]:
# filling Null neighborhood values with borough value
newDf['Neighborhood'] = newDf['Neighborhood'].fillna(newDf['Borough'])

In [155]:
# dropping rows with Latitude is Null
indexNames = newDf[ newDf['Latitude'].isnull()].index
 
# Delete these row indexes from dataFrame
newDf.drop(indexNames , inplace=True)

# dropping rows with Longitude is Null
indexNames = newDf[ newDf['Longitude'].isnull()].index
 
# Delete these row indexes from dataFrame
newDf.drop(indexNames , inplace=True)

In [156]:
newDf.shape

(103, 5)

## Exploring and Clustering Toronto neighborhoods

In [157]:
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

In [158]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [159]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(newDf['Latitude'], newDf['Longitude'], newDf['Borough'], newDf['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [160]:
# creating a subset with only Downtown Toronto Borough
torontoNeighborhoods = newDf[newDf['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
torontoNeighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [161]:
address = 'Downtown Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.6563221, -79.3809161.


In [162]:
# create map of Toronto using latitude and longitude values
map_dwtoronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(torontoNeighborhoods['Latitude'], torontoNeighborhoods['Longitude'], torontoNeighborhoods['Borough'], torontoNeighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dwtoronto)  
    
map_dwtoronto

## Using Foursquare 

In [163]:
CLIENT_ID = '32R31NFZEJX05XSVG15ARLETKBKEMJG3LH4KNAD1ZBQL405Z'
CLIENT_SECRET = 'NAJI0UF1UPOCQUDYC3MF0SBIAY3ELS4APRLRMBBMNEUONAV4'
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 32R31NFZEJX05XSVG15ARLETKBKEMJG3LH4KNAD1ZBQL405Z
CLIENT_SECRET:NAJI0UF1UPOCQUDYC3MF0SBIAY3ELS4APRLRMBBMNEUONAV4


In [164]:
# get neighborhood's name
torontoNeighborhoods.loc[0, 'Neighborhood']

# get neighborhood's coordinates
neighborhood_latitude = torontoNeighborhoods.loc[0, 'Latitude'] 
neighborhood_longitude = torontoNeighborhoods.loc[0, 'Longitude'] 

neighborhood_name = torontoNeighborhoods.loc[0, 'Neighborhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park / Harbourfront are 43.6542599, -79.3606359.


In [165]:
LIMIT = 100
radius = 500
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=32R31NFZEJX05XSVG15ARLETKBKEMJG3LH4KNAD1ZBQL405Z&client_secret=NAJI0UF1UPOCQUDYC3MF0SBIAY3ELS4APRLRMBBMNEUONAV4&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [166]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e9e4144b1cac0001cae811a'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 48,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [167]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [168]:
# printing json data into a dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [169]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

48 venues were returned by Foursquare.


In [172]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [173]:
dwtoronto_venues = getNearbyVenues(names=torontoNeighborhoods['Neighborhood'],
                                   latitudes=torontoNeighborhoods['Latitude'],
                                   longitudes=torontoNeighborhoods['Longitude']
                                  )

Regent Park / Harbourfront
Queen's Park / Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond / Adelaide / King
Harbourfront East / Union Station / Toronto Islands
Toronto Dominion Centre / Design Exchange
Commerce Court / Victoria Hotel
University of Toronto / Harbord
Kensington Market / Chinatown / Grange Park
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport
Rosedale
Stn A PO Boxes
St. James Town / Cabbagetown
First Canadian Place / Underground city
Church and Wellesley


In [174]:
dwtoronto_venues.shape

(1228, 7)

In [175]:
dwtoronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [176]:
print('There are {} uniques categories.'.format(len(dwtoronto_venues['Venue Category'].unique())))

There are 205 uniques categories.


## Analyze Neighborhoods

In [177]:
# one hot encoding
toronto_onehot = pd.get_dummies(dwtoronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = dwtoronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [178]:
toronto_onehot.shape

(1228, 205)

In [179]:
dwtoronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
dwtoronto_grouped.shape

(19, 205)

In [180]:
num_top_venues = 5

for hood in dwtoronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = dwtoronto_grouped[dwtoronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1            Beer Bar  0.04
2  Italian Restaurant  0.04
3      Farmers Market  0.04
4                Café  0.04


----CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport----
                venue  freq
0     Airport Service  0.20
1      Airport Lounge  0.13
2             Airport  0.07
3    Sculpture Garden  0.07
4  Airport Food Court  0.07


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.20
1  Italian Restaurant  0.06
2      Sandwich Place  0.06
3                Café  0.05
4     Bubble Tea Shop  0.03


----Christie----
           venue  freq
0  Grocery Store  0.21
1           Café  0.16
2    Coffee Shop  0.11
3           Park  0.11
4      Nightclub  0.05


----Church and Wellesley----
                 venue  freq
0     Sushi Restaurant  0.07
1          Coffee Shop  0.07
2  Japanese Restaurant  0.06
3           Restaura

In [181]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [182]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dwtoronto_grouped['Neighborhood']

for ind in np.arange(dwtoronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dwtoronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Café,Cheese Shop,Farmers Market,Bakery,Beer Bar,Restaurant,Italian Restaurant,Seafood Restaurant,Cocktail Bar
1,CN Tower / King and Spadina / Railway Lands / ...,Airport Service,Airport Lounge,Harbor / Marina,Boutique,Plane,Boat or Ferry,Sculpture Garden,Bar,Airport Terminal,Airport Gate
2,Central Bay Street,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Salad Place,Fried Chicken Joint,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop
3,Christie,Grocery Store,Café,Coffee Shop,Park,Candy Store,Diner,Italian Restaurant,Restaurant,Baby Store,Athletics & Sports
4,Church and Wellesley,Sushi Restaurant,Coffee Shop,Japanese Restaurant,Restaurant,Gastropub,Pub,Hotel,Dance Studio,Yoga Studio,Burger Joint


## Cluster Neighborhoods

In [183]:
# set number of clusters
kclusters = 5

dwtoronto_grouped_clustering = dwtoronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dwtoronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 0, 4, 1, 1, 1, 1, 1, 1])

In [184]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dwtoronto_merged = torontoNeighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dwtoronto_merged = dwtoronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

dwtoronto_merged.head() 

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,0,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Theater,Shoe Store,Performing Arts Venue
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,College Cafeteria,Sandwich Place,Restaurant,Burger Joint,Burrito Place,Café
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Bookstore,Ramen Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Café,Coffee Shop,Gastropub,Cocktail Bar,Hotel,American Restaurant,Italian Restaurant,Clothing Store,Art Gallery,Restaurant
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1,Coffee Shop,Café,Cheese Shop,Farmers Market,Bakery,Beer Bar,Restaurant,Italian Restaurant,Seafood Restaurant,Cocktail Bar


In [185]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dwtoronto_merged['Latitude'], dwtoronto_merged['Longitude'], dwtoronto_merged['Neighborhood'], dwtoronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [188]:
# Cluster 1
dwtoronto_merged.loc[dwtoronto_merged['Cluster Labels'] == 0, dwtoronto_merged.columns[[1] + list(range(5, dwtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Theater,Shoe Store,Performing Arts Venue
1,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,College Cafeteria,Sandwich Place,Restaurant,Burger Joint,Burrito Place,Café
5,Downtown Toronto,0,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Salad Place,Fried Chicken Joint,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop


In [189]:
# Cluster 2
dwtoronto_merged.loc[dwtoronto_merged['Cluster Labels'] == 1, dwtoronto_merged.columns[[1] + list(range(5, dwtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,1,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Bookstore,Ramen Restaurant
3,Downtown Toronto,1,Café,Coffee Shop,Gastropub,Cocktail Bar,Hotel,American Restaurant,Italian Restaurant,Clothing Store,Art Gallery,Restaurant
4,Downtown Toronto,1,Coffee Shop,Café,Cheese Shop,Farmers Market,Bakery,Beer Bar,Restaurant,Italian Restaurant,Seafood Restaurant,Cocktail Bar
7,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Hotel,Clothing Store,Thai Restaurant,Gym,American Restaurant,Deli / Bodega,Seafood Restaurant
8,Downtown Toronto,1,Coffee Shop,Aquarium,Italian Restaurant,Restaurant,Café,Hotel,Sporting Goods Shop,Fried Chicken Joint,Scenic Lookout,Brewery
9,Downtown Toronto,1,Coffee Shop,Hotel,Café,Restaurant,Japanese Restaurant,American Restaurant,Salad Place,Seafood Restaurant,Sporting Goods Shop,Beer Bar
10,Downtown Toronto,1,Coffee Shop,Restaurant,Café,Hotel,Gym,American Restaurant,Deli / Bodega,Japanese Restaurant,Seafood Restaurant,Italian Restaurant
11,Downtown Toronto,1,Café,Yoga Studio,Bakery,Restaurant,Bookstore,Japanese Restaurant,Bar,Italian Restaurant,Theater,Bank
12,Downtown Toronto,1,Café,Coffee Shop,Vietnamese Restaurant,Mexican Restaurant,Grocery Store,Bakery,Gaming Cafe,Vegetarian / Vegan Restaurant,Dessert Shop,Bar
15,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Seafood Restaurant,Café,Hotel,Japanese Restaurant,Beer Bar,Restaurant,Breakfast Spot,Cocktail Bar


In [190]:
# Cluster 3
dwtoronto_merged.loc[dwtoronto_merged['Cluster Labels'] == 2, dwtoronto_merged.columns[[1] + list(range(5, dwtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,2,Airport Service,Airport Lounge,Harbor / Marina,Boutique,Plane,Boat or Ferry,Sculpture Garden,Bar,Airport Terminal,Airport Gate


In [191]:
# Cluster 4
dwtoronto_merged.loc[dwtoronto_merged['Cluster Labels'] == 3, dwtoronto_merged.columns[[1] + list(range(5, dwtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,3,Park,Trail,Playground,Women's Store,Cupcake Shop,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store


In [192]:
# Cluster 5
dwtoronto_merged.loc[dwtoronto_merged['Cluster Labels'] == 4, dwtoronto_merged.columns[[1] + list(range(5, dwtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Downtown Toronto,4,Grocery Store,Café,Coffee Shop,Park,Candy Store,Diner,Italian Restaurant,Restaurant,Baby Store,Athletics & Sports
