## This is an example Jupyter notebook to be used in the capstone project of the Applied Data Science capstone course of IBM specialization from Coursera

In [1]:
import pandas as pd
import numpy as np

!conda install -c anaconda lxml --yes
!conda install -c conda-forge geocoder --yes

import geocoder

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - lxml


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.1.1   |                0         132 KB  anaconda
    certifi-2020.4.5.2         |           py36_0         160 KB  anaconda
    libxslt-1.1.33             |       h7d1a2b0_0         577 KB  anaconda
    lxml-4.5.1                 |   py36hefd8a0e_0         1.4 MB  anaconda
    openssl-1.1.1g             |       h7b6447c_0         3.8 MB  anaconda
    ------------------------------------------------------------
                                           Total:         6.0 MB

The following NEW packages will be INSTALLED:

  libxslt            anaconda/linux-64::libxslt-1.1.33-h7d1a2b0_0
  lxml               anaconda/linu

## Part 1: Importing the data from wikipedia page to a dataframe

In [2]:
#Reading the table from wikipedia
df = pd.read_html('http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = df[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [3]:
#Removing Not Assigned Borough entries
indexValue = df[df['Borough'] == 'Not assigned'].index
df.drop(indexValue, 0, inplace = True)
df = df.reset_index(drop = True)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
#Number of lines and features of the dataframe
df.shape

(103, 3)

## Part 2: Using geospatial data to find the latitude and longitude of a given postal code

In [7]:
#Taking only the postal codes from the dataframe
postal_codes = df['Postal Code']
postal_codes.head()

0    M3A
1    M4A
2    M5A
3    M6A
4    M7A
Name: Postal Code, dtype: object

In [8]:
!wget -q -O 'geospatial_data.csv' http://cocl.us/Geospatial_data
geo_data = pd.read_csv('geospatial_data.csv')
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
#List where latitude and longitude will be stored
lat_long = []

#interating over the postal codes to find the latitude and longitude in the geospatial data
for postal_code in postal_codes:
    latitude = geo_data[geo_data['Postal Code'] == postal_code]['Latitude'].item()
    longitude = geo_data[geo_data['Postal Code'] == postal_code]['Longitude'].item()
    
    lat_long.append([latitude,longitude])

#Transforming the latitude and longitude list into a dataframe    
lat_long_df = pd.DataFrame(lat_long, columns = ['Latitude','Longitude'])    

#Merging the two dataframes 
df_ll = pd.concat([df, lat_long_df],axis = 1)
df_ll.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Part 3: Exploring and clustering the neighborhoods in Toronto

In [10]:
#Picking only the Toronto area from the dataframe and droping the Postal Code column
toronto = df_ll[df_ll['Borough'].str.contains('Toronto')].drop('Postal Code', 1).reset_index(drop = True)
toronto.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,Downtown Toronto,St. James Town,43.651494,-79.375418
4,East Toronto,The Beaches,43.676357,-79.293031


In [11]:
#Defining Foursquare credentials
CLIENT_ID = 'PAQ5TNLWASWMWRVUIMWAG0OXGQF1K3XTW5TLJS3UMGCHT2UF' 
CLIENT_SECRET = 'EXNMUSGCWF3I0YTQBMK1MDJPTGI24LRCAZC001T3PAIRCBW5' 
VERSION = '20180605' 
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PAQ5TNLWASWMWRVUIMWAG0OXGQF1K3XTW5TLJS3UMGCHT2UF
CLIENT_SECRET:EXNMUSGCWF3I0YTQBMK1MDJPTGI24LRCAZC001T3PAIRCBW5


In [12]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [13]:
#Funtion that search for nearby venues within 500m radius from a given location
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
#Runnig the function to get venues around Toronto neighborhoods
toronto_venues = getNearbyVenues(names = toronto['Neighborhood'],
                                   latitudes = toronto['Latitude'],
                                   longitudes = toronto['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [15]:
print(toronto_venues.shape)
toronto_venues.head()

(1627, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Corktown Common,43.655618,-79.356211,Park


In [16]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 240 uniques categories.


In [17]:
#Generating a column for each category
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']])

toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Toy / Game Store,Venue Category_Trail,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Vietnamese Restaurant,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Women's Store,Venue Category_Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
toronto_onehot.shape

(1627, 241)

In [19]:
#Grouping the neighborhoods with respect to the categories and taking the mean
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Toy / Game Store,Venue Category_Trail,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Vietnamese Restaurant,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.0625,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.014925,0.0,0.0,0.014925,0.0,0.0,0.014925


In [32]:
#Clustering the neighborhoods using KMeans
k = 3
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters = k, n_init = 10, random_state = 0).fit(toronto_grouped_clustering)

kmeans.labels_

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 1, 2,
       2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2], dtype=int32)

In [34]:
#Getting the most common venues of a neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe with top 10 places in order
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Seafood Restaurant,Venue Category_Bakery,Venue Category_Restaurant,Venue Category_Cheese Shop,Venue Category_Café,Venue Category_Beer Bar,Venue Category_Jazz Club,Venue Category_Hotel
1,"Brockton, Parkdale Village, Exhibition Place",Venue Category_Café,Venue Category_Performing Arts Venue,Venue Category_Breakfast Spot,Venue Category_Coffee Shop,Venue Category_Bakery,Venue Category_Stadium,Venue Category_Burrito Place,Venue Category_Restaurant,Venue Category_Climbing Gym,Venue Category_Pet Store
2,"Business reply mail Processing Centre, South C...",Venue Category_Light Rail Station,Venue Category_Yoga Studio,Venue Category_Garden Center,Venue Category_Skate Park,Venue Category_Restaurant,Venue Category_Recording Studio,Venue Category_Pizza Place,Venue Category_Park,Venue Category_Garden,Venue Category_Spa
3,"CN Tower, King and Spadina, Railway Lands, Har...",Venue Category_Airport Service,Venue Category_Airport Lounge,Venue Category_Boutique,Venue Category_Harbor / Marina,Venue Category_Coffee Shop,Venue Category_Boat or Ferry,Venue Category_Rental Car Location,Venue Category_Sculpture Garden,Venue Category_Airport Terminal,Venue Category_Airport Gate
4,Central Bay Street,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Salad Place,Venue Category_Dessert Shop,Venue Category_Middle Eastern Restaurant,Venue Category_Thai Restaurant,Venue Category_Department Store


In [35]:
#Adding cluster labels to neighborhood dataframe
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge toronto_grouped with toronto data to add latitude/longitude for each neighborhood with its top 10 venues
toronto_merged = toronto.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Venue Category_Coffee Shop,Venue Category_Pub,Venue Category_Bakery,Venue Category_Park,Venue Category_Breakfast Spot,Venue Category_Café,Venue Category_Theater,Venue Category_Yoga Studio,Venue Category_Farmers Market,Venue Category_Restaurant
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Venue Category_Coffee Shop,Venue Category_Sushi Restaurant,Venue Category_Bank,Venue Category_Bar,Venue Category_Beer Bar,Venue Category_Smoothie Shop,Venue Category_Sandwich Place,Venue Category_Burrito Place,Venue Category_Café,Venue Category_Park
2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Middle Eastern Restaurant,Venue Category_Café,Venue Category_Italian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Tea Room,Venue Category_Bookstore
3,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Cocktail Bar,Venue Category_American Restaurant,Venue Category_Gastropub,Venue Category_Restaurant,Venue Category_Italian Restaurant,Venue Category_Creperie,Venue Category_Lingerie Store,Venue Category_Moroccan Restaurant
4,East Toronto,The Beaches,43.676357,-79.293031,0,Venue Category_Neighborhood,Venue Category_Health Food Store,Venue Category_Pub,Venue Category_Trail,Venue Category_Yoga Studio,Venue Category_Dog Run,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center


In [41]:
# create map
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2  for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neigh, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(neigh) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_map)
       
toronto_map

### Exploring each cluster further

In [42]:
#Cluster 0
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5,toronto_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,The Beaches,Venue Category_Neighborhood,Venue Category_Health Food Store,Venue Category_Pub,Venue Category_Trail,Venue Category_Yoga Studio,Venue Category_Dog Run,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center
18,Lawrence Park,Venue Category_Park,Venue Category_Swim School,Venue Category_Bus Line,Venue Category_Dim Sum Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Donut Shop
20,Davisville North,Venue Category_Hotel,Venue Category_Gym,Venue Category_Department Store,Venue Category_Sandwich Place,Venue Category_Breakfast Spot,Venue Category_Food & Drink Shop,Venue Category_Park,Venue Category_General Travel,Venue Category_Greek Restaurant,Venue Category_Eastern European Restaurant
21,"Forest Hill North & West, Forest Hill Road Park",Venue Category_Park,Venue Category_Jewelry Store,Venue Category_Trail,Venue Category_Sushi Restaurant,Venue Category_Dessert Shop,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Donut Shop
33,Rosedale,Venue Category_Park,Venue Category_Playground,Venue Category_Trail,Venue Category_Department Store,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Donut Shop,Venue Category_Doner Restaurant


- Cluster 0 is the park and outdoors activities cluster

In [43]:
#Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5,toronto_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park, Summerhill East",Venue Category_Tennis Court,Venue Category_Yoga Studio,Venue Category_Dessert Shop,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Donut Shop,Venue Category_Doner Restaurant


- Cluster 1 is the sports and events cluster

In [44]:
#Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5,toronto_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",Venue Category_Coffee Shop,Venue Category_Pub,Venue Category_Bakery,Venue Category_Park,Venue Category_Breakfast Spot,Venue Category_Café,Venue Category_Theater,Venue Category_Yoga Studio,Venue Category_Farmers Market,Venue Category_Restaurant
1,"Queen's Park, Ontario Provincial Government",Venue Category_Coffee Shop,Venue Category_Sushi Restaurant,Venue Category_Bank,Venue Category_Bar,Venue Category_Beer Bar,Venue Category_Smoothie Shop,Venue Category_Sandwich Place,Venue Category_Burrito Place,Venue Category_Café,Venue Category_Park
2,"Garden District, Ryerson",Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Middle Eastern Restaurant,Venue Category_Café,Venue Category_Italian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Tea Room,Venue Category_Bookstore
3,St. James Town,Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Cocktail Bar,Venue Category_American Restaurant,Venue Category_Gastropub,Venue Category_Restaurant,Venue Category_Italian Restaurant,Venue Category_Creperie,Venue Category_Lingerie Store,Venue Category_Moroccan Restaurant
5,Berczy Park,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Seafood Restaurant,Venue Category_Bakery,Venue Category_Restaurant,Venue Category_Cheese Shop,Venue Category_Café,Venue Category_Beer Bar,Venue Category_Jazz Club,Venue Category_Hotel
6,Central Bay Street,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Salad Place,Venue Category_Dessert Shop,Venue Category_Middle Eastern Restaurant,Venue Category_Thai Restaurant,Venue Category_Department Store
7,Christie,Venue Category_Grocery Store,Venue Category_Café,Venue Category_Park,Venue Category_Restaurant,Venue Category_Candy Store,Venue Category_Baby Store,Venue Category_Diner,Venue Category_Italian Restaurant,Venue Category_Coffee Shop,Venue Category_Nightclub
8,"Richmond, Adelaide, King",Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Restaurant,Venue Category_Thai Restaurant,Venue Category_Gym,Venue Category_Deli / Bodega,Venue Category_Hotel,Venue Category_Steakhouse,Venue Category_Pizza Place,Venue Category_Sushi Restaurant
9,"Dufferin, Dovercourt Village",Venue Category_Pharmacy,Venue Category_Bakery,Venue Category_Brewery,Venue Category_Middle Eastern Restaurant,Venue Category_Bar,Venue Category_Café,Venue Category_Furniture / Home Store,Venue Category_Bank,Venue Category_Music Venue,Venue Category_Park
10,"Harbourfront East, Union Station, Toronto Islands",Venue Category_Coffee Shop,Venue Category_Aquarium,Venue Category_Hotel,Venue Category_Café,Venue Category_Restaurant,Venue Category_Scenic Lookout,Venue Category_Sporting Goods Shop,Venue Category_Brewery,Venue Category_Fried Chicken Joint,Venue Category_Park


- Cluster 2 is the Coffe shop, restaurants and pubs cluster