# Segmenting and Clustering Neighborhoods in Toronto   
Peer-graded assignment, part 3   
Coursera Applied data science capstone

# Import libraries

In [8]:
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# 3 Explore and cluster neighborhoods in Toronto

In [9]:
# load the combined dataframe did in previous section:
df = pd.read_csv('../data/df_combined.csv')
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### Explore Borough and neighborhood in dataframe

In [10]:
print(f"There are total of {df.Borough.nunique()} boroughs and {df.Neighborhood.nunique()} neighborhoods")

There are total of 10 boroughs and 99 neighborhoods


In [11]:
# List the number of neighborhood for each borough
df.groupby('Borough').size()

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
dtype: int64

Choose the borough with highest number of neighborhood, i.e. North York to segment and cluster. Thus, create a new dataframe of North York

In [12]:
northyork_df = df[df['Borough'] == 'North York'].reset_index(drop = True)
northyork_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M3C,North York,Don Mills,43.7259,-79.340923
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262


Get the geographical coordinates of North York

In [14]:
address = 'North York, Toronto, Ontario'

geolocator = Nominatim(user_agent = "toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of North York are {:.2f}, {:.2f}'.format(latitude, longitude))

The geograpical coordinate of North York are 43.75, -79.45


Visualize neighborhood in North York 

In [15]:
# create map of North York using latitude and longitude values
map_northyork = folium.Map(location = [latitude, longitude], zoom_start = 11)

# add marker to map on all the neighborhoods
for lat,lng,label in zip(df.Latitude, df.Longitude, df.Neighborhood):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.6,
        parse_html = False).add_to(map_northyork)
    
#visualize map
map_northyork

### Define Foursquare Credentials and Version

In [22]:
CLIENT_ID = 'JWDXHM0IORPKPQWHUFSAN0NS3F5IU1RUD42IMK43IX2FX1FD'
CLIENT_SECRET = 'B1OP2PZ2GPDUMRS0I0UOZRLQELLKER4I5HHHARJ4MSFAR1C3'
VERSION = '20200520'

print('Your credentials: ')
print('CLIENT_ID: '+ CLIENT_ID)
print('CLIENT_SECRET: '+ CLIENT_SECRET)

Your credentials: 
CLIENT_ID: JWDXHM0IORPKPQWHUFSAN0NS3F5IU1RUD42IMK43IX2FX1FD
CLIENT_SECRET: B1OP2PZ2GPDUMRS0I0UOZRLQELLKER4I5HHHARJ4MSFAR1C3


In [16]:
# FUNCTION that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Function to get top 100 venues that are within 700 meters for all neighborhoods in North York

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=700):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the above function on each neighborhood and create a new dataframe called northyork_venues

In [18]:
northyork_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [28]:
LIMIT = 700
northyork_venues = getNearbyVenues(names = northyork_df.Neighborhood,
                                   latitudes = northyork_df.Latitude,
                                   longitudes = northyork_df.Longitude)

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


In [29]:
print(f" Total list of nearby venues: {northyork_venues.shape[0]}")
northyork_venues.head()

 Total list of nearby venues: 361


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Parkwoods,43.753259,-79.329656,TTC Stop #8381,43.74841,-79.32627,Bus Stop
4,Parkwoods,43.753259,-79.329656,Three Valleys Park,43.751195,-79.337356,Park


Number of venues returned for each neighborhood

In [30]:
northyork_venues.groupby('Neighborhood')['Venue'].count()

Neighborhood
Bathurst Manor, Wilson Heights, Downsview North    24
Bayview Village                                     7
Bedford Park, Lawrence Manor East                  30
Don Mills                                          32
Downsview                                          35
Fairview, Henry Farm, Oriole                       63
Glencairn                                          17
Hillcrest Village                                   5
Humber Summit                                       4
Humberlea, Emery                                    5
Lawrence Manor, Lawrence Heights                   27
North Park, Maple Leaf Park, Upwood Park            5
Northwood Park, York University                    11
Parkwoods                                           5
Victoria Village                                    8
Willowdale, Newtonbrook                             4
Willowdale, Willowdale East                        64
Willowdale, Willowdale West                         8
York Mills West

In [31]:
print("Thre are {} uniques categories".format(northyork_venues['Venue Category'].nunique()))

Thre are 119 uniques categories


In [32]:
# export to csv
#northyork_venues.to_csv('../data/northyork_venues.csv', index = False)

### Analyze each neighborhood

In [20]:
# one hot encoding
northyork_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix = "", prefix_sep="")

# add neighborhood column back to dataframe
northyork_onehot['Neighborhood'] = northyork_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

northyork_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Tea Room,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### group rows by neighborhood and taking the mean of the frequency of occurance of each category

In [21]:
northyork_grouped = northyork_onehot.groupby('Neighborhood').mean().reset_index()
northyork_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Tea Room,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.033333,...,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.03125,0.0,0.0625,0.03125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0
5,"Fairview, Henry Farm, Oriole",0.015873,0.0,0.015873,0.0,0.0,0.015873,0.0,0.0,0.015873,...,0.015873,0.0,0.0,0.015873,0.031746,0.0,0.015873,0.0,0.0,0.015873
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Function to sort the frequency occurance of the venues for each neighborhood

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]   

Create a new dataframe and display the top 10 venues fo reach neighborhood

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
# creat new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighborhood'] = northyork_grouped['Neighborhood']

for ind in np.arange(northyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind,1:] = return_most_common_venues(northyork_grouped.iloc[ind,:], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Park,Bank,Coffee Shop,Mobile Phone Shop,Restaurant,Pizza Place,Pharmacy,Gift Shop,Chinese Restaurant,Middle Eastern Restaurant
1,Bayview Village,Bank,Skating Rink,Grocery Store,Chinese Restaurant,Café,Japanese Restaurant,Department Store,Discount Store,Diner,Dim Sum Restaurant
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Restaurant,Bank,Cosmetics Shop,Liquor Store,Café,Pharmacy,Pizza Place
3,Don Mills,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
4,Downsview,Vietnamese Restaurant,Sandwich Place,Coffee Shop,Grocery Store,Gas Station,Park,Pharmacy,Pizza Place,Business Service,Falafel Restaurant


### Cluster Neighborhoods   
Run k-means to cluster the neighborhood into 3 clusters

In [24]:
# set number of clusters
kclusters = 3

northyork_grouped_clustering = northyork_grouped.drop('Neighborhood', axis =1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state =42)
kmeans.fit(northyork_grouped_clustering)

# check cluster labels generated fro each row in the dataframe
# Majority fall under Cluster == 1
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 2],
      dtype=int32)

Create a new dataframe that includes the clusters (via kmeans) as well as the top 10 venues for each neighborhood.

In [25]:
# add clustering labels
neighborhoods_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,"Bathurst Manor, Wilson Heights, Downsview North",Park,Bank,Coffee Shop,Mobile Phone Shop,Restaurant,Pizza Place,Pharmacy,Gift Shop,Chinese Restaurant,Middle Eastern Restaurant
1,1,Bayview Village,Bank,Skating Rink,Grocery Store,Chinese Restaurant,Café,Japanese Restaurant,Department Store,Discount Store,Diner,Dim Sum Restaurant
2,1,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Restaurant,Bank,Cosmetics Shop,Liquor Store,Café,Pharmacy,Pizza Place
3,1,Don Mills,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
4,1,Downsview,Vietnamese Restaurant,Sandwich Place,Coffee Shop,Grocery Store,Gas Station,Park,Pharmacy,Pizza Place,Business Service,Falafel Restaurant


In [26]:
northyork_merged = northyork_df

# merge northyork grouped with northyork_df to add latitude/longitude fo each neighborhood
northyork_merged = northyork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on = 'Neighborhood')
northyork_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Park,Construction & Landscaping,Bus Stop,Food & Drink Shop,Electronics Store,Community Center,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Hockey Arena,French Restaurant,Coffee Shop,Pizza Place,Playground,Portuguese Restaurant,Sporting Goods Shop,Park,Food Court,Department Store
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Clothing Store,Vietnamese Restaurant,Furniture / Home Store,Fast Food Restaurant,Coffee Shop,Miscellaneous Shop,Café,Cheese Shop,Grocery Store,Park
3,M3B,North York,Don Mills,43.745906,-79.352188,1,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
4,M6B,North York,Glencairn,43.709577,-79.445073,1,Pizza Place,Grocery Store,Restaurant,Bus Line,Pub,Coffee Shop,Latin American Restaurant,Japanese Restaurant,Italian Restaurant,Playground
5,M3C,North York,Don Mills,43.7259,-79.340923,1,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
6,M2H,North York,Hillcrest Village,43.803762,-79.363452,1,Pharmacy,Bakery,Restaurant,Sandwich Place,Chinese Restaurant,Gas Station,Diner,Comfort Food Restaurant,Community Center,Construction & Landscaping
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,1,Park,Bank,Coffee Shop,Mobile Phone Shop,Restaurant,Pizza Place,Pharmacy,Gift Shop,Chinese Restaurant,Middle Eastern Restaurant
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,1,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Food Court,Electronics Store,Toy / Game Store,Bank,Park,Baseball Field
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262,1,Coffee Shop,Bar,Massage Studio,Fast Food Restaurant,Furniture / Home Store,Miscellaneous Shop,Caribbean Restaurant,Japanese Restaurant,Metro Station,Sushi Restaurant


### Visualize the resulting clusters

In [27]:
#create map
map_clusters = folium.Map(location = [latitude, longitude], zoom_start = 11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map for each clusters
markers_colors = []
for lat, lng, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighborhood'], northyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + 'Cluster' + str(cluster), parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = rainbow[cluster-1],
        fill=True,
        fill_color = rainbow[cluster-1],
        fill_opacity = 0.6,
        parse_html = False).add_to(map_clusters)


#display map
map_clusters

### Examine Clusters

In [28]:
# Cluster == 0: park & trail
northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[2] + list(range(6,northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Park,Construction & Landscaping,Bus Stop,Food & Drink Shop,Electronics Store,Community Center,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
16,"Willowdale, Newtonbrook",Park,Trail,Coffee Shop,Electronics Store,Comfort Food Restaurant,Community Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
19,"Humberlea, Emery",Park,Convenience Store,Gas Station,Baseball Field,Discount Store,Event Space,Community Center,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
22,York Mills West,Park,Convenience Store,Tennis Court,Intersection,Electronics Store,Comfort Food Restaurant,Community Center,Construction & Landscaping,Cosmetics Shop,Deli / Bodega


In [29]:
# Cluster == 1 (Majority): City center restaurant and shops
northyork_merged.loc[northyork_merged['Cluster Labels'] == 1, northyork_merged.columns[[2] + list(range(6,northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,Hockey Arena,French Restaurant,Coffee Shop,Pizza Place,Playground,Portuguese Restaurant,Sporting Goods Shop,Park,Food Court,Department Store
2,"Lawrence Manor, Lawrence Heights",Clothing Store,Vietnamese Restaurant,Furniture / Home Store,Fast Food Restaurant,Coffee Shop,Miscellaneous Shop,Café,Cheese Shop,Grocery Store,Park
3,Don Mills,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
4,Glencairn,Pizza Place,Grocery Store,Restaurant,Bus Line,Pub,Coffee Shop,Latin American Restaurant,Japanese Restaurant,Italian Restaurant,Playground
5,Don Mills,Japanese Restaurant,Gym,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Intersection,Dim Sum Restaurant,Office
6,Hillcrest Village,Pharmacy,Bakery,Restaurant,Sandwich Place,Chinese Restaurant,Gas Station,Diner,Comfort Food Restaurant,Community Center,Construction & Landscaping
7,"Bathurst Manor, Wilson Heights, Downsview North",Park,Bank,Coffee Shop,Mobile Phone Shop,Restaurant,Pizza Place,Pharmacy,Gift Shop,Chinese Restaurant,Middle Eastern Restaurant
8,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Food Court,Electronics Store,Toy / Game Store,Bank,Park,Baseball Field
9,"Northwood Park, York University",Coffee Shop,Bar,Massage Studio,Fast Food Restaurant,Furniture / Home Store,Miscellaneous Shop,Caribbean Restaurant,Japanese Restaurant,Metro Station,Sushi Restaurant
10,Bayview Village,Bank,Skating Rink,Grocery Store,Chinese Restaurant,Café,Japanese Restaurant,Department Store,Discount Store,Diner,Dim Sum Restaurant


In [30]:
# Cluster == 2: Shopping 
northyork_merged.loc[northyork_merged['Cluster Labels'] == 2, northyork_merged.columns[[2] + list(range(6,northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,"York Mills, Silver Hills",Martial Arts Dojo,Women's Store,Health Food Store,Community Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop
