# 1. Table from web as Data Frame

### Import Packages
I am going to utilize BeautifulSoup, request as web table scraping tool

In [1]:
import pandas as pd
from bs4 import BeautifulSoup as bs
import urllib.request
import re

### Web Scraping

In [2]:
source = urllib.request.urlopen('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').read()
soup = bs(source, 'lxml')

### Using 'find, find_all' from BeautifulSoup to get rows and columns

In [3]:
table = soup.table
table = soup.find('table')
table_rows = table.find_all('tr')

df_table = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    df_table.append(row)

### list into Data Frame and merge into one column

In [4]:
# set column names
df = pd.DataFrame(df_table, columns=['1', '2', '3', '4', '5', '6', '7', '8', '9'])

# replace '\n' string into ''
df = df.replace('\n','', regex=True)

# merge all column values into one column
df_concat = pd.concat([df, df.unstack().reset_index(drop=True).rename('10')], axis=1)

### split value by character length, and ignoring 'Not assigned' value

In [5]:
# Splitting value by character length into 2 columns
df_concat['PostalCode'] = df_concat['10'].str[:3].replace('\n','', regex=True)
df_concat['BN'] = df_concat['10'].str[3:]

# Select 2 columns only for easy preprocessing
dfs = df_concat[['PostalCode', 'BN']]

# Ignoring specific string value
dfs = dfs[~(dfs['BN'] == "Not assigned")]

### split value by special character, and replace special character to others

In [6]:
# split value by spacieal character - first column
dfs['Borough'] = dfs['BN'].str.split('\(', expand=True)[0]

# split value by special character - second column, replace special character to other
dfs['Neighborhood'] = dfs['BN'].str.split('\(', expand=True)[1].replace(' /', ',', regex=True).replace('\)', '', regex=True)

# choose 3 columns as new Data Frame
dfs = dfs[['PostalCode', 'Borough', 'Neighborhood']]

In [7]:
# dfs[dfs['Neighborhood'].isna()]

dfs[dfs['Borough'].str.contains('Queen') == True]

# None
# Enclave of L4W
# Enclave of M4L

Unnamed: 0,PostalCode,Borough,Neighborhood
120,M7A,Queen's Park / Ontario Provincial Government,


## Result 

In [8]:
dfs.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1B,Scarborough,"Malvern, Rouge"
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae
6,M1J,Scarborough,Scarborough Village
7,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
8,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
9,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
10,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [9]:
dfs.shape

(103, 3)

# 2. Get the geographical coordinates of each postal code

### read csv from the url link

In [10]:
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')

In [11]:
print(geo_df.shape)
geo_df.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge two DataFrame into one DataFrame, 'Postal Code' as the Key value

In [12]:
dfs_geo = pd.merge(dfs, geo_df, how='left', left_on='PostalCode', right_on='Postal Code').drop(['Postal Code'], axis=1)

## Result

In [13]:
dfs_geo.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [14]:
dfs_geo.shape

(103, 5)

# 3. Segmenting and Clustering Neighborhoods in Toronto

In [15]:
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import numpy as np

# !conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Libraries imported.')

Libraries imported.


### Understand about dataset - how many boroughs and neighborhods?

In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(dfs_geo['Borough'].unique()),
        dfs_geo.shape[0]
    )
)

The dataframe has 15 boroughs and 103 neighborhoods.


### Get the geolocation of Toronto

In [17]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Visualize neighborhood into toronto folium map

In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfs_geo['Latitude'], dfs_geo['Longitude'], dfs_geo['Borough'], dfs_geo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Set variable of Foursquare Credential 

In [19]:
# @hidden_cell
CLIENT_ID = 'Z3YXZ4HYRYM2K4YNBYBCRAD01CAOLNLOSYB2JK3SNGLGYJ3N'# Foursquare ID
# Z3YXZ4HYRYM2K4YNBYBCRAD01CAOLNLOSYB2JK3SNGLGYJ3N # google
# AQYAOEFJFWC12AHJDJ1U05SKTTMUOTNG2BYYTFGKIDSDUTUN # icloud
CLIENT_SECRET = 'JYSUOFMK0K3IY5XH4BD5KTXK344D1PVDOILL0MUWZCKZABK5' # Foursquare Secret
# JYSUOFMK0K3IY5XH4BD5KTXK344D1PVDOILL0MUWZCKZABK5 # google
# UUQSDTYH1J2TFESOEB4LWNJBSLTG1PNRIOCZ0W5FJRALMBF1 # icloud
VERSION = '20180605' # Foursquare API version

print('Credentails')

Credentails


### Generate python function
* empty list to store data
* API request with credential above
* GET request
* from the result, extract required data and store into emyty list
* modify list into Data Frame, and set the column name

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### apply above function into all Neighborhood data

In [21]:
venues = getNearbyVenues(names=dfs_geo['Neighborhood'],
                                   latitudes=dfs_geo['Latitude'],
                                   longitudes=dfs_geo['Longitude']
                        )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt 
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
WillowdaleSouth
York Mills West
WillowdaleWest
Parkwoods
Don MillsNorth
Don MillsSouth
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
DownsviewEast  
DownsviewWest
DownsviewCentral
DownsviewNorthwest
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
The Danforth  East
The Danforth West, Riverdale
India

In [22]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 273 uniques categories.


### One hot encoing of all venue categories, and insert Neighborhood name at the first column

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)

(2226, 273)


### To get the frequency of Neighborhood, proceed groupby and calculate mean data

In [24]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped.shape)

(99, 273)


### Create python function to sort the venues in descending order

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### create the new dataframe and display the top 10 venues for each neighborhood
* Give number of top venues which want to get
* set indicator for 1st, 2nd, 3rd, and so on
* set column names with indicator and add string into the columns list
* create new dataframe from grouped dataframe and new columns
* slice dataframe into number of top venues

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Latin American Restaurant,Clothing Store,Breakfast Spot,Lounge,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
1,"Alderwood, Long Branch",Pizza Place,Skating Rink,Sandwich Place,Pool,Pharmacy,Pub,Gym,Coffee Shop,Dog Run,Distribution Center
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Fried Chicken Joint,Frozen Yogurt Shop,Sushi Restaurant,Ice Cream Shop,Deli / Bodega,Pizza Place,Pharmacy,Middle Eastern Restaurant
3,Bayview Village,Chinese Restaurant,Café,Japanese Restaurant,Bank,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Restaurant,Pharmacy,Juice Bar,Liquor Store,Indian Restaurant,Ice Cream Shop,Fast Food Restaurant


### Set the number of clusters, drop the unnecessary column, and fitting the data into K-means

In [37]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=26).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 3, 0, 0, 0, 0, 0, 1], dtype=int32)

### insert the above label result into top number venue dataframe, and get the geolocation data from dfs

In [38]:
# incase reset cluster number above
# neighborhoods_venues_sorted.drop('Cluster Labels', axis=1, inplace=True)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dfs_merged = dfs_geo

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dfs_merged = dfs_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
dfs_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,2.0,Bar,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Electronics Store,Medical Center,Spa,Mexican Restaurant,Intersection,Rental Car Location,Bank,Diner,Department Store,Dessert Shop
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Korean Restaurant,Soccer Field,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Fried Chicken Joint,Bakery,Bank,Athletics & Sports,Hakka Restaurant,Caribbean Restaurant,Thai Restaurant,Gas Station,Discount Store,Dessert Shop


### Removed NA value from the data frame, and reset the columns data type into integer

In [39]:
dfs_merged = dfs_merged[dfs_merged['Cluster Labels'].notna()]
dfs_merged['Cluster Labels'] = dfs_merged['Cluster Labels'].astype(int)

### Same as above, visualized clusted data with folium

In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))

rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dfs_merged['Latitude'], dfs_merged['Longitude'], dfs_merged['Neighborhood'], dfs_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 1st Cluster - Date place near by

In [41]:
dfs_merged.loc[dfs_merged['Cluster Labels'] == 0, dfs_merged.columns[[1] + list(range(5, dfs_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,0,Electronics Store,Medical Center,Spa,Mexican Restaurant,Intersection,Rental Car Location,Bank,Diner,Department Store,Dessert Shop
3,Scarborough,0,Coffee Shop,Korean Restaurant,Soccer Field,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
4,Scarborough,0,Fried Chicken Joint,Bakery,Bank,Athletics & Sports,Hakka Restaurant,Caribbean Restaurant,Thai Restaurant,Gas Station,Discount Store,Dessert Shop
5,Scarborough,0,Construction & Landscaping,Playground,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Women's Store
6,Scarborough,0,Discount Store,Convenience Store,Hobby Shop,Department Store,Coffee Shop,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
7,Scarborough,0,Bakery,Bus Line,Bus Station,Ice Cream Shop,Metro Station,Soccer Field,Park,Intersection,Discount Store,Dim Sum Restaurant
8,Scarborough,0,Skating Rink,Movie Theater,American Restaurant,Motel,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Eastern European Restaurant,Dance Studio
9,Scarborough,0,Skating Rink,College Stadium,Café,General Entertainment,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
10,Scarborough,0,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Pet Store,Women's Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
11,Scarborough,0,Smoke Shop,Bakery,Shopping Mall,Breakfast Spot,Sandwich Place,Auto Garage,Concert Hall,Construction & Landscaping,College Stadium,Eastern European Restaurant


## 2nd Cluster - Park near by

In [42]:
dfs_merged.loc[dfs_merged['Cluster Labels'] == 1, dfs_merged.columns[[1] + list(range(5, dfs_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,North York,1,Park,Convenience Store,Bank,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
25,North York,1,Park,Food & Drink Shop,Women's Store,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
30,North York,1,Park,Airport,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
40,East YorkEast Toronto,1,Park,Metro Station,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Women's Store
50,Downtown Toronto,1,Park,Trail,Playground,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
74,York,1,Park,Women's Store,Market,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
79,North York,1,Construction & Landscaping,Park,Bakery,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Women's Store
90,Etobicoke,1,River,Park,Women's Store,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
100,Etobicoke,1,Park,Pizza Place,Sandwich Place,Women's Store,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner


## 3rd Cluster - Bar near by

In [43]:
dfs_merged.loc[dfs_merged['Cluster Labels'] == 2, dfs_merged.columns[[1] + list(range(5, dfs_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,2,Bar,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Event Space


## 4th Cluster - Restaurant near by

In [44]:
dfs_merged.loc[dfs_merged['Cluster Labels'] == 3, dfs_merged.columns[[1] + list(range(5, dfs_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,3,Fast Food Restaurant,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
13,Scarborough,3,Pizza Place,Noodle House,Pharmacy,Italian Restaurant,Intersection,Fast Food Restaurant,Bank,Convenience Store,Chinese Restaurant,Thai Restaurant
15,Scarborough,3,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Nail Salon,Grocery Store,Bank,Coffee Shop,Breakfast Spot,Sandwich Place,Pizza Place
19,North York,3,Chinese Restaurant,Café,Japanese Restaurant,Bank,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
24,North York,3,Pizza Place,Grocery Store,Bank,Coffee Shop,Home Service,Pharmacy,Discount Store,Distribution Center,Department Store,Dessert Shop
35,East York,3,Pizza Place,Pharmacy,Intersection,Breakfast Spot,Bank,Gym / Fitness Center,Fast Food Restaurant,Gastropub,Athletics & Sports,Café
80,York,3,Skating Rink,Fast Food Restaurant,Discount Store,Sandwich Place,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center,Doner Restaurant
96,North York,3,Empanada Restaurant,Shopping Mall,Pizza Place,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
98,York,3,Convenience Store,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Deli / Bodega
99,Etobicoke,3,Pizza Place,Middle Eastern Restaurant,Discount Store,Coffee Shop,Sandwich Place,Chinese Restaurant,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant


## 5th Gift Shop 

In [45]:
dfs_merged.loc[dfs_merged['Cluster Labels'] == 4, dfs_merged.columns[[1] + list(range(5, dfs_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,Etobicoke,4,Gift Shop,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
