# IBM Applied Data Science Capstone
In this project, the various neighbourhoods of Singapore would be analysed to provide a high-level overview of each neighberhood, with the hope to help expats moving to Singapore to search for their housing options more efficiently. Data are be webscraped from URA website, neighbhourhood coordiantes are fetched through Geocoder, and K-means clustering is used to group the neighbhourhoods together 

### Install and Import Required Pacakges

In [906]:
!pip install geocoder
import pandas as pd
import requests
import numpy as np
from bs4 import BeautifulSoup
import geocoder
!pip install geopy
!pip install folium

import folium
import requests 
import json 
import matplotlib.cm as cm
import matplotlib.colors as colors
import pandas as pd

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
!conda install -c conda-forge geopy 

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



### Webscraping from URA Website

In [907]:
url = 'https://www.ura.gov.sg/realEstateIIWeb/resources/misc/list_of_postal_districts.htm'

URA_data = requests.get(url).text

df = pd.read_html(URA_data)[0]
df = df.drop([0])
df = df.drop(df.columns[1], axis =1)
df = df.drop(df.columns[0], axis =1)
df.columns = ["locations"]
df

Unnamed: 0,locations
1,"Raffles Place, Cecil, Marina, People's Park"
2,"Anson, Tanjong Pagar"
3,"Queenstown, Tiong Bahru"
4,"Telok Blangah, Harbourfront"
5,"Pasir Panjang, Hong Leong Garden, Clementi Ne..."
6,"High Street, Beach Road (part)"
7,"Middle Road, Golden Mile"
8,Little India
9,"Orchard, Cairnhill, River Valley"
10,"Ardmore, Bukit Timah, Holland Road, Tanglin"


### Split Location Column by Delimiter and Join to Form a List of all Neighbourhoods in Singapore

In [984]:
df1 = df.locations.str.split(',', expand = True)
df1 = df1.stack().reset_index()
locations = df1[df1.columns[2]]
locations

0     Raffles  Place
1              Cecil
2             Marina
3      People's Park
4              Anson
           ...      
63    Upper  Thomson
64        Springleaf
65            Yishun
66         Sembawang
67           Seletar
Name: 0, Length: 68, dtype: object

### Get coordinates from geocoder

In [909]:
def get_coord(locations):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Singapore'.format(locations))
        lati_long_coords = g.latlng
    return lati_long_coords

get_coord('Jurong') #test for one Neighbhourhood

[1.3208800000000451, 103.74532000000005]

In [910]:
coord = np.zeros((len(locations),2))
pd.DataFrame(coord)

for i, val in enumerate(locations):
    print(val)
    temp = get_coord(val)
    print(temp)
    coord[i,0] = temp[0]
    coord[i,1] = temp[1]

Raffles  Place
[1.2818900000000326, 103.84912000000008]
 Cecil
[1.2904100000000653, 103.85211000000004]
 Marina
[1.292850000000044, 103.76272000000006]
 People's Park
[1.2844400000000746, 103.84194000000008]
Anson
[1.2904100000000653, 103.85211000000004]
  Tanjong Pagar
[1.2788900000000467, 103.84539000000007]
Queenstown
[1.299660000000074, 103.80172000000005]
  Tiong Bahru
[1.2895300000000702, 103.83208000000008]
Telok  Blangah
[1.2750900000000343, 103.81980000000004]
 Harbourfront
[1.2652000000000498, 103.82010000000008]
Pasir  Panjang
[1.2922900000000368, 103.76819000000006]
 Hong Leong Garden
[1.3194400000000428, 103.75472000000008]
 Clementi New Town
[1.3158300000000622, 103.76472000000007]
High  Street
[1.2906187965025282, 103.84945092902026]
 Beach Road (part)
[1.2996691160153269, 103.85930982393218]
Middle  Road
[1.2994617619149866, 103.8528473768238]
 Golden Mile
[1.3023200000000656, 103.86390000000006]
Little  India
[1.3110700000000293, 103.85483000000005]
Orchard
[1.30109000

### Join coordinate data with location/neighbourhood data from URA

In [991]:
coord = pd.DataFrame(coord)
dfmerged = coord.merge(locations, left_index = True, right_index = True)
dfmerged.columns = ['lat','lng','location']
dfmerged.head()

Unnamed: 0,lat,lng,location
0,1.28189,103.84912,Raffles Place
1,1.29041,103.85211,Cecil
2,1.29285,103.76272,Marina
3,1.28444,103.84194,People's Park
4,1.29041,103.85211,Anson


### Get general coordinates of Singapore

In [912]:
address = 'Singapore'
geolocator = Nominatim(user_agent = "max")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Singapore are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Singapore are 1.357107, 103.8194992.


### Plot the neighbourhoods onto map for verification

In [913]:
map_sg = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng,location in zip(dfmerged['lat'], dfmerged['lng'], dfmerged['location']):
    label = '{}'.format(location)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#87cefa',
        fill_opacity=0.5,
        parse_html=False).add_to(map_sg)
map_sg

### Foursquare API Credentials

In [914]:
CLIENT_ID = 'VGOBE1V4EPQP5MA0LJXHVX0N11RJUCMFJPZRKCQJJ3T1M1CR' # your Foursquare ID
CLIENT_SECRET = 'HA2C5KAIA2MVSYNSYHGLE5GDOX4YHUYVI5BYOC4EXVMEBQ5Z' # your Foursquare Secret
ACCESS_TOKEN = 'Z2U3PVNQFSIUOS0LCYPKSWAEWWS5HP5W3TDDWFQX0BCWWLI3' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VGOBE1V4EPQP5MA0LJXHVX0N11RJUCMFJPZRKCQJJ3T1M1CR
CLIENT_SECRET:HA2C5KAIA2MVSYNSYHGLE5GDOX4YHUYVI5BYOC4EXVMEBQ5Z


### Get Venue Reccomendation from Foursquare for Each Neighbourhood

In [915]:
def getNearbyVenues(names, latitudes, longitudes, radius=3000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([( name, lat, lng, v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'], v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return(nearby_venues)

In [916]:
sg_venues = getNearbyVenues(names=dfmerged['location'], latitudes = dfmerged['lat'], longitudes = dfmerged['lng'])

Raffles  Place
 Cecil
 Marina
 People's Park
Anson
  Tanjong Pagar
Queenstown
  Tiong Bahru
Telok  Blangah
 Harbourfront
Pasir  Panjang
 Hong Leong Garden
 Clementi New Town
High  Street
 Beach Road (part)
Middle  Road
 Golden Mile
Little  India
Orchard
  Cairnhill
 River Valley
Ardmore
  Bukit Timah
 Holland Road
 Tanglin
Watten  Estate
 Novena
 Thomson
Balestier
  Toa Payoh
 Serangoon
Macpherson
  Braddell
Geylang
  Eunos
Katong
  Joo Chiat
 Amber Road
Bedok
  Upper East Coast
 Eastwood
 Kew Drive
Loyang
  Changi
Tampines
  Pasir Ris
Serangoon  Garden
 Hougang
 Punggol
Bishan
  Ang Mo Kio
Upper  Bukit Timah
 Clementi Park
 Ulu Pandan
Jurong
Hillview
  Dairy Farm
 Bukit Panjang
 Choa Chu Kang
Lim  Chu Kang
 Tengah
Kranji
  Woodgrove
Upper  Thomson
 Springleaf
Yishun
  Sembawang
Seletar


### Grouby to Count the Venues in each Neighbourhood

In [986]:
sg_venues.groupby('Neighborhood').count()
sg_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Raffles Place,1.28189,103.84912,Luke's Oyster Bar & Chop House,1.282459,103.847240,Seafood Restaurant
1,Raffles Place,1.28189,103.84912,Amoy Hotel,1.283118,103.848539,Hotel
2,Raffles Place,1.28189,103.84912,Napoleon Food & Wine Bar,1.279925,103.847333,Wine Bar
3,Raffles Place,1.28189,103.84912,Yen Yakiniku,1.281074,103.845743,Japanese Restaurant
4,Raffles Place,1.28189,103.84912,Ritual Gym,1.285965,103.848651,Gym
...,...,...,...,...,...,...,...
2007,Seletar,1.41000,103.87417,Seletar Corner,1.386256,103.873539,Coffee Shop
2008,Seletar,1.41000,103.87417,Burger King,1.391067,103.875760,Fast Food Restaurant
2009,Seletar,1.41000,103.87417,Savoury,1.397798,103.873236,Café
2010,Seletar,1.41000,103.87417,Shaw Theatres,1.391328,103.876035,Multiplex


In [918]:
print('There are {} uniques categories.'.format(len(sg_venues['Venue Category'].unique())))

There are 197 uniques categories.


In [919]:
sg_onehot = pd.get_dummies(sg_venues[['Venue Category']], prefix="", prefix_sep="")

sg_onehot['Neighborhood'] = sg_venues['Neighborhood'] 

fixed_columns = [sg_onehot.columns[-1]] + list(sg_onehot.columns[:-1])
sg_onehot = sg_onehot[fixed_columns]

sg_onehot.head()

Unnamed: 0,Neighborhood,Airport,Airport Lounge,Airport Service,American Restaurant,Aquarium,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Water Park,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Yunnan Restaurant,Zoo,Zoo Exhibit
0,Raffles Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Raffles Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Raffles Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,Raffles Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Raffles Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Use Mean to Calculate the Frequency of Venue Categories in Each Neighbourhood

In [992]:
sg_grouped = sg_onehot.groupby('Neighborhood').mean().reset_index()
sg_grouped.head()

Unnamed: 0,Neighborhood,Airport,Airport Lounge,Airport Service,American Restaurant,Aquarium,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Water Park,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Yunnan Restaurant,Zoo,Zoo Exhibit
0,Ang Mo Kio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Braddell,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
2,Bukit Timah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cairnhill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0
4,Changi,0.066667,0.066667,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0


### Get the Top Venus Categories in Each Neighhood

In [921]:
num_top_venues = 5

for hood in sg_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = sg_grouped[sg_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----  Ang Mo Kio----
                venue  freq
0  Chinese Restaurant  0.20
1                Park  0.10
2                 Spa  0.07
3         Coffee Shop  0.07
4              Bakery  0.07


----  Braddell----
                venue  freq
0  Chinese Restaurant  0.27
1      Ice Cream Shop  0.10
2         Supermarket  0.07
3         Coffee Shop  0.07
4         Snack Place  0.07


----  Bukit Timah----
                venue  freq
0  Italian Restaurant  0.13
1   Korean Restaurant  0.10
2   Indian Restaurant  0.07
3      Scenic Lookout  0.07
4              Bakery  0.07


----  Cairnhill----
                 venue  freq
0                Hotel  0.13
1       Clothing Store  0.10
2  Japanese Restaurant  0.07
3       Cosmetics Shop  0.07
4     Asian Restaurant  0.07


----  Changi----
                venue  freq
0             Airport  0.07
1  Chinese Restaurant  0.07
2      Ice Cream Shop  0.07
3      Airport Lounge  0.07
4               Hotel  0.07


----  Dairy Farm----
                venue  f

                venue  freq
0  Chinese Restaurant  0.10
1           BBQ Joint  0.10
2                 Spa  0.07
3   Indian Restaurant  0.07
4                Park  0.07


----Geylang----
                venue  freq
0          Food Court  0.10
1  Seafood Restaurant  0.07
2                Café  0.07
3     Thai Restaurant  0.07
4               Trail  0.03


----High  Street----
                   venue  freq
0                  Hotel  0.20
1                   Park  0.07
2    Japanese Restaurant  0.07
3  Performing Arts Venue  0.07
4            Event Space  0.03


----Hillview----
                venue  freq
0     Nature Preserve  0.10
1  Italian Restaurant  0.10
2          Food Court  0.10
3      Scenic Lookout  0.07
4   Korean Restaurant  0.07


----Jurong----
                    venue  freq
0                  Bakery  0.13
1      Chinese Restaurant  0.10
2     Japanese Restaurant  0.07
3      Dim Sum Restaurant  0.07
4  Furniture / Home Store  0.07


----Katong----
         venue  freq
0  

In [922]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [1027]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = sg_grouped['Neighborhood']

for ind in np.arange(sg_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(sg_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,Chinese Restaurant,Park,Spa,Coffee Shop,Bakery,Asian Restaurant,BBQ Joint,Dog Run,Ramen Restaurant,Noodle House
1,Braddell,Chinese Restaurant,Ice Cream Shop,Supermarket,Coffee Shop,Snack Place,Indian Restaurant,Pool,Scenic Lookout,Convenience Store,Seafood Restaurant
2,Bukit Timah,Italian Restaurant,Korean Restaurant,Indian Restaurant,Scenic Lookout,Bakery,Park,Dessert Shop,Food Court,Sandwich Place,Electronics Store
3,Cairnhill,Hotel,Clothing Store,Japanese Restaurant,Cosmetics Shop,Asian Restaurant,Bakery,Chinese Restaurant,Boutique,Tea Room,Sushi Restaurant
4,Changi,Airport,Chinese Restaurant,Ice Cream Shop,Airport Lounge,Hotel,Sandwich Place,Public Art,Hobby Shop,Garden,Spa


### K-means Clustering

In [1028]:
kclusters = 8

sg_grouped_clustering = sg_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sg_grouped_clustering)

kmeans.labels_[0:10] 

array([4, 4, 1, 6, 3, 0, 0, 3, 3, 5], dtype=int32)

In [1029]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

df_merged = dfmerged.rename(columns={'location':'Neighborhood'})

df_merged = df_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

df_merged.head(68)

Unnamed: 0,lat,lng,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1.281890,103.849120,Raffles Place,2,Hotel,Japanese Restaurant,Yoga Studio,Coffee Shop,Waterfront,Gym / Fitness Center,Bookstore,Dessert Shop,Sandwich Place,Seafood Restaurant
1,1.290410,103.852110,Cecil,2,Hotel,Event Space,Restaurant,Performing Arts Venue,Salad Place,Massage Studio,Park,Spanish Restaurant,French Restaurant,Cocktail Bar
2,1.292850,103.762720,Marina,1,Italian Restaurant,Sandwich Place,French Restaurant,Park,Bakery,Snack Place,Fried Chicken Joint,Sushi Restaurant,Café,Chinese Restaurant
3,1.284440,103.841940,People's Park,3,Seafood Restaurant,Hotel,Japanese Restaurant,Spanish Restaurant,Wine Bar,Bakery,Steakhouse,Gym / Fitness Center,Gym,Buddhist Temple
4,1.290410,103.852110,Anson,2,Hotel,Event Space,Restaurant,Performing Arts Venue,Salad Place,Massage Studio,Park,Spanish Restaurant,French Restaurant,Cocktail Bar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,1.350250,103.835770,Upper Thomson,4,Park,Thai Restaurant,BBQ Joint,Spa,Snack Place,Food Court,Bakery,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant
64,1.396726,103.821265,Springleaf,4,Chinese Restaurant,Park,Zoo Exhibit,Food Court,Café,Lake,Coffee Shop,Indian Restaurant,Fishing Spot,Italian Restaurant
65,1.436210,103.835820,Yishun,5,Chinese Restaurant,Indian Restaurant,Thai Restaurant,Café,Fried Chicken Joint,Coffee Shop,Food Court,Cantonese Restaurant,Park,Bubble Tea Shop
66,1.447940,103.818910,Sembawang,5,Chinese Restaurant,Indian Restaurant,Thai Restaurant,Coffee Shop,Asian Restaurant,Vegetarian / Vegan Restaurant,Café,Park,Bubble Tea Shop,Supermarket


### Plot the Clusters on Map

In [1044]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['lat'], df_merged['lng'], df_merged['Neighborhood'],df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon], radius=5, popup=label, color=rainbow[cluster-1], fill=True, fill_color=rainbow[cluster-1], fill_opacity=0.7).add_to(map_clusters)
map_clusters

### Cluster 0

In [1038]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
33,Geylang,Food Court,Seafood Restaurant,Café,Thai Restaurant,Trail,Cantonese Restaurant,Climbing Gym,Soup Place,Burger Joint,Sporting Goods Shop
34,Eunos,Asian Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Food Court,Restaurant,Noodle House,Café,Snack Place,Motorcycle Shop,Seafood Restaurant
48,Punggol,Sandwich Place,Park,Trail,Coffee Shop,Supermarket,Chinese Restaurant,Seafood Restaurant,Fish & Chips Shop,Asian Restaurant,Gym
51,Upper Bukit Timah,Italian Restaurant,Coffee Shop,Nature Preserve,Café,Fast Food Restaurant,Park,Vegetarian / Vegan Restaurant,Food Court,Bowling Alley,College Cafeteria
55,Hillview,Nature Preserve,Italian Restaurant,Food Court,Scenic Lookout,Korean Restaurant,Lake,Park,Beer Bar,Dim Sum Restaurant,Electronics Store
56,Dairy Farm,Italian Restaurant,Nature Preserve,Park,Food Court,Supermarket,Coffee Shop,Scenic Lookout,Lake,Fast Food Restaurant,Bistro
57,Bukit Panjang,Coffee Shop,Italian Restaurant,Café,Park,Nature Preserve,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Noodle House,Food Court,Supermarket
58,Choa Chu Kang,Coffee Shop,Asian Restaurant,Sandwich Place,Fast Food Restaurant,Supermarket,Park,Bowling Alley,Golf Course,Café,Chinese Restaurant
60,Tengah,Food Court,Fast Food Restaurant,Farmers Market,Coffee Shop,Bagel Shop,Café,Supermarket,Grocery Store,Gun Range,College Cafeteria
61,Kranji,Food Court,Fast Food Restaurant,Asian Restaurant,Grocery Store,Park,Café,Go Kart Track,Palace,Racetrack,Gift Shop


### Cluster 1

In [1032]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Marina,Italian Restaurant,Sandwich Place,French Restaurant,Park,Bakery,Snack Place,Fried Chicken Joint,Sushi Restaurant,Café,Chinese Restaurant
10,Pasir Panjang,Japanese Restaurant,Italian Restaurant,Sandwich Place,French Restaurant,Snack Place,Park,Bakery,Café,Thai Restaurant,Museum
11,Hong Leong Garden,Bakery,Chinese Restaurant,Japanese Restaurant,French Restaurant,Dim Sum Restaurant,Soup Place,Snack Place,Clothing Store,Coffee Shop,Skating Rink
12,Clementi New Town,Bakery,French Restaurant,Dim Sum Restaurant,Chinese Restaurant,Asian Restaurant,Thai Restaurant,Soup Place,Snack Place,Coffee Shop,Seafood Restaurant
22,Bukit Timah,Italian Restaurant,Korean Restaurant,Indian Restaurant,Scenic Lookout,Bakery,Park,Dessert Shop,Food Court,Sandwich Place,Electronics Store
23,Holland Road,Bakery,Pizza Place,Italian Restaurant,Indian Restaurant,Japanese Restaurant,Thai Restaurant,Dim Sum Restaurant,Dessert Shop,Shabu-Shabu Restaurant,Pet Store
25,Watten Estate,Pizza Place,Malay Restaurant,Italian Restaurant,Café,Garden,Bistro,Monument / Landmark,Halal Restaurant,Botanical Garden,French Restaurant
52,Clementi Park,Bakery,French Restaurant,Dessert Shop,Trail,Chinese Restaurant,Outlet Store,Café,Clothing Store,Pet Store,Furniture / Home Store
53,Ulu Pandan,Bakery,Italian Restaurant,French Restaurant,Pizza Place,Historic Site,Spa,Chinese Restaurant,Fried Chicken Joint,Gym,Coffee Shop
54,Jurong,Bakery,Chinese Restaurant,Japanese Restaurant,Dim Sum Restaurant,Furniture / Home Store,Café,Gym,Seafood Restaurant,Skating Rink,Food Court


### Cluster 2

In [1039]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Raffles Place,Hotel,Japanese Restaurant,Yoga Studio,Coffee Shop,Waterfront,Gym / Fitness Center,Bookstore,Dessert Shop,Sandwich Place,Seafood Restaurant
1,Cecil,Hotel,Event Space,Restaurant,Performing Arts Venue,Salad Place,Massage Studio,Park,Spanish Restaurant,French Restaurant,Cocktail Bar
4,Anson,Hotel,Event Space,Restaurant,Performing Arts Venue,Salad Place,Massage Studio,Park,Spanish Restaurant,French Restaurant,Cocktail Bar
13,High Street,Hotel,Park,Japanese Restaurant,Performing Arts Venue,Event Space,Cocktail Bar,Seafood Restaurant,Comfort Food Restaurant,Concert Hall,Restaurant
14,Beach Road (part),Hotel,Event Space,Café,Bakery,Lounge,Salon / Barbershop,Japanese Restaurant,Pool,Food Court,Shopping Mall
15,Middle Road,Hotel,Café,Whisky Bar,Bakery,Shopping Mall,Other Repair Shop,Chinese Restaurant,Gaming Cafe,Park,Museum
16,Golden Mile,Hotel,Café,Bakery,Thai Restaurant,Ice Cream Shop,Park,Italian Restaurant,Food Court,Multiplex,Burger Joint
24,Tanglin,Hotel,Garden,Chinese Restaurant,Italian Restaurant,Spanish Restaurant,Butcher,Park,Performing Arts Venue,Cocktail Bar,Seafood Restaurant


### Cluster 3

In [1034]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,People's Park,Seafood Restaurant,Hotel,Japanese Restaurant,Spanish Restaurant,Wine Bar,Bakery,Steakhouse,Gym / Fitness Center,Gym,Buddhist Temple
5,Tanjong Pagar,Coffee Shop,Hotel,Spanish Restaurant,Chinese Restaurant,Bakery,Dessert Shop,Seafood Restaurant,Street Food Gathering,Buddhist Temple,Café
6,Queenstown,Café,Chinese Restaurant,Bakery,Modern European Restaurant,Boutique,Greek Restaurant,Butcher,Sporting Goods Shop,Optical Shop,Japanese Restaurant
17,Little India,Indian Restaurant,Hotel,Chinese Restaurant,Vegetarian / Vegan Restaurant,Café,Ice Cream Shop,Bakery,Burger Joint,Gaming Cafe,Cocktail Bar
28,Balestier,Chinese Restaurant,Bakery,Dessert Shop,Ice Cream Shop,Hotel,Snack Place,Restaurant,Pool,Supermarket,Coffee Shop
35,Katong,Trail,Coffee Shop,Bakery,Café,Salad Place,Pub,Deli / Bodega,Dim Sum Restaurant,Restaurant,Dumpling Restaurant
36,Joo Chiat,Bakery,Coffee Shop,Food Court,Vegetarian / Vegan Restaurant,Café,Restaurant,Seafood Restaurant,Salad Place,Deli / Bodega,Japanese Restaurant
37,Amber Road,Café,Coffee Shop,Bakery,Trail,Italian Restaurant,Beach,Snack Place,Seafood Restaurant,Food Court,Pizza Place
43,Changi,Airport,Chinese Restaurant,Ice Cream Shop,Airport Lounge,Hotel,Sandwich Place,Public Art,Hobby Shop,Garden,Spa
44,Tampines,Coffee Shop,Chinese Restaurant,Dessert Shop,Park,Bakery,Scandinavian Restaurant,Supermarket,Gourmet Shop,Bubble Tea Shop,Café


### Cluster 4

In [1040]:
df_merged.loc[df_merged['Cluster Labels'] == 4, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,Thomson,Park,Chinese Restaurant,Food Court,Asian Restaurant,BBQ Joint,Spa,Café,Thai Restaurant,Nature Preserve,Trail
29,Toa Payoh,Chinese Restaurant,Bakery,Snack Place,Indian Restaurant,Supermarket,Asian Restaurant,BBQ Joint,Convenience Store,Tea Room,Food Court
30,Serangoon,Chinese Restaurant,Ice Cream Shop,Coffee Shop,Noodle House,Dessert Shop,Vegetarian / Vegan Restaurant,Snack Place,Bubble Tea Shop,Clothing Store,Szechuan Restaurant
31,Macpherson,Asian Restaurant,Chinese Restaurant,Food Court,Vegetarian / Vegan Restaurant,BBQ Joint,Furniture / Home Store,Climbing Gym,Soup Place,Soccer Field,Snack Place
32,Braddell,Chinese Restaurant,Ice Cream Shop,Supermarket,Coffee Shop,Snack Place,Indian Restaurant,Pool,Scenic Lookout,Convenience Store,Seafood Restaurant
46,Serangoon Garden,BBQ Joint,Food Court,Chinese Restaurant,Ice Cream Shop,Asian Restaurant,Italian Restaurant,Scenic Lookout,Park,Clothing Store,Snack Place
47,Hougang,Chinese Restaurant,Bakery,Food Court,Vegetarian / Vegan Restaurant,Snack Place,Dessert Shop,Coffee Shop,Bookstore,Noodle House,Historic Site
49,Bishan,Chinese Restaurant,BBQ Joint,Spa,Indian Restaurant,Park,Snack Place,Bakery,Food Court,Café,Clothing Store
50,Ang Mo Kio,Chinese Restaurant,Park,Spa,Coffee Shop,Bakery,Asian Restaurant,BBQ Joint,Dog Run,Ramen Restaurant,Noodle House
63,Upper Thomson,Park,Thai Restaurant,BBQ Joint,Spa,Snack Place,Food Court,Bakery,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant


### Cluster 5

In [1041]:
df_merged.loc[df_merged['Cluster Labels'] == 5, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,Bedok,Dessert Shop,Chinese Restaurant,Café,Beach,Indian Restaurant,Food Court,Asian Restaurant,Noodle House,Thai Restaurant,Italian Restaurant
39,Upper East Coast,Dessert Shop,Beach,Asian Restaurant,Chinese Restaurant,Thai Restaurant,Noodle House,Café,Seafood Restaurant,Fried Chicken Joint,Coffee Shop
40,Eastwood,Dessert Shop,Café,Golf Course,Beach,Food Court,Thai Restaurant,Asian Restaurant,Noodle House,Indian Restaurant,Harbor / Marina
41,Kew Drive,Dessert Shop,Chinese Restaurant,Café,Beach,Indian Restaurant,Food Court,Asian Restaurant,Noodle House,Thai Restaurant,Italian Restaurant
42,Loyang,Café,Airport,Sandwich Place,Indian Restaurant,Candy Store,Chinese Restaurant,Spa,Coffee Shop,Smoke Shop,Shopping Mall
65,Yishun,Chinese Restaurant,Indian Restaurant,Thai Restaurant,Café,Fried Chicken Joint,Coffee Shop,Food Court,Cantonese Restaurant,Park,Bubble Tea Shop
66,Sembawang,Chinese Restaurant,Indian Restaurant,Thai Restaurant,Coffee Shop,Asian Restaurant,Vegetarian / Vegan Restaurant,Café,Park,Bubble Tea Shop,Supermarket


### Cluster 6

In [1036]:
df_merged.loc[df_merged['Cluster Labels'] == 6, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Tiong Bahru,Japanese Restaurant,Café,Yoga Studio,Whisky Bar,Bookstore,Burger Joint,Cupcake Shop,Greek Restaurant,Grocery Store,Park
8,Telok Blangah,Chinese Restaurant,Trail,Scenic Lookout,Park,Dessert Shop,Dim Sum Restaurant,Chocolate Shop,Clothing Store,Cupcake Shop,Department Store
9,Harbourfront,Theme Park,Clothing Store,Theme Park Ride / Attraction,Trail,Shopping Mall,Chocolate Shop,German Restaurant,Souvenir Shop,Café,Snack Place
18,Orchard,Japanese Restaurant,Clothing Store,Hotel,Coffee Shop,Boutique,Tea Room,Sushi Restaurant,Bubble Tea Shop,Speakeasy,Chinese Restaurant
19,Cairnhill,Hotel,Clothing Store,Japanese Restaurant,Cosmetics Shop,Asian Restaurant,Bakery,Chinese Restaurant,Boutique,Tea Room,Sushi Restaurant
20,River Valley,Japanese Restaurant,Clothing Store,Yoga Studio,Sushi Restaurant,Tea Room,Hotel,Bakery,Cosmetics Shop,Speakeasy,Boutique
21,Ardmore,Hotel,Sushi Restaurant,Japanese Restaurant,Italian Restaurant,Scenic Lookout,Speakeasy,Park,Chinese Restaurant,Chocolate Shop,Cocktail Bar
26,Novena,Indian Restaurant,Hotel,Clothing Store,Chinese Restaurant,Bakery,Japanese Restaurant,Vegetarian / Vegan Restaurant,Speakeasy,Snack Place,Supermarket


### Cluster 7

In [1042]:
df_merged.loc[df_merged['Cluster Labels'] == 7, df_merged.columns[[2] + list(range(4, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
59,Lim Chu Kang,Farm,Harbor / Marina,Zoo Exhibit,Arts & Entertainment,Cafeteria,Asian Restaurant,Nature Preserve,Public Art,Pet Service,Optical Shop
