#### Get data from wikipedia

In [1]:
# Get Data from wikipedia
import pandas as pd
import requests

url = "https://id.wikipedia.org/wiki/Daftar_kecamatan_dan_kelurahan_di_Daerah_Khusus_Ibukota_Jakarta"

r = requests.get(url)
df_list = pd.read_html(r.text) # this parses all the tables in webpages to a list


In [2]:
borough_name = ['Jakarta Pusat', 'Jakarta Utara', 'Jakarta Timur', 'Jakarta Selatan', 'Jakarta Barat', 'Kepulauan Seribu']

#### We concatenate all dataframe in df_list

We drop Kepulauan Seribu borough because it's not a business borough

In [3]:
list_neighborhood = []
for i in range(1,6):
    print(i)
    temp = df_list[i].dropna()
    try:
        col_to_drop = ['Kode Kemendagri', 'Jumlah Kelurahan', 'Daftar Kelurahan']
        temp = temp.drop(col_to_drop, axis=1)
    except:
        col_to_drop = ['Kemendagri', 'Jumlah Kelurahan', 'Daftar Kelurahan']
        temp = temp.drop(col_to_drop, axis=1)
    temp.rename(columns={'Kecamatan':'Neighborhood'}, inplace=True)
    temp['Borough'] = borough_name[i-1]
    cols = temp.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    temp = temp[cols]
    list_neighborhood.append(temp)

1
2
3
4
5


Print the dataframe

In [4]:
df = pd.concat(list_neighborhood, ignore_index=True)
df

Unnamed: 0,Borough,Neighborhood
0,Jakarta Pusat,Cempaka Putih
1,Jakarta Pusat,Gambir
2,Jakarta Pusat,Johar Baru
3,Jakarta Pusat,Kemayoran
4,Jakarta Pusat,Menteng
5,Jakarta Pusat,Sawah Besar
6,Jakarta Pusat,Senen
7,Jakarta Pusat,Tanah Abang
8,Jakarta Utara,Cilincing
9,Jakarta Utara,Kelapa Gading


#### Use geopy to find latitude and longitude for each corresponding neighborhood

In [5]:
import geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="jakarta_agent")
lat_list, long_list = [],[]

for name, brgh in zip(df.Neighborhood, df.Borough):
    geolocator = Nominatim(user_agent="jakarta_agent")
    temp = name + ', ' + brgh
    location = geolocator.geocode(temp, timeout=3)
    lat_list.append(location.latitude)
    long_list.append(location.longitude)

df['Latitude'] = lat_list
df['Longitude'] = long_list

df

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Jakarta Pusat,Cempaka Putih,-6.181214,106.868548
1,Jakarta Pusat,Gambir,-6.171163,106.816744
2,Jakarta Pusat,Johar Baru,-6.183125,106.855332
3,Jakarta Pusat,Kemayoran,-6.162546,106.85689
4,Jakarta Pusat,Menteng,-6.195026,106.832224
5,Jakarta Pusat,Sawah Besar,-6.155891,106.83358
6,Jakarta Pusat,Senen,-6.184971,106.843235
7,Jakarta Pusat,Tanah Abang,-6.205258,106.8095
8,Jakarta Utara,Cilincing,-6.129015,106.944454
9,Jakarta Utara,Kelapa Gading,-6.159938,106.902483


#### Use folium to show the map of Jakarta and its neighborhood spot

In [6]:
import folium

#Longitude a d latitude of Jakarta
latitude = -6.21462 
longitude = 106.84513

In [7]:
#Create map of toronto
map_jakarta = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_jakarta)  
    
map_jakarta

#### Insert credential

In [8]:
# Insert credential
CLIENT_ID = 'VOWLFKJCLNN5HY3KIKLHXCVRUU5SNKCW3LE13UZSAMM1WOQJ' # your Foursquare ID
CLIENT_SECRET = 'AIALEK502TNFTN4TFYDIFGY3X4AQSJ0GQ0QRQNV5C5EBARDA' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
LIMIT = 100
radius = 500 # d

Your credentails:
CLIENT_ID: VOWLFKJCLNN5HY3KIKLHXCVRUU5SNKCW3LE13UZSAMM1WOQJ
CLIENT_SECRET:AIALEK502TNFTN4TFYDIFGY3X4AQSJ0GQ0QRQNV5C5EBARDA


#### Define function to get venue near the given latitude and longitude of neighborhood

In [9]:
def getnearbyvenues(name, brgh, lat, lng,  radius=500):    
    print(name)

    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        radius, 
        LIMIT)

    results = requests.get(url).json()["response"]['groups'][0]['items']
    return [(name,
            brgh,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results]


#### Column name of Dataframe

In [10]:
col = ['Neighborhood', 'Borough',
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

#### Looping over dataframe to get venue near given latitude and longitude of neighborhood

In [11]:
res = []
for name, brgh, lat, long in zip(df['Neighborhood'], df['Borough'], df['Latitude'], df['Longitude']):
    try:
        res.append(getnearbyvenues(name, brgh, lat, long))
    except:
        print('Error in Neighborhood: ', name)


Cempaka Putih
Gambir
Johar Baru
Kemayoran
Menteng
Sawah Besar
Senen
Tanah Abang
Cilincing
Kelapa Gading
Koja
Pademangan
Penjaringan
Tanjung Priok
Cakung
Cipayung
Ciracas
Duren Sawit
Jatinegara
Kramat Jati
Makasar
Matraman
Pasar Rebo
Pulo Gadung
Cilandak
Jagakarsa
Kebayoran Baru
Kebayoran Lama
Mampang Prapatan
Pancoran
Pasar Minggu
Pesanggrahan
Setiabudi
Tebet
Cengkareng
Grogol Petamburan
Taman Sari
Tambora
Kebon Jeruk
Kalideres
Palmerah
Kembangan


#### Convert the reault to dataframe

In [12]:
jakarta_venues = pd.DataFrame([item for sublist in res for item in sublist], columns = col)
jakarta_venues.head()

Unnamed: 0,Neighborhood,Borough,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cempaka Putih,Jakarta Pusat,-6.181214,106.868548,Mie Aceh Bungong Cempaka,-6.180464,106.871099,Acehnese Restaurant
1,Cempaka Putih,Jakarta Pusat,-6.181214,106.868548,Bika Ambon Medan Sylvie,-6.181566,106.866629,Bakery
2,Cempaka Putih,Jakarta Pusat,-6.181214,106.868548,Pizza Hut,-6.181644,106.872073,Pizza Place
3,Cempaka Putih,Jakarta Pusat,-6.181214,106.868548,Pizza Hut,-6.179067,106.866189,Pizza Place
4,Cempaka Putih,Jakarta Pusat,-6.181214,106.868548,Arcici Swiming Pool™,-6.184377,106.867813,Pool


#### Count each Neighborhood

In [13]:
jakarta_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Borough,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cakung,4,4,4,4,4,4,4
Cempaka Putih,7,7,7,7,7,7,7
Cengkareng,3,3,3,3,3,3,3
Cilandak,4,4,4,4,4,4,4
Cilincing,3,3,3,3,3,3,3
Cipayung,1,1,1,1,1,1,1
Ciracas,2,2,2,2,2,2,2
Duren Sawit,5,5,5,5,5,5,5
Gambir,16,16,16,16,16,16,16
Grogol Petamburan,36,36,36,36,36,36,36


In [14]:
print('There are {} uniques categories.'.format(len(jakarta_venues['Venue Category'].unique())))

There are 136 uniques categories.


#### Convert categorical variable into numerical variable using one hot encoder

In [15]:
jakarta_onehot = pd.get_dummies(jakarta_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
jakarta_onehot['Neighborhood'] = jakarta_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [jakarta_onehot.columns[-1]] + list(jakarta_onehot.columns[:-1])
jakarta_onehot = jakarta_onehot[fixed_columns]

jakarta_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Acehnese Restaurant,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,...,Supermarket,Sushi Restaurant,Thai Restaurant,Theme Park,Theme Park Ride / Attraction,Track Stadium,Train Station,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Group by mean each Neighborhood

In [16]:
jakarta_grouped = jakarta_onehot.groupby('Neighborhood').mean().reset_index()
jakarta_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Acehnese Restaurant,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Asian Restaurant,...,Supermarket,Sushi Restaurant,Thai Restaurant,Theme Park,Theme Park Ride / Attraction,Track Stadium,Train Station,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Cakung,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cempaka Putih,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cengkareng,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cilandak,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Cilincing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Cipayung,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Ciracas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Duren Sawit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Gambir,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Grogol Petamburan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0


#### We want to see top 5 venues every Neighborhood

In [17]:
num_top_venues = 5

for hood in jakarta_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = jakarta_grouped[jakarta_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Cakung----
                      venue  freq
0                Soup Place  0.25
1                    Lounge  0.25
2               Gas Station  0.25
3               Yoga Studio  0.00
4  Mediterranean Restaurant  0.00


----Cempaka Putih----
                       venue  freq
0                Pizza Place  0.29
1                       Pool  0.14
2        Acehnese Restaurant  0.14
3  Indonesian Meatball Place  0.14
4                     Bakery  0.14


----Cengkareng----
                venue  freq
0          Food Truck  0.33
1         Coffee Shop  0.33
2     Bubble Tea Shop  0.33
3           Nightclub  0.00
4  Miscellaneous Shop  0.00


----Cilandak----
                     venue  freq
0               Food Truck  0.25
1               Soup Place  0.25
2              Art Gallery  0.25
3  Health & Beauty Service  0.25
4              Yoga Studio  0.00


----Cilincing----
           venue  freq
0           Park  0.33
1  Shopping Mall  0.33
2          Diner  0.33
3    Yoga Studio  0.00
4   Mu



----Tebet----
                   venue  freq
0  Indonesian Restaurant  0.15
1                   Café  0.08
2                 Bakery  0.07
3       Asian Restaurant  0.07
4            Karaoke Bar  0.05




#### define function to return most common venues


In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhood_sorted = pd.DataFrame(columns=columns)
neighborhood_sorted['Neighborhood'] = jakarta_grouped['Neighborhood']

for ind in np.arange(jakarta_grouped.shape[0]):
    neighborhood_sorted.iloc[ind, 1:] = return_most_common_venues(jakarta_grouped.iloc[ind, :], num_top_venues)

neighborhood_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Cakung,Gas Station,Lounge,Soup Place,Food & Drink Shop,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner,Vietnamese Restaurant
1,Cempaka Putih,Pizza Place,Acehnese Restaurant,Indonesian Meatball Place,Pool,Bakery,BBQ Joint,Vietnamese Restaurant,Diner,Farmers Market,Electronics Store
2,Cengkareng,Coffee Shop,Bubble Tea Shop,Food Truck,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Food & Drink Shop,Convenience Store
3,Cilandak,Food Truck,Health & Beauty Service,Soup Place,Art Gallery,Diner,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop
4,Cilincing,Park,Shopping Mall,Diner,Vietnamese Restaurant,Dim Sum Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Dessert Shop


#### Clustering

In [20]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

jakarta_grouped_clustering = jakarta_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(jakarta_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 3, 3, 4, 3, 4, 4, 0, 4])

#### Merge jakarta_grouped with jakarta_data to add latitude/longitude for each neighborhood

We pass the parameter how = 'inner' to remove NaN or infinity number from previous grouping by mean

In [21]:
neighborhood_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


In [22]:
df_merged = df.copy()
df_merged = df_merged.join(neighborhood_sorted.set_index('Neighborhood'), on='Neighborhood', how='inner').reset_index(drop=True)

df_merged.tail() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Jakarta Barat,Tambora,-6.146614,106.801046,3,Food Truck,Garden,Convenience Store,Train Station,Department Store,Indonesian Restaurant,Miscellaneous Shop,Farmers Market,Electronics Store,Dumpling Restaurant
38,Jakarta Barat,Kebon Jeruk,-6.192572,106.769725,4,Noodle House,Concert Hall,Asian Restaurant,Café,Food Truck,Juice Bar,Bakery,Indonesian Restaurant,Coffee Shop,Auto Dealership
39,Jakarta Barat,Kalideres,-6.137006,106.701594,3,Ski Area,Convenience Store,Bookstore,Food Truck,Soccer Field,Fried Chicken Joint,Indonesian Restaurant,Donut Shop,Farmers Market,Electronics Store
40,Jakarta Barat,Palmerah,-6.191002,106.794363,4,Coffee Shop,Pizza Place,Noodle House,Asian Restaurant,Multiplex,Restaurant,Medical Center,Convenience Store,Gym,Hotel
41,Jakarta Barat,Kembangan,-6.191395,106.740586,4,Asian Restaurant,Japanese Restaurant,Chinese Restaurant,Coffee Shop,Café,Clothing Store,Food Court,Korean Restaurant,Pizza Place,Department Store


#### Check the shape of data

In [23]:
df_merged.shape

(42, 15)

#### Plot the segmentation and cluster

In [24]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighborhood'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Get insight from the data

#### 1. Check the first cluster

We sort the 1st most popular venues from cluster 1

In [25]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Gambir,Indonesian Restaurant,Seafood Restaurant,Café,History Museum,Hotel,Clothing Store,Office,Food Truck,Breakfast Spot,Bakery
6,Senen,Hotel,Indonesian Restaurant,University,History Museum,Dance Studio,Noodle House,Art Gallery,Donut Shop,Vietnamese Restaurant,Diner
7,Tanah Abang,Indonesian Restaurant,Coffee Shop,Seafood Restaurant,Soup Place,Noodle House,Food Truck,Javanese Restaurant,Japanese Restaurant,Fruit & Vegetable Store,Pizza Place
9,Kelapa Gading,Indonesian Restaurant,Asian Restaurant,Chinese Restaurant,Japanese Restaurant,Korean Restaurant,Steakhouse,Seafood Restaurant,Javanese Restaurant,Food Court,Coffee Shop
10,Koja,Indonesian Restaurant,Pizza Place,Arcade,Vietnamese Restaurant,Dim Sum Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner
20,Makasar,Indonesian Restaurant,Concert Hall,Airport Terminal,Chinese Restaurant,Asian Restaurant,Deli / Bodega,Dance Studio,Cupcake Shop,Department Store,Food Court
21,Matraman,Coffee Shop,Snack Place,Pizza Place,Indonesian Restaurant,Dim Sum Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner
22,Pasar Rebo,Yoga Studio,Bakery,Noodle House,Indonesian Restaurant,Diner,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop
23,Pulo Gadung,Indonesian Restaurant,Convenience Store,Athletics & Sports,Fast Food Restaurant,Pizza Place,Café,Hardware Store,Sandwich Place,Breakfast Spot,Bubble Tea Shop
28,Mampang Prapatan,Noodle House,Indonesian Restaurant,Fast Food Restaurant,Bakery,Vietnamese Restaurant,Diner,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop


We see that most of popular venue of cluster 1 belong to Indonesian.

#### 2. Check the second cluster

In [26]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,Pesanggrahan,Pizza Place,Dim Sum Restaurant,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner,Dessert Shop,College Academic Building


From the above table we can see that the majority 1st most common venue for cluster 2 is Pizza Place. Hence, we conclude that cluster 2 belongs to Pizza Place.

#### 3. Check the third cluster

In [27]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Tanjung Priok,Bakery,BBQ Joint,Vietnamese Restaurant,Food Truck,Food & Drink Shop,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop


From the above table we can see that the all the 1st most common venue for cluster 3 is Bakery. Hence, we conclude that cluster 3 belongs to Bakery.

#### 4. Check the fourth cluster

In [28]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Johar Baru,Food Truck,Arcade,Café,Department Store,Food & Drink Shop,Convenience Store,Gym / Fitness Center,Gym,Cupcake Shop,Dance Studio
15,Cipayung,Food Truck,Food Court,Convenience Store,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner
18,Jatinegara,Garden,Satay Restaurant,Food Truck,Gym,Italian Restaurant,Vietnamese Restaurant,Electronics Store,Dumpling Restaurant,Donut Shop,Diner
24,Cilandak,Food Truck,Health & Beauty Service,Soup Place,Art Gallery,Diner,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop
25,Jagakarsa,Pharmacy,Department Store,Convenience Store,Food Truck,Acehnese Restaurant,Health & Beauty Service,Hardware Store,History Museum,Cupcake Shop,Dance Studio
34,Cengkareng,Coffee Shop,Bubble Tea Shop,Food Truck,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Food & Drink Shop,Convenience Store
37,Tambora,Food Truck,Garden,Convenience Store,Train Station,Department Store,Indonesian Restaurant,Miscellaneous Shop,Farmers Market,Electronics Store,Dumpling Restaurant
39,Kalideres,Ski Area,Convenience Store,Bookstore,Food Truck,Soccer Field,Fried Chicken Joint,Indonesian Restaurant,Donut Shop,Farmers Market,Electronics Store


From the above table we can see that the majority of 1st most common venue is Food Truck. Hence, we conclude that cluster 4 belongs to Food Truck.

#### 5. Check the fifth cluster

In [29]:
df_merged.loc[df_merged['Cluster Labels'] == 4, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Cempaka Putih,Pizza Place,Acehnese Restaurant,Indonesian Meatball Place,Pool,Bakery,BBQ Joint,Vietnamese Restaurant,Diner,Farmers Market,Electronics Store
3,Kemayoran,Noodle House,Arcade,Spa,Vietnamese Restaurant,Dim Sum Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner
4,Menteng,Food Truck,Indonesian Restaurant,Coffee Shop,Park,Deli / Bodega,Department Store,Miscellaneous Shop,Dessert Shop,Massage Studio,Electronics Store
5,Sawah Besar,Indonesian Restaurant,Convenience Store,Noodle House,Karaoke Bar,Asian Restaurant,Pet Store,Diner,Automotive Shop,Camera Store,Chinese Restaurant
8,Cilincing,Park,Shopping Mall,Diner,Vietnamese Restaurant,Dim Sum Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Dessert Shop
11,Pademangan,Hotel,Food Truck,Theme Park,Shopping Mall,Bowling Alley,Athletics & Sports,Asian Restaurant,Seafood Restaurant,Department Store,Dessert Shop
12,Penjaringan,Pharmacy,Food Truck,Theme Park,Indonesian Restaurant,Restaurant,Dessert Shop,Dim Sum Restaurant,Electronics Store,Dumpling Restaurant,Donut Shop
14,Cakung,Gas Station,Lounge,Soup Place,Food & Drink Shop,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Diner,Vietnamese Restaurant
16,Ciracas,Coffee Shop,Metro Station,Diner,Fast Food Restaurant,Farmers Market,Electronics Store,Dumpling Restaurant,Donut Shop,Dim Sum Restaurant,Concert Hall
17,Duren Sawit,Indonesian Meatball Place,Coffee Shop,Mediterranean Restaurant,Convenience Store,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Food & Drink Shop,Dim Sum Restaurant
