**Step 1** : Import relevant libraries.

In [1]:
import requests # library to handle requests

import pandas as pd # library for data analysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import numpy as np # library to handle data in a vectorized manner

import random # library for random number generation

!pip install geocoder
import geocoder
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values


# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

from bs4 import BeautifulSoup # library to parse HTML and XML documents

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# plotting library
!pip -q install folium
import folium 

print('Libraries imported.')

Libraries imported.


**Step 2a** : Scrape data from source and transform into a Dataframe. We start with Selangor first.

In [2]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Townships_in_Selangor").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# create a list to store neighborhood data
Sgor_List = []

In [5]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    Sgor_List.append(row.text)

In [6]:
# create a new DataFrame from the list
Sgor_df = pd.DataFrame({"Neighborhood": Sgor_List})

Sgor_df.head()

Unnamed: 0,Neighborhood
0,Alam Budiman
1,Ara Damansara
2,Balakong
3,Bandar Baru Bangi
4,Bandar Baru Klang


In [7]:
Sgor_df.shape

(67, 1)

In [8]:
Sgor_df.dtypes

Neighborhood    object
dtype: object

In [9]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Selangor, Malaysia'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [10]:
# call the function to get the coordinates, store in a new list using list comprehension
coordinates = [ get_latlng(neighborhood) for neighborhood in Sgor_df["Neighborhood"].tolist() ]

In [11]:
# view samples of coordinates list.
coordinates[0:9]

[[3.143730000000062, 101.47958000000006],
 [3.122850000000028, 101.58796000000007],
 [3.0413900000000353, 101.77131000000008],
 [2.962690000000066, 101.76102000000009],
 [3.0625700000000506, 101.46816000000007],
 [2.991300000000024, 101.43755000000004],
 [2.995900000000063, 101.43776000000008],
 [3.328510000000051, 101.52823000000006],
 [3.0556300000000647, 101.64860000000004]]

In [12]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
coordinates_df = pd.DataFrame(coordinates, columns=['Latitude', 'Longitude'])
coordinates_df.head()

Unnamed: 0,Latitude,Longitude
0,3.14373,101.47958
1,3.12285,101.58796
2,3.04139,101.77131
3,2.96269,101.76102
4,3.06257,101.46816


In [13]:
# merge the coordinates into the original dataframe
Sgor_df['Latitude'] = coordinates_df['Latitude']
Sgor_df['Longitude'] = coordinates_df['Longitude']
Sgor_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alam Budiman,3.14373,101.47958
1,Ara Damansara,3.12285,101.58796
2,Balakong,3.04139,101.77131
3,Bandar Baru Bangi,2.96269,101.76102
4,Bandar Baru Klang,3.06257,101.46816


In [14]:
Sgor_df['State'] = "Selangor"

In [15]:
Sgor_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,State
0,Alam Budiman,3.14373,101.47958,Selangor
1,Ara Damansara,3.12285,101.58796,Selangor
2,Balakong,3.04139,101.77131,Selangor
3,Bandar Baru Bangi,2.96269,101.76102,Selangor
4,Bandar Baru Klang,3.06257,101.46816,Selangor


In [16]:
# save the DataFrame as CSV file
Sgor_df.to_csv("Sgor_df.csv", index=False)

**Step 2b** : Do the same for Kuala Lumpur.

In [17]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_in_Kuala_Lumpur").text

In [18]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [19]:
# create a list to store neighborhood data
KL_List = []

In [20]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    KL_List.append(row.text)

In [21]:
# create a new DataFrame from the list
KL_df = pd.DataFrame({"Neighborhood": KL_List})

KL_df.head()

Unnamed: 0,Neighborhood
0,Alam Damai
1,"Ampang, Kuala Lumpur"
2,Bandar Menjalara
3,Bandar Sri Permaisuri
4,Bandar Tasik Selatan


In [22]:
KL_df.shape

(71, 1)

In [23]:
KL_df.dtypes

Neighborhood    object
dtype: object

In [24]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Kuala Lumpur, Malaysia'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [25]:
# call the function to get the coordinates, store in a new list using list comprehension
coordinates = [ get_latlng(neighborhood) for neighborhood in KL_df["Neighborhood"].tolist() ]

In [26]:
# view samples of coordinates list.
coordinates[0:9]

[[3.0576900000000364, 101.74388000000005],
 [3.148494115588384, 101.69672876508707],
 [3.1903500000000236, 101.62545000000006],
 [3.1039100000000417, 101.71226000000007],
 [3.072750000000042, 101.71461000000005],
 [3.08280000000002, 101.72281000000004],
 [3.1292000000000257, 101.67844000000008],
 [3.1292000000000257, 101.67844000000008],
 [3.111020000000053, 101.66283000000004]]

In [27]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
coordinates_df = pd.DataFrame(coordinates, columns=['Latitude', 'Longitude'])
coordinates_df.head()

Unnamed: 0,Latitude,Longitude
0,3.05769,101.74388
1,3.148494,101.696729
2,3.19035,101.62545
3,3.10391,101.71226
4,3.07275,101.71461


In [28]:
# merge the coordinates into the original dataframe
KL_df['Latitude'] = coordinates_df['Latitude']
KL_df['Longitude'] = coordinates_df['Longitude']
KL_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alam Damai,3.05769,101.74388
1,"Ampang, Kuala Lumpur",3.148494,101.696729
2,Bandar Menjalara,3.19035,101.62545
3,Bandar Sri Permaisuri,3.10391,101.71226
4,Bandar Tasik Selatan,3.07275,101.71461


In [29]:
KL_df['State'] = "Kuala Lumpur"

In [30]:
KL_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,State
0,Alam Damai,3.05769,101.74388,Kuala Lumpur
1,"Ampang, Kuala Lumpur",3.148494,101.696729,Kuala Lumpur
2,Bandar Menjalara,3.19035,101.62545,Kuala Lumpur
3,Bandar Sri Permaisuri,3.10391,101.71226,Kuala Lumpur
4,Bandar Tasik Selatan,3.07275,101.71461,Kuala Lumpur


**Step 3** : Merge both Selangor and Kuala Lumpur dataframes.

In [31]:
# Merge two tables
frames = [KL_df,Sgor_df]
merged_df = pd.concat(frames)
merged_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,State
0,Alam Damai,3.05769,101.74388,Kuala Lumpur
1,"Ampang, Kuala Lumpur",3.148494,101.696729,Kuala Lumpur
2,Bandar Menjalara,3.19035,101.62545,Kuala Lumpur
3,Bandar Sri Permaisuri,3.10391,101.71226,Kuala Lumpur
4,Bandar Tasik Selatan,3.07275,101.71461,Kuala Lumpur


In [32]:
merged_df = merged_df[["Neighborhood","State","Latitude","Longitude"]]
merged_df.head()

Unnamed: 0,Neighborhood,State,Latitude,Longitude
0,Alam Damai,Kuala Lumpur,3.05769,101.74388
1,"Ampang, Kuala Lumpur",Kuala Lumpur,3.148494,101.696729
2,Bandar Menjalara,Kuala Lumpur,3.19035,101.62545
3,Bandar Sri Permaisuri,Kuala Lumpur,3.10391,101.71226
4,Bandar Tasik Selatan,Kuala Lumpur,3.07275,101.71461


In [33]:
# download csv file in case you prefer to work on local machine.

from IPython.display import HTML
import base64 

def create_download_link( new_df, title = "Download CSV file", filename = "merged_df.csv"):  
    csv = merged_df.to_csv(index =False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(merged_df)

**Step 4** : Create a map for data visualisation.

In [34]:
# get the coordinates of Selangor (Note: Kuala Lumpur Federal Territory is surrounded by Selangor state.)
address = 'Kuala Lumpur, Malaysia'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kuala Lumpur, Malaysia is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kuala Lumpur, Malaysia is 3.1516964, 101.6942371.


In [35]:
# create map of Selangor using latitude and longitude values
map = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add markers to map
for lat, lng, neighborhood in zip(merged_df['Latitude'], merged_df['Longitude'], merged_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map)  
    
map

In [36]:
# save the map as HTML file
map.save('map.html')

**Step 5** : Use Foursquare API to explore the neighborhoods.

In [37]:
# define Foursquare Credentials and Version
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XSTIDMUH4OA4C04C34AMTWFWSD25OTLFXWJNG5BIZBDFXCEW
CLIENT_SECRET:YQVELJ2REQ3U2ZV01HL5BACTDD0LJYCNW5OEITIPUPP2WFHV


In [38]:
# let's get the top 50 venues that are within a radius of 10,000 meters.

radius = 10000
LIMIT = 50

venues = []

for lat, long, neighborhood in zip(merged_df['Latitude'], merged_df['Longitude'], merged_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [39]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print('{} venues were returned by Foursquare.'.format(venues_df.shape[0]))
venues_df.head()

6894 venues were returned by Foursquare.


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Alam Damai,3.05769,101.74388,Pengedar Shaklee Kuala Lumpur,3.061235,101.740696,Supplement Shop
1,Alam Damai,3.05769,101.74388,Jc Deli 皆喜食坊,3.058397,101.74856,Food & Drink Shop
2,Alam Damai,3.05769,101.74388,Minang Tomyam,3.057185,101.749812,Seafood Restaurant
3,Alam Damai,3.05769,101.74388,Suakasih Bdr tun Hussein onn,3.034633,101.757606,Garden
4,Alam Damai,3.05769,101.74388,Alpha Brew Coffee,3.037045,101.765406,Brewery


In [40]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alam Budiman,50,50,50,50,50,50
Alam Damai,50,50,50,50,50,50
"Ampang, Kuala Lumpur",50,50,50,50,50,50
Ara Damansara,50,50,50,50,50,50
Balakong,50,50,50,50,50,50
Bandar Baru Bangi,50,50,50,50,50,50
Bandar Baru Klang,50,50,50,50,50,50
Bandar Botanic,50,50,50,50,50,50
Bandar Bukit Tinggi,50,50,50,50,50,50
Bandar Country Homes,50,50,50,50,50,50


In [41]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 222 uniques categories.


In [42]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]

array(['Supplement Shop', 'Food & Drink Shop', 'Seafood Restaurant',
       'Garden', 'Brewery', 'Noodle House', 'Tea Room',
       'Chinese Restaurant', 'Food Truck', 'Trail', 'Spa', 'Steakhouse',
       'Pet Store', 'Outlet Store', 'Massage Studio', 'Bubble Tea Shop',
       'Bike Shop', 'Asian Restaurant', 'Café', 'Scenic Lookout',
       'Thai Restaurant', 'Sporting Goods Shop',
       'Middle Eastern Restaurant', 'Ice Cream Shop', 'Coffee Shop',
       'Burger Joint', 'BBQ Joint', 'Vegetarian / Vegan Restaurant',
       'Malay Restaurant', 'Pizza Place', 'Surf Spot', 'Speakeasy',
       'Hostel', 'Latin American Restaurant', 'Hotel', 'Hotel Pool',
       'Museum', 'Indian Restaurant', 'Bookstore', 'Resort', 'Building',
       'Shopping Mall', 'Hotel Bar', 'Japanese Restaurant',
       'Udon Restaurant', 'Beer Bar', 'Cosmetics Shop', 'Jewelry Store',
       'Park', 'Juice Bar'], dtype=object)

In [43]:
# check if "Pizza Place" is one of the unique categories.
"Pizza Place" in venues_df['VenueCategory'].unique()

True

**Step 6** : Analyze each neighborhood.

In [44]:
# one hot encoding
merged_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
merged_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [merged_onehot.columns[-1]] + list(merged_onehot.columns[:-1])
merged_onehot = merged_onehot[fixed_columns]

print(merged_onehot.shape)
merged_onehot.head()

(6894, 223)


Unnamed: 0,Neighborhoods,Accessories Store,African Restaurant,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bike Shop,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Café,Camera Store,Campground,Carpet Store,Cave,Chaat Place,Chinese Breakfast Place,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Administrative Building,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Dance Studio,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Shop,Donut Shop,Dry Cleaner,Eastern European Restaurant,Electronics Store,Farm,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Forest,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Fujian Restaurant,Furniture / Home Store,Garden,Gas Station,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hainan Restaurant,Halal Restaurant,Harbor / Marina,Hardware Store,Health & Beauty Service,Hill,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Housing Development,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Island,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Lighthouse,Lounge,Malay Restaurant,Market,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Mosque,Motorcycle Shop,Mountain,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,Nature Preserve,Neighborhood,Night Market,Nightclub,Noodle House,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Outlet Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Service,Pet Store,Pharmacy,Photography Lab,Piano Bar,Pier,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Racetrack,Recreation Center,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,Rock Climbing Spot,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stables,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Taco Place,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Trail,Udon Restaurant,Vape Store,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Water Park,Wine Bar,Women's Store,Yoga Studio,Zoo
0,Alam Damai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Alam Damai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Alam Damai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Alam Damai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Alam Damai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [45]:
merged_grouped = merged_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(merged_grouped.shape)
merged_grouped

(138, 223)


Unnamed: 0,Neighborhoods,Accessories Store,African Restaurant,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bike Shop,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Café,Camera Store,Campground,Carpet Store,Cave,Chaat Place,Chinese Breakfast Place,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Administrative Building,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Dance Studio,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Shop,Donut Shop,Dry Cleaner,Eastern European Restaurant,Electronics Store,Farm,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Forest,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Fujian Restaurant,Furniture / Home Store,Garden,Gas Station,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hainan Restaurant,Halal Restaurant,Harbor / Marina,Hardware Store,Health & Beauty Service,Hill,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Housing Development,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Island,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Lighthouse,Lounge,Malay Restaurant,Market,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Mosque,Motorcycle Shop,Mountain,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,Nature Preserve,Neighborhood,Night Market,Nightclub,Noodle House,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Outlet Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Service,Pet Store,Pharmacy,Photography Lab,Piano Bar,Pier,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Racetrack,Recreation Center,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,Rock Climbing Spot,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stables,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Taco Place,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Trail,Udon Restaurant,Vape Store,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Water Park,Wine Bar,Women's Store,Yoga Studio,Zoo
0,Alam Budiman,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.02,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.06,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Alam Damai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Ampang, Kuala Lumpur",0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.26,0.04,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Ara Damansara,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Balakong,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.16,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bandar Baru Bangi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0
6,Bandar Baru Klang,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.1,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.12,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Bandar Botanic,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.02,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Bandar Bukit Tinggi,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.14,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.02,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Bandar Country Homes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.1,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0


In [46]:
len(merged_grouped[merged_grouped["Pizza Place"] > 0])

25

We now create a dataframe for "Pizza Place" only.

In [47]:
pizza_df = merged_grouped[["Neighborhoods","Pizza Place"]]
pizza_df.head()

Unnamed: 0,Neighborhoods,Pizza Place
0,Alam Budiman,0.02
1,Alam Damai,0.02
2,"Ampang, Kuala Lumpur",0.0
3,Ara Damansara,0.0
4,Balakong,0.0


**Step 8** : Cluster the neighborhoods.

In [48]:
# set number of clusters
kclusters = 3

pizza_clustering = pizza_df.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(pizza_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

In [49]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
pizza_df2 = pizza_df.copy()

# add clustering labels
pizza_df2["Cluster Labels"] = kmeans.labels_

In [50]:
pizza_df2.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
pizza_df2.head()

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels
0,Alam Budiman,0.02,1
1,Alam Damai,0.02,1
2,"Ampang, Kuala Lumpur",0.0,0
3,Ara Damansara,0.0,0
4,Balakong,0.0,0


In [51]:
pizza_df2.shape

(138, 3)

In [52]:
# merge with earlier dataframe to include latitude/longitude for each neighborhood

pizza_df3 = pizza_df2.join(merged_df.set_index("Neighborhood"), on="Neighborhood")
print(pizza_df3.shape)
pizza_df3.head()

(138, 6)


Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,State,Latitude,Longitude
0,Alam Budiman,0.02,1,Selangor,3.14373,101.47958
1,Alam Damai,0.02,1,Kuala Lumpur,3.05769,101.74388
2,"Ampang, Kuala Lumpur",0.0,0,Kuala Lumpur,3.148494,101.696729
3,Ara Damansara,0.0,0,Selangor,3.12285,101.58796
4,Balakong,0.0,0,Selangor,3.04139,101.77131


In [53]:
# sort the results by Cluster Labels
pizza_df3.sort_values(["Cluster Labels"], ascending=False, inplace=True)
pizza_df3

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,State,Latitude,Longitude
89,Puncak Alam,0.04,2,Selangor,3.23719,101.42399
90,Puncak Jalil,0.04,2,Selangor,3.01222,101.67573
36,Bukit Beruntung,0.04,2,Selangor,3.42623,101.55695
0,Alam Budiman,0.02,1,Selangor,3.14373,101.47958
45,Bukit Sentosa,0.02,1,Selangor,2.91936,101.75022
23,Bandar Tasik Selatan,0.02,1,Kuala Lumpur,3.07275,101.71461
31,Batu Arang,0.02,1,Selangor,3.3181,101.47202
107,Sungai Besi,0.02,1,Kuala Lumpur,3.04997,101.70603
43,Bukit Rahman Putra,0.02,1,Selangor,3.22885,101.55704
44,Bukit Rimau,0.02,1,Selangor,2.99436,101.52628


Visualise the cluster results.

In [54]:
# create map
pizza_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(pizza_df3['Latitude'], pizza_df3['Longitude'], pizza_df3['Neighborhood'], pizza_df3['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-0],
        fill=True,
        fill_color=rainbow[cluster-0],
        fill_opacity=0.7).add_to(pizza_map)
       
pizza_map

In [55]:
# save the map as HTML file
pizza_map.save('pizza_map.html')

**Step 8 ** : Examine clusters.

In [56]:
# cluster 2
pizza_df3.loc[pizza_df3['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,State,Latitude,Longitude
89,Puncak Alam,0.04,2,Selangor,3.23719,101.42399
90,Puncak Jalil,0.04,2,Selangor,3.01222,101.67573
36,Bukit Beruntung,0.04,2,Selangor,3.42623,101.55695


In [57]:
# cluster 1
pizza_df3.loc[pizza_df3['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,State,Latitude,Longitude
0,Alam Budiman,0.02,1,Selangor,3.14373,101.47958
45,Bukit Sentosa,0.02,1,Selangor,2.91936,101.75022
23,Bandar Tasik Selatan,0.02,1,Kuala Lumpur,3.07275,101.71461
31,Batu Arang,0.02,1,Selangor,3.3181,101.47202
107,Sungai Besi,0.02,1,Kuala Lumpur,3.04997,101.70603
43,Bukit Rahman Putra,0.02,1,Selangor,3.22885,101.55704
44,Bukit Rimau,0.02,1,Selangor,2.99436,101.52628
104,Southville City,0.02,1,Selangor,2.90011,101.76345
19,Bandar Sungai Buaya,0.02,1,Selangor,3.37435,101.52979
96,Saujana Impian,0.02,1,Selangor,3.012076,101.797168


In [58]:
# cluster 0
pizza_df3.loc[pizza_df3['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,State,Latitude,Longitude
119,Taman Len Seng,0.0,0,Kuala Lumpur,3.06908,101.74287
118,Taman Keramat,0.0,0,Selangor,3.1716,101.74245
97,Segambut,0.0,0,Kuala Lumpur,3.18639,101.6681
131,Taman Tun Dr Ismail,0.0,0,Kuala Lumpur,3.15283,101.62271
95,Salak South,0.0,0,Kuala Lumpur,3.08102,101.69724
94,Putrajaya,0.0,0,Kuala Lumpur,3.125843,101.718509
132,Taman U-Thant,0.0,0,Kuala Lumpur,3.1577,101.72452
133,Taman Wahyu,0.0,0,Kuala Lumpur,3.2224,101.67173
92,Pusat Bandar Puchong,0.0,0,Selangor,3.03404,101.61551
91,Puncak Perdana,0.0,0,Selangor,3.04749,101.76539
