# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

### 1. Import libraries

In [14]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup # for scraping html docs
import urllib.request # for opening url 

print('Libraries imported.')

Libraries imported.


### 2. Obtain BTS station names and their location data

I prepared a file of BTS station names in Bangkok and their latitudes and longitudes because it would be really complicated to do that over geopy or other tools and it would return strange results because there are other places with the same names. 

In [15]:
BTS_coords = pd.read_csv('BTS_coords.csv')
BTS_coords.head()

Unnamed: 0,BTS Station,"Latitude, Longitude"
0,Mo Chit,"13.8025945, 100.5537947"
1,Saphan Khwai,"13.7938461, 100.5497489"
2,Ari,"13.7796580, 100.5446153"
3,Sanam Pao,"13.7726146, 100.5420928"
4,Victory Monument,"13.7627647, 100.5370685"


In [16]:
coords = pd.DataFrame(BTS_coords['Latitude, Longitude'].str.split(',',1).tolist(),columns = ['Latitude',' Longitude'])

In [17]:
coords.head()

Unnamed: 0,Latitude,Longitude
0,13.8025945,100.5537947
1,13.7938461,100.5497489
2,13.779658,100.5446153
3,13.7726146,100.5420928
4,13.7627647,100.5370685


In [18]:
BTS_data = pd.DataFrame()
BTS_data['Station'] = BTS_coords['BTS Station']
BTS_data[['Latitude','Longitude']] = coords.astype(float)
BTS_data.head()

Unnamed: 0,Station,Latitude,Longitude
0,Mo Chit,13.802594,100.553795
1,Saphan Khwai,13.793846,100.549749
2,Ari,13.779658,100.544615
3,Sanam Pao,13.772615,100.542093
4,Victory Monument,13.762765,100.537069


In [19]:
print(BTS_data.shape)

(35, 3)


### 4. Visualize our result dataset  

Create a map of Toronto with Postal code points on top

In [20]:
latitude = 13.7563
longitude = 100.5018

# create map of Toronto using latitude and longitude values
map_bkk = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, station in zip(BTS_data['Latitude'], BTS_data['Longitude'], BTS_data['Station']):
    label = station
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bkk)  
    
map_bkk

### 5. Explore the neighborhoods around BTS stations

#### Let's use the getNearbyVenues function from the previous lab to get data from foursquare

In [21]:
CLIENT_ID = 'FRA1RCJO1PWXFIER42UNGKYNTWL02PX4JOUIBTGU3EAIBW4W' # your Foursquare ID
CLIENT_SECRET = 'MMRD0HSW4Y2URDU4V4PLIRM00BVRCRWVXFRBHQQ3VFICMCUK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500

In [22]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Station', 
                  'Station Latitude', 
                  'Station Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now use it to get the data 

In [23]:
bkk_venues = getNearbyVenues(names=BTS_data['Station'],
                                   latitudes=BTS_data['Latitude'],
                                   longitudes=BTS_data['Longitude']
                                  )

Mo Chit
Saphan Khwai
Ari
Sanam Pao
Victory Monument
Phaya Thai
Ratchathewi
Siam
Chit Lom
Phloen Chit
Nana
Asok
Phrom Phong
Thong Lo
Ekkamai
Phra Khanong
On Nut
Bang Chak
Punnawithi
Udom Suk
Bang Na
Bearing
Samrong
National Stadium
Ratchadamri
Sala Daeng
Chong Nonsi
Surasak
Saphan Taksin
Krung Thonburi
Wongwian Yai
Pho Nimit
Talat Phlu
Wutthakat
Bang Wa


In [24]:
bkk_venues.head()

Unnamed: 0,Station,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Mo Chit,13.802594,100.553795,Chatuchak Market Section7 Art Zone (โครงการ 7),13.802202,100.551757,Art Gallery
1,Mo Chit,13.802594,100.553795,VIVA Bar,13.80177,100.551753,Bar
2,Mo Chit,13.802594,100.553795,BTS Mo Chit (N8) (BTS หมอชิต),13.802749,100.553848,Train Station
3,Mo Chit,13.802594,100.553795,Three Brothers Barber Shop,13.802349,100.553201,Salon / Barbershop
4,Mo Chit,13.802594,100.553795,Koko Drip,13.802332,100.551693,Coffee Shop


In [25]:
bkk_venues = bkk_venues[bkk_venues['Venue Category']!='Train Station']
bkk_venues.head(20)

Unnamed: 0,Station,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Mo Chit,13.802594,100.553795,Chatuchak Market Section7 Art Zone (โครงการ 7),13.802202,100.551757,Art Gallery
1,Mo Chit,13.802594,100.553795,VIVA Bar,13.80177,100.551753,Bar
3,Mo Chit,13.802594,100.553795,Three Brothers Barber Shop,13.802349,100.553201,Salon / Barbershop
4,Mo Chit,13.802594,100.553795,Koko Drip,13.802332,100.551693,Coffee Shop
5,Mo Chit,13.802594,100.553795,Jeed-Jard (จี๊ด-จ๊าด),13.801513,100.551771,Som Tum Restaurant
6,Mo Chit,13.802594,100.553795,Section 5 (โครงการ 5),13.800017,100.551862,Clothing Store
7,Mo Chit,13.802594,100.553795,จตุจักร ตลาดต้นไม้,13.799622,100.550524,Garden Center
8,Mo Chit,13.802594,100.553795,Chatuchak Park (สวนจตุจักร),13.806633,100.555776,Park
9,Mo Chit,13.802594,100.553795,Children's Discovery Museum (พิพิธภัณฑ์เด็กแห่...,13.803349,100.551002,Museum
10,Mo Chit,13.802594,100.553795,สวนสุขภาพเฉลิมพระเกียรติ ๓๖ พรรษา,13.803664,100.553749,Park


Let's see how many venues were returned for each postalcode

In [26]:
bkk_venues.groupby('Station').count()

Unnamed: 0_level_0,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ari,77,77,77,77,77,77
Asok,100,100,100,100,100,100
Bang Chak,20,20,20,20,20,20
Bang Na,9,9,9,9,9,9
Bang Wa,8,8,8,8,8,8
Bearing,9,9,9,9,9,9
Chit Lom,99,99,99,99,99,99
Chong Nonsi,99,99,99,99,99,99
Ekkamai,88,88,88,88,88,88
Krung Thonburi,13,13,13,13,13,13


In [27]:
print('There are {} uniques categories.'.format(len(bkk_venues['Venue Category'].unique())))

There are 198 uniques categories.


### 6. Analyze each neighborhood

First we apply one hot encoding

In [28]:
# one hot encoding
bkk_onehot = pd.get_dummies(bkk_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bkk_onehot['Station'] = bkk_venues['Station'] 

# move neighborhood column to the first column
fixed_columns = [bkk_onehot.columns[-1]] + list(bkk_onehot.columns[:-1])
bkk_onehot = bkk_onehot[fixed_columns]

bkk_onehot.head()

Unnamed: 0,Station,Accessories Store,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Breakfast Spot,Bubble Tea Shop,Buddhist Temple,Buffet,Burger Joint,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cantonese Restaurant,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Coworking Space,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Donburi Restaurant,Donut Shop,Drugstore,Duty-free Shop,Eastern European Restaurant,Electronics Store,Factory,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Driving Range,Grocery Store,Gun Range,Gym / Fitness Center,Halal Restaurant,Health & Beauty Service,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kaiseki Restaurant,Karaoke Bar,Korean Restaurant,Library,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monjayaki Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,New American Restaurant,Night Market,Nightclub,Noodle House,Optical Shop,Organic Grocery,Other Nightlife,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Pet Café,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Ramen Restaurant,Record Shop,Residential Building (Apartment / Condo),Resort,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Shrine,Skating Rink,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Sukiyaki Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tonkatsu Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Yoshoku Restaurant
0,Mo Chit,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Mo Chit,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Mo Chit,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Mo Chit,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Mo Chit,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Then let's group them and take the mean of the frequency of occurrence of each category

In [29]:
bkk_grouped = bkk_onehot.groupby('Station').mean().reset_index()
bkk_grouped

Unnamed: 0,Station,Accessories Store,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Breakfast Spot,Bubble Tea Shop,Buddhist Temple,Buffet,Burger Joint,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cantonese Restaurant,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Coworking Space,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Donburi Restaurant,Donut Shop,Drugstore,Duty-free Shop,Eastern European Restaurant,Electronics Store,Factory,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Driving Range,Grocery Store,Gun Range,Gym / Fitness Center,Halal Restaurant,Health & Beauty Service,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kaiseki Restaurant,Karaoke Bar,Korean Restaurant,Library,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monjayaki Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,New American Restaurant,Night Market,Nightclub,Noodle House,Optical Shop,Organic Grocery,Other Nightlife,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Pet Café,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Ramen Restaurant,Record Shop,Residential Building (Apartment / Condo),Resort,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Shrine,Skating Rink,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Sukiyaki Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tonkatsu Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Yoshoku Restaurant
0,Ari,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.038961,0.0,0.0,0.0,0.051948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.012987,0.116883,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.12987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.025974,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.038961,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064935,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.051948,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0
1,Asok,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.14,0.02,0.0,0.01,0.01,0.0,0.0,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0
2,Bang Chak,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bang Na,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bang Wa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bearing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Chit Lom,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.010101,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.030303,0.0,0.0,0.010101,0.10101,0.0,0.0,0.010101,0.0,0.0,0.020202,0.0,0.0,0.0,0.010101,0.0,0.010101,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.131313,0.030303,0.0,0.0,0.010101,0.0,0.0,0.030303,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.030303,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.010101,0.0,0.0,0.010101,0.0,0.010101,0.010101,0.0,0.0,0.020202,0.0,0.070707,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.030303,0.0
7,Chong Nonsi,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.10101,0.0,0.0,0.0,0.050505,0.0,0.0,0.010101,0.080808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.050505,0.0,0.0,0.0,0.0,0.010101,0.080808,0.010101,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.070707,0.0,0.010101,0.0,0.0,0.0,0.010101,0.0,0.030303,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.010101,0.0,0.010101,0.0,0.030303,0.010101,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.020202,0.010101,0.0,0.0,0.0,0.060606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0
8,Ekkamai,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.034091,0.022727,0.011364,0.011364,0.022727,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.056818,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.034091,0.0,0.0,0.0,0.011364,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.034091,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034091,0.022727,0.0,0.0,0.011364,0.0,0.102273,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.022727,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.011364,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.056818,0.0,0.022727,0.0,0.0,0.011364,0.0,0.022727,0.0,0.0,0.0,0.011364
9,Krung Thonburi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now we get each BTS station along with the top 5 most common venues

In [30]:
num_top_venues = 5

for hood in bkk_grouped['Station']:
    print("----" + hood + "----")
    temp = bkk_grouped[bkk_grouped['Station'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ari----
                venue  freq
0         Coffee Shop  0.13
1                Café  0.12
2     Thai Restaurant  0.09
3  Som Tum Restaurant  0.06
4        Dessert Shop  0.05


----Asok----
               venue  freq
0              Hotel  0.14
1        Coffee Shop  0.09
2  Korean Restaurant  0.08
3    Thai Restaurant  0.06
4                Spa  0.06


----Bang Chak----
                venue  freq
0        Noodle House  0.15
1   Convenience Store  0.10
2  Chinese Restaurant  0.10
3                Café  0.10
4  Italian Restaurant  0.05


----Bang Na----
                   venue  freq
0            Coffee Shop  0.22
1    Japanese Restaurant  0.11
2      Hotpot Restaurant  0.11
3  Australian Restaurant  0.11
4   Fast Food Restaurant  0.11


----Bang Wa----
               venue  freq
0  Convenience Store  0.38
1    Thai Restaurant  0.25
2       Noodle House  0.12
3        Pizza Place  0.12
4             Market  0.12


----Bearing----
                    venue  freq
0             Coffee 

Then we put those into pandas dataframe

In [31]:
# Use the function from the previous lab to get most common venues

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Station']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
stations_venues_sorted = pd.DataFrame(columns=columns)
stations_venues_sorted['Station'] = bkk_grouped['Station']

for ind in np.arange(bkk_grouped.shape[0]):
    stations_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bkk_grouped.iloc[ind, :], num_top_venues)

stations_venues_sorted

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ari,Coffee Shop,Café,Thai Restaurant,Som Tum Restaurant,Sushi Restaurant,Bar,Dessert Shop,Japanese Restaurant,Noodle House,BBQ Joint
1,Asok,Hotel,Coffee Shop,Korean Restaurant,Spa,Thai Restaurant,Dessert Shop,Gym / Fitness Center,Burger Joint,Restaurant,Ramen Restaurant
2,Bang Chak,Noodle House,Café,Chinese Restaurant,Convenience Store,Hotpot Restaurant,Thai Restaurant,Flea Market,Boutique,Bike Shop,Coffee Shop
3,Bang Na,Coffee Shop,Gun Range,Hotpot Restaurant,Japanese Restaurant,Fast Food Restaurant,Australian Restaurant,Shopping Mall,Grocery Store,Duty-free Shop,Food
4,Bang Wa,Convenience Store,Thai Restaurant,Market,Pizza Place,Noodle House,Yoshoku Restaurant,Food,Flea Market,Fast Food Restaurant,Farmers Market
5,Bearing,Coffee Shop,Bus Station,Hotel,Furniture / Home Store,Convention Center,Convenience Store,Noodle House,Steakhouse,Yoshoku Restaurant,Electronics Store
6,Chit Lom,Hotel,Coffee Shop,Thai Restaurant,Spa,Shopping Mall,Gym / Fitness Center,Italian Restaurant,Bakery,Restaurant,Yoga Studio
7,Chong Nonsi,Café,Hotel,Coffee Shop,Japanese Restaurant,Thai Restaurant,Chinese Restaurant,Gym / Fitness Center,Lounge,Restaurant,Noodle House
8,Ekkamai,Japanese Restaurant,Café,Thai Restaurant,Ramen Restaurant,Coffee Shop,Dessert Shop,Hotpot Restaurant,Bakery,Gym / Fitness Center,Bar
9,Krung Thonburi,Convenience Store,Food Truck,Thai Restaurant,Café,Factory,Seafood Restaurant,Bed & Breakfast,Badminton Court,Coffee Shop,Food Stand


Seems like there are many areas that has a lot of Coffee Shops and Cafes

### 7. Clustering Postalcode Areas

Run k-means clustering on toronto_grouped data into 5 clusters

In [87]:
# set number of clusters
kclusters = 7

bkk_grouped_clustering = bkk_grouped.drop('Station', 1)

# run k-means clustering
kmeans = KMeans(init = "k-means++", n_clusters=kclusters, n_init = 12, random_state=0).fit(bkk_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 4, 0, 3, 0, 4, 0, 0, 5], dtype=int32)

Now let's create the resulting dataframe that includes clustering results

In [88]:
bkk_cluster = BTS_data

# add clustering labels
bkk_cluster['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bkk_cluster = bkk_cluster.join(stations_venues_sorted.set_index('Station'), on='Station')

bkk_cluster.head() # check the last columns!

Unnamed: 0,Station,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mo Chit,13.802594,100.553795,2,Bar,Som Tum Restaurant,Coffee Shop,Fast Food Restaurant,Park,Bubble Tea Shop,BBQ Joint,Thai Restaurant,Noodle House,Bus Station
1,Saphan Khwai,13.793846,100.549749,3,Thai Restaurant,Coffee Shop,Som Tum Restaurant,Noodle House,Asian Restaurant,Farmers Market,Bakery,Dessert Shop,Diner,Seafood Restaurant
2,Ari,13.779658,100.544615,4,Coffee Shop,Café,Thai Restaurant,Som Tum Restaurant,Sushi Restaurant,Bar,Dessert Shop,Japanese Restaurant,Noodle House,BBQ Joint
3,Sanam Pao,13.772615,100.542093,0,Hotel,Café,Thai Restaurant,Coffee Shop,Sushi Restaurant,BBQ Joint,Convenience Store,Bar,Wine Bar,Steakhouse
4,Victory Monument,13.762765,100.537069,3,Steakhouse,Noodle House,Hotel,Café,Coffee Shop,Duty-free Shop,Snack Place,Chinese Restaurant,Restaurant,Park


### 8. Visualize the resulting clusters

In [89]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bkk_cluster['Latitude'], bkk_cluster['Longitude'], bkk_cluster['Station'], bkk_cluster['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Let's have a look at each cluster

Cluster 1

In [90]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 0, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Sanam Pao,Hotel,Café,Thai Restaurant,Coffee Shop,Sushi Restaurant,BBQ Joint,Convenience Store,Bar,Wine Bar,Steakhouse
5,Phaya Thai,Hotel,Café,Restaurant,Hostel,Convenience Store,Coffee Shop,Thai Restaurant,Noodle House,Pub,Bed & Breakfast
7,Siam,Coffee Shop,Dessert Shop,Cosmetics Shop,Shopping Mall,Thai Restaurant,Japanese Restaurant,Hotpot Restaurant,Movie Theater,Bakery,Café
8,Chit Lom,Hotel,Coffee Shop,Thai Restaurant,Spa,Shopping Mall,Gym / Fitness Center,Italian Restaurant,Bakery,Restaurant,Yoga Studio
10,Nana,Hotel,Korean Restaurant,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Gym / Fitness Center,Hotel Bar,Thai Restaurant,Indian Restaurant,Men's Store
15,Phra Khanong,Coffee Shop,Noodle House,Art Gallery,Bistro,Convenience Store,Japanese Restaurant,Restaurant,Spa,Market,Ramen Restaurant
17,Bang Chak,Noodle House,Café,Chinese Restaurant,Convenience Store,Hotpot Restaurant,Thai Restaurant,Flea Market,Boutique,Bike Shop,Coffee Shop
18,Punnawithi,Thai Restaurant,Bakery,Food Truck,Coffee Shop,Shopping Mall,Furniture / Home Store,Noodle House,Chinese Restaurant,Supermarket,Korean Restaurant
19,Udom Suk,Noodle House,Convenience Store,Café,Shopping Plaza,Som Tum Restaurant,Shabu-Shabu Restaurant,Farmers Market,Bistro,Coffee Shop,Optical Shop
25,Sala Daeng,Coffee Shop,Japanese Restaurant,BBQ Joint,Gay Bar,Ramen Restaurant,Hotel,Spa,Japanese Curry Restaurant,Bakery,Dessert Shop


Cluster 2

In [91]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 1, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Krung Thonburi,Convenience Store,Food Truck,Thai Restaurant,Café,Factory,Seafood Restaurant,Bed & Breakfast,Badminton Court,Coffee Shop,Food Stand


Cluster 3

In [92]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 2, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mo Chit,Bar,Som Tum Restaurant,Coffee Shop,Fast Food Restaurant,Park,Bubble Tea Shop,BBQ Joint,Thai Restaurant,Noodle House,Bus Station
14,Ekkamai,Japanese Restaurant,Café,Thai Restaurant,Ramen Restaurant,Coffee Shop,Dessert Shop,Hotpot Restaurant,Bakery,Gym / Fitness Center,Bar
20,Bang Na,Coffee Shop,Gun Range,Hotpot Restaurant,Japanese Restaurant,Fast Food Restaurant,Australian Restaurant,Shopping Mall,Grocery Store,Duty-free Shop,Food
22,Samrong,Som Tum Restaurant,Convenience Store,Coffee Shop,Shabu-Shabu Restaurant,Shopping Mall,Buffet,Shrine,Skating Rink,Electronics Store,Soccer Field
23,National Stadium,Coffee Shop,Hotel,Café,Dessert Shop,Clothing Store,Ice Cream Shop,Thai Restaurant,Japanese Restaurant,Korean Restaurant,Shopping Mall


Cluster 4

In [93]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 3, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Saphan Khwai,Thai Restaurant,Coffee Shop,Som Tum Restaurant,Noodle House,Asian Restaurant,Farmers Market,Bakery,Dessert Shop,Diner,Seafood Restaurant
4,Victory Monument,Steakhouse,Noodle House,Hotel,Café,Coffee Shop,Duty-free Shop,Snack Place,Chinese Restaurant,Restaurant,Park
12,Phrom Phong,Japanese Restaurant,Café,Hotel,Coffee Shop,Supermarket,Shopping Mall,Thai Restaurant,Restaurant,Ramen Restaurant,Massage Studio
13,Thong Lo,Thai Restaurant,Noodle House,Hotel,Coffee Shop,BBQ Joint,Bar,Café,Korean Restaurant,Dessert Shop,Hotel Bar
21,Bearing,Coffee Shop,Bus Station,Hotel,Furniture / Home Store,Convention Center,Convenience Store,Noodle House,Steakhouse,Yoshoku Restaurant,Electronics Store
24,Ratchadamri,Hotel,Italian Restaurant,Spa,Restaurant,Japanese Restaurant,Café,Steakhouse,Chinese Restaurant,Buffet,French Restaurant
26,Chong Nonsi,Café,Hotel,Coffee Shop,Japanese Restaurant,Thai Restaurant,Chinese Restaurant,Gym / Fitness Center,Lounge,Restaurant,Noodle House
27,Surasak,Café,Thai Restaurant,Hotel,Som Tum Restaurant,Spa,Chinese Restaurant,Italian Restaurant,Coffee Shop,Library,Modern European Restaurant
28,Saphan Taksin,Hotel,Noodle House,Thai Restaurant,Asian Restaurant,Café,Chinese Restaurant,Coffee Shop,Italian Restaurant,Cocktail Bar,Buddhist Temple


Cluster 5

In [94]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 4, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Ari,Coffee Shop,Café,Thai Restaurant,Som Tum Restaurant,Sushi Restaurant,Bar,Dessert Shop,Japanese Restaurant,Noodle House,BBQ Joint
6,Ratchathewi,Hotel,Hostel,Convenience Store,Massage Studio,Café,Bar,Sushi Restaurant,Pub,Farmers Market,Som Tum Restaurant
11,Asok,Hotel,Coffee Shop,Korean Restaurant,Spa,Thai Restaurant,Dessert Shop,Gym / Fitness Center,Burger Joint,Restaurant,Ramen Restaurant
30,Wongwian Yai,Convenience Store,Thai Restaurant,Coffee Shop,Café,Hotpot Restaurant,Seafood Restaurant,Noodle House,Asian Restaurant,Pool,Market
32,Talat Phlu,Japanese Restaurant,Coffee Shop,Hotpot Restaurant,Restaurant,Steakhouse,Fried Chicken Joint,Fast Food Restaurant,Ice Cream Shop,Convenience Store,Donut Shop


Cluster 6

In [95]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 5, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Phloen Chit,Hotel,Café,Gym / Fitness Center,Restaurant,Japanese Restaurant,Coffee Shop,Hotel Bar,Middle Eastern Restaurant,French Restaurant,Bakery
33,Wutthakat,Convenience Store,Restaurant,Health & Beauty Service,Yoshoku Restaurant,Drugstore,Food,Flea Market,Fast Food Restaurant,Farmers Market,Farm


Cluster 7

In [96]:
bkk_cluster.loc[bkk_cluster['Cluster Labels'] == 6, bkk_cluster.columns[[0] + list(range(4, bkk_cluster.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,On Nut,Convenience Store,Coffee Shop,Fast Food Restaurant,Hotel,Spa,Pharmacy,Hotpot Restaurant,Ice Cream Shop,Italian Restaurant,Café
34,Bang Wa,Convenience Store,Thai Restaurant,Market,Pizza Place,Noodle House,Yoshoku Restaurant,Food,Flea Market,Fast Food Restaurant,Farmers Market


Let's group these stations together into clusters and see what's the most common venues for each cluster

In [97]:
bkk_cluster_label = bkk_cluster[['Station','Cluster Labels']]
bkk_cluster_label.sort_values(by='Station', inplace=True)
bkk_cluster_label.reset_index(inplace=True,drop='True')
bkk_cluster_label.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Station,Cluster Labels
0,Ari,4
1,Asok,4
2,Bang Chak,0
3,Bang Na,2
4,Bang Wa,6


In [98]:
new_columns = bkk_grouped.columns.tolist()
bkk_grouped['Cluster Labels'] = bkk_cluster_label['Cluster Labels']


In [99]:
new_columns.pop(0)

'Station'

In [107]:
grouped_cluster = bkk_grouped[['Station']+ ['Cluster Labels']+new_columns]

In [109]:
grouped_cluster.drop('Station',1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [110]:
grouped_cluster.head()

Unnamed: 0,Cluster Labels,Accessories Store,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Breakfast Spot,Bubble Tea Shop,Buddhist Temple,Buffet,Burger Joint,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cantonese Restaurant,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Coworking Space,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Donburi Restaurant,Donut Shop,Drugstore,Duty-free Shop,Eastern European Restaurant,Electronics Store,Factory,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Driving Range,Grocery Store,Gun Range,Gym / Fitness Center,Halal Restaurant,Health & Beauty Service,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kaiseki Restaurant,Karaoke Bar,Korean Restaurant,Library,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monjayaki Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,New American Restaurant,Night Market,Nightclub,Noodle House,Optical Shop,Organic Grocery,Other Nightlife,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Pet Café,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Ramen Restaurant,Record Shop,Residential Building (Apartment / Condo),Resort,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Shrine,Skating Rink,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Sukiyaki Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tonkatsu Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Yoshoku Restaurant
0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.038961,0.0,0.0,0.0,0.051948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.012987,0.116883,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.12987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.025974,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.038961,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064935,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.051948,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0
1,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.14,0.02,0.0,0.01,0.01,0.0,0.0,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0
2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [113]:
grouped_cluster = grouped_cluster.groupby('Cluster Labels').sum()

In [115]:
grouped_cluster.reset_index(inplace=True)
grouped_cluster.head()

Unnamed: 0,Cluster Labels,Accessories Store,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Breakfast Spot,Bubble Tea Shop,Buddhist Temple,Buffet,Burger Joint,Bus Station,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Cantonese Restaurant,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Coworking Space,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Donburi Restaurant,Donut Shop,Drugstore,Duty-free Shop,Eastern European Restaurant,Electronics Store,Factory,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,German Restaurant,Gift Shop,Golf Driving Range,Grocery Store,Gun Range,Gym / Fitness Center,Halal Restaurant,Health & Beauty Service,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kaiseki Restaurant,Karaoke Bar,Korean Restaurant,Library,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monjayaki Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Nail Salon,New American Restaurant,Night Market,Nightclub,Noodle House,Optical Shop,Organic Grocery,Other Nightlife,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Pet Café,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Ramen Restaurant,Record Shop,Residential Building (Apartment / Condo),Resort,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Shrine,Skating Rink,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Sukiyaki Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tonkatsu Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Yoshoku Restaurant
0,0,0.010101,0.011111,0.010101,0.0625,0.0,0.0,0.072551,0.03125,0.0,0.047619,0.210636,0.0,0.010101,0.315384,0.119854,0.03125,0.03125,0.0,0.0,0.05,0.098214,0.0,0.020202,0.101515,0.0,0.045928,0.0,0.045518,0.039513,0.0,0.0,0.508321,0.0,0.010101,0.0,0.215769,0.012658,0.030303,0.021212,0.834948,0.0,0.0,0.010101,0.933783,0.010101,0.060606,0.0,0.0,0.0,0.010101,0.03125,0.010101,0.041414,0.138857,0.080751,0.05,0.0,0.012658,0.011111,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.035714,0.063244,0.05,0.05,0.035827,0.0,0.295238,0.021212,0.070826,0.010101,0.047619,0.0,0.0,0.028283,0.044444,0.011111,0.0,0.0,0.0,0.0,0.175254,0.028283,0.0,0.0,0.0,0.134273,0.699245,0.068278,0.0,0.192214,0.041414,0.025316,0.010101,0.139787,0.043434,0.207545,0.0,0.011111,0.011111,0.0,0.0,0.184712,0.0,0.080808,0.03125,0.086041,0.0,0.0,0.025316,0.012658,0.050633,0.0,0.0,0.010101,0.0,0.030303,0.010101,0.015625,0.029412,0.0,0.0,0.03125,0.022759,1.004164,0.035714,0.010101,0.029412,0.0,0.026736,0.0,0.0,0.010101,0.020202,0.0,0.0,0.046825,0.0,0.078789,0.0,0.015625,0.020202,0.0,0.03125,0.091319,0.0,0.0,0.0,0.224172,0.012658,0.047948,0.0,0.0,0.0,0.069816,0.077065,0.0,0.158849,0.066964,0.010101,0.0,0.015625,0.010101,0.0,0.0,0.083663,0.0,0.0,0.117646,0.0,0.010101,0.029412,0.0,0.077897,0.0,0.104249,0.090137,0.0,0.011111,0.042961,0.0,0.52045,0.0,0.0,0.044981,0.0,0.020202,0.011111,0.0,0.068806,0.0,0.041414,0.021212
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.0,0.010101,0.0,0.022727,0.010101,0.010101,0.021465,0.0,0.111111,0.0,0.102694,0.0,0.0,0.066919,0.090909,0.021465,0.021465,0.022727,0.0,0.0,0.0,0.010101,0.081229,0.010101,0.0,0.056818,0.0,0.037037,0.0,0.045455,0.0,0.152778,0.011364,0.010101,0.0,0.020202,0.0,0.05303,0.0,0.422138,0.010101,0.010101,0.022727,0.071128,0.0,0.05303,0.0,0.0,0.022727,0.0,0.0,0.011364,0.047138,0.074495,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.0,0.037037,0.0,0.0,0.011364,0.203704,0.0,0.0,0.031566,0.0,0.022727,0.021465,0.057239,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.010101,0.011364,0.111111,0.111111,0.042929,0.032828,0.010101,0.0,0.0,0.020202,0.060606,0.0,0.0,0.182239,0.090067,0.0,0.010101,0.011364,0.010101,0.280724,0.0,0.0,0.0,0.0,0.011364,0.05303,0.0,0.0,0.0,0.059764,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048401,0.042929,0.0,0.0,0.011364,0.0,0.0,0.093855,0.0,0.0,0.0,0.010101,0.0,0.045455,0.0,0.0,0.010101,0.0,0.022727,0.0,0.0,0.047138,0.011364,0.011364,0.010101,0.010101,0.0,0.112795,0.010101,0.0,0.0,0.021465,0.0,0.0,0.022727,0.011364,0.011364,0.034091,0.047138,0.020202,0.246633,0.0,0.037037,0.048401,0.0,0.010101,0.037037,0.010101,0.15362,0.0,0.0,0.021465,0.0,0.0,0.0,0.020202,0.068603,0.0,0.022727,0.022727,0.0,0.0,0.010101,0.0,0.169613,0.0,0.022727,0.0,0.0,0.011364,0.0,0.022727,0.0,0.0,0.0,0.021465
3,3,0.033911,0.0,0.0,0.022727,0.0,0.0,0.242698,0.0,0.0,0.0,0.105236,0.0,0.0,0.090711,0.165407,0.0,0.010101,0.0,0.0,0.0,0.082463,0.0,0.020202,0.030303,0.0,0.020202,0.022727,0.044584,0.010101,0.111111,0.0,0.520236,0.0,0.0,0.050505,0.230009,0.0,0.0,0.053236,0.72444,0.0,0.0,0.0,0.111111,0.111111,0.010101,0.010101,0.017241,0.0,0.0,0.0,0.010101,0.020202,0.111119,0.0,0.04,0.0,0.0,0.0,0.010101,0.0,0.02381,0.020408,0.0,0.0,0.0,0.08,0.0,0.010101,0.0,0.044012,0.0,0.0,0.085194,0.010101,0.0,0.111111,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.017241,0.0,0.144792,0.032828,0.02381,0.0,0.0,0.110707,0.853395,0.156107,0.027778,0.073911,0.046537,0.017241,0.0,0.265271,0.010101,0.237356,0.02381,0.010101,0.010101,0.020408,0.010101,0.074727,0.027778,0.080808,0.0,0.081455,0.0,0.022727,0.0,0.0,0.0,0.0,0.027778,0.0,0.010101,0.020202,0.020202,0.04,0.0,0.010101,0.0,0.0,0.0,0.545832,0.0,0.0,0.0,0.0,0.0,0.033911,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.039969,0.010101,0.060812,0.0,0.010101,0.0,0.158867,0.010101,0.010101,0.010101,0.0,0.0,0.050101,0.04,0.0,0.050505,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.207345,0.0,0.0,0.235838,0.0,0.0,0.017241,0.0,0.28505,0.0,0.100575,0.050711,0.010101,0.0,0.051152,0.020408,0.721622,0.044218,0.0,0.0,0.027778,0.030509,0.02381,0.027778,0.066739,0.04,0.010101,0.0
4,4,0.0,0.027027,0.0,0.0,0.0,0.0,0.095125,0.0,0.0,0.0,0.094856,0.045895,0.0,0.043256,0.10846,0.0,0.033256,0.0,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.055895,0.0,0.02,0.083001,0.018868,0.012987,0.304759,0.0,0.0,0.0,0.031855,0.0,0.0,0.0,0.407838,0.0,0.0,0.0,0.296692,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.105204,0.018868,0.0,0.0,0.0,0.012987,0.037736,0.0,0.0,0.0,0.018868,0.027027,0.0,0.023256,0.037736,0.0,0.0,0.028868,0.0,0.0,0.01,0.060723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.074842,0.023256,0.018868,0.0,0.0,0.11601,0.302174,0.02,0.0,0.120658,0.047736,0.0,0.0,0.02,0.022987,0.157689,0.01,0.0,0.0,0.0,0.0,0.103256,0.0,0.0,0.027027,0.079767,0.0,0.0,0.0,0.01,0.0,0.018868,0.0,0.0,0.0,0.0,0.028868,0.023256,0.012987,0.0,0.0,0.0,0.01,0.126271,0.0,0.0,0.031855,0.023256,0.018868,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.01,0.038868,0.0,0.0,0.0,0.037027,0.078367,0.038868,0.0,0.0,0.0,0.103979,0.01,0.0,0.012987,0.0,0.0,0.054054,0.092138,0.0,0.041855,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.098191,0.018868,0.0,0.091855,0.0,0.0,0.0,0.0,0.111006,0.0,0.041855,0.09846,0.0,0.01,0.02,0.0,0.301141,0.0,0.0,0.0,0.0,0.022987,0.012987,0.01,0.0,0.0,0.0,0.0


In [116]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Cluster Labels']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
clusters_venues_sorted = pd.DataFrame(columns=columns)
clusters_venues_sorted['Cluster Labels'] = grouped_cluster['Cluster Labels']

for ind in np.arange(grouped_cluster.shape[0]):
    clusters_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped_cluster.iloc[ind, :], num_top_venues)

clusters_venues_sorted

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Noodle House,Convenience Store,Coffee Shop,Hotel,Thai Restaurant,Café,Bakery,Food Truck,Restaurant,Chinese Restaurant
1,1,Convenience Store,Food Truck,Thai Restaurant,Café,Factory,Seafood Restaurant,Bed & Breakfast,Badminton Court,Coffee Shop,Food Stand
2,2,Coffee Shop,Japanese Restaurant,Shopping Mall,Fast Food Restaurant,Hotpot Restaurant,Thai Restaurant,Som Tum Restaurant,Café,Ramen Restaurant,Gun Range
3,3,Hotel,Coffee Shop,Thai Restaurant,Noodle House,Café,Steakhouse,Italian Restaurant,Asian Restaurant,Japanese Restaurant,Spa
4,4,Coffee Shop,Café,Hotel,Thai Restaurant,Convenience Store,Japanese Restaurant,Noodle House,Hotpot Restaurant,Hostel,Steakhouse
5,5,Convenience Store,Restaurant,Hotel,Health & Beauty Service,Café,Japanese Restaurant,Gym / Fitness Center,Coffee Shop,Middle Eastern Restaurant,Hotel Bar
6,6,Convenience Store,Thai Restaurant,Pizza Place,Noodle House,Market,Coffee Shop,Hotel,Fast Food Restaurant,Hotpot Restaurant,Café


In [None]:
We can see that cluster number 3 (index=2)