In [1]:
!pip install beautifulsoup4 geopy folium==0.5.0 geocoder

Collecting folium==0.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/07/37/456fb3699ed23caa0011f8b90d9cad94445eddc656b601e6268090de35f5/folium-0.5.0.tar.gz (79kB)
[K     |████████████████████████████████| 81kB 8.0MB/s eta 0:00:011
[?25hCollecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 9.1MB/s eta 0:00:01
Collecting branca (from folium==0.5.0)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Building wheels for collected packages: folium
  Building wheel for folium (setup.py) ... [?25ldone
[?2

# 1. Import Library

In [2]:
import numpy as np 
import pandas as pd
import requests
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

# 2.Scrap data from Wikipedia to Dataframe

In [3]:
page = requests.get("https://en.wikipedia.org/wiki/Phra_Nakhon_Si_Ayutthaya_Province").text
soup = BeautifulSoup(page, "html.parser")
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Phra Nakhon Si Ayutthaya Province - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XowC0ApAAL8AAn3xZvYAAABB","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Phra_Nakhon_Si_Ayutthaya_Province","wgTitle":"Phra Nakhon Si Ayutthaya Province","wgCurRevisionId":945850206,"wgRevisionId":945850206,"wgArticleId":353987,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 Thai-language sources (th)","CS1 errors: missing periodical","CS1: Julian–Gregorian uncertainty","CS

In [4]:
#create list of distinct
districtList = []

In [5]:
for row in soup.find_all("table", class_="")[0].findAll("a"):
    print(row.text)
    districtList.append(row.text)

Phra Nakhon Si Ayutthaya
Tha Ruea
Nakhon Luang
Bang Sai (1404)
Bang Ban
Bang Pa-in
Bang Pahan
Phak Hai
Phachi
Lat Bua Luang
Wang Noi
Sena
Bang Sai (1413)
Uthai
Maha Rat
Ban Phraek


In [6]:
# create a new DataFrame from the list
ayt_df = pd.DataFrame({"Neighborhood": districtList})

ayt_df

Unnamed: 0,Neighborhood
0,Phra Nakhon Si Ayutthaya
1,Tha Ruea
2,Nakhon Luang
3,Bang Sai (1404)
4,Bang Ban
5,Bang Pa-in
6,Bang Pahan
7,Phak Hai
8,Phachi
9,Lat Bua Luang


In [7]:
ayt_df.shape

(16, 1)

# 3.Get the geographical coordinates

In [8]:
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinatesz
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Ayutthaya , Thailand'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [9]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in ayt_df["Neighborhood"].tolist() ]

In [10]:
coords

[[14.333951748566847, 100.611089393996],
 [14.551643898973737, 100.72760799459246],
 [14.46431288648705, 100.605739010545],
 [14.319430444775492, 100.30018856873443],
 [14.373931781911892, 100.48586464374881],
 [14.215502745952506, 100.57795346053229],
 [14.462617728324403, 100.5449112574093],
 [14.458821581523694, 100.36934314714182],
 [14.44901045285593, 100.72823566287434],
 [14.165790713227352, 100.30765836989048],
 [14.22675267985096, 100.71542141328888],
 [14.327254927220906, 100.40446233281557],
 [14.319430444775492, 100.30018856873443],
 [14.362623263800401, 100.67115224735096],
 [14.534210882597051, 100.5265710577346],
 [14.647845820710131, 100.58251181510389]]

In [11]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [12]:
# merge the coordinates into the original dataframe
ayt_df['Latitude'] = df_coords['Latitude']
ayt_df['Longitude'] = df_coords['Longitude']

In [13]:
#check neighborhoods and coordinates
print(ayt_df.shape)
ayt_df

(16, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Phra Nakhon Si Ayutthaya,14.333952,100.611089
1,Tha Ruea,14.551644,100.727608
2,Nakhon Luang,14.464313,100.605739
3,Bang Sai (1404),14.31943,100.300189
4,Bang Ban,14.373932,100.485865
5,Bang Pa-in,14.215503,100.577953
6,Bang Pahan,14.462618,100.544911
7,Phak Hai,14.458822,100.369343
8,Phachi,14.44901,100.728236
9,Lat Bua Luang,14.165791,100.307658


In [14]:
# save the DataFrame as CSV file
ayt_df.to_csv("ayt_df.csv", index=False)

# 4. Define Foursquare Credential

In [15]:
CLIENT_ID = "3NW42DZS4D4PM2TG0IZK01F24OJCFVYHVT4XLKAHTJ5E5SN5"    #Foursquare ID
CLIENT_SECRET = "DY2YTWPVTN5IFVHZLWP5K1JIJ44IX55CXTKMUNQZ13BTHIHQ"    #Foursquare SECRET 
VERSION = "20180605"    #Foursquare Version
LIMIT =30
print('Your credentails:')
print('Foursquare_ID: ' + CLIENT_ID)
print('Foursquare_Secret:' + CLIENT_SECRET)

Your credentails:
Foursquare_ID: 3NW42DZS4D4PM2TG0IZK01F24OJCFVYHVT4XLKAHTJ5E5SN5
Foursquare_Secret:DY2YTWPVTN5IFVHZLWP5K1JIJ44IX55CXTKMUNQZ13BTHIHQ


In [16]:
address = 'Ayutthaya, Thailand'
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address) 
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Ayutthaya are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Ayutthaya are 14.3972206, 100.53194461258316.


In [17]:
# create map of Ayutthaya using latitude and longitude values
map_ayt = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(ayt_df['Latitude'], ayt_df['Longitude'], ayt_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ayt)  
    
map_ayt

In [18]:
# save the map as HTML file
map_ayt.save('map_ayt.html')

# 5. Use the Foursquare API to explore the neighborhoods

 let's get the top 100 venues that are within a radius of 1000 meters.

In [19]:
radius = 1000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(ayt_df['Latitude'], ayt_df['Longitude'], ayt_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [20]:
#convert the venues list to new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df

(134, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Starbucks (สตาร์บัคส์),14.335298,100.61106,Coffee Shop
1,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Ayutthaya City Park Cineplex (อยุธยาซิตี้พาร์ค...,14.336897,100.609972,Multiplex
2,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Robinson (โรบินสัน),14.335253,100.61166,Department Store
3,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Hotpot Buffet,14.335561,100.610752,Hotpot Restaurant
4,Phra Nakhon Si Ayutthaya,14.333952,100.611089,MK (เอ็มเค),14.336183,100.609856,Hotpot Restaurant
5,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Swensen's (สเวนเซ่นส์),14.335381,100.61073,Ice Cream Shop
6,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Fuji (ฟูจิ),14.336052,100.6107,Japanese Restaurant
7,Phra Nakhon Si Ayutthaya,14.333952,100.611089,B2S (บีทูเอส),14.335329,100.611007,Bookstore
8,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Sizzler,14.33554,100.61059,Salad Place
9,Phra Nakhon Si Ayutthaya,14.333952,100.611089,Nikuya (นิกุยะ),14.335604,100.610626,BBQ Joint


Let's check how many venues were returned for each district

In [21]:
venues_df.groupby(['Neighborhood']).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ban Phraek,6,6,6,6,6,6
Bang Ban,5,5,5,5,5,5
Bang Pa-in,8,8,8,8,8,8
Bang Pahan,16,16,16,16,16,16
Bang Sai (1404),2,2,2,2,2,2
Bang Sai (1413),2,2,2,2,2,2
Lat Bua Luang,4,4,4,4,4,4
Maha Rat,4,4,4,4,4,4
Nakhon Luang,9,9,9,9,9,9
Phachi,6,6,6,6,6,6


Let's find out how many unique categories can be curated from all the returned venues

In [22]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 54 uniques categories.


In [23]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:52]

array(['Coffee Shop', 'Multiplex', 'Department Store',
       'Hotpot Restaurant', 'Ice Cream Shop', 'Japanese Restaurant',
       'Bookstore', 'Salad Place', 'BBQ Joint', 'Shopping Mall',
       'Fast Food Restaurant', 'Mobile Phone Shop', 'Noodle House',
       'Steakhouse', 'Pizza Place', 'Thai Restaurant', 'Supermarket',
       'Ramen Restaurant', 'Internet Cafe', 'Gift Shop',
       'Asian Restaurant', 'Convenience Store', 'Furniture / Home Store',
       'Seafood Restaurant', 'Pool', 'Other Great Outdoors',
       'Som Tum Restaurant', 'Hotel', 'Restaurant', 'Diner',
       'History Museum', 'Comfort Food Restaurant', 'Food & Drink Shop',
       'Southern / Soul Food Restaurant', 'Flea Market', 'Café',
       'Bus Station', 'Intersection', 'Soup Place', 'Farmers Market',
       'Train Station', 'Gourmet Shop', 'Sporting Goods Shop', 'Pharmacy',
       'Market', 'Food Court', 'Bakery', 'Dessert Shop', 'Rest Area',
       'Whisky Bar', 'Shop & Service', 'Grocery Store'], dtype=obje

In [24]:
# check if the results contain "Som Tum Restaurant"
"Restaurant" in venues_df['VenueCategory'].unique()

True

# 6. Analyze Each District

In [25]:
# one hot encoding
ayt_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ayt_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ayt_onehot.columns[-1]] + list(ayt_onehot.columns[:-1])
ayt_onehot = ayt_onehot[fixed_columns]

print(ayt_onehot.shape)
ayt_onehot.head()

(134, 55)


Unnamed: 0,Neighborhoods,Asian Restaurant,BBQ Joint,Bakery,Bookstore,Bus Station,Café,Candy Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Department Store,Dessert Shop,Diner,Farmers Market,Fast Food Restaurant,Flea Market,Food & Drink Shop,Food Court,Fried Chicken Joint,Furniture / Home Store,Gift Shop,Gourmet Shop,Grocery Store,History Museum,Hotel,Hotpot Restaurant,Ice Cream Shop,Internet Cafe,Intersection,Japanese Restaurant,Market,Mobile Phone Shop,Multiplex,Noodle House,Other Great Outdoors,Pharmacy,Pizza Place,Pool,Ramen Restaurant,Rest Area,Restaurant,Salad Place,Seafood Restaurant,Shop & Service,Shopping Mall,Som Tum Restaurant,Soup Place,Southern / Soul Food Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Thai Restaurant,Train Station,Whisky Bar
0,Phra Nakhon Si Ayutthaya,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Phra Nakhon Si Ayutthaya,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Phra Nakhon Si Ayutthaya,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Phra Nakhon Si Ayutthaya,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Phra Nakhon Si Ayutthaya,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


group rows by district and by taking the mean of frequency on each category

In [26]:
ayt_grouped = ayt_onehot.groupby(['Neighborhoods']).mean().reset_index()

print(ayt_grouped.shape)
ayt_grouped

(16, 55)


Unnamed: 0,Neighborhoods,Asian Restaurant,BBQ Joint,Bakery,Bookstore,Bus Station,Café,Candy Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Department Store,Dessert Shop,Diner,Farmers Market,Fast Food Restaurant,Flea Market,Food & Drink Shop,Food Court,Fried Chicken Joint,Furniture / Home Store,Gift Shop,Gourmet Shop,Grocery Store,History Museum,Hotel,Hotpot Restaurant,Ice Cream Shop,Internet Cafe,Intersection,Japanese Restaurant,Market,Mobile Phone Shop,Multiplex,Noodle House,Other Great Outdoors,Pharmacy,Pizza Place,Pool,Ramen Restaurant,Rest Area,Restaurant,Salad Place,Seafood Restaurant,Shop & Service,Shopping Mall,Som Tum Restaurant,Soup Place,Southern / Soul Food Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Thai Restaurant,Train Station,Whisky Bar
0,Ban Phraek,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bang Ban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
2,Bang Pa-in,0.0,0.125,0.0,0.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.125,0.0,0.0
3,Bang Pahan,0.125,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.1875,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0
4,Bang Sai (1404),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bang Sai (1413),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Lat Bua Luang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
7,Maha Rat,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Nakhon Luang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Phachi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0


In [27]:
len(ayt_grouped[ayt_grouped["Restaurant"] > 0])

3

Create a new DataFrame for Som Tum Restaurant data only

In [28]:
ayt_stRes = ayt_grouped[['Neighborhoods', 'Restaurant']]

In [29]:
ayt_stRes

Unnamed: 0,Neighborhoods,Restaurant
0,Ban Phraek,0.0
1,Bang Ban,0.0
2,Bang Pa-in,0.0
3,Bang Pahan,0.0625
4,Bang Sai (1404),0.0
5,Bang Sai (1413),0.0
6,Lat Bua Luang,0.0
7,Maha Rat,0.0
8,Nakhon Luang,0.111111
9,Phachi,0.0


# 7.Cluster Neighborhoods

Using k-means to cluster the neighborhoods in Ayuthaya into 3 clusters.

In [30]:
# set number of clusters
kclusters = 3

ayt_clustering = ayt_grouped.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ayt_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 2, 1, 0, 0, 2, 1, 2, 1], dtype=int32)

In [31]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
ayt_merged = ayt_stRes.copy()

# add clustering labels
ayt_merged["Cluster Labels"] = kmeans.labels_


In [32]:
ayt_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
ayt_merged.head()

Unnamed: 0,Neighborhood,Restaurant,Cluster Labels
0,Ban Phraek,0.0,2
1,Bang Ban,0.0,1
2,Bang Pa-in,0.0,2
3,Bang Pahan,0.0625,1
4,Bang Sai (1404),0.0,0


In [33]:
# merge ayt_grouped with ayt_data to add latitude/longitude for each neighborhood
ayt_merged =ayt_merged.join(ayt_df.set_index("Neighborhood"), on="Neighborhood")

print(ayt_merged.shape)
ayt_merged

(16, 5)


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
0,Ban Phraek,0.0,2,14.647846,100.582512
1,Bang Ban,0.0,1,14.373932,100.485865
2,Bang Pa-in,0.0,2,14.215503,100.577953
3,Bang Pahan,0.0625,1,14.462618,100.544911
4,Bang Sai (1404),0.0,0,14.31943,100.300189
5,Bang Sai (1413),0.0,0,14.31943,100.300189
6,Lat Bua Luang,0.0,2,14.165791,100.307658
7,Maha Rat,0.0,1,14.534211,100.526571
8,Nakhon Luang,0.111111,2,14.464313,100.605739
9,Phachi,0.0,1,14.44901,100.728236


In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ayt_merged['Latitude'], ayt_merged['Longitude'], ayt_merged['Neighborhood'], ayt_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
#Save the map as HTML file
map_clusters.save("Ayt_map_clusters.html")

# 8.Examine Clusters

Cluster 0

In [36]:
# ayt_merged.loc[ayt_merged['Cluster Labels'] == 0, ayt_merged.columns[[1] + list(range(5, ayt_merged.shape[1]))]]
ayt_merged.loc[ayt_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
4,Bang Sai (1404),0.0,0,14.31943,100.300189
5,Bang Sai (1413),0.0,0,14.31943,100.300189


Cluster 1

In [37]:
ayt_merged.loc[ayt_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
1,Bang Ban,0.0,1,14.373932,100.485865
3,Bang Pahan,0.0625,1,14.462618,100.544911
7,Maha Rat,0.0,1,14.534211,100.526571
9,Phachi,0.0,1,14.44901,100.728236
14,Uthai,0.333333,1,14.362623,100.671152


Cluster 2

In [38]:
ayt_merged.loc[ayt_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
0,Ban Phraek,0.0,2,14.647846,100.582512
2,Bang Pa-in,0.0,2,14.215503,100.577953
6,Lat Bua Luang,0.0,2,14.165791,100.307658
8,Nakhon Luang,0.111111,2,14.464313,100.605739
10,Phak Hai,0.0,2,14.458822,100.369343
11,Phra Nakhon Si Ayutthaya,0.0,2,14.333952,100.611089
12,Sena,0.0,2,14.327255,100.404462
13,Tha Ruea,0.0,2,14.551644,100.727608
15,Wang Noi,0.0,2,14.226753,100.715421
