# CAPSTONE PROJECT: BATTLE OF THE NEIGHBORHOODS  
## Bali Indonesia Venue Recommendation for Food Lover Tourist

____

### Preparing Dataset

In [127]:
# Import Library
import pandas as pd
import numpy as np

In [193]:
# Get 4 popular Bali Location
Bali_Area = pd.DataFrame(data = {'Regency':['Denpasar','Tabanan','Gianyar','Klungkung','Bangli']})
Bali_Area

Unnamed: 0,Regency
0,Denpasar
1,Tabanan
2,Gianyar
3,Klungkung
4,Bangli


In [194]:
df_Bali = Bali_Area.iloc[:,0]
df_Bali

0     Denpasar
1      Tabanan
2      Gianyar
3    Klungkung
4       Bangli
Name: Regency, dtype: object

In [195]:
df_Bali[1]

'Tabanan'

### Getting Coordinate for Bali City and Regency

In [196]:
# Import Library for Geocoding

#mapping tools
# !pip install geopy 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# !pip install folium
import folium # map rendering library

#other library
from sklearn.cluster import KMeans
import json
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors

In [200]:
# Make Function to generate coordinates

def getlatlong(address):
    geolocator = Nominatim()
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return latitude,longitude

In [201]:
# Capture Latitudes and Longitudes

df_Bali = Bali_Area.iloc[:,0]

latitudes = {}
longitudes = {}

for i in range(0,5):
    latitudes[i],longitudes[i] = getlatlong(df_Bali[i])
    print(df_Bali[i], latitudes[i], longitudes[i])
    

  after removing the cwd from sys.path.


Denpasar -8.6524973 115.2191175
Tabanan -8.5392306 115.1265683
Gianyar -8.5482357 115.32605401537212
Klungkung -8.5350173 115.4032763
Bangli -8.4603116 115.3535198


In [202]:
# Combining Regency, Latitude and Longitude Data to create final dataset
df_Bali_Final = pd.DataFrame (data = {'Regency':df_Bali, 'Lat':latitudes,'Long':longitudes})
df_Bali_Final

Unnamed: 0,Regency,Lat,Long
0,Denpasar,-8.652497,115.219117
1,Tabanan,-8.539231,115.126568
2,Gianyar,-8.548236,115.326054
3,Klungkung,-8.535017,115.403276
4,Bangli,-8.460312,115.35352


In [205]:
# create map of Bali using latitude and longitude values
map_Bali = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Regency in zip(df_Bali_Final['Lat'], 
                             df_Bali_Final['Long'], 
                             df_Bali_Final['Regency']):
  label = '{}'.format(Regency)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat, lng],
      radius=5,
      popup=label,
      color='blue',
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7).add_to(map_Bali) 
    
map_Bali

In [None]:
{'Denpasar','Tabana','Gianyar','Klungkung','Bangli'}

### Get Food Venue Around

### Setup Credential

In [206]:
# Setup Credential

CLIENT_ID = '2MD2TJL1K0FC1CPALRMURS3DOMMNTK1TA0YFS2ORISNG3YZX' # your Foursquare ID
CLIENT_SECRET = 'BCGPQPXRYKEFLG1I0N4RCLG5GS0UVLVE5DZQCZOKI5ICU0GE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2MD2TJL1K0FC1CPALRMURS3DOMMNTK1TA0YFS2ORISNG3YZX
CLIENT_SECRET:BCGPQPXRYKEFLG1I0N4RCLG5GS0UVLVE5DZQCZOKI5ICU0GE


Create a function to search food category around

In [229]:
def getNearbyVenuesCategory(names, latitudes, longitudes, radius=500, LIMIT = 100, category_id = '4bf58dd8d48988d143941735'):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            category_id)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Regency', 
                  'Regency Latitude', 
                  'Regency Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [251]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Regency', 
                  'Regency Latitude', 
                  'Regency Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [252]:
Bali_venues = getNearbyVenues(df_Bali_Final.Regency,
                              df_Bali_Final.Lat,
                              df_Bali_Final.Long)

In [253]:
print(Bali_venues.shape)
Bali_venues.head()

(49, 7)


Unnamed: 0,Regency,Regency Latitude,Regency Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Denpasar,-8.652497,115.219117,Dapoer Pemuda,-8.653717,115.217061,Indonesian Restaurant
1,Denpasar,-8.652497,115.219117,Kober Mie Setan Kaliasem,-8.65508,115.218596,Noodle House
2,Denpasar,-8.652497,115.219117,Voltvet Eatery & Coffee,-8.653594,115.21702,Café
3,Denpasar,-8.652497,115.219117,Warung Wardani,-8.651159,115.215957,Indonesian Restaurant
4,Denpasar,-8.652497,115.219117,Patung Catur Muka,-8.656036,115.216993,Monument / Landmark


In [255]:
# Get Total Venue in each neighborhood

Bali_venues.groupby("Regency").Venue.count().sort_values(ascending=False).head()

Regency
Denpasar     28
Klungkung    11
Tabanan       4
Gianyar       3
Bangli        3
Name: Venue, dtype: int64

In [256]:
# Total Unique Category in neighborhood
print('There are {} uniques categories.'.format(len(Bali_venues['Venue Category'].unique())))
print('')
print(Bali_venues['Venue Category'].unique())

There are 34 uniques categories.

['Indonesian Restaurant' 'Noodle House' 'Café' 'Monument / Landmark'
 'Coffee Shop' 'Satay Restaurant' 'Historic Site' 'Gym' 'Hotel'
 'BBQ Joint' 'Bike Shop' 'Pizza Place' 'Bakery' 'Seafood Restaurant'
 'Pet Store' 'Middle Eastern Restaurant' 'Ice Cream Shop'
 'Miscellaneous Shop' 'Asian Restaurant' 'Convenience Store'
 'Video Game Store' 'Breakfast Spot' 'Chinese Restaurant' 'Farmers Market'
 'Park' 'History Museum' 'Department Store' 'Night Market' 'Field'
 'Food Truck' 'Javanese Restaurant' 'General Entertainment' 'Snack Place'
 'Flea Market']


#### One Hot Encoding in category data

Let us now take the venue category information and create a dataframe with a one hot enconding of these data.

In [258]:
# one hot encoding
Bali_onehot = pd.get_dummies(Bali_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Bali_onehot['Regency'] = Bali_venues['Regency'] 

# move neighborhood column to the first column
fixed_columns = [Bali_onehot.columns[-1]] + list(Bali_onehot.columns[:-1])
Bali_onehot = Bali_onehot[fixed_columns]

Bali_onehot.head()

Unnamed: 0,Regency,Asian Restaurant,BBQ Joint,Bakery,Bike Shop,Breakfast Spot,Café,Chinese Restaurant,Coffee Shop,Convenience Store,...,Monument / Landmark,Night Market,Noodle House,Park,Pet Store,Pizza Place,Satay Restaurant,Seafood Restaurant,Snack Place,Video Game Store
0,Denpasar,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Denpasar,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,Denpasar,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Denpasar,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Denpasar,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [259]:
Bali_onehot.shape

(49, 35)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [260]:
Bali_grouped = Bali_onehot.groupby('Regency').mean().reset_index()
Bali_grouped

Unnamed: 0,Regency,Asian Restaurant,BBQ Joint,Bakery,Bike Shop,Breakfast Spot,Café,Chinese Restaurant,Coffee Shop,Convenience Store,...,Monument / Landmark,Night Market,Noodle House,Park,Pet Store,Pizza Place,Satay Restaurant,Seafood Restaurant,Snack Place,Video Game Store
0,Bangli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0
1,Denpasar,0.035714,0.035714,0.035714,0.035714,0.0,0.035714,0.0,0.107143,0.035714,...,0.035714,0.0,0.107143,0.0,0.035714,0.035714,0.035714,0.035714,0.0,0.035714
2,Gianyar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
3,Klungkung,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,...,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Tabanan,0.0,0.0,0.25,0.0,0.25,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Clustering

We Start by finding top 3 venue in each Neighborhood

In [261]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [278]:
num_top_venues = 3

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Regency']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Regency'] = Bali_grouped['Regency']

for ind in np.arange(Bali_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Bali_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Regency,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Bangli,Gym,Flea Market,Snack Place
1,Denpasar,Indonesian Restaurant,Coffee Shop,Noodle House
2,Gianyar,Convenience Store,Park,Video Game Store
3,Klungkung,History Museum,Historic Site,Night Market
4,Tabanan,Bakery,Breakfast Spot,Chinese Restaurant


In [279]:
# set number of clusters
kclusters = 3

Bali_grouped_clustering = Bali_grouped.drop('Regency', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Bali_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:5]

array([2, 0, 1, 0, 0], dtype=int32)

In [280]:
Bali_merged = df_Bali_Final

# add clustering labels
Bali_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Bali_merged = Bali_merged.join(neighborhoods_venues_sorted.set_index('Regency'), on='Regency')

Bali_merged.head()

Unnamed: 0,Regency,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Denpasar,-8.652497,115.219117,2,Indonesian Restaurant,Coffee Shop,Noodle House
1,Tabanan,-8.539231,115.126568,0,Bakery,Breakfast Spot,Chinese Restaurant
2,Gianyar,-8.548236,115.326054,1,Convenience Store,Park,Video Game Store
3,Klungkung,-8.535017,115.403276,0,History Museum,Historic Site,Night Market
4,Bangli,-8.460312,115.35352,0,Gym,Flea Market,Snack Place


In [282]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Bali_merged['Lat'], Bali_merged['Long'], Bali_merged['Regency'], Bali_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Describe Each Cluster

In [294]:
Bali_merged.loc[Bali_merged['Cluster Labels'] == 0, Bali_merged.columns[[0] + list(range(3, Bali_merged.shape[1]))]]

Unnamed: 0,Regency,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
1,Tabanan,0,Bakery,Breakfast Spot,Chinese Restaurant
3,Klungkung,0,History Museum,Historic Site,Night Market
4,Bangli,0,Gym,Flea Market,Snack Place


In [295]:
Bali_merged.loc[Bali_merged['Cluster Labels'] == 1, Bali_merged.columns[[0] + list(range(3, Bali_merged.shape[1]))]]

Unnamed: 0,Regency,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
2,Gianyar,1,Convenience Store,Park,Video Game Store


In [296]:
Bali_merged.loc[Bali_merged['Cluster Labels'] == 2, Bali_merged.columns[[0] + list(range(3, Bali_merged.shape[1]))]]

Unnamed: 0,Regency,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Denpasar,2,Indonesian Restaurant,Coffee Shop,Noodle House


### Summary

Based on the clustering result, We can suggest to tourist to stay in Denpasar. Because this area categories as area with the have the most of Indonesian Restaurant. So the tourist can experienced the local food on their choices. Example of the Indonesian Restaurant that can be visit as below:

In [307]:
Denpasar_Food = Bali_venues[((Bali_venues['Regency'] == 'Denpasar') & (Bali_venues['Venue Category'] == 'Indonesian Restaurant'))]
Denpasar_Food

Unnamed: 0,Regency,Regency Latitude,Regency Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Denpasar,-8.652497,115.219117,Dapoer Pemuda,-8.653717,115.217061,Indonesian Restaurant
3,Denpasar,-8.652497,115.219117,Warung Wardani,-8.651159,115.215957,Indonesian Restaurant
8,Denpasar,-8.652497,115.219117,Depot Karna Sari,-8.654098,115.216454,Indonesian Restaurant
25,Denpasar,-8.652497,115.219117,Warung Adnyana,-8.649157,115.21942,Indonesian Restaurant


In [316]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to the map
markers_colors = []
for lat, lon, poi in zip(Denpasar_Food['Venue Latitude'], Denpasar_Food['Venue Longitude'], Denpasar_Food['Venue']):
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### End