# Capstone Project: 
# Best Places to Travel & Eat in London for First-time Visitor

## 1. Introduction 

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import csv # library to handle csv files

import geocoder as geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

from bs4 import BeautifulSoup
import requests

print('Libraries imported.')

## 2. Data Collection
### 2.1. Get the recommended-places-to-visit Data

In [2]:
#Get the data with beautifulsoup  https://foursquare.com/gmissawa/list/places-to-visit-in-london
df_venue = pd.read_csv("df_venue.csv")
df_venue.sort_values('rating', ascending=False, inplace = True)
df_venue.head()

Unnamed: 0.1,Unnamed: 0,location.lat,location.lng,location.postalCode,name,rating,likes.count
18,16,51.507781,-0.162392,W2 2TP,Hyde Park,9.6,11445
14,27,51.477521,0.000858,SE10 9NF,Greenwich Park,9.6,1358
72,45,51.689539,-0.419315,WD25 7LR,Warner Bros. Studio Tour London - The Making o...,9.6,2837
68,7,51.50555,-0.075338,SE1 2UP,Tower Bridge,9.5,5471
40,15,51.501122,-0.177417,SW7 2AP,Royal Albert Hall,9.5,2536


In [3]:
print(df_venue.shape)
print('The dataframe has {} locations'.format(
        len(df_venue['name'].unique()),))

(78, 7)
The dataframe has 78 locations


### Create a map of London with the recommended places using latitude and longitude values

In [180]:
for lat1, lng1, name in zip(df_venue['location.lat'], df_venue['location.lng'], df_venue['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat1, lng1],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#FF5733',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  

map_london

## 3. Methodology

### 3.1. Based on current location to recommend a place to go

In [4]:
###################
#Define a function to calculate the distance between two points based on longitude and latitude
def calculate_distance(lat1, lon1, lat2, lon2):

    from math import sin, cos, sqrt, atan2, radians

    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    #print("Result:", distance)

    return distance
    

In [5]:
# Use a random start point 
# I used a highest-ranked hotel location as the starting point
current_lat = 51.5168
current_log = 0.1360

In [85]:
# Calculate the distnace based on the latitude and longitude
for i in range (len(df_venue)):
    place = df_venue.iloc[i, :]
    lat2 = place['location.lat']
    log2 = place['location.lng']
    df_venue.loc[i,'distance'] = calculate_distance(current_lat, current_log, lat2, log2)

# Sort the dataframe by distance
df_venue_sort = df_venue.sort_values(by='distance', ascending=True)

df_venue_sort.head(10)

Unnamed: 0.1,Unnamed: 0,location.lat,location.lng,location.postalCode,name,rating,likes.count,distance
1,19,51.512841,-0.127896,WC2H 9ND,Ambassadors Theatre,8.1,73,10.327738
56,35,51.506912,-0.194801,W8 7LN,The Churchill Arms,8.9,718,10.445538
3,32,51.5198,-0.093969,EC2Y 8DS,Barbican Art Gallery,9.2,273,14.683319
18,16,51.507781,-0.162392,W2 2TP,Hyde Park,9.6,11445,14.724101
61,1,51.503287,-0.119594,SE1 7PB,The London Eye,9.2,6506,15.099482
59,25,51.511718,-0.125695,WC2E 9EB,The Lamb & Flag,8.5,454,15.388385
52,33,51.17906,-1.826198,SP4 7DE,Stonehenge,8.7,1695,15.712717
72,45,51.689539,-0.419315,WD25 7LR,Warner Bros. Studio Tour London - The Making o...,9.6,2837,15.88481
16,29,51.403235,-0.3374,KT8 9AU,Hampton Court Palace,9.3,775,15.918907
21,12,51.531371,-0.126022,NW1 2QP,London St Pancras International Railway Statio...,8.9,2917,15.920634


In [7]:
# Make a recommendation
print("The closest place to go is: ", df_venue_sort.iloc[0,:]['name'])
print("The rating is: ", df_venue_sort.iloc[0,:]['rating'])

#Save the latitude and longitude of this place for restaurant recommendation

The closest place to go is:  Ambassadors Theatre
The rating is:  8.1


### 3.1. Recommend a restaurant

In [21]:
#0.Define Foursquare 
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EL0QYZH3NAVAKDR2M5EGMSKEFEM3BXPO2CHPEMNR4GDAF3VL
CLIENT_SECRET:MYTF243JWF3F1RLYHNLQQXNQHBHMYFKEJJWAG0QKN0TTJRRS


In [22]:
#1.Based on the venue just visited
venue_latitude = df_venue_sort.iloc[0,:]['location.lat']
venue_longitude = df_venue_sort.iloc[0,:]['location.lng']

venue_name = df_venue_sort.iloc[0,:]['name']

print('Latitude and longitude values of {} are {}, {}.'.format(venue_name, 
                                                               venue_latitude, 
                                                               venue_longitude))

Latitude and longitude values of Ambassadors Theatre are 51.512841064253024, -0.1278958930930689.


In [23]:
#2.Recommend food related venue in a radius of 100 around the most-liked place

#Foursquare service for recommendation
LIMIT = 50 # limit of number of venues returned by Foursquare API
radius = 100 # define radius
intent = 'food'
sortByDistance = 'true'

#GET https://api.foursquare.com/v2/search/recommendations

#1. Write the url with important parameters:
url_r = 'https://api.foursquare.com/v2/search/recommendations?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&intent={}&sortByDistance={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,    
    venue_latitude,
    venue_longitude,
    radius,
    LIMIT,
    intent,
    sortByDistance)

url_r

'https://api.foursquare.com/v2/search/recommendations?&client_id=EL0QYZH3NAVAKDR2M5EGMSKEFEM3BXPO2CHPEMNR4GDAF3VL&client_secret=MYTF243JWF3F1RLYHNLQQXNQHBHMYFKEJJWAG0QKN0TTJRRS&v=20180605&ll=51.512841064253024,-0.1278958930930689&radius=100&limit=50&intent=food&sortByDistance=true'

In [24]:
recommend = requests.get(url_r).json()
# recommend
venues = recommend['response']['group']['results']
venues

[{'displayType': 'venue',
  'id': '5e16eeb4e79aa500072e8bc4',
  'photo': {'createdAt': 1373318483,
   'height': 960,
   'id': '51db2d53498ebf7acc10f8df',
   'prefix': 'https://fastly.4sqi.net/img/general/',
   'suffix': '/2519409_KnZ4IMDhzB62VOvswk8and-Ni3CUvOCaOwn2rFHzV2g.jpg',
   'visibility': 'public',
   'width': 720},
  'snippets': {'count': 1,
   'items': [{'detail': {'object': {'agreeCount': 0,
       'canonicalUrl': 'https://foursquare.com/item/573ce3ea498e4b8918686d6b',
       'createdAt': 1463608298,
       'disagreeCount': 0,
       'id': '573ce3ea498e4b8918686d6b',
       'logView': True,
       'text': 'For starters The barbeque squid salad worth chorizo which was simply incredible. Mains was the herb roasted monkfish - hands down best monkfish I have ever had!',
       'todo': {'count': 0},
       'type': 'user',
       'user': {'firstName': 'Becky',
        'id': '73231888',
        'lastName': 'J',
        'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       

### 3.4. Cluster the restaurant into 5 clusters Recommed based on ratings/distances/likes (three lists recommendation)

In [44]:
nearby_food = json_normalize(venues) # flatten JSON
nearby_food

# filter columns
filtered_columns = ['venue.name', 'venue.categories','venue.location.distance', 'venue.location.lat', 'venue.location.lng','venue.id' ]
nearby_food_filtered =nearby_food.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
nearby_food_filtered['categories'] = nearby_food_filtered.apply(get_category_type, axis=1)

nearby_food_filtered.head(20)
#nearby_venues.shape

#keep the index of the venues for later use
nearby_food_filtered = nearby_food_filtered.rename_axis('SerialNum').reset_index()

nearby_food_filtered.columns = ['serialNum', 'name','categories_', 'distance', 'lat', 'lng', 'id', 'categories']

In [45]:
nearby_food_filtered

Unnamed: 0,serialNum,name,categories_,distance,lat,lng,id,categories
0,0,The Ivy Restaurant,"[{'id': '52e81612bcbc57f1066b7a05', 'name': 'E...",7,51.5128,-0.127981,4b5344aef964a5200c9527e3,English Restaurant
1,1,Tredwells,"[{'id': '52e81612bcbc57f1066b7a05', 'name': 'E...",74,51.512276,-0.127321,54172b0f498e587dba2832f7,English Restaurant
2,2,Dishoom,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",82,51.51243,-0.126908,4c31c371a0ced13a150d146e,Indian Restaurant
3,3,Chick 'n' Sours,"[{'id': '4d4ae6fc7a7b7dea34424761', 'name': 'F...",89,51.513624,-0.128159,57e6cee7498ec100a83cfd3c,Fried Chicken Joint
4,4,Fabrique,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",89,51.51352,-0.128575,560d0db9498e57efd528f261,Bakery
5,5,Bill's Restaurant,"[{'id': '52e81612bcbc57f1066b7a05', 'name': 'E...",95,51.512555,-0.1266,4d440bf1c3e5f04d0a219620,English Restaurant
6,6,P. F. Chang's,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",98,51.511958,-0.127846,5980dcd02e26804a157bb9a2,Chinese Restaurant
7,7,Shake Shack Soho,"[{'id': '4bf58dd8d48988d16c941735', 'name': 'B...",98,51.513313,-0.129103,59e85fbd6e4650734436499a,Burger Joint


In [26]:
# Again, recommend the closest restaurant
print("The closest place to go is: ", nearby_food.iloc[0,:]['venue.name'])

#Save the latitude and longitude of this place for restaurant recommendation

The closest place to go is:  The Ivy Restaurant


In [14]:
################
#Foursquare API get the details of the venue based on the ID
def get_url_detail(venue_id):
    url_d = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
        venue_id,
        CLIENT_ID, 
        CLIENT_SECRET,
        VERSION)
    venue_detail = requests.get(url_d).json()
    
    return venue_detail

In [36]:
################
#This is how to get the details of one food venue
def get_one_venue(venue_detail):
    
    a = venue_detail['response']['venue']['name']
    b = venue_detail['response']['venue']['rating']
    c = venue_detail['response']['venue']['likes']['count']
    d = venue_detail['response']['venue']['price']['tier']
    list_ = [a, b, c, d]
    
    return list_

In [37]:
#Create a new dataframe that with name and ratings etc. 
#To combine with the previous one with location information
# filter columns
food_venue_details = []
for i in range (len(nearby_food_filtered)):
    venue_row = nearby_food_filtered.iloc[i, :]
    id_ = venue_row['venue.id']
    #print(id_)
    venue_detail = get_url_detail(id_)

    list_of_venues = get_one_venue(venue_detail)
    print(list_of_venues)
    
    food_venue_details.append(list_of_venues)

    #food_venue_details


['The Ivy Restaurant', 8.5, 171, 3]
['Tredwells', 7.6, 57, 4]
['Dishoom', 9.2, 2239, 2]
["Chick 'n' Sours", 8.0, 146, 1]
['Fabrique', 9.2, 128, 1]
["Bill's Restaurant", 7.9, 246, 2]
["P. F. Chang's", 8.0, 43, 1]
['Shake Shack Soho', 7.8, 49, 1]


In [38]:
food_venue_details

[['The Ivy Restaurant', 8.5, 171, 3],
 ['Tredwells', 7.6, 57, 4],
 ['Dishoom', 9.2, 2239, 2],
 ["Chick 'n' Sours", 8.0, 146, 1],
 ['Fabrique', 9.2, 128, 1],
 ["Bill's Restaurant", 7.9, 246, 2],
 ["P. F. Chang's", 8.0, 43, 1],
 ['Shake Shack Soho', 7.8, 49, 1]]

In [40]:
cols = ['name','rating','likes_count','price_tier']
df_food_venue_details = pd.DataFrame(food_venue_details, columns=cols)
df_food_venue_details

Unnamed: 0,name,rating,likes_count,price_tier
0,The Ivy Restaurant,8.5,171,3
1,Tredwells,7.6,57,4
2,Dishoom,9.2,2239,2
3,Chick 'n' Sours,8.0,146,1
4,Fabrique,9.2,128,1
5,Bill's Restaurant,7.9,246,2
6,P. F. Chang's,8.0,43,1
7,Shake Shack Soho,7.8,49,1


In [57]:
# Combine all the information of the food venues together
# Merging df_food_venue_details with nearby_food_filtered
df_food = pd.merge(df_food_venue_details, nearby_food_filtered, on='name')
#df_food
df_food_clean = df_food.drop(['categories_','id','serialNum'], axis = 1)

### 3.4. Cluster the restaurant into 5 clusters Recommed based on location, number of likes and category 

In [51]:
#Prepare the dataframe for clustering (string cannot be read. Use onehot coding)
#make index a column for later retrieve the name of restaurant
#Drop column categories for k clustering
df_cluster = df_food.drop(['name','categories_','categories','id','serialNum'], axis = 1)
df_cluster

Unnamed: 0,rating,likes_count,price_tier,distance,lat,lng
0,8.5,171,3,7,51.5128,-0.127981
1,7.6,57,4,74,51.512276,-0.127321
2,9.2,2239,2,82,51.51243,-0.126908
3,8.0,146,1,89,51.513624,-0.128159
4,9.2,128,1,89,51.51352,-0.128575
5,7.9,246,2,95,51.512555,-0.1266
6,8.0,43,1,98,51.511958,-0.127846
7,7.8,49,1,98,51.513313,-0.129103


In [52]:
# set number of clusters
kclusters = 3

#manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 1, 0, 0, 0, 2, 2], dtype=int32)

In [59]:
# add clustering labels
df_food_clean.insert(0, 'Cluster Labels', kmeans.labels_)

In [84]:
df_food_clean

Unnamed: 0,Cluster Labels,name,rating,likes_count,price_tier,distance,lat,lng,categories
0,0,The Ivy Restaurant,8.5,171,3,7,51.5128,-0.127981,English Restaurant
1,2,Tredwells,7.6,57,4,74,51.512276,-0.127321,English Restaurant
2,1,Dishoom,9.2,2239,2,82,51.51243,-0.126908,Indian Restaurant
3,0,Chick 'n' Sours,8.0,146,1,89,51.513624,-0.128159,Fried Chicken Joint
4,0,Fabrique,9.2,128,1,89,51.51352,-0.128575,Bakery
5,0,Bill's Restaurant,7.9,246,2,95,51.512555,-0.1266,English Restaurant
6,2,P. F. Chang's,8.0,43,1,98,51.511958,-0.127846,Chinese Restaurant
7,2,Shake Shack Soho,7.8,49,1,98,51.513313,-0.129103,Burger Joint


In [79]:
# create map
map_clusters = folium.Map(location=[venue_latitude, venue_longitude], zoom_start=16)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_food_clean['lat'], df_food_clean['lng'], df_food_clean['name'], df_food_clean['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)
       
map_clusters

### 3.4b. Study the clusters

In [78]:
df_food_clean.sort_values(by=['Cluster Labels'])

Unnamed: 0,Cluster Labels,name,rating,likes_count,price_tier,distance,lat,lng,categories
0,0,The Ivy Restaurant,8.5,171,3,7,51.5128,-0.127981,English Restaurant
3,0,Chick 'n' Sours,8.0,146,1,89,51.513624,-0.128159,Fried Chicken Joint
4,0,Fabrique,9.2,128,1,89,51.51352,-0.128575,Bakery
5,0,Bill's Restaurant,7.9,246,2,95,51.512555,-0.1266,English Restaurant
2,1,Dishoom,9.2,2239,2,82,51.51243,-0.126908,Indian Restaurant
1,2,Tredwells,7.6,57,4,74,51.512276,-0.127321,English Restaurant
6,2,P. F. Chang's,8.0,43,1,98,51.511958,-0.127846,Chinese Restaurant
7,2,Shake Shack Soho,7.8,49,1,98,51.513313,-0.129103,Burger Joint


In [88]:
# Normalize the dataframe and do clustering again
#Normalize the dataframe
from sklearn import preprocessing

x = df_cluster
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df_cluster_n = pd.DataFrame(x_scaled)

df_cluster_n

Unnamed: 0,0,1,2,3,4,5
0,0.5625,0.058288,0.666667,0.0,0.505006,0.448026
1,0.0,0.006375,1.0,0.736264,0.190627,0.711905
2,1.0,1.0,0.333333,0.824176,0.283067,0.876893
3,0.25,0.046903,0.0,0.901099,1.0,0.377124
4,1.0,0.038707,0.0,0.901099,0.937351,0.210889
5,0.1875,0.092441,0.333333,0.967033,0.357891,1.0
6,0.25,0.0,0.0,1.0,0.0,0.502266
7,0.125,0.002732,0.0,1.0,0.813213,0.0


In [89]:
# Clustering again
# set number of clusters
kclusters = 3

#manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_cluster_n)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 0, 0, 1, 1, 0], dtype=int32)

In [96]:
#df_food_venue_details.insert(0, 'Cluster Labels', kmeans.labels_)
df_food_venue_details.sort_values(by=['Cluster Labels'])

Unnamed: 0,Cluster Labels,name,rating,likes_count,price_tier
3,0,Chick 'n' Sours,8.0,146,1
4,0,Fabrique,9.2,128,1
7,0,Shake Shack Soho,7.8,49,1
0,1,The Ivy Restaurant,8.5,171,3
1,1,Tredwells,7.6,57,4
5,1,Bill's Restaurant,7.9,246,2
6,1,P. F. Chang's,8.0,43,1
2,2,Dishoom,9.2,2239,2
