First, we'll import the packages we need and set some variables

In [1]:
# Import Packages
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.action_chains import ActionChains

import folium
import requests

In [2]:
api_key = '' #  API Key removed for Github copy.
api_secret = '' #  API Key removed for Github copy.
version = '20210101'
limit = 50
radius = 3000
city = 'Seattle, WA'

Next, create and set our map.

In [3]:
geolocator = Nominatim(user_agent='seattle')
location = geolocator.geocode(city)

lat = location.latitude + .005  # '+ .005' is to move the map NE a bit for our purposes.
lng = location.longitude + .005

map_seattle = folium.Map(location=[lat,lng], zoom_start=13)

We then make our API call to Foursquare

In [4]:
restaurants = []
url = 'https://api.foursquare.com/v2/venues/search?ll={},{}&client_id={}&client_secret={}&v={}&limit={}'.format(
        lat,
        lng,
        api_key,
        api_secret,
        version,
        limit)
response = requests.get(url).json()['response']['venues']

First we need to identify the category of our restaurant so we can pull information on competitors.

In [5]:
categories = requests.get('https://api.foursquare.com/v2/venues/categories?client_id={}&client_secret={}&v={}'.format(
                            api_key,
                            api_secret,
                            version)).json()['response']['categories']

For our purposes, we will use the following categories:

    Mediterranean Restaurant 4bf58dd8d48988d1c0941735
    Middle Eastern Restaurant 4bf58dd8d48988d115941735
    Turkish Restaurant 4f04af1f2fb6e1c99f3db0bb

In [7]:
restaurant_type_ids = ['4bf58dd8d48988d1c0941735', '4bf58dd8d48988d115941735', '4f04af1f2fb6e1c99f3db0bb']

Now we will search for each category to create a list of restaurants

In [8]:
restaurants = []
for type_id in restaurant_type_ids:
    url = 'https://api.foursquare.com/v2/venues/search?ll={},{}&client_id={}&client_secret={}&categoryId={}&v={}&limit={}&radius={}'.format(
            lat, lng, api_key, api_secret, type_id, version, limit, radius)
    
    response = requests.get(url).json()['response']['venues']
    for restaurant in response:
        restaurants.append(restaurant)

Example of our restaurant data

In [9]:
restaurants[0]

{'id': '5d099a39838e59002c7c6895',
 'name': 'Garlic Crush',
 'location': {'address': '1417 Broadway',
  'crossStreet': 'Union',
  'lat': 47.613642,
  'lng': -122.32106,
  'labeledLatLngs': [{'label': 'display',
    'lat': 47.613642,
    'lng': -122.32106}],
  'distance': 613,
  'postalCode': '98122',
  'cc': 'US',
  'city': 'Seattle',
  'state': 'WA',
  'country': 'United States',
  'formattedAddress': ['1417 Broadway (Union)',
   'Seattle, WA 98122',
   'United States']},
 'categories': [{'id': '4bf58dd8d48988d1c0941735',
   'name': 'Mediterranean Restaurant',
   'pluralName': 'Mediterranean Restaurants',
   'shortName': 'Mediterranean',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/mediterranean_',
    'suffix': '.png'},
   'primary': True}],
 'referralId': 'v-1615925825',
 'hasPerk': False}

Having our list of restaurants, we will now sort them into clusters and add them to the map.

In [10]:
res_data = []
for restaurant in restaurants:
    res_data.append([restaurant['location']['lat'], restaurant['location']['lng'], restaurant['name']])
    
coords = []
for x, y, res in res_data:
    coords.append([x, y])

k_means = KMeans(init="k-means++", n_clusters=6, n_init=20)
k_means.fit(coords)
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels = k_means.labels_

colors = {'0' : 'green', '1' : 'red', '2' : 'blue', '3' : 'orange', '4' : 'purple', '5' : 'black'}
for i, cluster in enumerate(k_means_labels): #  asign a color to each restaurant based on cluster
    res_data[i].append(cluster)

for i, data in enumerate(res_data):
    label = folium.Popup(data[2], parse_html=True)
    folium.CircleMarker([data[0], data[1]], radius=5, popup=label, 
                        color=colors['{}'.format(data[3])]).add_to(map_seattle)

Due to permissions, the map will not display here on Github. I have included it as a jpg in the repository so you can view it

In [11]:
map_seattle

We will next scrape the reviews for each of these restaurants from Yelp. First we set up the browser

In [12]:
options = Options()
options.headless = True

browser = webdriver.Firefox(options=options)

This function, get_reviews(), will scrape yelp and return the # of reviews for a restaurant

In [13]:
def get_reviews(name):
    
    browser.get('https://www.yelp.com/search?find_desc={}&find_loc=Downtown%2C+Seattle%2C+WA'.format(name.replace(' ', '+')))
    
    for b in browser.find_elements_by_tag_name('div[class^=\' mainAttributes\']'):
        if b.text[0:2] == '1.':
            return int(b.text.split('\n')[1])
        

In [14]:
for restaurant in res_data:
    name = restaurant[2]
    reviews = get_reviews(name)
    try:
        if reviews > 0:
            restaurant.append(reviews)
    except:
        restaurant.append(0)

Sample of our data, (longitude, latitude, name, color, #ofReviews) 

In [15]:
for restaurant in res_data[0:5]:
    print(restaurant)

[47.613642, -122.32106, 'Garlic Crush', 1, 70]
[47.624748, -122.357256, 'Mint & Olive', 4, 375]
[47.604019, -122.3315, 'Gyro Express', 2, 19]
[47.604563, -122.33351, 'Mado Oven', 2, 0]
[47.599464000000005, -122.303067, 'Mediterranean Mix', 3, 284]


We will then figure out which cluster has the highest average # of reviews

In [16]:
averages = [0] * 6  #  each list entry here represents a cluster #
counts = [0] * 6  # records the number of restaurants in each cluster

for restaurant in res_data:
    averages[restaurant[3]] += restaurant[4]
    counts[restaurant[3]] += 1
for i, cluster in enumerate(averages):
    averages[i] = int(cluster / counts[i])
averages

[318, 255, 142, 181, 292, 120]

In this case, cluster 0 has the highest # of reviews relative to the number of restaurants in the area. This is the downtown area.

This project only shows a limited part of the picture. For example, the rental costs are bound to be much higher in location [0]. As such, a complete model would need to incorporate factors such as this.