In [1]:
#!/usr/bin/python3

import json
import requests
import time

# "inspired by" https://stevesie.com/apps/grubhub-api
# his site gets the parameter names wrong but the approach is fine
# basically, grubhub.com uses an "unofficial" API to load restaurants and reviews
# you can use your browser's dev tools to sniff its web requests and see how the API works

###############################
# Setup
###############################
# This is the API key
# If you need a new one, follow these steps:
# - Load some restaurant's Grubhub page in Chrome
# - Right-click somewhere, select "Inspect"
# - Switch to the "Network" tab
# - Ctrl+R to refresh
# - In the "Filter" bar, put "/ratings/search/restaurant/"
# - Find one in the list with "Request Method: GET"
# - Scroll down to the "Request Headers" section, find "authorization"
authorization = "Bearer f930e11c-0d1a-4f34-bba4-30592206a1d2"


# This is a magic string that tells grubhub you're pretending to use Chrome
# If you want to know why, https://webaim.org/blog/user-agent-string-history/ is funny and basically correct
user_agent = "Mozilla/5.0 (X11; CrOS x86_64 13816.34.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.51 Safari/537.36"

# Unsure if necessary but makes Grubhub give you JSON instead of, like, xml
accept = "application/json"

headers = {
    "authorization": authorization,
    "User-Agent": user_agent,
    "Accept": accept,
}

###############################
# Search restaurants in area
###############################
search_restaurants = "https://api-gtm.grubhub.com/restaurants/search"

# I have no idea where these coords are or what unit "radius" is. Miles?
search_params = {
    "latitude": "40.760280",
    "longitude": "-73.976918",
    "radius": 25,
}

search_response = requests.get(search_restaurants, headers=headers, params=search_params)
json.loads(search_response.content)
#search_response = requests.get(search_restaurants, headers=headers, params=search_params).json()

# Sleep so that we don't get rate limited / bot-detected
time.sleep(2)

###############################
# Search reviews for a specific restaurant
###############################
reviews_for_restaurant = "https://api-gtm.grubhub.com/ratings/search/restaurant/{{restaurant_id}}"

aloha_plates = "2396923" # I just know this ID from looking at browser dev tools
reviews_for_aloha_plates = reviews_for_restaurant.replace("{{restaurant_id}}", aloha_plates)

# grubhub.com seems to use 35 for `pageSize`, you can try different numbers
review_page_1_params = {
    "pageSize": 10,
    "pageNum": 1,
}

review_page_1_response = requests.get(reviews_for_aloha_plates, headers=headers, params=review_page_1_params)
review_page_1_json = json.loads(review_page_1_response.content)

# Sleep so that we don't get rate limited / bot-detected
time.sleep(2)

review_page_2_params = review_page_1_params.copy()
review_page_2_params['pageNum'] = 2

review_page_2_response = requests.get(reviews_for_aloha_plates, params=review_page_2_params, headers=headers)
review_page_2_json = json.loads(review_page_2_response.content)

In [2]:
review_page_2_json['reviews']['review'][0]['content']

'Great food, great people'

In [3]:
review_page_2_json['reviews']['review'][0]['star_rating']

5

In [4]:
review_page_2_json['reviews']['review'][0]['sentiment']

'Positive'

In [5]:
len(review_page_2_json['reviews']['review'])

10

In [6]:
text_list = []
star_list = []
tag_list = [] 

In [7]:
for i in range(0, len(review_page_2_json['reviews']['review'])):
    #print(i)
    text_list.append(review_page_2_json['reviews']['review'][i]['content'])
    star_list.append(review_page_2_json['reviews']['review'][i]['star_rating'])
    tag_list.append(review_page_2_json['reviews']['review'][i]['sentiment'])

0
1
2
3
4
5
6
7
8
9


In [8]:
text_list

['Great food, great people',
 'Food was partially frozen and texture was bad food was all cold',
 'Would have been better with two more tacos',
 "It's very good.  Macaroni salad is expensive.  Delivery and service fees make it all very expensive so it's an infrequent treat.",
 "Great food! If the order wasn't completed so quickly, I would have thought it came straight from Maui!!",
 "Please check my address. I made sure this is what is showing on my end: 855 S 100th St UNIT ABOVE GARAGES (to the right) Burien, WA 98168\n\nThis is the second driver in a row that has called to ask where to deliver. And I repeat the above info, UNIT ABOVE GARAGES (to the right). That's all I say, and they find it. So if this does not show on the drivers view, please include it going forward. All other drivers are able to locate and deliver without calling or having me come out of my home to find them. ",
 'Always good and we appreciate the service very much.',
 'nice options. nice taste.',
 'When I go in 

In [11]:
star_list

[5, 2, 3, 5, 5, 3, 5, 5, 5, 5]

In [12]:
tag_list

['Positive',
 'Negative',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral']

In [13]:
review_page_2_json['reviews']['review'][5]['content']

"Please check my address. I made sure this is what is showing on my end: 855 S 100th St UNIT ABOVE GARAGES (to the right) Burien, WA 98168\n\nThis is the second driver in a row that has called to ask where to deliver. And I repeat the above info, UNIT ABOVE GARAGES (to the right). That's all I say, and they find it. So if this does not show on the drivers view, please include it going forward. All other drivers are able to locate and deliver without calling or having me come out of my home to find them. "

In [14]:
for i in range(0, len(review_page_2_json['reviews']['review'])):
    #print(i)
    text_list.append(review_page_1_json['reviews']['review'][i]['content'])
    star_list.append(review_page_1_json['reviews']['review'][i]['star_rating'])
    tag_list.append(review_page_1_json['reviews']['review'][i]['sentiment'])

In [15]:
star_list

[5, 2, 3, 5, 5, 3, 5, 5, 5, 5, 3, 4, 5, 5, 4, 5, 3, 5, 5, 4]

In [17]:
tag_list

['Positive',
 'Negative',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Negative',
 'Positive',
 'Positive',
 'Positive']

In [16]:
text_list

['Great food, great people',
 'Food was partially frozen and texture was bad food was all cold',
 'Would have been better with two more tacos',
 "It's very good.  Macaroni salad is expensive.  Delivery and service fees make it all very expensive so it's an infrequent treat.",
 "Great food! If the order wasn't completed so quickly, I would have thought it came straight from Maui!!",
 "Please check my address. I made sure this is what is showing on my end: 855 S 100th St UNIT ABOVE GARAGES (to the right) Burien, WA 98168\n\nThis is the second driver in a row that has called to ask where to deliver. And I repeat the above info, UNIT ABOVE GARAGES (to the right). That's all I say, and they find it. So if this does not show on the drivers view, please include it going forward. All other drivers are able to locate and deliver without calling or having me come out of my home to find them. ",
 'Always good and we appreciate the service very much.',
 'nice options. nice taste.',
 'When I go in 