# Burpple Restaurant Reviews Scraper

In [None]:
import requests as r
from bs4 import BeautifulSoup
from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import TimeoutException
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
# from fake_useragent import UserAgent

import json
import re
import numpy as np
import pandas as pd
import pickle
import pprint
pp = pprint.PrettyPrinter(indent=2)

from threading import Thread
import time
from random import choice
import logging
logging.basicConfig(level = logging.INFO)

# bs4 requests
requests = r.Session()
retries = Retry(total=5,
                backoff_factor=2,
                status_forcelist=[ 500, 502, 503, 504 ],
                connect=10)
requests.mount("https://", HTTPAdapter(max_retries=retries))

## Defining Functions 

In [None]:
def scrape(url, name, class_):
    """Scrapes `url` using requests and bs4 for a `class_` with the specific`name`,
        and returns the scraped section of interest, `scraped` as a result_set. 

    Uses randomized User-Agents as headers for the scraping to avoid being blocked by the site.
    """

    desktop_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
             'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
             'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']

    def random_headers():
        return {'User-Agent': choice(desktop_agents),
                'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}

    r = requests.get(url, timeout=10, headers=random_headers())
    soup = BeautifulSoup(r.text, 'html.parser')            
    scraped = soup.find_all(name, class_=class_)
    if scraped:
        return scraped
    raise Exception

In [None]:
def extractRestaurants(results_dict, scraped_data):
    """Scraping from the Search Restaurants page 
    Alternatively, can build another for scraping from: https://www.burpple.com/sg/hot/hawker#hot100-ref
    """
    
    for u in scraped_data:
        rest_name = u.find('span', class_='searchVenue-header-name-name headingMedium').text.replace('\n','')
#         if rest_name == 'Burpple Guides': continue # exception 

        try:
            num_reviews = u.find('span', class_='searchVenue-header-reviews').text.replace('\n','')
            location = u.find('span',class_='searchVenue-header-locationDistancePrice-location').text
            lat = u.find('span',class_='searchVenue-header-locationDistancePrice-distance').get('data-latitude')
            long = u.find('span',class_='searchVenue-header-locationDistancePrice-distance').get('data-longitude')
            price = u.find('span',class_='searchVenue-header-locationDistancePrice-price').text.replace(' · ','')
        except AttributeError as e:
            print('Stats not found:', e) 
            # e.g. no reviews given yet.
        
        restaurantpage = u.find('a').get('href')

        results_dict[rest_name] = {
                        'Link' :  restaurantpage,
                        'Location' : location,
                        'Latitude' : lat,
                        'Longitude' : long,
                        'PriceRange' : price,
                        'NumReviews' : '{}'.format(num_reviews.split(' R')[0]),
                        'Reviews' : {},
                        'DupeCount' : 0,
                        'Scraped' : False
                       }
        
        # add list of categories to dict if it exists
        cats = u.find('span', class_='searchVenue-header-categories')
        if cats: 
            results_dict[rest_name]['Categories'] = cats.text.split(',')

        # add list of tags to dict if exists
        tags = u.find('div', class_='searchVenue-reasonTags card-item')
        if tags: 
            results_dict[rest_name]['Tags'] = [tag.text.replace(r'\\n|"| · ','') for tag in tags.find_all('div',class_=re.compile('^label--pill'))]

    return results_dict

In [None]:
def getRestaurantReviews2(soup, restaurant):
    """
    """
    review_dict = restaurant['Reviews'] # At this point should be empty 
    # soup contains the find_all() of the "food body" on a Restaurant's review timeline
    if not soup: 
        return
    for s in soup:
        # Get review info 
        review_title = s.find('div', class_='food-description-title').text
        review_link = s.find('div', class_='food-description-title').find('a').get('href').split('?')[0]

        if review_title in review_dict: 
            # check for duplicates
            if not 'DupeCount' in restaurant:
                restaurant['DupeCount'] = 0
            restaurant['DupeCount'] += 1
            continue

        review_desc = s.find('div', class_='food-description-body')
        review_date = getDate(s.find_all('div', class_='card-item-set--link-details-wrapper'))
    
        # Get reviewER info. Link up to the other side later.
        user_name = s.find('div',class_='card-item-set--link-title').text
        user_link = s.find('div',class_='card-item-set--link-title').find('a').get('href')
    
        review_dict[review_title] = {'ReviewLink': review_link,
                                     'Reviewer': {user_name: {'Link':user_link}}}
        if review_desc: 
            review_dict[review_title]['Review'] = review_desc.text.replace('\n','')
        if review_date: 
            review_dict[review_title]['ReviewDate'] = review_date.replace('\n','')
        
        
        
#     return review_dict # no need to return since we are writing directly to it

In [None]:
def getDate(soup):
    for s in soup:
        dt = s.find('div',class_='card-item-set--link-subtitle')
        if dt: 
            return dt.text
    return '' # no date found for this review

## Scraping
### 1. Get Restaurants from Search Page 
Define number of pages to search through - each page has 12 restaurants

In [None]:
error_list = []
results_dict = {}
num_pages = 100

In [None]:
# collect basic restaurant information from the search page
def getRestaurantsFromSearchPage(pg, results_dict, error_list):
    restaurants_url = f'https://www.burpple.com/search/sg?offset={pg*12}&open_now=false&price_from=0&price_to=90&q=' 
    restaurants_soup = scrape(restaurants_url,name='div',class_='searchVenue card feed-item') #searchVenue-header card-item card-item--header for just the restau info, no tags
    if restaurants_soup:
        extractRestaurants(results_dict, restaurants_soup)
    else:
        error_list.append(restaurants_url)
    logging.info("results_dict_len=%s", len(results_dict))
    
# Threading to improve speed
thread_list = []
for pg in range(num_pages):      
    thread = Thread(target=getRestaurantsFromSearchPage, kwargs={"pg": pg, "results_dict": results_dict, "error_list": error_list})
    thread.start()
    time.sleep(1)
    thread_list.append(thread)
[t.join() for t in thread_list]
    
print(len(results_dict),' restaurants have been scraped')

with open('scraped_{}_restaurants.json'.format(len(results_dict)), 'w') as f:
    json.dump(results_dict, f, indent=4, ensure_ascii=False)

In [None]:
# Subsequent load:
with open('scraped_1164_restaurants.json', 'r') as f:
    results_dict = json.load(f)

In [None]:
len(results_dict)

### 2. Extract Reviews from Restaurant Pages
(using Selenium) From each restaurant, scrape 

In [None]:
# # Test with a single restaurant first:
# sample_dict = {} 
# for k, v in results_dict.items(): 
#     if k == 'Bacha Coffee':
#         sample_dict[k] = v

# pprint.pprint(sample_dict)
# restaurant_url = sample_dict['Bacha Coffee']['Link'].split('?')[0] #'https://www.burpple.com/bacha-coffee'

In [220]:
def loadFullPage(browser, expected_pages=1, timeout=20):
    """Expands by clicking"LOAD MORE" for each restaurant page 
    based on the expected number of pages.
    """
    
    try: 
        count=0
        while count <= expected_pages:
            button = browser.find_element_by_xpath('//a[@id="load-more-reviews"]')
            if button.get_attribute('style') != 'visibility: hidden;': # i.e. while load more is visible
                WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((By.ID, 'load-more-reviews')))
                time.sleep(1.5)
                browser.execute_script("arguments[0].click();", button)
                count+=1
                #logging.info('Clicked: %s',count)
        
        logging.info('Completed clicking %s times', count)
        
    except Exception as e:
        logging.info(e)

In [221]:
def getRestaurantReviews(browser, dt_list, restaurant_link, error_list):
    """Extracts restaurant reviews from the current page of the browser and 
        adds it as a pandas dataframe to the dt_list. 
        Uses restaurantname to create an additional column in the dt.
        
    inputs: 
        element from selenium
        masterTable (Pandas DataFrame) to add the scraped data table onto. 
    """
    try:
        # Need to chain the xpath elements to avoid getting the foodcard from top dishes section.
        time.sleep(1) 
        review = browser.find_elements_by_xpath("//div[@class='collection-feed collection-feed--reviews']" + "//div[@class='food-description']")
        reviewer_cards = browser.find_elements_by_xpath("//div[@class='collection-feed collection-feed--reviews']" + "//div[@class='card-item-set--link-details']")

        titles = []
        bodies = []
        for rev in review:
            title = rev.text.split('\n', 1)[0]
            try:
                body = rev.text.split('\n', 1)[1].replace('\n',' ')
            except:
                 #logging.info('No review body found.')
                body = ''
            titles.append(title)
            bodies.append(body)

        # Get reviewer info
        reviewer_names = [card.text.split('\n')[0] for card in reviewer_cards]
        reviewer_levels = [re.search('Level ([0-9]{1,}) Burppler', card.text).group(1) if card.text.split('\n')[0] != 'Burpple Guides' else "" for card in reviewer_cards ]
        reviewer_numreviews = [re.search('([0-9]{1,}) Reviews', card.text).group(1) if card.text.split('\n')[0] != 'Burpple Guides' else "" for card in reviewer_cards]
        review_datetime = [card.text.split('\n')[-1].split(' · ')[0] for card in reviewer_cards]    

        logging.info([len(ls) for ls in [titles, bodies, reviewer_cards, reviewer_names, reviewer_levels, reviewer_numreviews, review_datetime]])

        #### Getting an error here because of the axis not being of the same legngth or something. :-(
        currentDT = pd.DataFrame({
            'Title' : titles,
            'Body' : bodies,
            'Reviewer' : reviewer_names,
            'Reviewer_Level' : reviewer_levels,
            'Reviewer_NumReviews': reviewer_numreviews,
            'ReviewDateTime': review_datetime
        })
        currentDT['Restaurant'] = restaurant_link
        currentDT = currentDT.drop_duplicates()
        dt_list.append(currentDT)

        logging.info('added %s reviews to the main data table', len(titles))
    except:
        logging.info('Error for restaurant %s', restaurant_link)
        error_list.add(restaurant_link)
    return len(titles)

In [279]:
def _get_reviews(restaurant, result_list, error_list):
    """Runs selenium to get the browser up. 
    Calls `loadFullPage()` and `getRestaurantReviews()` on the loaded browser.
    """
    try:
        restaurant_link = restaurant['Link']
        option = webdriver.ChromeOptions()
        option.add_argument("--headless") 
        option.add_argument("--incognito")
        option.add_argument('--no-sandbox')
        option.add_argument("--disable-popup-blocking")
        
        browser = webdriver.Chrome(executable_path='./scraping/chromedriver', options=option)
        browser.get('http://www.burpple.com' + restaurant_link)
        loadFullPage(browser, int(restaurant['NumReviews'])//6)
        restaurant['NumScraped'] = getRestaurantReviews(browser, result_list, restaurant_link, error_list)
        if restaurant['NumScraped'] == len(result_list): restaurant['Scraped'] = True
        logging.info("Number scraped: %s", restaurant['NumScraped'])
        logging.info("Current restaurant count: %s", len(result_list))
        browser.close()
        
    except Exception:
        logging.exception("error_in_thread")

In [233]:
def getReviews(restaurant, result_list, error_list):
    """ for the specified restaurant, check whether the restaurant is already scraped.
    If it's not scraped and there is at least one review, start the scraper and flag when it's done.
    """
    link = restaurant['Link']
    if restaurant['Scraped']: 
        logging.info('Skipping restaurant %s; already scraped', link)
    else:
        num_reviews = int(restaurant['NumReviews'])
        logging.info("num_reviews=%s", num_reviews)
        if  num_reviews > 0 :
            _get_reviews(restaurant, result_list, error_list)
            restaurant['Scraped'] =True

# Threading

To make it faster

In [300]:
count= 0 
for k, v in results_dict.items():
    if not v['Link'] in scraped_restaurants:
#         print(k + ": " + v['Scraped'])
        if v['Scraped']: count +=1
#         print(k, v['Scraped']) 
#         v['Scraped'] = False
print(count)

239


In [296]:
for restaurant in results_dict.values():
    link_ = restaurant['Link']
    if not v['Scraped']:
        logging.info('Scraping %s', link_)
        getReviews(restaurant, result_list, review_errors)

INFO:root:Scraping /babasan-by-uncle-kiisu
INFO:root:Skipping restaurant /babasan-by-uncle-kiisu; already scraped
INFO:root:Scraping /enjoy-eating-house-and-bar
INFO:root:Skipping restaurant /enjoy-eating-house-and-bar; already scraped
INFO:root:Scraping /wu-pao-chun-bakery
INFO:root:num_reviews=79
INFO:root:Completed clicking 14 times
INFO:root:[84, 84, 84, 84, 84, 84, 84]
INFO:root:added 84 reviews to the main data table
INFO:root:Number scraped: 84
INFO:root:Current restaurant count: 1
INFO:root:Scraping /thesaltedplumsg
INFO:root:Skipping restaurant /thesaltedplumsg; already scraped
INFO:root:Scraping /rakki-bowl
INFO:root:Skipping restaurant /rakki-bowl; already scraped
INFO:root:Scraping /elemen
INFO:root:Skipping restaurant /elemen; already scraped
INFO:root:Scraping /pasta-e-formaggio-marina-square
INFO:root:Skipping restaurant /pasta-e-formaggio-marina-square; already scraped
INFO:root:Scraping /se7enth
INFO:root:Skipping restaurant /se7enth; already scraped
INFO:root:Scraping

INFO:root:Completed clicking 62 times
INFO:root:[372, 372, 372, 372, 372, 372, 372]
INFO:root:added 372 reviews to the main data table
INFO:root:Number scraped: 372
INFO:root:Current restaurant count: 21
INFO:root:Scraping /hopii-singapore
INFO:root:num_reviews=4
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[5, 5, 5, 5, 5, 5, 5]
INFO:root:added 5 reviews to the main data table
INFO:root:Number scraped: 5
INFO:root:Current restaurant count: 22
INFO:root:Scraping /red-sparrow
INFO:root:num_reviews=38
INFO:root:Completed clicking 7 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaurant count: 23
INFO:root:Scraping /kooks-creamery
INFO:root:num_reviews=185
INFO:root:Completed clicking 31 times
INFO:root:[192, 192, 192, 192, 192, 192, 192]
INFO:root:added 192 r

INFO:root:Current restaurant count: 49
INFO:root:Scraping /108-matcha-saro
INFO:root:num_reviews=84
INFO:root:Completed clicking 15 times
INFO:root:[90, 90, 90, 90, 90, 90, 90]
INFO:root:added 90 reviews to the main data table
INFO:root:Number scraped: 90
INFO:root:Current restaurant count: 50
INFO:root:Scraping /dual
INFO:root:num_reviews=45
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 51
INFO:root:Scraping /tenten
INFO:root:num_reviews=141
INFO:root:Completed clicking 24 times
INFO:root:[144, 144, 144, 144, 144, 144, 144]
INFO:root:added 144 reviews to the main data table
INFO:root:Number scraped: 144
INFO:root:Current restaurant count: 52
INFO:root:Scraping /keong-saik-bakery
INFO:root:num_reviews=202
INFO:root:Completed clicking 34 times
INFO:root:[204, 204, 204, 204, 204, 204, 204]
INFO:root:added 204 reviews to the main data table
INFO:

INFO:root:Scraping /laderach-swiss-chocolatier
INFO:root:num_reviews=15
INFO:root:Completed clicking 3 times
INFO:root:[15, 15, 15, 15, 15, 15, 15]
INFO:root:added 15 reviews to the main data table
INFO:root:Number scraped: 15
INFO:root:Current restaurant count: 79
INFO:root:Scraping /hatter-street-bakehouse-cafe
INFO:root:num_reviews=290
INFO:root:Completed clicking 49 times
INFO:root:[294, 294, 294, 294, 294, 294, 294]
INFO:root:added 294 reviews to the main data table
INFO:root:Number scraped: 294
INFO:root:Current restaurant count: 80
INFO:root:Scraping /cluck-cluck-rochor-nafa
INFO:root:num_reviews=8
INFO:root:Completed clicking 2 times
INFO:root:[8, 8, 8, 8, 8, 8, 8]
INFO:root:added 8 reviews to the main data table
INFO:root:Number scraped: 8
INFO:root:Current restaurant count: 81
INFO:root:Scraping /dink-dink-thai-street-cafe
INFO:root:num_reviews=19
INFO:root:Completed clicking 4 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO

INFO:root:Scraping /five-izakaya-bar-ccp
INFO:root:num_reviews=27
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 108
INFO:root:Scraping /ramen-champion
INFO:root:num_reviews=340
INFO:root:Completed clicking 57 times
INFO:root:[342, 342, 342, 342, 342, 342, 342]
INFO:root:added 342 reviews to the main data table
INFO:root:Number scraped: 342
INFO:root:Current restaurant count: 109
INFO:root:Scraping /brine-singapore
INFO:root:num_reviews=136
INFO:root:Completed clicking 23 times
INFO:root:[138, 138, 138, 138, 138, 138, 138]
INFO:root:added 138 reviews to the main data table
INFO:root:Number scraped: 138
INFO:root:Current restaurant count: 110
INFO:root:Scraping /kanpai-izakaya-bar
INFO:root:num_reviews=29
INFO:root:Completed clicking 5 times
INFO:root:[34, 34, 34, 34, 34, 34, 34]
INFO:root:added 34 reviews to the main data table
INFO:root:Number

INFO:root:[2, 2, 2, 2, 2, 2, 2]
INFO:root:added 2 reviews to the main data table
INFO:root:Number scraped: 2
INFO:root:Current restaurant count: 138
INFO:root:Scraping /lau-wang-claypot-delights-punggol
INFO:root:num_reviews=10
INFO:root:Completed clicking 2 times
INFO:root:[11, 11, 11, 11, 11, 11, 11]
INFO:root:added 11 reviews to the main data table
INFO:root:Number scraped: 11
INFO:root:Current restaurant count: 139
INFO:root:Scraping /ramen-nagi-singapore
INFO:root:num_reviews=241
INFO:root:Completed clicking 41 times
INFO:root:[246, 246, 246, 246, 246, 246, 246]
INFO:root:added 246 reviews to the main data table
INFO:root:Number scraped: 246
INFO:root:Current restaurant count: 140
INFO:root:Scraping /les-amis
INFO:root:num_reviews=239
INFO:root:Completed clicking 40 times
INFO:root:[246, 246, 246, 246, 246, 246, 246]
INFO:root:added 246 reviews to the main data table
INFO:root:Number scraped: 246
INFO:root:Current restaurant count: 141
INFO:root:Scraping /marusaya
INFO:root:num_re

INFO:root:Scraping /menage-cafe
INFO:root:num_reviews=6
INFO:root:Completed clicking 2 times
INFO:root:[6, 6, 6, 6, 6, 6, 6]
INFO:root:added 6 reviews to the main data table
INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 169
INFO:root:Scraping /narrative-coffee-stand
INFO:root:num_reviews=32
INFO:root:Completed clicking 6 times
INFO:root:[34, 34, 34, 34, 34, 34, 34]
INFO:root:added 34 reviews to the main data table
INFO:root:Number scraped: 34
INFO:root:Current restaurant count: 170
INFO:root:Scraping /be-frank
INFO:root:num_reviews=45
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 171
INFO:root:Scraping /som-tam
INFO:root:num_reviews=190
INFO:root:Completed clicking 32 times
INFO:root:[192, 192, 192, 192, 192, 192, 192]
INFO:root:added 192 reviews to the main data table
INFO:root:Number scraped: 192
INFO:root:Current restauran

INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 199
INFO:root:Scraping /4u-bar-kitchen
INFO:root:num_reviews=14
INFO:root:Completed clicking 3 times
INFO:root:[14, 14, 14, 14, 14, 14, 14]
INFO:root:added 14 reviews to the main data table
INFO:root:Number scraped: 14
INFO:root:Current restaurant count: 200
INFO:root:Scraping /springleaf-jcube
INFO:root:num_reviews=18
INFO:root:Completed clicking 4 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO:root:Number scraped: 24
INFO:root:Current restaurant count: 201
INFO:root:Scraping /shin-minori-japanese-restaurant
INFO:root:num_reviews=140
INFO:root:Completed clicking 24 times
INFO:root:[144, 144, 150, 150, 150, 150, 150]
INFO:root:Error for restaurant /shin-minori-japanese-restaurant
INFO:root:Number scraped: 144
INFO:root:Current restaurant count: 201
INFO:root:Scraping /the-clueless-g

INFO:root:Scraping /thegoodboyscafe
INFO:root:num_reviews=29
INFO:root:Completed clicking 5 times
INFO:root:[36, 36, 36, 36, 36, 36, 36]
INFO:root:added 36 reviews to the main data table
INFO:root:Number scraped: 36
INFO:root:Current restaurant count: 227
INFO:root:Scraping /beach-road-scissor-cut-curry-rice
INFO:root:num_reviews=210
INFO:root:Completed clicking 36 times
INFO:root:[216, 216, 216, 216, 216, 216, 216]
INFO:root:added 216 reviews to the main data table
INFO:root:Number scraped: 216
INFO:root:Current restaurant count: 228
INFO:root:Scraping /aroma-1
INFO:root:num_reviews=58


KeyboardInterrupt: 

In [322]:
# review_errors = set()
thread_list = []
# result_list = []

for restaurant in results_dict.values():
    if restaurant['Link'] not in scraped_restaurants:
        try:
            thread = Thread(target=getReviews, 
                            kwargs={"restaurant" : restaurant, 
                                    "result_list" : result_list,
                                    "error_list" : review_errors}, 
                            daemon=True)
            thread.start()
            time.sleep(0.5)
            thread_list.append(thread)
        except Exception:
            review_errors.add(link)
            logging.exception("error for restaurant %s",restaurant['Link'])

    [t.join() for t in thread_list]
    saveFile(first45)

INFO:root:num_reviews=0
INFO:root:Skipping restaurant /the-mad-sailors; already scraped
INFO:root:Skipping restaurant /rookery-1; already scraped
INFO:root:Skipping restaurant /nassim-hill-bakery-bistro-bar; already scraped
INFO:root:Skipping restaurant /whelers-yard; already scraped
INFO:root:num_reviews=0
INFO:root:Skipping restaurant /oberstrasse; already scraped
INFO:root:Skipping restaurant /una-una; already scraped
INFO:root:Skipping restaurant /atlas-coffeehouse; already scraped
INFO:root:Skipping restaurant /supply-demand-orchard-gateway; already scraped
INFO:root:Skipping restaurant /hvala-chijmes; already scraped
INFO:root:Skipping restaurant /shin-minori-japanese-restaurant; already scraped
INFO:root:Skipping restaurant /sun-ray-cafe; already scraped
INFO:root:num_reviews=0
INFO:root:num_reviews=0
INFO:root:num_reviews=58
INFO:root:Completed clicking 10 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66

INFO:root:added 36 reviews to the main data table
INFO:root:Number scraped: 36
INFO:root:Current restaurant count: 260
INFO:root:num_reviews=165
INFO:root:Completed clicking 28 times
INFO:root:[174, 174, 174, 174, 174, 174, 174]
INFO:root:added 174 reviews to the main data table
INFO:root:Number scraped: 174
INFO:root:Current restaurant count: 261
INFO:root:num_reviews=269
INFO:root:Completed clicking 45 times
INFO:root:[270, 270, 270, 270, 270, 270, 270]
INFO:root:added 270 reviews to the main data table
INFO:root:Number scraped: 270
INFO:root:Current restaurant count: 262
INFO:root:num_reviews=114
INFO:root:Completed clicking 20 times
INFO:root:[120, 120, 120, 120, 120, 120, 120]
INFO:root:added 120 reviews to the main data table
INFO:root:Number scraped: 120
INFO:root:Current restaurant count: 263
INFO:root:num_reviews=55
INFO:root:Completed clicking 10 times
INFO:root:[60, 60, 60, 60, 60, 60, 60]
INFO:root:added 60 reviews to the main data table
INFO:root:Number scraped: 60
INFO:ro

INFO:root:[132, 132, 132, 132, 132, 132, 132]
INFO:root:added 132 reviews to the main data table
INFO:root:Number scraped: 132
INFO:root:Current restaurant count: 295
INFO:root:num_reviews=47
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 296
INFO:root:num_reviews=58
INFO:root:Completed clicking 10 times
INFO:root:[60, 60, 60, 60, 60, 60, 60]
INFO:root:added 60 reviews to the main data table
INFO:root:Number scraped: 60
INFO:root:Current restaurant count: 297
INFO:root:num_reviews=2
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[2, 2, 2, 2, 2, 2, 2]
INFO:root:added 2 reviews to the main data table
INFO:root:Number scraped: 2
INFO:root:Current restaurant count: 298
INFO:root:num_reviews=125
INFO:root:Completed cl

INFO:root:Current restaurant count: 328
INFO:root:num_reviews=42
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 329
INFO:root:num_reviews=267
INFO:root:Completed clicking 45 times
INFO:root:[270, 270, 270, 270, 270, 270, 270]
INFO:root:added 270 reviews to the main data table
INFO:root:Number scraped: 270
INFO:root:Current restaurant count: 330
INFO:root:num_reviews=29
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 331
INFO:root:num_reviews=51
INFO:root:Completed clicking 9 times
INFO:root:[54, 54, 54, 54, 54, 54, 54]
INFO:root:added 54 reviews to the main data table
INFO:root:Number scraped: 54
INFO:root:Current restaurant count: 332
INFO:root:num_reviews=72
INFO:root:Completed clicking 13 times
INFO:r

INFO:root:Number scraped: 5
INFO:root:Current restaurant count: 362
INFO:root:num_reviews=75
INFO:root:Completed clicking 13 times
INFO:root:[84, 84, 84, 84, 84, 84, 84]
INFO:root:added 84 reviews to the main data table
INFO:root:Number scraped: 84
INFO:root:Current restaurant count: 363
INFO:root:num_reviews=152
INFO:root:Completed clicking 26 times
INFO:root:[156, 156, 156, 156, 156, 156, 156]
INFO:root:added 156 reviews to the main data table
INFO:root:Number scraped: 156
INFO:root:Current restaurant count: 364
INFO:root:num_reviews=18
INFO:root:Completed clicking 4 times
INFO:root:[18, 18, 18, 18, 18, 18, 18]
INFO:root:added 18 reviews to the main data table
INFO:root:Number scraped: 18
INFO:root:Current restaurant count: 365
INFO:root:num_reviews=143
INFO:root:Completed clicking 24 times
INFO:root:[150, 150, 150, 150, 150, 150, 150]
INFO:root:added 150 reviews to the main data table
INFO:root:Number scraped: 150
INFO:root:Current restaurant count: 366
INFO:root:num_reviews=2
INFO:

INFO:root:num_reviews=15
INFO:root:Completed clicking 3 times
INFO:root:[15, 15, 15, 15, 15, 15, 15]
INFO:root:added 15 reviews to the main data table
INFO:root:Number scraped: 15
INFO:root:Current restaurant count: 398
INFO:root:num_reviews=37
INFO:root:Completed clicking 7 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaurant count: 399
INFO:root:num_reviews=16
INFO:root:Completed clicking 3 times
INFO:root:[16, 16, 16, 16, 16, 16, 16]
INFO:root:added 16 reviews to the main data table
INFO:root:Number scraped: 16
INFO:root:Current restaurant count: 400
INFO:root:num_reviews=539
INFO:root:Completed clicking 90 times
INFO:root:[546, 546, 546, 546, 546, 546, 546]
INFO:root:added 546 reviews to the main data table
INFO:root:Number scraped: 546
INFO:root:Current restaurant count: 401
INFO:root:num_reviews=52
INFO:root:Completed clicking 9 times
INFO:root:[60, 60, 60, 60, 60, 60, 60]
INFO:roo

INFO:root:[4, 4, 4, 4, 4, 4, 4]
INFO:root:added 4 reviews to the main data table
INFO:root:Number scraped: 4
INFO:root:Current restaurant count: 430
INFO:root:num_reviews=3
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[4, 4, 4, 4, 4, 4, 4]
INFO:root:added 4 reviews to the main data table
INFO:root:Number scraped: 4
INFO:root:Current restaurant count: 431
INFO:root:num_reviews=70
INFO:root:Completed clicking 12 times
INFO:root:[72, 72, 72, 72, 72, 72, 72]
INFO:root:added 72 reviews to the main data table
INFO:root:Number scraped: 72
INFO:root:Current restaurant count: 432
INFO:root:num_reviews=825
INFO:root:Completed clicking 138 times
INFO:root:[828, 828, 834, 834, 834, 834, 834]
INFO:root:Error for restaurant /paddy-hills
INFO:root:Number scraped: 828
INFO:root:Current restaurant count: 432
INFO:root:num_reviews=34
INFO:root:Completed clicking 6 times

INFO:root:Number scraped: 10
INFO:root:Current restaurant count: 460
INFO:root:num_reviews=47
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 461
INFO:root:num_reviews=48
INFO:root:Completed clicking 9 times
INFO:root:[54, 54, 54, 54, 54, 54, 54]
INFO:root:added 54 reviews to the main data table
INFO:root:Number scraped: 54
INFO:root:Current restaurant count: 462
INFO:root:num_reviews=64
INFO:root:Completed clicking 11 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 463
INFO:root:num_reviews=6
INFO:root:Completed clicking 2 times
INFO:root:[6, 6, 6, 6, 6, 6, 6]
INFO:root:added 6 reviews to the main data table
INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 464
INFO:root:num_reviews=29
INFO:root:Completed clicking 5 time

INFO:root:num_reviews=45
INFO:root:Completed clicking 8 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaurant count: 495
INFO:root:num_reviews=15
INFO:root:Completed clicking 3 times
INFO:root:[15, 15, 15, 15, 15, 15, 15]
INFO:root:added 15 reviews to the main data table
INFO:root:Number scraped: 15
INFO:root:Current restaurant count: 496
INFO:root:num_reviews=17
INFO:root:Completed clicking 3 times
INFO:root:[22, 22, 22, 22, 22, 22, 22]
INFO:root:added 22 reviews to the main data table
INFO:root:Number scraped: 22
INFO:root:Current restaurant count: 497
INFO:root:num_reviews=89
INFO:root:Completed clicking 15 times
INFO:root:[90, 90, 90, 90, 90, 90, 90]
INFO:root:added 90 reviews to the main data table
INFO:root:Number scraped: 90
INFO:root:Current restaurant count: 498
INFO:root:num_reviews=47
INFO:root:Completed clicking 8 times
INFO:root:[54, 54, 54, 54, 54, 54, 54]
INFO:root:added 54

INFO:root:Completed clicking 3 times
INFO:root:[16, 16, 16, 16, 16, 16, 16]
INFO:root:added 16 reviews to the main data table
INFO:root:Number scraped: 16
INFO:root:Current restaurant count: 529
INFO:root:num_reviews=87
INFO:root:Completed clicking 15 times
INFO:root:[96, 96, 96, 96, 96, 96, 96]
INFO:root:added 96 reviews to the main data table
INFO:root:Number scraped: 96
INFO:root:Current restaurant count: 530
INFO:root:num_reviews=135
INFO:root:Completed clicking 23 times
INFO:root:[138, 138, 138, 138, 138, 138, 138]
INFO:root:added 138 reviews to the main data table
INFO:root:Number scraped: 138
INFO:root:Current restaurant count: 531
INFO:root:num_reviews=111
INFO:root:Completed clicking 19 times
INFO:root:[114, 114, 114, 114, 114, 114, 114]
INFO:root:added 114 reviews to the main data table
INFO:root:Number scraped: 114
INFO:root:Current restaurant count: 532
INFO:root:num_reviews=39
INFO:root:Completed clicking 7 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 re

INFO:root:Number scraped: 654
INFO:root:Current restaurant count: 561
INFO:root:num_reviews=7
INFO:root:Completed clicking 2 times
INFO:root:[7, 7, 7, 7, 7, 7, 7]
INFO:root:added 7 reviews to the main data table
INFO:root:Number scraped: 7
INFO:root:Current restaurant count: 562
INFO:root:num_reviews=24
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 563
INFO:root:num_reviews=38
INFO:root:Completed clicking 7 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaurant count: 564
INFO:root:num_reviews=5
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[5, 5, 5, 5, 5, 5, 5]
INFO:root:added 5 reviews to the main data table
INFO

INFO:root:Current restaurant count: 596
INFO:root:num_reviews=75
INFO:root:Completed clicking 13 times
INFO:root:[78, 78, 78, 78, 78, 78, 78]
INFO:root:added 78 reviews to the main data table
INFO:root:Number scraped: 78
INFO:root:Current restaurant count: 597
INFO:root:num_reviews=177
INFO:root:Completed clicking 30 times
INFO:root:[180, 180, 180, 180, 180, 180, 180]
INFO:root:added 180 reviews to the main data table
INFO:root:Number scraped: 180
INFO:root:Current restaurant count: 598
INFO:root:num_reviews=4
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[4, 4, 4, 4, 4, 4, 4]
INFO:root:added 4 reviews to the main data table
INFO:root:Number scraped: 4
INFO:root:Current restaurant count: 599
INFO:root:num_reviews=26
INFO:root:Completed clicking 5 times
INFO:root:[36, 36, 36, 36, 36, 36, 36]
INFO:root:added 36 reviews to the main data table
INFO:root:Num

INFO:root:added 378 reviews to the main data table
INFO:root:Number scraped: 378
INFO:root:Current restaurant count: 628
INFO:root:num_reviews=28
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 629
INFO:root:num_reviews=23
INFO:root:Completed clicking 4 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO:root:Number scraped: 24
INFO:root:Current restaurant count: 630
INFO:root:num_reviews=164
INFO:root:Completed clicking 28 times
INFO:root:[174, 174, 174, 174, 174, 174, 174]
INFO:root:added 174 reviews to the main data table
INFO:root:Number scraped: 174
INFO:root:Current restaurant count: 631
INFO:root:num_reviews=31
INFO:root:Completed clicking 6 times
INFO:root:[32, 32, 32, 32, 32, 32, 32]
INFO:root:added 32 reviews to the main data table
INFO:root:Number scraped: 32
INFO:root:Current restaurant

INFO:root:num_reviews=4
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[4, 4, 4, 4, 4, 4, 4]
INFO:root:added 4 reviews to the main data table
INFO:root:Number scraped: 4
INFO:root:Current restaurant count: 661
INFO:root:num_reviews=64
INFO:root:Completed clicking 11 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 662
INFO:root:num_reviews=55
INFO:root:Completed clicking 10 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 663
INFO:root:num_reviews=56
INFO:root:Completed clicking 10 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant coun

INFO:root:Current restaurant count: 695
INFO:root:num_reviews=2
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[1, 1, 1, 1, 1, 1, 1]
INFO:root:added 1 reviews to the main data table
INFO:root:Number scraped: 1
INFO:root:Current restaurant count: 696
INFO:root:num_reviews=18
INFO:root:Completed clicking 4 times
INFO:root:[18, 18, 18, 18, 18, 18, 18]
INFO:root:added 18 reviews to the main data table
INFO:root:Number scraped: 18
INFO:root:Current restaurant count: 697
INFO:root:num_reviews=62
INFO:root:Completed clicking 11 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 698
INFO:root:num_reviews=85
INFO:root:Completed clicking 15 times
INFO:root:[90, 90, 90, 90, 90, 90, 90]
INFO:root:added 90 reviews to the main data table
INFO:root:Number scrape

INFO:root:num_reviews=24
INFO:root:Completed clicking 5 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO:root:Number scraped: 24
INFO:root:Current restaurant count: 731
INFO:root:num_reviews=10
INFO:root:Completed clicking 2 times
INFO:root:[10, 10, 10, 10, 10, 10, 10]
INFO:root:added 10 reviews to the main data table
INFO:root:Number scraped: 10
INFO:root:Current restaurant count: 732
INFO:root:num_reviews=28
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 733
INFO:root:num_reviews=501
INFO:root:Completed clicking 84 times
INFO:root:[510, 510, 510, 510, 510, 510, 510]
INFO:root:added 510 reviews to the main data table
INFO:root:Number scraped: 510
INFO:root:Current restaurant count: 734
INFO:root:num_reviews=3
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","sele

INFO:root:num_reviews=114
INFO:root:Completed clicking 20 times
INFO:root:[120, 120, 120, 120, 120, 120, 120]
INFO:root:added 120 reviews to the main data table
INFO:root:Number scraped: 120
INFO:root:Current restaurant count: 764
INFO:root:num_reviews=28
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 765
INFO:root:num_reviews=3
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[3, 3, 3, 3, 3, 3, 3]
INFO:root:added 3 reviews to the main data table
INFO:root:Number scraped: 3
INFO:root:Current restaurant count: 766
INFO:root:num_reviews=38
INFO:root:Completed clicking 7 times
INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaur

INFO:root:[1, 1, 1, 1, 1, 1, 1]
INFO:root:added 1 reviews to the main data table
INFO:root:Number scraped: 1
INFO:root:Current restaurant count: 795
INFO:root:num_reviews=11
INFO:root:Completed clicking 2 times
INFO:root:[11, 11, 11, 11, 11, 11, 11]
INFO:root:added 11 reviews to the main data table
INFO:root:Number scraped: 11
INFO:root:Current restaurant count: 796
INFO:root:num_reviews=189
INFO:root:Completed clicking 32 times
INFO:root:[192, 192, 192, 192, 192, 192, 192]
INFO:root:added 192 reviews to the main data table
INFO:root:Number scraped: 192
INFO:root:Current restaurant count: 797
INFO:root:num_reviews=4
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more-reviews"]"}
  (Session info: headless chrome=78.0.3904.87)

INFO:root:[4, 4, 4, 4, 4, 4, 4]
INFO:root:added 4 reviews to the main data table
INFO:root:Number scraped: 4
INFO:root:Current restaurant count: 798
INFO:root:num_reviews=218
INFO:root:Completed clicking 3

INFO:root:added 144 reviews to the main data table
INFO:root:Number scraped: 144
INFO:root:Current restaurant count: 829
INFO:root:num_reviews=28
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 830
INFO:root:num_reviews=51
INFO:root:Completed clicking 9 times
INFO:root:[54, 54, 54, 54, 54, 54, 54]
INFO:root:added 54 reviews to the main data table
INFO:root:Number scraped: 54
INFO:root:Current restaurant count: 831
INFO:root:num_reviews=308
INFO:root:Completed clicking 52 times
INFO:root:[312, 312, 312, 312, 312, 312, 312]
INFO:root:added 312 reviews to the main data table
INFO:root:Number scraped: 312
INFO:root:Current restaurant count: 832
INFO:root:num_reviews=473
INFO:root:Completed clicking 79 times
INFO:root:[480, 480, 480, 480, 480, 480, 480]
INFO:root:added 480 reviews to the main data table
INFO:root:Number scraped: 480
INFO:root:Current

INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 863
INFO:root:num_reviews=650
INFO:root:Completed clicking 109 times
INFO:root:[654, 654, 660, 660, 660, 660, 660]
INFO:root:Error for restaurant /curious-palette
INFO:root:Number scraped: 654
INFO:root:Current restaurant count: 863
INFO:root:num_reviews=43
INFO:root:Completed clicking 8 times
INFO:root:[48, 48, 48, 48, 48, 48, 48]
INFO:root:added 48 reviews to the main data table
INFO:root:Number scraped: 48
INFO:root:Current restaurant count: 864
INFO:root:num_reviews=12
INFO:root:Completed clicking 3 times
INFO:root:[12, 12, 12, 12, 12, 12, 12]
INFO:root:added 12 reviews to the main data table
INFO:root:Number scraped: 12
INFO:root:Current restaurant count: 865
INFO:root:num_reviews=55
INFO:root:Completed clicking 10 times
INFO:root:[60, 60, 60, 60, 60, 60, 60]
INFO:root:added 60 reviews to the main data table
INFO:root:Number scraped: 60
INFO:root:Current restaurant count: 866
INFO:root:num_reviews=54
INFO:root:Complet

INFO:root:Completed clicking 44 times
INFO:root:[264, 264, 264, 264, 264, 264, 264]
INFO:root:added 264 reviews to the main data table
INFO:root:Number scraped: 264
INFO:root:Current restaurant count: 898
INFO:root:num_reviews=10
INFO:root:Completed clicking 2 times
INFO:root:[14, 14, 14, 14, 14, 14, 14]
INFO:root:added 14 reviews to the main data table
INFO:root:Number scraped: 14
INFO:root:Current restaurant count: 899
INFO:root:num_reviews=80
INFO:root:Completed clicking 14 times
INFO:root:[84, 84, 84, 84, 84, 84, 84]
INFO:root:added 84 reviews to the main data table
INFO:root:Number scraped: 84
INFO:root:Current restaurant count: 900
INFO:root:num_reviews=19
INFO:root:Completed clicking 4 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO:root:Number scraped: 24
INFO:root:Current restaurant count: 901
INFO:root:num_reviews=2
INFO:root:Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@id="load-more

INFO:root:Current restaurant count: 930
INFO:root:num_reviews=21
INFO:root:Completed clicking 4 times
INFO:root:[24, 24, 24, 24, 24, 24, 24]
INFO:root:added 24 reviews to the main data table
INFO:root:Number scraped: 24
INFO:root:Current restaurant count: 931
INFO:root:num_reviews=8
INFO:root:Completed clicking 2 times
INFO:root:[8, 8, 8, 8, 8, 8, 8]
INFO:root:added 8 reviews to the main data table
INFO:root:Number scraped: 8
INFO:root:Current restaurant count: 932
INFO:root:num_reviews=67
INFO:root:Completed clicking 12 times
INFO:root:[72, 72, 72, 72, 72, 72, 72]
INFO:root:added 72 reviews to the main data table
INFO:root:Number scraped: 72
INFO:root:Current restaurant count: 933
INFO:root:num_reviews=63
INFO:root:Completed clicking 11 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 934
INFO:root:num_reviews=6
INFO:root:Completed clicking 2 times
INFO:root:[6, 6, 6, 6, 6, 6

INFO:root:Number scraped: 366
INFO:root:Current restaurant count: 964
INFO:root:num_reviews=30
INFO:root:Completed clicking 6 times
INFO:root:[36, 36, 36, 36, 36, 36, 36]
INFO:root:added 36 reviews to the main data table
INFO:root:Number scraped: 36
INFO:root:Current restaurant count: 965
INFO:root:num_reviews=136
INFO:root:Completed clicking 23 times
INFO:root:[138, 138, 138, 138, 138, 138, 138]
INFO:root:added 138 reviews to the main data table
INFO:root:Number scraped: 138
INFO:root:Current restaurant count: 966
INFO:root:num_reviews=81
INFO:root:Completed clicking 14 times
INFO:root:[84, 84, 84, 84, 84, 84, 84]
INFO:root:added 84 reviews to the main data table
INFO:root:Number scraped: 84
INFO:root:Current restaurant count: 967
INFO:root:num_reviews=25
INFO:root:Completed clicking 5 times
INFO:root:[30, 30, 30, 30, 30, 30, 30]
INFO:root:added 30 reviews to the main data table
INFO:root:Number scraped: 30
INFO:root:Current restaurant count: 968
INFO:root:num_reviews=14
INFO:root:Com

INFO:root:[36, 36, 36, 36, 36, 36, 36]
INFO:root:added 36 reviews to the main data table
INFO:root:Number scraped: 36
INFO:root:Current restaurant count: 996
INFO:root:num_reviews=31
INFO:root:Completed clicking 6 times
INFO:root:[36, 36, 36, 36, 36, 36, 36]
INFO:root:added 36 reviews to the main data table
INFO:root:Number scraped: 36
INFO:root:Current restaurant count: 997
INFO:root:num_reviews=556
INFO:root:Completed clicking 93 times
INFO:root:[558, 558, 558, 558, 558, 558, 558]
INFO:root:added 558 reviews to the main data table
INFO:root:Number scraped: 558
INFO:root:Current restaurant count: 998
INFO:root:num_reviews=6
INFO:root:Completed clicking 2 times
INFO:root:[6, 6, 6, 6, 6, 6, 6]
INFO:root:added 6 reviews to the main data table
INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 999
INFO:root:num_reviews=158
INFO:root:Completed clicking 27 times
INFO:root:[162, 162, 162, 162, 162, 162, 162]
INFO:root:added 162 reviews to the main data table
INFO:root:Number scr

INFO:root:Completed clicking 30 times
INFO:root:[180, 180, 180, 180, 180, 180, 180]
INFO:root:added 180 reviews to the main data table
INFO:root:Number scraped: 180
INFO:root:Current restaurant count: 1029
INFO:root:num_reviews=6
INFO:root:Completed clicking 2 times
INFO:root:[6, 6, 6, 6, 6, 6, 6]
INFO:root:added 6 reviews to the main data table
INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 1030
INFO:root:num_reviews=262
INFO:root:Completed clicking 44 times
INFO:root:[264, 264, 270, 270, 270, 270, 270]
INFO:root:Error for restaurant /dulcet-studio
INFO:root:Number scraped: 264
INFO:root:Current restaurant count: 1030
INFO:root:num_reviews=5
INFO:root:Completed clicking 1 times
INFO:root:[6, 6, 6, 6, 6, 6, 6]
INFO:root:added 6 reviews to the main data table
INFO:root:Number scraped: 6
INFO:root:Current restaurant count: 1031
INFO:root:num_reviews=7
INFO:root:Completed clicking 2 times
INFO:root:[7, 7, 7, 7, 7, 7, 7]
INFO:root:added 7 reviews to the main data table
INF

INFO:root:[42, 42, 42, 42, 42, 42, 42]
INFO:root:added 42 reviews to the main data table
INFO:root:Number scraped: 42
INFO:root:Current restaurant count: 1063
INFO:root:num_reviews=12
INFO:root:Completed clicking 3 times
INFO:root:[18, 18, 18, 18, 18, 18, 18]
INFO:root:added 18 reviews to the main data table
INFO:root:Number scraped: 18
INFO:root:Current restaurant count: 1064
INFO:root:num_reviews=70
INFO:root:Completed clicking 12 times
INFO:root:[72, 72, 72, 72, 72, 72, 72]
INFO:root:added 72 reviews to the main data table
INFO:root:Number scraped: 72
INFO:root:Current restaurant count: 1065
INFO:root:num_reviews=61
INFO:root:Completed clicking 11 times
INFO:root:[66, 66, 66, 66, 66, 66, 66]
INFO:root:added 66 reviews to the main data table
INFO:root:Number scraped: 66
INFO:root:Current restaurant count: 1066
INFO:root:num_reviews=117
INFO:root:Completed clicking 20 times
INFO:root:[120, 120, 120, 120, 120, 120, 120]
INFO:root:added 120 reviews to the main data table
INFO:root:Numbe

# Saving Scraped Results

In [318]:
def saveFile(previousDF):
    updatedDF = pd.concat([previousDF, pd.concat(result_list)]).drop_duplicates()
    with open('scraped_{}restaurantreviews.pkl'.format(len(updatedDF.Restaurant.unique())),'wb') as f:
        pickle.dump(updatedDF, f)
    print('saved.')


saved.


In [335]:
first273 = pickle.load(open('scraped_273restaurantreviews.pkl','rb'))
saveFile(first273)
# with open('scraped_45restaurantreviews.pkl', 'wb') as f:
#     pickle.dump(first45, f)

saved.


In [339]:
updatedDF = pd.concat([first273, pd.concat(result_list)]).drop_duplicates()

len(updatedDF.Restaurant.unique())

1114

In [385]:
updatedDF.shape

(63921, 7)

# Converting restaurants data to pandas

In [342]:
results_dict

{'Babasan by Uncle Kiisu': {'Link': '/babasan-by-uncle-kiisu',
  'Location': 'Clarke Quay',
  'Latitude': '1.2860345',
  'Longitude': '103.8495524',
  'PriceRange': '~$30/pax',
  'NumReviews': '4',
  'Reviews': {},
  'DupeCount': 0,
  'Scraped': True,
  'Categories': ['Local Delights',
   ' Japanese',
   ' 1-for-1 Deals',
   ' Burpple Beyond'],
  'Tags': ['BEYOND'],
  'NumScraped': 4},
 'Enjoy Eating House and Bar': {'Link': '/enjoy-eating-house-and-bar',
  'Location': 'Farrer Park',
  'Latitude': '1.3131906',
  'Longitude': '103.8596966',
  'PriceRange': '~$15/pax',
  'NumReviews': '131',
  'Reviews': {},
  'DupeCount': 0,
  'Scraped': True,
  'Categories': ['Bars',
   ' Local Delights',
   ' Zi Char',
   ' Thai',
   ' Chinese',
   ' 1-for-1 Deals'],
  'Tags': ['BEYOND', 'Featured In 3 Guides'],
  'NumScraped': 138},
 'Wu Pao Chun': {'Link': '/wu-pao-chun-bakery',
  'Location': 'City Hall',
  'Latitude': '1.292939',
  'Longitude': '103.8514798',
  'PriceRange': '~$13/pax',
  'NumRevie

In [343]:
# json.load(open('scraped_1164_restaurants.json','r'))
restaurants = pd.read_json('scraped_1164_restaurants.json', orient='index')
restaurants.reset_index(inplace=True)
restaurants = restaurants.drop(columns=['Scraped','DupeCount'])  # remove some redundant columns
restaurants = restaurants.rename({'index':'Title'}, axis=1)[['Title','Link','Location','Latitude','Longitude','NumReviews','PriceRange','Tags','Categories']]

In [375]:
with open('restaurants.pkl','wb') as f:
    pickle.dump(restaurants,f)