In [1]:
#Importing packages
from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import TimeoutException
import pandas as pd
import numpy as np
import time 

option = webdriver.ChromeOptions()
option.add_argument("--incognito")
option.add_argument("--disable-popup-blocking")

#=========================================================
# Get all a list of food categories
#=========================================================
browser = webdriver.Chrome(executable_path='c:/chrome/chromedriver.exe', options=option)
browser.get("https://www.burpple.com/categories/sg")

# Wait 10 seconds for page to load
timeout = 10
try:
    WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, "logo")))
    print("")
except TimeoutException:
    print("Timed out waiting for page to load")
    browser.quit()

# find_elements_by_xpath returns an array of selenium objects.
all_categories_element = browser.find_elements_by_xpath("//a[@class='a--grey']")
# use list comprehension to get the actual repo titles and not the selenium objects.
all_categories = [x.text for x in all_categories_element]
# print out all the titles.
print('All Categories:')
print(all_categories, '\n')

#=========================================================
# Burpple search using food categories
#=========================================================
browser = webdriver.Chrome(executable_path='c:/chrome/chromedriver.exe', options=option)
browser.get("https://www.burpple.com/search/sg?q={}".format(all_categories[0].replace(" ","+")))
print(all_categories[0] + '=================', '\n')

# Expand "LOAD MORE":
count = 1
times = 10
while count < times: # click for 10 times, every click should add 12 places
    try: 
        WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((By.ID, 'masonryViewMore-btn')))
        browser.find_element_by_xpath('//button[@id="masonryViewMore-btn"]').click()
        count+=1
        print(count)
    except:
        count = times
time.sleep(5) # force wait for 5 sec
print('Completed')

# Create list of places
places_element = browser.find_elements_by_xpath("//span[@class='searchVenue-header-name-name headingMedium']")
places = [x.text for x in places_element]
print("Places ({}):".format(len(places)))
print(places, '\n')

# Create list of reviews
num_reviews_element = browser.find_elements_by_xpath("//span[@class='searchVenue-header-reviews']")
num_reviews = [x.text for x in num_reviews_element]
print("No. of Reviews ({}):".format(len(num_reviews)))
print(num_reviews, '\n')

# Create list of locations
locations_element = browser.find_elements_by_xpath("//span[@class='searchVenue-header-locationDistancePrice-location']")
locations = [x.text for x in locations_element]
print("Locations ({}):".format(len(locations)))
print(locations, '\n')

# Create list of prices
#prices_element = browser.find_elements_by_xpath("//span[@class='searchVenue-header-locationDistancePrice-price']")
#prices = [x.text for x in prices_element]
#print("Prices ({}):".format(len(prices)))
#print(prices, '\n')

# Create list of category (repeated)
categories = [all_categories[0]] * len(places)

#=========================================================
# Convert in dataframe
#=========================================================
df=pd.DataFrame(np.column_stack([categories, places, locations, num_reviews]), 
                columns=['Categories', 'Places', 'Locations', 'No. of reviews'])
df.to_csv('burpple_df.csv', index=False)
df


All Categories:
['Burpple Guides', 'Newly Opened', 'Breakfast & Brunch', 'Cafes & Coffee', 'Halal', 'Japanese', '1-for-1 Deals', 'Bread', 'Bubble Tea', 'Buffets', 'Cakes', 'Char Kway Teow', 'Cheap & Good', 'Chinese', 'Chirashi', 'Cocktails', 'Craft Beer', 'Date Night', 'Desserts', 'Dim Sum', 'French', 'Fruit Tea', 'Hawker Food', 'Ice Cream & Yoghurt', 'Korean', 'Late Night', 'Malay', 'Michelin Guide Singapore 2019', 'Thai', 'Vegetarian', 'Waffles', 'Zi Char', 'Dinner with Drinks', 'Supper', 'Bak Kut Teh', 'Bars', 'Burgers', 'Chicken Rice', 'European', 'Fine Dining', 'Good For Groups', 'Healthy', 'Indian', 'Italian', 'Kid Friendly', 'Korean BBQ', 'Korean Desserts', 'Korean Fried Chicken', 'Mediterranean', 'Mexican', 'Mookata', 'Pasta', 'Pizza', 'Ramen', 'Salads', 'Sandwiches', 'Seafood', 'Spanish', 'Steak', 'Steamboat', 'Sushi', 'Turkish', 'Western'] 


2
3
4
5
6
7
8
9
10
Completed
Places (120):
['One Man Coffee (Upper Thomson)', 'Hatter Street (Kovan)', 'Mad For Garlic', "Strangers' R

Unnamed: 0,Categories,Places,Locations,No. of reviews
0,Burpple Guides,One Man Coffee (Upper Thomson),Thomson,678 Reviews
1,Burpple Guides,Hatter Street (Kovan),Kovan,290 Reviews
2,Burpple Guides,Mad For Garlic,Promenade,259 Reviews
3,Burpple Guides,Strangers' Reunion,Outram Park,1122 Reviews
4,Burpple Guides,Wheeler's Yard,Balestier,572 Reviews
5,Burpple Guides,Creamier (Toa Payoh),Toa Payoh,691 Reviews
6,Burpple Guides,Jekyll & Hyde,Tanjong Pagar,87 Reviews
7,Burpple Guides,Wild Honey (Mandarin Gallery),Somerset,558 Reviews
8,Burpple Guides,Koh Grill & Sushi Bar,Orchard,782 Reviews
9,Burpple Guides,Seng Kee Black Chicken Herbal Soup (Kembangan),Kembangan,277 Reviews
