### This is an example of how to get the review data for Albeta_Mediterranean_Bakery business. Of course this part (the business) will be dynamic and it will be retrieved from Kiki's or Vasilis' code. I used a specific business for testing purposes.

In [2]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

In [3]:
# URL of the TripAdvisor page with restaurant reviews
url = 'https://www.tripadvisor.com/Restaurant_Review-g189473-d11816638-Reviews-Albeta_Mediterranean_Bakery-Thessaloniki_Thessaloniki_Region_Central_Macedonia.html'

# Configure Selenium web driver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run Chrome in headless mode, remove this line to see the browser
driver = webdriver.Chrome(options=options)

# Open the URL
driver.get(url)

# Wait for the page to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'review-container')))
time.sleep(4)  # Additional sleep to ensure all elements are loaded

In [4]:
def navigate(driver, review):
    # Find review elements
    reviews = driver.find_elements(By.CLASS_NAME, 'review-container')

    # Scrape review data
    for review in reviews:
        review_info = {}

        try:
            username_element = review.find_element(By.XPATH, ".//div[@class='info_text pointer_cursor']")
            username = username_element.text
        except NoSuchElementException:
            username = None
        review_info['Username'] = username

        try:
            date_element = review.find_element(By.XPATH, ".//span[@class='ratingDate']")
            review_date = date_element.get_attribute('title')
        except NoSuchElementException:
            review_date = None
        review_info['Review Date'] = review_date

        try:
            visit_date_element = review.find_element(By.XPATH, ".//div[@class='prw_rup prw_reviews_stay_date_hsx']")
            visit_date_text = visit_date_element.text
            visit_date = visit_date_text.replace("Date of visit:", "").strip()
        except NoSuchElementException:
            visit_date = None
        review_info['Visit Date'] = visit_date

        try:
            title_element = review.find_element(By.XPATH, ".//span[@class='noQuotes']")
            title = title_element.text
        except NoSuchElementException:
            title = None
        review_info['Review Title'] = title

        try:
            content_element = review.find_element(By.XPATH, ".//p[@class='partial_entry']")
            content = content_element.text
        except NoSuchElementException:
            content = None
        review_info['Review Text'] = content

        try:
            rating_element = review.find_element(By.XPATH, ".//span[contains(@class, 'ui_bubble_rating')]")
            rating_class = rating_element.get_attribute('class')
            rating = int(rating_class.split('bubble_')[-1]) / 10
        except NoSuchElementException:
            rating = None
        review_info['Rating'] = rating

        review_data.append(review_info)

    return review_data

In [5]:
def goToPage(driver, review, counter):
    try:
        # Find the "Next" button element using XPath
        next_page_link = driver.find_element("xpath", "//a[@class='nav next ui_button primary']")

        # Check if the "Next" button is disabled
        if 'disabled' in next_page_link.get_attribute('class'):
            msg = "No more pages."
            return review, msg

        # Click the "Next" button using JavaScript executor
        driver.execute_script("arguments[0].click();", next_page_link)

        # Wait for the page to load
        time.sleep(3)

        # Update the review data by navigating to the new page
        review = navigate(driver, review)

        msg = ""

    except NoSuchElementException:
        # Handle the case when the "Next" button is not found
        msg = "No more pages."
        return review, msg
    
    # Return the updated review data and the status message
    return review, msg

In [6]:
# Find the business name element
business_name_element = driver.find_element(By.XPATH, "//h1[@data-test-target='top-info-header']")
business_name = business_name_element.text

print(f"Business Name: {business_name}")
print()

# Create a list to store the review data
review_data = []

counter = 2
while True:
    # Navigate to the current page and extract review data
    review_data = navigate(driver, review_data)

    # Go to the next page and update the review data
    review_data, msg = goToPage(driver, review_data, counter)
    counter += 1

    # Check if there are no more pages
    if msg == "No more pages.":
        break

# Print the review data
for review in review_data:
    print(f"Username: {review['Username']}")
    print(f"Review Date: {review['Review Date']}")
    print(f"Visit Date: {review['Visit Date']}")
    print(f"Review Title: {review['Review Title']}")
    print(f"Review Text: {review['Review Text']}")
    print(f"Rating: {review['Rating']}")
    print()

# Close the driver
driver.quit()

Business Name: Albeta Mediterranean Bakery

Username: Traveler32528783809
Review Date: June 2, 2023
Visit Date: June 2023
Review Title: Highly recommended
Review Text: Top quality products, exceptional customer service and tasty food. Coffee is also very good and one of my favorite choices.
Rating: 5.0

Username: Tourist30751701237
Review Date: May 31, 2023
Visit Date: May 2023
Review Title: Breakfast
Review Text: A great place for breakfast! I really enjoyed the tortilla chicken and the fresh pomegranate juice.
Rating: 5.0

Username: Maria D
Review Date: May 10, 2023
Visit Date: May 2023
Review Title: You have to try it
Review Text: The stuff was very helpful and kind! They also have a beg variety of pastries and the best coffee in town!
Rating: 5.0

Username: Stella K
Review Date: April 3, 2023
Visit Date: March 2023
Review Title: Best bakery/quick bites
Review Text: The best bakery in Thessaloniki. Highest quality of ingredients featuring baked goods to sandwiches & salads
Rating: 5