There is in fact a TripAdvisor API, but it only lets you call 10 entries at a time and is far less powerful than just scraping TripAdvisor.com, unfortunately. 
So that's what we're gonna do!

In [3]:
# Install dependencies

!pip install --quiet requests beautifulsoup4 pandas

In [2]:
# Import and setup. Importing time to help pause between requests

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [7]:
# HTTP Requests: Use libraries like requests to fetch HTML content of the webpage.
# Importing time to pause between requests

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Step 1: Define the base URL of the website
# base_url = "https://www.tripadvisor.com/Restaurants-g294265-Singapore.html"

target_url = "https://www.tripadvisor.com/FindRestaurants?geo=298184&offset=0&sort=POPULARITY&establishmentTypes=10591&broadened=false"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

# Step 2: Send an HTTP GET request
response = requests.get(target_url, headers=headers)

if response.status_code == 200:
    print("Successfully fetched TripAdvisor data!")
    html_content = response.text
else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")

Successfully fetched TripAdvisor data!


In [8]:
# Step 3: Parse the HTML content
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
soup

<!DOCTYPE html>
<html lang="en-US"><head><link href="https://static.tacdn.com/favicon.ico?v2" id="favicon" rel="icon" type="image/x-icon"/><link color="#000000" href="https://static.tacdn.com/img2/brand_refresh/application_icons/mask-icon.svg" rel="mask-icon" sizes="any"/><meta content="#34e0a1" name="theme-color"/><meta content="telephone=no" name="format-detection"/><meta content="app-id=-1" name="apple-itunes-app"/><script type="application/ld+json">[{"@context":"https:\u002F\u002Fschema.org","@type":"Organization","name":"Tripadvisor","url":"https:\u002F\u002Fwww.tripadvisor.com\u002F","logo":"https:\u002F\u002Fstatic.tacdn.com\u002Fimg2\u002Fbrand_refresh\u002FTripadvisor_logoset_solid_green.svg","sameAs":["https:\u002F\u002Fwww.facebook.com\u002FTripadvisor","https:\u002F\u002Ftwitter.com\u002FTripadvisor","https:\u002F\u002Finstagram.com\u002Ftripadvisor\u002F","https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Ftripadvisor"]},{"@context":"https:\u002F\u002Fschema.org","@type"

In [None]:
# Step 4: Find restaurant data
restaurant_cards = soup.find_all('div', class_='restaurant-card-class')  # Adjust the class name as per TripAdvisor's HTML structure

# Create a list to store the data
data = []

for card in restaurant_cards:
    name = card.find('a', class_='restaurant-name-class').text.strip()  # Extract the name
    rating = card.find('span', class_='rating-class').text.strip()  # Extract the rating
    cuisine = card.find('div', class_='cuisine-class').text.strip()  # Extract the cuisine type
    
    # Add the data as a dictionary to the list
    data.append({
        'Name': name,
        'Rating': rating,
        'Cuisine': cuisine
    })

In [None]:
# Step 5: Create a DataFrame
df = pd.DataFrame(data)

# Step 6: Export to CSV
df.to_csv('restaurants.csv', index=False)

print("Data exported to 'restaurants.csv' successfully!")

In [None]:
# Step 7: Handle pagination
page_number = 1
all_data = []

while True:
    url = f"https://www.tripadvisor.com/Restaurants-g294265-Singapore.html?page={page_number}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to fetch page {page_number}. Stopping...")
        break

    soup = BeautifulSoup(response.text, 'html.parser')
    restaurant_cards = soup.find_all('div', class_='restaurant-card-class')

    if not restaurant_cards:  # Stop if no more data is found
        print("No more restaurant data found. Stopping...")
        break

    for card in restaurant_cards:
        name = card.find('a', class_='restaurant-name-class').text.strip()
        rating = card.find('span', class_='rating-class').text.strip()
        cuisine = card.find('div', class_='cuisine-class').text.strip()

        all_data.append({
            'Name': name,
            'Rating': rating,
            'Cuisine': cuisine
        })

    print(f"Scraped page {page_number}")
    page_number += 1
    time.sleep(2)  # Be polite and avoid overwhelming the server

# Convert to DataFrame and export
df = pd.DataFrame(all_data)
df.to_csv('restaurants.csv', index=False)
print("Data exported to 'restaurants.csv' successfully!")