In [None]:
# Imports and setup
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [None]:
# Base URL for the first page
BASE_URL = "https://tabelog.com/en/tokyo/rstLst/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# Function to scrape a single page
def scrape_page(url):
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch: {url}")
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    restaurant_cards = soup.find_all("li", class_="list-rst")
    data = []

    for card in restaurant_cards:
        # Extract restaurant details
        try:
            name = card.find("a", class_="list-rst__rst-name-target").text.strip()
        except AttributeError:
            name = None

        try:
            rating = card.find("b", class_="c-rating__val").text.strip()
        except AttributeError:
            rating = None

        try:
            cuisine = card.find("div", class_="list-rst__area-genre").text.strip()
        except AttributeError:
            cuisine = None

        try:
            price_range = card.find("span", class_="c-rating__price").text.strip()
        except AttributeError:
            price_range = None

        # Append data to the list
        data.append({
            "Name": name,
            "Rating": rating,
            "Cuisine": cuisine,
            "Price Range": price_range,
        })

    return data

# Function to paginate and scrape multiple pages
def scrape_tabelog(num_pages):
    all_data = []

    for page in range(1, num_pages + 1):
        print(f"Scraping page {page}...")
        url = f"{BASE_URL}{page}/?SrtT=rt" if page > 1 else BASE_URL + "?SrtT=rt"
        page_data = scrape_page(url)

        if page_data:
            all_data.extend(page_data)
        else:
            print(f"Stopping at page {page} due to an error.")
            break

        # Be polite by adding a delay between requests
        time.sleep(2)

    return all_data

# Scrape the first 2 pages (adjust as needed)
data = scrape_tabelog(num_pages=2)

# Create a DataFrame and export to CSV
df = pd.DataFrame(data)
df.to_csv("tabelog_top_restaurants_tokyo.csv", index=False)

print("Data scraped and saved to 'tabelog_top_restaurants_tokyo.csv'")