In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd

class NYTRestaurantScraper:
    def __init__(self):
        """Initializes the scraper with Selenium WebDriver options."""
        self.options = Options()
        # Specify the path for the ChromeDriver log file here
        self.service = ChromeService(ChromeDriverManager().install(), log_path='chromedriver.log')
        self.driver = webdriver.Chrome(service=self.service, options=self.options)
        self.driver.maximize_window()
        self.driver.implicitly_wait(5)

    def scrape(self):
        """Scrapes restaurant reviews from the NYT website and returns a DataFrame."""
        try:
            self.driver.get("https://www.nytimes.com/reviews/dining")
            while True:
                # Find and click the "SHOW MORE" button
                show_more_button = WebDriverWait(self.driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, "css-1t62hi8")))
                actions = ActionChains(self.driver)
                actions.move_to_element(show_more_button).perform()
                show_more_button.click()
        except (TimeoutException, NoSuchElementException, StaleElementReferenceException) as e:
            print("No more 'SHOW MORE' button available. Exiting...")
        finally:
            wait = WebDriverWait(self.driver, 10)
            restaurant_listings = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'li.css-1hks1bt')))
            restaurant_names = [] #array to store restaurant names
            authors = [] #array to store author of each article
            dates = [] #array to store publishing date of each article
            ratings = [] #array to store rating for each restaurant
            prices = [] #array to store the price of each restaurant, show by $-$$$$
            cuisines = [] #array to store the cuisine of each restaurant
            neighborhoods = [] #array to store the neighborhood of each restaurant
            critics_pick = [] #array to indicate whether or not the restaurant is a NYT Critic's pick
            descriptions = [] #array to store the short descriptions

            for listing in restaurant_listings:
                # Find the restaurant name
                try:
                    restaurant_name = listing.find_element(By.CSS_SELECTOR, 'h2.css-8aqwnr').text
                    restaurant_names.append(restaurant_name)
                except:
                    restaurant_names.append('NA')

                # Find the author
                try:
                    author = listing.find_element(By.CSS_SELECTOR, 'p.css-jmrzxh span.css-1lejymi').text
                    authors.append(author)
                except:
                    authors.append('NA')

                # Find the date
                try:
                    date = listing.find_element(By.CSS_SELECTOR, 'footer.css-14jc8zs time.css-laplyn').get_attribute('textContent')
                    dates.append(date)
                except:
                    dates.append("NA")

                # Find the rating
                try:
                    rating = listing.find_element(By.CSS_SELECTOR, 'li.css-df1tmv span.css-1dv1kvn').text
                    ratings.append(rating)
                except:
                    ratings.append("NA")

                # Find the price
                try:
                    price = listing.find_element(By.CSS_SELECTOR, 'li.css-df1tmv[itemprop="priceRange"]').text
                    prices.append(price)
                except:
                    prices.append("NA")

                # Find the cuisine
                try:
                    cuisine = listing.find_element(By.CSS_SELECTOR, 'li.css-df1tmv[itemprop="servesCuisine"]').text
                    cuisines.append(cuisine)
                except:
                    cuisines.append("NA")

                # Find the neighborhood
                try:
                    neighborhood = listing.find_element(By.CSS_SELECTOR, 'li.css-df1tmv[itemprop="addressLocality"]').text
                    neighborhoods.append(neighborhood)
                except:
                    neighborhoods.append("NA")

                # Find the Critics' Pick indicator
                try:
                    critics_pick_indicator = listing.find_element(By.CSS_SELECTOR, 'li.css-df1tmv span.css-9n3bbc').text
                    critics_pick.append(1)
                except:
                    critics_pick.append(0)

                # Find the description
                try:
                    description = listing.find_element(By.CSS_SELECTOR, 'p.css-95mumb').text
                    descriptions.append(description)
                except:
                    descriptions.append('NA')

        # After extracting all information and storing it in lists
        data = {
            'Restaurant_name': restaurant_names,
            'Author': authors,
            'Publishing_Date': dates,
            'Rating': ratings,
            'Price': prices,
            'Cuisine': cuisines,
            'Neighborhood': neighborhoods,
            'CriticsPick': critics_pick,
            'Description': descriptions
        }
        df = pd.DataFrame(data)

        self.driver.quit()
        print('Data extraction complete.')
        return df

# Example usage:
scraper = NYTRestaurantScraper()
df = scraper.scrape()
df.to_csv('Data.csv')
print(df.shape)
df.head()


No more 'SHOW MORE' button available. Exiting...
Data extraction complete.
(1630, 9)


Unnamed: 0,Restaurant_name,Author,Publishing_Date,Rating,Price,Cuisine,Neighborhood,CriticsPick,Description
0,Levant,PRIYA KRISHNA,"Dec. 17, 2024",2 star,$$,Middle Eastern,,1,"The feteer’s the thing at Levant, a tiny store..."
1,Le Veau d’Or,PRIYA KRISHNA,"Nov. 12, 2024",2 star,$$$$,French,Upper East Side,1,This second coming of the famed French bistro ...
2,Din Tai Fung,MELISSA CLARK,"Nov. 5, 2024",1 star,$$,Taiwanese,Midtown Manhattan,0,East Coast dumpling lovers rejoiced at the ope...
3,Sawa,MELISSA CLARK,"Oct. 22, 2024",2 star,$$,Middle Eastern,Park Slope,1,Sawa’s menu is a mix of beloved Lebanese class...
4,Carnitas Ramírez,PRIYA KRISHNA,"Sept. 17, 2024",2 star,$,Mexican,East Village,1,"Carnitas Ramírez, the sequel to the wildly pop..."


In [4]:
df.tail()

Unnamed: 0,Restaurant_name,Author,Publishing_Date,Rating,Price,Cuisine,Neighborhood,CriticsPick,Description
1625,Victor's Cafe,BRYAN MILLER,"Sept. 18, 1987",1 star,$$$,Latin American,Midtown,0,This old theater district standby offers Cuban...
1626,Ambassador Grill & Lounge,BRYAN MILLER,"Oct. 3, 1986",1 star,$$$,New American,Midtown East,0,Near the United Nations and popular with diplo...
1627,La Boite en Bois,BRYAN MILLER,"Sept. 27, 1985",1 star,$$$,French,Upper West Side,0,"Cute and tiny, with forgettable but acceptable..."
1628,Gargiulo's,MARIAN BURROS,"Aug. 24, 1984",0.5 star,$$,Italian,Coney Island,0,This large Coney Island Italian restaurant is ...
1629,Kodama Japanese Restaurant,MIMI SHERATON,"July 29, 1983",1 star,$$,"Japanese, Sushi",Clinton,0,Although the decor is drab and the menu almost...
