In [197]:
from selenium import webdriver
import re
import time
import csv
import pandas as pd
import numpy as np
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import datetime

In [72]:
def get_earliest_review(url):
    """
    get_earliest_review: visits a restaurant page on OpenTable, scrolls to the last page of reviews, finds the
    first review ever made at that restaurant, and extracts the date that review was made
    
    args:
        url: string, the url of the restaurant page from which to extract the earliest review
        
    output:
        first review: string, 'Dined/Reviewed on <date>' or 'No Reviews' if no reviews
    
    WARNING: has bugs, ~5% of the time takes the last review on the first review page depending on how long the
    page takes to load. Adding the WebDriverWait blocks improved but did not fully solve the problem.
    
    Best used on urls taken from OpenTable search results page sorted by Newest, so that earliest reviews should be in 
    chronological order and you can identify ones where it has not navigated the reviews correctly
    """
    driver=webdriver.Chrome()
    driver.get(url)
    driver.maximize_window() # maximize to make sure page sidebar is loaded
    
    first_review = None
    
    reviews = True
    
    delay = 5
    
    # try to find the reviews element on the restaurant page for 5 seconds
    try:
        WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//*[@id="reviews-results"]')))
    except:
        print("couldn't find reviews on page")
        reviews = False
        
    # if reviews found, try to find the element corresponding to the list of reviews pages
    # if reviews found to have more than 1 page, click button corresponding to final review page
    # after clicking, or if no review pagination found, get earliest review currently loaded on page
    if reviews:
        try:
            WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//*[@id="review-feed-pagination"]')))
            driver.find_element_by_xpath('//*[@id="review-feed-pagination"]/div/button[last()]').click()
            try:
                WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//*[@id="reviews-results"]/div[last()]/div/div[2]/div[1]/div[1]/div[2]/span')))
                time.sleep(5)
                first_review = driver.find_element_by_xpath('//*[@id="reviews-results"]/div[last()]/div/div[2]/div[1]/div[1]/div[2]/span').get_attribute('innerHTML')
            except:
                print("couldnt find final review on page")
                
        except:
            print("couldnt find button on page")
            first_review = driver.find_element_by_xpath('//*[@id="reviews-results"]/div[last()]/div/div[2]/div[1]/div[1]/div[2]/span').get_attribute('innerHTML')
    else:
        first_review = 'No reviews' # if entire reviews element missing, first review = 'No reviews'
    
    driver.close()
    return first_review

In [3]:
df = pd.read_csv('nyc_restaurants_clean.csv')

In [12]:
urllist = df['url'].tolist()

In [73]:
cleanlist = []
for url in urllist:
    print(url, end = ' ')
    rev = get_earliest_review(url)
    print(rev)
    cleanlist.append(rev)


earliest = dict(zip(urllist, cleanlist))
earliest_df = pd.DataFrame()
earliest_df['url'] = earliest.keys()
earliest_df['earliest_rev'] = earliest.values()
earliest_df.to_csv('earliest_revs.csv', index = False)

https://www.opentable.com/r/seaport-house-new-york couldnt find button on page
Dined on March 10, 2021
https://www.opentable.com/r/empire-burger-house-new-york couldnt find button on page
Dined on March 1, 2021
https://www.opentable.com/r/el-paso-mexican-restaurants-east-harlem-new-york couldnt find button on page
Dined on February 14, 2021
https://www.opentable.com/r/milas-fish-and-hook-new-york couldnt find button on page
Dined on May 16, 2021
https://www.opentable.com/r/quality-bistro-new-york Dined on July 9, 2021
https://www.opentable.com/r/lena-west-village-new-york couldnt find button on page
Dined on March 2, 2021
https://www.opentable.com/r/hachi-maki-new-york couldnt find button on page
Dined on March 13, 2021
https://www.opentable.com/r/tudor-city-steakhouse-new-york couldnt find button on page
Dined on March 26, 2021
https://www.opentable.com/r/tito-murphys-new-york Dined on January 25, 2020
https://www.opentable.com/r/pasta-by-hudson-new-york couldnt find button on page
Di

https://www.opentable.com/r/westville-new-york Dined on September 12, 2019
https://www.opentable.com/r/petite-boucherie-new-york Dined on February 20, 2015
https://www.opentable.com/the-smith-lincoln-square Dined on June 30, 2021
https://www.opentable.com/r/del-friscos-grille-nyc-new-york Dined on August 24, 2011
https://www.opentable.com/nomo-kitchen Dined on April 10, 2011
https://www.opentable.com/r/del-friscos-double-eagle-steak-house-new-york-city-new-york Dined on July 6, 2021
https://www.opentable.com/max-brenner-union-square Dined on January 26, 2010
https://www.opentable.com/r/sojourn-restaurant-new-york Dined on June 13, 2009
https://www.opentable.com/r/charlie-palmer-steak-nyc-new-york-3 Dined on June 30, 2009
https://www.opentable.com/locanda-verde Dined on June 25, 2008
https://www.opentable.com/r/brick-lane-curry-house-new-york Dined on March 22, 2008
https://www.opentable.com/ruths-chris-steak-house-new-york Dined on May 10, 2005
https://www.opentable.com/carmines-91st-s

https://www.opentable.com/limani-nyc Dined on May 9, 2021
https://www.opentable.com/wild-west-village Dined on January 6, 2015
https://www.opentable.com/old-tbilisi Dined on November 8, 2014
https://www.opentable.com/r/finestra-new-york Dined on October 8, 2014
https://www.opentable.com/r/gran-morsi-new-york Dined on October 10, 2014
https://www.opentable.com/bistro-les-amis Dined on October 11, 2014
https://www.opentable.com/r/anejo-tribeca-new-york Dined on May 16, 2021
https://www.opentable.com/r/212-steakhouse-new-york Dined on August 15, 2014
https://www.opentable.com/district-tap-house Dined on August 19, 2014
https://www.opentable.com/r/obica-mozzarella-bar-pizza-e-cucina-new-york Dined on March 19, 2021
https://www.opentable.com/haru-sushi-w-43rd Dined on June 8, 2021
https://www.opentable.com/r/il-mulino-prime-soho Dined on June 26, 2014
https://www.opentable.com/jones-wood-foundry Dined on June 20, 2014
https://www.opentable.com/r/quality-italian-new-york Dined on August 3, 2

https://www.opentable.com/natsumi-restaurant Dined on January 25, 2020
https://www.opentable.com/faces-and-names Dined on March 24, 2018
https://www.opentable.com/r/benoit-restaurant-and-wine-bar-new-york Dined on April 22, 2008
https://www.opentable.com/r/il-melograno-new-york Dined on March 14, 2008
https://www.opentable.com/central-park-boathouse Dined on April 10, 2008
https://www.opentable.com/r/amali-new-york Dined on March 12, 2008
https://www.opentable.com/r/shun-lee-west-new-york Dined on February 29, 2008
https://www.opentable.com/the-smith-east-village Dined on June 20, 2021
https://www.opentable.com/sushi-of-gari-46 Dined on March 11, 2008
https://www.opentable.com/r/ilili-new-york Dined on February 23, 2008
https://www.opentable.com/serafina-at-the-time-hotel Dined on October 12, 2013
https://www.opentable.com/r/la-sirene-new-york Dined on November 30, 2007
https://www.opentable.com/the-mermaid-inn-uptown Dined on December 5, 2007
https://www.opentable.com/r/bocca-di-bacco

https://www.opentable.com/r/luna-asian-bistro-and-rooftop-lounge-astoria Dined on December 3, 2017
https://www.opentable.com/r/pronto-pizza-astoria-2 couldn't find reviews on page
No reviews
https://www.opentable.com/r/blend-astoria Dined on January 27, 2017
https://www.opentable.com/r/basil-brick-oven-pizza-astoria couldnt find button on page
Dined on August 26, 2016
https://www.opentable.com/r/cream-ridgewood-glendale couldnt find button on page
Dined on August 24, 2018
https://www.opentable.com/sangaritas Dined on December 31, 2015
https://www.opentable.com/papazzio-italian-restaurant Dined on November 17, 2015
https://www.opentable.com/dionysos-restaurant Dined on January 7, 2016
https://www.opentable.com/vista-sky-lounge Dined on June 20, 2015
https://www.opentable.com/r/via-vai-astoria Dined on August 17, 2014
https://www.opentable.com/victory-garden-cafe Dined on April 13, 2014
https://www.opentable.com/r/blend-on-the-water-long-island-city Dined on January 1, 2014
https://www.o

https://www.opentable.com/r/pastavino-staten-island Dined on March 7, 2020
https://www.opentable.com/r/grand-sahara-mediterranean-grill-staten-island couldnt find button on page
Dined on February 1, 2020
https://www.opentable.com/r/the-richmond-staten-island Dined on July 26, 2019
https://www.opentable.com/r/sofias-taqueria-staten-island Dined on January 7, 2019
https://www.opentable.com/r/taverna-on-the-bay-staten-island Dined on December 31, 2019
https://www.opentable.com/r/richmond-republic-staten-island Dined on September 15, 2018
https://www.opentable.com/r/corner-house-bbq-staten-island Dined on April 20, 2018
https://www.opentable.com/r/violettes-cellar-staten-island Dined on November 6, 2017
https://www.opentable.com/r/lilyas-restaurant-and-grill-cafe-gourmand-staten-island couldnt find button on page
Dined on February 14, 2021
https://www.opentable.com/r/bayou-staten-island couldnt find button on page
Dined on May 28, 2021
https://www.opentable.com/r/incas-grill-peruvian-cuisi