In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager #Automatically manages ChromeDriver
from bs4 import BeautifulSoup
import pandas as pd
import time

# ChromeDriver set up with Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

url = 'https://www.metacritic.com/game/animal-crossing-new-leaf/user-reviews/'
driver.get(url)

#Scroll and load dynamic content, allowing it to pause and load after each scroll
SCROLL_PAUSE_TIME = 2

#Get initial height of the page
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # Wait to load more content
    time.sleep(SCROLL_PAUSE_TIME)
    
    # Calculate new scroll height and compare it with the last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break  # Break the loop if no more new content is loaded (end of page)
    last_height = new_height

In [3]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

#Create a dictionary so that all the parsed data can be stored
review_dict = {'name':[], 'date':[], 'rating':[], 'review':[]}

for review in soup.find_all('div', class_='c-siteReview_main'):
    try:
        review_dict['name'].append(review.find('a', class_='c-siteReviewHeader_username').text.strip())
    except AttributeError:
        review_dict['name'].append(None)
    try:
        review_dict['date'].append(review.find('div', class_='c-siteReviewHeader_reviewDate').text.strip())
    except AttributeError:
        review_dict['date'].append(None)
    try:
        review_dict['rating'].append(review.find('div', class_='c-siteReviewHeader_reviewScore').find_all('div')[0].text)
    except AttributeError:
        review_dict['rating'].append(None)
    try:
        review_dict['review'].append(review.find('div', class_='c-siteReview_quote').find('span').text)
    except AttributeError:
        review_dict['review'].append(None)

NLReviews = pd.DataFrame(review_dict)

NLReviews.shape

(213, 4)

In [4]:
# store data frame in a CSV file
NLReviews.to_csv('new_leaf_reviews.csv', index=False)