In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up headless Chrome browser
options = Options()
options.add_argument("--headless")  # Set to False to see browser in action
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# List to store all DataFrames
df_list = []

# Loop through pages 1 to 4
for page in range(1, 5):
    url = f"https://www.metacritic.com/browse/game/all/adventure/all-time/metascore/?releaseYearMin=1991&releaseYearMax=2011&genre=adventure&page={page}"
    print(f"Scraping page {page}...")
    
    driver.get(url)
    time.sleep(3)  # Wait for JavaScript to load

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Extract titles
    titles = soup.find_all('div', class_='c-finderProductCard_title')
    titles_a = [t.find_all("span")[1].text.strip() for t in titles if len(t.find_all("span")) > 1]

    # Extract dates and ratings
    dates = soup.find_all('div', class_='c-finderProductCard_meta')
    dates_a = []
    rates_a = []

    for d in dates:
        span_date = d.find('span', class_='u-text-uppercase')
        dates_a.append(span_date.text.strip() if span_date else None)

        span_rating = d.find('span', class_='u-text-capitalize')
        rates_a.append(span_rating.next_sibling.strip() if span_rating and span_rating.next_sibling else None)

    # Extract descriptions
    descriptions = soup.find_all('div', class_='c-finderProductCard_description')
    descriptions_a = [desc.find('span').text.strip() if desc.find('span') else None for desc in descriptions]

    # Extract scores
    scores = soup.find_all('div', class_='c-siteReviewScore')
    scores_a = [score.get("aria-label") for score in scores]

    # Debugging: check list lengths
    print(f"Titles: {len(titles_a)}, Dates: {len(dates_a)}, Ratings: {len(rates_a)}, Descriptions: {len(descriptions_a)}, Scores: {len(scores_a)}")

    # Trim to shortest length
    min_len = min(len(titles_a), len(dates_a), len(rates_a), len(descriptions_a), len(scores_a))

    data = zip(
        titles_a[:min_len],
        dates_a[:min_len],
        rates_a[:min_len],
        descriptions_a[:min_len],
        scores_a[:min_len]
    )

    # Create DataFrame
    df = pd.DataFrame(data, columns=['Title', 'Date', 'Rating', 'Description', 'Score'])
    df_list.append(df)

# Close the browser
driver.quit()

# Combine all data into one DataFrame
final_df = pd.concat(df_list, ignore_index=True)

# Display the first few rows
print(final_df.head())
