In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up headless Chrome browser
options = Options()
options.add_argument("--headless")  # Set to False to see browser in action
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# List to store all DataFrames
df_list = []

# Loop through pages 1 to 4
for page in range(1, 104):
    url = f"https://www.metacritic.com/browse/game/?releaseYearMin=1995&releaseYearMax=2025&genre=action&genre=action-adventure&genre=action-puzzle&genre=action-rpg&genre=adventure&genre=application&genre=arcade&genre=first---person-shooter&genre=rpg&genre=racing&genre=sports&genre=third---person-shooter&page={page}"
    print(f"Scraping page {page}...")
    
    driver.get(url)
    time.sleep(3)  # Wait for JavaScript to load

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Extract titles
    titles = soup.find_all('div', class_='c-finderProductCard_title')
    titles_a = [t.find_all("span")[1].text.strip() for t in titles if len(t.find_all("span")) > 1]

    # Extract dates and ratings
    dates = soup.find_all('div', class_='c-finderProductCard_meta')
    dates_a = []
    rates_a = []

    for d in dates:
        span_date = d.find('span', class_='u-text-uppercase')
        dates_a.append(span_date.text.strip() if span_date else None)

        span_rating = d.find('span', class_='u-text-capitalize')
        rates_a.append(span_rating.next_sibling.strip() if span_rating and span_rating.next_sibling else None)

    # Extract descriptions
    descriptions = soup.find_all('div', class_='c-finderProductCard_description')
    descriptions_a = [desc.find('span').text.strip() if desc.find('span') else None for desc in descriptions]

    # Extract scores
    scores = soup.find_all('div', class_='c-siteReviewScore')
    scores_a = [score.get("aria-label") for score in scores]

    # Debugging: check list lengths
    print(f"Titles: {len(titles_a)}, Dates: {len(dates_a)}, Ratings: {len(rates_a)}, Descriptions: {len(descriptions_a)}, Scores: {len(scores_a)}")

    # Trim to shortest length
    min_len = min(len(titles_a), len(dates_a), len(rates_a), len(descriptions_a), len(scores_a))

    data = zip(
        titles_a[:min_len],
        dates_a[:min_len],
        rates_a[:min_len],
        descriptions_a[:min_len],
        scores_a[:min_len]
    )

    # Create DataFrame
    df = pd.DataFrame(data, columns=['Title', 'Date', 'Rating', 'Description', 'Score'])
    df_list.append(df)

# Close the browser
driver.quit()

# Combine all data into one DataFrame
final_df = pd.concat(df_list, ignore_index=True)

# Display the first few rows
print(final_df.head())


Scraping page 1...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 2...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 3...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 4...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 5...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 6...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 7...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 8...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 9...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 10...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 11...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 12...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, 

In [2]:
final_df.to_csv('output.csv', index = False)

In [8]:
final_df.dropna(inplace=True)

In [10]:
len(final_df)

1009

In [11]:
df_sales = pd.read_csv('Data/vgsales.csv')

In [12]:
merged_df = pd.merge(final_df, df_sales, left_on='Title', right_on='Name', how='inner')

In [20]:
merged_df

Unnamed: 0,Title,Date,Rating,Description,Score,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
360,Pac-Man,"Aug 9, 2006",E,[Xbox Live Arcade] Play one of the all-time c...,Metascore 62 out of 100,90,Pac-Man,2600,1982.0,Puzzle,Atari,7.28,0.45,0.00,0.08,7.81
195,Kinect Sports,"Jun 6, 2017",E10+,This action sports compilation for the Xbox 36...,Metascore 73 out of 100,140,Kinect Sports,X360,2010.0,Sports,Microsoft Game Studios,3.92,1.78,0.03,0.51,6.24
0,Metal Gear Solid,"Jun 21, 2024",M,"You are Snake, a government agent on a mission...",Metascore 94 out of 100,146,Metal Gear Solid,PS,1998.0,Action,Konami Digital Entertainment,3.18,1.83,0.78,0.24,6.03
86,Monster Hunter Freedom Unite,"Sep 12, 2006",E10+,The Monster Hunter world is always changing an...,Metascore 81 out of 100,162,Monster Hunter Freedom Unite,PSP,2008.0,Role-Playing,Capcom,0.47,0.57,4.13,0.34,5.50
134,The Simpsons: Hit & Run,"Jul 5, 2010",T,The Simpsons Hit & Run is a mission-based driv...,Metascore 78 out of 100,225,The Simpsons: Hit & Run,PS2,2003.0,Racing,Vivendi Games,1.73,2.19,0.00,0.79,4.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,Marvel: Ultimate Alliance,"Nov 4, 2008",E,Marvel: Ultimate Alliance is an action/RPG tha...,Metascore 82 out of 100,16236,Marvel: Ultimate Alliance,PC,2006.0,Role-Playing,Activision,0.01,0.00,0.00,0.00,0.01
376,Darkened Skye,"Oct 29, 2003",M,The dread lord Necroth rules the world armed w...,Metascore 61 out of 100,16390,Darkened Skye,GC,2002.0,Adventure,TDK Mediactive,0.01,0.00,0.00,0.00,0.01
249,Harry Potter and the Chamber of Secrets,"Feb 22, 2018",E,Players enter fantastical free-roaming 3D envi...,Metascore 71 out of 100,16146,Harry Potter and the Chamber of Secrets,PC,2002.0,Action,Electronic Arts,0.00,0.01,0.00,0.00,0.01
397,Spider-Man: Friend or Foe,"Aug 12, 2008",T,Spider-Man: Friend or Foe challenges players t...,Metascore 60 out of 100,16224,Spider-Man: Friend or Foe,PC,2007.0,Action,Activision,0.01,0.00,0.00,0.00,0.01


In [21]:
len(merged_df)

536

In [23]:
merged_df.sort_values(by='Global_Sales', ascending=False, inplace=True)
merged_df

Unnamed: 0,Title,Date,Rating,Description,Score,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
360,Pac-Man,"Aug 9, 2006",E,[Xbox Live Arcade] Play one of the all-time c...,Metascore 62 out of 100,90,Pac-Man,2600,1982.0,Puzzle,Atari,7.28,0.45,0.00,0.08,7.81
195,Kinect Sports,"Jun 6, 2017",E10+,This action sports compilation for the Xbox 36...,Metascore 73 out of 100,140,Kinect Sports,X360,2010.0,Sports,Microsoft Game Studios,3.92,1.78,0.03,0.51,6.24
0,Metal Gear Solid,"Jun 21, 2024",M,"You are Snake, a government agent on a mission...",Metascore 94 out of 100,146,Metal Gear Solid,PS,1998.0,Action,Konami Digital Entertainment,3.18,1.83,0.78,0.24,6.03
86,Monster Hunter Freedom Unite,"Sep 12, 2006",E10+,The Monster Hunter world is always changing an...,Metascore 81 out of 100,162,Monster Hunter Freedom Unite,PSP,2008.0,Role-Playing,Capcom,0.47,0.57,4.13,0.34,5.50
134,The Simpsons: Hit & Run,"Jul 5, 2010",T,The Simpsons Hit & Run is a mission-based driv...,Metascore 78 out of 100,225,The Simpsons: Hit & Run,PS2,2003.0,Racing,Vivendi Games,1.73,2.19,0.00,0.79,4.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,Marvel: Ultimate Alliance,"Nov 4, 2008",E,Marvel: Ultimate Alliance is an action/RPG tha...,Metascore 82 out of 100,16236,Marvel: Ultimate Alliance,PC,2006.0,Role-Playing,Activision,0.01,0.00,0.00,0.00,0.01
376,Darkened Skye,"Oct 29, 2003",M,The dread lord Necroth rules the world armed w...,Metascore 61 out of 100,16390,Darkened Skye,GC,2002.0,Adventure,TDK Mediactive,0.01,0.00,0.00,0.00,0.01
249,Harry Potter and the Chamber of Secrets,"Feb 22, 2018",E,Players enter fantastical free-roaming 3D envi...,Metascore 71 out of 100,16146,Harry Potter and the Chamber of Secrets,PC,2002.0,Action,Electronic Arts,0.00,0.01,0.00,0.00,0.01
397,Spider-Man: Friend or Foe,"Aug 12, 2008",T,Spider-Man: Friend or Foe challenges players t...,Metascore 60 out of 100,16224,Spider-Man: Friend or Foe,PC,2007.0,Action,Activision,0.01,0.00,0.00,0.00,0.01


In [25]:
merged_df.reset_index(drop=True, inplace=True)
merged_df

Unnamed: 0,Title,Date,Rating,Description,Score,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Pac-Man,"Aug 9, 2006",E,[Xbox Live Arcade] Play one of the all-time c...,Metascore 62 out of 100,90,Pac-Man,2600,1982.0,Puzzle,Atari,7.28,0.45,0.00,0.08,7.81
1,Kinect Sports,"Jun 6, 2017",E10+,This action sports compilation for the Xbox 36...,Metascore 73 out of 100,140,Kinect Sports,X360,2010.0,Sports,Microsoft Game Studios,3.92,1.78,0.03,0.51,6.24
2,Metal Gear Solid,"Jun 21, 2024",M,"You are Snake, a government agent on a mission...",Metascore 94 out of 100,146,Metal Gear Solid,PS,1998.0,Action,Konami Digital Entertainment,3.18,1.83,0.78,0.24,6.03
3,Monster Hunter Freedom Unite,"Sep 12, 2006",E10+,The Monster Hunter world is always changing an...,Metascore 81 out of 100,162,Monster Hunter Freedom Unite,PSP,2008.0,Role-Playing,Capcom,0.47,0.57,4.13,0.34,5.50
4,The Simpsons: Hit & Run,"Jul 5, 2010",T,The Simpsons Hit & Run is a mission-based driv...,Metascore 78 out of 100,225,The Simpsons: Hit & Run,PS2,2003.0,Racing,Vivendi Games,1.73,2.19,0.00,0.79,4.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,Marvel: Ultimate Alliance,"Nov 4, 2008",E,Marvel: Ultimate Alliance is an action/RPG tha...,Metascore 82 out of 100,16236,Marvel: Ultimate Alliance,PC,2006.0,Role-Playing,Activision,0.01,0.00,0.00,0.00,0.01
532,Darkened Skye,"Oct 29, 2003",M,The dread lord Necroth rules the world armed w...,Metascore 61 out of 100,16390,Darkened Skye,GC,2002.0,Adventure,TDK Mediactive,0.01,0.00,0.00,0.00,0.01
533,Harry Potter and the Chamber of Secrets,"Feb 22, 2018",E,Players enter fantastical free-roaming 3D envi...,Metascore 71 out of 100,16146,Harry Potter and the Chamber of Secrets,PC,2002.0,Action,Electronic Arts,0.00,0.01,0.00,0.00,0.01
534,Spider-Man: Friend or Foe,"Aug 12, 2008",T,Spider-Man: Friend or Foe challenges players t...,Metascore 60 out of 100,16224,Spider-Man: Friend or Foe,PC,2007.0,Action,Activision,0.01,0.00,0.00,0.00,0.01


In [26]:
merged_df.sort_values(by='Global_Sales', ascending=False, inplace=True)
merged_df

Unnamed: 0,Title,Date,Rating,Description,Score,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Pac-Man,"Aug 9, 2006",E,[Xbox Live Arcade] Play one of the all-time c...,Metascore 62 out of 100,90,Pac-Man,2600,1982.0,Puzzle,Atari,7.28,0.45,0.00,0.08,7.81
1,Kinect Sports,"Jun 6, 2017",E10+,This action sports compilation for the Xbox 36...,Metascore 73 out of 100,140,Kinect Sports,X360,2010.0,Sports,Microsoft Game Studios,3.92,1.78,0.03,0.51,6.24
2,Metal Gear Solid,"Jun 21, 2024",M,"You are Snake, a government agent on a mission...",Metascore 94 out of 100,146,Metal Gear Solid,PS,1998.0,Action,Konami Digital Entertainment,3.18,1.83,0.78,0.24,6.03
3,Monster Hunter Freedom Unite,"Sep 12, 2006",E10+,The Monster Hunter world is always changing an...,Metascore 81 out of 100,162,Monster Hunter Freedom Unite,PSP,2008.0,Role-Playing,Capcom,0.47,0.57,4.13,0.34,5.50
4,The Simpsons: Hit & Run,"Jul 5, 2010",T,The Simpsons Hit & Run is a mission-based driv...,Metascore 78 out of 100,225,The Simpsons: Hit & Run,PS2,2003.0,Racing,Vivendi Games,1.73,2.19,0.00,0.79,4.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,Marvel: Ultimate Alliance,"Nov 4, 2008",E,Marvel: Ultimate Alliance is an action/RPG tha...,Metascore 82 out of 100,16236,Marvel: Ultimate Alliance,PC,2006.0,Role-Playing,Activision,0.01,0.00,0.00,0.00,0.01
532,Darkened Skye,"Oct 29, 2003",M,The dread lord Necroth rules the world armed w...,Metascore 61 out of 100,16390,Darkened Skye,GC,2002.0,Adventure,TDK Mediactive,0.01,0.00,0.00,0.00,0.01
533,Harry Potter and the Chamber of Secrets,"Feb 22, 2018",E,Players enter fantastical free-roaming 3D envi...,Metascore 71 out of 100,16146,Harry Potter and the Chamber of Secrets,PC,2002.0,Action,Electronic Arts,0.00,0.01,0.00,0.00,0.01
534,Spider-Man: Friend or Foe,"Aug 12, 2008",T,Spider-Man: Friend or Foe challenges players t...,Metascore 60 out of 100,16224,Spider-Man: Friend or Foe,PC,2007.0,Action,Activision,0.01,0.00,0.00,0.00,0.01


In [27]:
merged_df.to_csv('output_B.csv', index = False)