In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up headless Chrome browser
options = Options()
options.add_argument("--headless")  # Set to False to see browser in action
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# List to store all DataFrames
df_list = []

# Loop through pages 1 to 4
for page in range(1, 5):
    url = f"https://www.metacritic.com/browse/game/all/adventure/all-time/metascore/?releaseYearMin=1991&releaseYearMax=2011&genre=adventure&page={page}"
    print(f"Scraping page {page}...")
    
    driver.get(url)
    time.sleep(3)  # Wait for JavaScript to load

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Extract titles
    titles = soup.find_all('div', class_='c-finderProductCard_title')
    titles_a = [t.find_all("span")[1].text.strip() for t in titles if len(t.find_all("span")) > 1]

    # Extract dates and ratings
    dates = soup.find_all('div', class_='c-finderProductCard_meta')
    dates_a = []
    rates_a = []

    for d in dates:
        span_date = d.find('span', class_='u-text-uppercase')
        dates_a.append(span_date.text.strip() if span_date else None)

        span_rating = d.find('span', class_='u-text-capitalize')
        rates_a.append(span_rating.next_sibling.strip() if span_rating and span_rating.next_sibling else None)

    # Extract descriptions
    descriptions = soup.find_all('div', class_='c-finderProductCard_description')
    descriptions_a = [desc.find('span').text.strip() if desc.find('span') else None for desc in descriptions]

    # Extract scores
    scores = soup.find_all('div', class_='c-siteReviewScore')
    scores_a = [score.get("aria-label") for score in scores]

    # Debugging: check list lengths
    print(f"Titles: {len(titles_a)}, Dates: {len(dates_a)}, Ratings: {len(rates_a)}, Descriptions: {len(descriptions_a)}, Scores: {len(scores_a)}")

    # Trim to shortest length
    min_len = min(len(titles_a), len(dates_a), len(rates_a), len(descriptions_a), len(scores_a))

    data = zip(
        titles_a[:min_len],
        dates_a[:min_len],
        rates_a[:min_len],
        descriptions_a[:min_len],
        scores_a[:min_len]
    )

    # Create DataFrame
    df = pd.DataFrame(data, columns=['Title', 'Date', 'Rating', 'Description', 'Score'])
    df_list.append(df)

# Close the browser
driver.quit()

# Combine all data into one DataFrame
final_df = pd.concat(df_list, ignore_index=True)

# Display the first few rows
print(final_df.head())


Scraping page 1...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 2...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 3...
Titles: 24, Dates: 48, Ratings: 48, Descriptions: 24, Scores: 24
Scraping page 4...
Titles: 11, Dates: 22, Ratings: 22, Descriptions: 11, Scores: 11
                                       Title          Date Rating  \
0  Zack & Wiki: Quest for Barbaros' Treasure  Oct 23, 2007      E   
1                                     flower          None   None   
2             Ghost Trick: Phantom Detective  Feb 12, 2009      E   
3                The Book of Unwritten Tales          None   None   
4                   Prince of Persia Classic  Jan 11, 2011      T   

                                         Description                    Score  
0  Legend has it that a revered pirate named Barb...  Metascore 87 out of 100  
1  [Playstation Network]  The pioneers that broug...  Metascore 87 out of 100  
2  Ghost 

In [4]:
final_df

Unnamed: 0,Title,Date,Rating,Description,Score
0,Zack & Wiki: Quest for Barbaros' Treasure,"Oct 23, 2007",E,Legend has it that a revered pirate named Barb...,Metascore 87 out of 100
1,flower,,,[Playstation Network] The pioneers that broug...,Metascore 87 out of 100
2,Ghost Trick: Phantom Detective,"Feb 12, 2009",E,Ghost Trick is a story of mystery and intrigue...,Metascore 83 out of 100
3,The Book of Unwritten Tales,,,"In a world torn by war, the aged gremlin archa...",Metascore 82 out of 100
4,Prince of Persia Classic,"Jan 11, 2011",T,[Xbox Live Arcade] While the Sultan of Persia...,Metascore 82 out of 100
...,...,...,...,...,...
78,CSI: Crime Scene Investigation: Fatal Conspiracy,"Dec 31, 2001",M,CSI: Fatal Conspiracy features five new connec...,Metascore 42 out of 100
79,Prison Break: The Conspiracy,,,Prison Break - The Conspiracy takes you inside...,Metascore 40 out of 100
80,Inkheart,"Jun 11, 2008",T,Based on the feature film adaptation of the be...,Metascore 39 out of 100
81,NCIS,,,NCIS is now available for the first time ever ...,Metascore 35 out of 100


In [5]:
final_df_2 = final_df.dropna()

In [6]:
final_df_2

Unnamed: 0,Title,Date,Rating,Description,Score
0,Zack & Wiki: Quest for Barbaros' Treasure,"Oct 23, 2007",E,Legend has it that a revered pirate named Barb...,Metascore 87 out of 100
2,Ghost Trick: Phantom Detective,"Feb 12, 2009",E,Ghost Trick is a story of mystery and intrigue...,Metascore 83 out of 100
4,Prince of Persia Classic,"Jan 11, 2011",T,[Xbox Live Arcade] While the Sultan of Persia...,Metascore 82 out of 100
6,Hotel Dusk: Room 215,"Oct 28, 2011",T,"Los Angeles, 1979. You are Kyle Hyde, an ex-co...",Metascore 78 out of 100
8,The Devil Inside,"Jun 13, 2007",T,The game features you as the star of a live Ho...,Metascore 76 out of 100
12,Beyond Atlantis II,"Jan 22, 2007",T,A young archeologist searches for the existenc...,Metascore 75 out of 100
14,Missing: Since January,"Jul 5, 2010",T,"Journalist, Jack Lorski, and his companion dis...",Metascore 75 out of 100
16,Space Pirates and Zombies,"Nov 9, 2000",M,Space Pirates and Zombies takes place far enou...,Metascore 74 out of 100
18,Ankh: Curse of the Scarab King,"May 21, 2008",M,"After a wild party at Pharaoh‘s pyramid, young...",Metascore 74 out of 100
20,Dive: The Medes Islands Secret,"Oct 29, 2008",M,Dive: The Medes Islands Secret recreates under...,Metascore 74 out of 100


In [7]:
final_df_3 = final_df_2.reset_index(drop=True)

In [8]:
final_df_3

Unnamed: 0,Title,Date,Rating,Description,Score
0,Zack & Wiki: Quest for Barbaros' Treasure,"Oct 23, 2007",E,Legend has it that a revered pirate named Barb...,Metascore 87 out of 100
1,Ghost Trick: Phantom Detective,"Feb 12, 2009",E,Ghost Trick is a story of mystery and intrigue...,Metascore 83 out of 100
2,Prince of Persia Classic,"Jan 11, 2011",T,[Xbox Live Arcade] While the Sultan of Persia...,Metascore 82 out of 100
3,Hotel Dusk: Room 215,"Oct 28, 2011",T,"Los Angeles, 1979. You are Kyle Hyde, an ex-co...",Metascore 78 out of 100
4,The Devil Inside,"Jun 13, 2007",T,The game features you as the star of a live Ho...,Metascore 76 out of 100
5,Beyond Atlantis II,"Jan 22, 2007",T,A young archeologist searches for the existenc...,Metascore 75 out of 100
6,Missing: Since January,"Jul 5, 2010",T,"Journalist, Jack Lorski, and his companion dis...",Metascore 75 out of 100
7,Space Pirates and Zombies,"Nov 9, 2000",M,Space Pirates and Zombies takes place far enou...,Metascore 74 out of 100
8,Ankh: Curse of the Scarab King,"May 21, 2008",M,"After a wild party at Pharaoh‘s pyramid, young...",Metascore 74 out of 100
9,Dive: The Medes Islands Secret,"Oct 29, 2008",M,Dive: The Medes Islands Secret recreates under...,Metascore 74 out of 100


In [9]:
df_sales = pd.read_csv('Data/vgsales.csv')

In [10]:
df_sales

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [11]:
merged_df = pd.merge(final_df_3, df_sales, left_on='Title', right_on='Name', how='inner')

In [14]:
merged_df

Unnamed: 0,Title,Date,Rating,Description,Score,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Zack & Wiki: Quest for Barbaros' Treasure,"Oct 23, 2007",E,Legend has it that a revered pirate named Barb...,Metascore 87 out of 100,4655,Zack & Wiki: Quest for Barbaros' Treasure,Wii,2007.0,Adventure,Nintendo,0.18,0.17,0.03,0.04,0.42
1,Ghost Trick: Phantom Detective,"Feb 12, 2009",E,Ghost Trick is a story of mystery and intrigue...,Metascore 83 out of 100,4621,Ghost Trick: Phantom Detective,DS,2010.0,Adventure,Capcom,0.2,0.08,0.12,0.03,0.42
2,Hotel Dusk: Room 215,"Oct 28, 2011",T,"Los Angeles, 1979. You are Kyle Hyde, an ex-co...",Metascore 78 out of 100,3578,Hotel Dusk: Room 215,DS,2007.0,Adventure,Nintendo,0.27,0.05,0.22,0.03,0.56
3,Teenage Zombies: Invasion of the Alien Brain T...,"Sep 30, 2007",E10+,The Earth is under attack by a horde of Alien ...,Metascore 67 out of 100,13953,Teenage Zombies: Invasion of the Alien Brain T...,DS,2008.0,Adventure,Ignition Entertainment,0.03,0.0,0.0,0.0,0.04
4,In Cold Blood,"Apr 15, 2008",T,Travel the world controlling M16 Secret Britis...,Metascore 67 out of 100,14659,In Cold Blood,PS,2000.0,Adventure,Sony Computer Entertainment,0.02,0.01,0.0,0.0,0.03
5,Disney's A Christmas Carol,"Feb 5, 2008",T,This is a puzzle-adventure game of the the fea...,Metascore 66 out of 100,6758,Disney's A Christmas Carol,DS,2009.0,Adventure,Disney Interactive Studios,0.22,0.01,0.0,0.02,0.25
6,The Adventures of Jimmy Neutron Boy Genius: At...,"Apr 15, 2008",E10+,Jimmy Neutron: Attack of the Twonkies is jam-p...,Metascore 65 out of 100,3949,The Adventures of Jimmy Neutron Boy Genius: At...,GBA,2004.0,Action,THQ,0.36,0.13,0.0,0.01,0.5
7,The Adventures of Jimmy Neutron Boy Genius: At...,"Apr 15, 2008",E10+,Jimmy Neutron: Attack of the Twonkies is jam-p...,Metascore 65 out of 100,3993,The Adventures of Jimmy Neutron Boy Genius: At...,PS2,2004.0,Action,THQ,0.24,0.19,0.0,0.06,0.5
8,The Adventures of Jimmy Neutron Boy Genius: At...,"Apr 15, 2008",E10+,Jimmy Neutron: Attack of the Twonkies is jam-p...,Metascore 65 out of 100,6202,The Adventures of Jimmy Neutron Boy Genius: At...,GC,2004.0,Action,THQ,0.22,0.06,0.0,0.01,0.28
9,The Adventures of Tintin: The Game,"Oct 9, 2007",E,The Adventures of Tintin is a game based off i...,Metascore 63 out of 100,5392,The Adventures of Tintin: The Game,Wii,2011.0,Action,Ubisoft,0.13,0.17,0.0,0.04,0.34


In [13]:
len(merged_df)

18