In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
import time

# Headers to avoid 403 error
headers = {
    "User-Agent": "Mozilla/5.0"
}

base_url = "https://www.metacritic.com/browse/games/release-date/available/all/date"
game_data = []

for page in range(0, 5):  # Scrape first 5 pages
    url = f"{base_url}?page={page}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    games = soup.find_all("td", class_="clamp-summary-wrap")

    for game in games:
        title = game.find("a", class_="title").get_text(strip=True)
        platform = game.find("div", class_="clamp-details").find_all("span")[1].get_text(strip=True)
        release_date = game.find("div", class_="clamp-details").find_all("span")[2].get_text(strip=True)
        summary = game.find("div", class_="summary").get_text(strip=True)

        game_data.append({
            "title": title,
            "platform": platform,
            "release_date": release_date,
            "summary": summary
        })

    time.sleep(1)  # Be polite to the server

df = pd.DataFrame(game_data)
print(df.head())


Empty DataFrame
Columns: []
Index: []


In [2]:
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup

In [3]:
def get_url (min, max, genre):
    template = 'https://www.metacritic.com/browse/game/all/action-adventure/all-time/metascore/?releaseYearMin={}&releaseYearMax={}&genre={}'
    url = template.format(min, max, genre)
    return url

In [4]:
url = get_url(1900, 2000, 'action')

In [5]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

In [6]:
response = requests.get(url, headers=headers)

In [7]:
response

<Response [200]>

In [8]:
response.reason

'OK'

In [9]:
soup = BeautifulSoup(response.text, 'html.parser')

In [10]:
cards = soup.find_all('div','c-finderProductCard_title')

In [11]:
len(cards)

7

In [12]:
card = cards[0]

In [13]:
card

<div class="c-finderProductCard_title" data-title="Blast Corps"><h3 class="c-finderProductCard_titleHeading"><span>1.</span> <span>Blast Corps</span></h3></div>

In [14]:
atag = card.h3

In [15]:
atag

<h3 class="c-finderProductCard_titleHeading"><span>1.</span> <span>Blast Corps</span></h3>

In [16]:
#atag.get('c-finderProductCard_titleHeading')

In [17]:
title = atag.find_all("span")[1].text.strip()
title

'Blast Corps'

In [18]:
a = card.find('div',class_ = 'c-finderProductCard_meta')

In [19]:
a

In [20]:
dates = soup.find_all('div','c-finderProductCard_meta')

In [21]:
len(dates)

14

In [22]:
date = dates[0]

In [23]:
date

<div class="c-finderProductCard_meta"><span class="u-text-uppercase">
          Feb 28, 1997
        </span> <span>
           • 
        </span> <span><span class="u-text-capitalize">Rated</span> K-A
        </span></div>

In [24]:
date.find('span','u-text-uppercase')

<span class="u-text-uppercase">
          Feb 28, 1997
        </span>

In [25]:
release_date = date.find('span', 'u-text-uppercase').text.strip()
print(release_date)

Feb 28, 1997


In [26]:
rates = soup.find_all('div','c-finderProductCard_meta')

In [27]:
len(rates)

14

In [28]:
rate = rates[0]

In [29]:
rate

<div class="c-finderProductCard_meta"><span class="u-text-uppercase">
          Feb 28, 1997
        </span> <span>
           • 
        </span> <span><span class="u-text-capitalize">Rated</span> K-A
        </span></div>

In [30]:
rate.find('span','u-text-capitalize')

<span class="u-text-capitalize">Rated</span>

In [31]:
rating_span = rate.find('span', 'u-text-capitalize')  # finds <span>Rated</span>
rating_value = rating_span.next_sibling.strip()
print(rating_value)

K-A


In [32]:
descriptions = soup.find_all('div','c-finderProductCard_description')

In [33]:
len(descriptions)

7

In [34]:
description = descriptions[0]

In [35]:
description

<div class="c-finderProductCard_description"><span>A pair of defective nuclear missiles, en route to a safe detonation site, has begun to leak. Badly damaged, the carrier automatically locks onto the most direct route. Clear a path to help the carrier arrive safely. Tons of vehicles are at your disposal. Leave nothing standing or the adventure will end in an earth-shattering explosion! Find the hidden technicians to ensure a safe detonation. Strap on your seat belt, it's going to be a bumpy ride.</span></div>

In [36]:
description_span = description.find('span')
description_value = description_span.text.strip() if description_span else None
print(description_value)

A pair of defective nuclear missiles, en route to a safe detonation site, has begun to leak. Badly damaged, the carrier automatically locks onto the most direct route. Clear a path to help the carrier arrive safely. Tons of vehicles are at your disposal. Leave nothing standing or the adventure will end in an earth-shattering explosion! Find the hidden technicians to ensure a safe detonation. Strap on your seat belt, it's going to be a bumpy ride.


In [37]:
scores = soup.find_all('div','c-siteReviewScore')

In [38]:
len(scores)

7

In [39]:
score = scores[0]

In [40]:
score

<div aria-label="Metascore 90 out of 100" class="c-siteReviewScore u-flexbox-column u-flexbox-alignCenter u-flexbox-justifyCenter g-text-bold c-siteReviewScore_green g-color-gray90 c-siteReviewScore_xsmall" data-v-e408cafe="" title="Metascore 90 out of 100"><span data-v-e408cafe="">90</span></div>

In [41]:
metascore_text = score.get("aria-label")
print(metascore_text)

Metascore 90 out of 100


In [47]:
def get_record(card, date, description, score):
    atag = card.h3
    title = atag.find_all("span")[1].text.strip()
    date.find('span','u-text-uppercase')
    release_date = date.find('span', 'u-text-uppercase').text.strip()
    #date.find('span','u-text-capitalize')
    rating_span = date.find('span', 'u-text-capitalize')  # finds <span>Rated</span>
    rating_value = rating_span.next_sibling.strip()
    description_span = description.find('span')
    description_value = description_span.text.strip() if description_span else None
    metascore_text = score.get("aria-label")

    record = (title, release_date, rating_value, description_value, metascore_text)

    return record

In [48]:
records = []

for i in cards:
    record = get_record(card, date, description, score)
    records.append(record)

In [56]:
records[0]

('Blast Corps',
 'Feb 28, 1997',
 'K-A',
 "A pair of defective nuclear missiles, en route to a safe detonation site, has begun to leak. Badly damaged, the carrier automatically locks onto the most direct route. Clear a path to help the carrier arrive safely. Tons of vehicles are at your disposal. Leave nothing standing or the adventure will end in an earth-shattering explosion! Find the hidden technicians to ensure a safe detonation. Strap on your seat belt, it's going to be a bumpy ride.",
 'Metascore 90 out of 100')

In [55]:
max_index = records.index(max(records))
max_index

0