In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
import requests as re
import pandas as pd
import numpy as np

In [2]:
# This dictionary was created from the list on ftw.usatoday.com/2022/12/the-game-awards-2022-winners
# and gives us all of the candidates' pages that need scraping as well as the consoles they use
dict_of_candidates = {'a-plague-tale-requiem': 'playstation-5', 
                      'elden-ring': 'playstation-5', 
                      'god-of-war-ragnarok': 'playstation-5', 
                      'horizon-forbidden-west': 'playstation-5', 
                      'stray': 'playstation-5', 
                      'xenoblade-chronicles-3': 'switch'}

In [3]:
# Create lists that will hold the data for the DataFrame
game_title = []
username = []
user_score = []
user_review_date = []
user_comments = []
total_helpful_thumbs = []
total_thumbs = []


# This is to mimic headers that the server is expecting and prevent redirects
headers = {
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
}

#This begins the loop that will scrape the first page
for candidate, console in dict_of_candidates.items():
    base_url = 'https://www.metacritic.com/game/'+str(console)+'/'+str(candidate)+'/user-reviews?page=0' 
    first_page = re.get(base_url, headers=headers)
    metacritic_soup = bs(first_page.text, 'html.parser')
    
    # This returns the total number of pages that will need to be scraped
    total_page_text = metacritic_soup.find('li', class_='page last_page')
    total_page_number = total_page_text.find('a', class_='page_num').text
    total_pages = int(total_page_number)
    
    #This gets the name of the game that will be attached to each new row of data
    game_name_body = metacritic_soup.find('div', class_='product_title')
    game_name = game_name_body.find('h1').text
    print(f'{game_name} scraping initiated...')
    
    #This is the overall score, I have it here in case it is needed for future purposes
    metacritic_score = metacritic_soup.find('div', class_='metascore_w user large game positive').text
    
    #This begins to scrape each page within the dictionary above based on how many pages were found within each
    #games webpage
    for page in range(0, 2):
        mutable_url = 'https://www.metacritic.com/game/'+str(console)+'/'+str(candidate)+'/user-reviews?page='+str(page) 
        response = re.get(mutable_url, headers=headers)
        metacritic_soup = bs(response.text, 'html.parser')

        review_body = metacritic_soup.find_all('ol', class_='reviews user_reviews')

        for i, review in enumerate(review_body):
            individual_review = review.find_all('li')
            
            for user_review in individual_review:
                
                game_title.append(game_name)
                
                user_profile_name = user_review.find_all('div', class_='name')
                for profile_name in user_profile_name:
                    username.append(profile_name.text)
                
                user_rating = user_review.find_all('div', class_='review_grade')
                for rating in user_rating:
                    user_score.append(rating.text)
                
                user_date = user_review.find_all('div', class_='date')
                for user in user_date:
                    user_review_date.append(user.text)
                
                user_review_body = review.find_all('div', class_='review_body')
                for body in user_review_body:
                    user_comments.append(body.text)
                
                rating_thumbs = review.find_all('span', class_='total_ups')
                for helpful_thumbs in rating_thumbs:
                    total_helpful_thumbs.append(helpful_thumbs.text)
                    
                total_rated_thumbs = review.find_all('span', class_='total_thumbs')
                for thumbs in total_rated_thumbs:
                    total_thumbs.append(thumbs.text)
            
        print(f'Page {page} completed')
                
print('Scraping Complete!')

A Plague Tale: Requiem scraping initiated...
Page 0 completed
Page 1 completed
Elden Ring scraping initiated...
Page 0 completed
Page 1 completed
God of War: Ragnarok scraping initiated...
Page 0 completed
Page 1 completed
Horizon Forbidden West scraping initiated...
Page 0 completed
Page 1 completed
Stray scraping initiated...
Page 0 completed
Page 1 completed
Xenoblade Chronicles 3 scraping initiated...
Page 0 completed
Page 1 completed
Scraping Complete!


In [5]:
game_data = list(zip(game_title, username, user_score, user_review_date, user_comments, total_helpful_thumbs, total_thumbs))
game_df = pd.DataFrame(game_data, columns=['Game', 'Userame', 'User Score', 'Review Date', 'Comments', "Thumbs Up", "Total Thumb Ratings"])
game_df = game_df.replace('\n', '', regex=True)
game_df.index += 1
game_df

Unnamed: 0,Game,Userame,User Score,Review Date,Comments,Thumbs Up,Total Thumb Ratings
1,A Plague Tale: Requiem,Morbo,10,"Oct 25, 2022",Beautiful game. Played on a PS5 with little is...,4,4
2,A Plague Tale: Requiem,GamergodsFail,9,"Oct 27, 2022",Requiem is an even more ambitious game than it...,3,3
3,A Plague Tale: Requiem,InTheSpotlight,10,"Oct 28, 2022",As someone who was quite dissapointed with A P...,3,3
4,A Plague Tale: Requiem,george_bondo,9,"Oct 27, 2022","this is a bold, made with soul, one of the mos...",3,3
5,A Plague Tale: Requiem,OneFadedGunner,10,"Nov 5, 2022","A great story, fun gameplay. One of the best g...",2,2
...,...,...,...,...,...,...,...
1148,God of War: Ragnarok,Mdavidcool,10,"Jul 31, 2022",Poor graphics and poor optimization for next-g...,8,17
1149,God of War: Ragnarok,HcGamer64,10,"Jul 31, 2022","It was an amazing experience! Amazing game, wi...",5,11
1150,God of War: Ragnarok,Kevinlin,10,"Jul 31, 2022",It should be a crime to play 30 fps in 2022it ...,9,20
1151,God of War: Ragnarok,Garc79,10,"Aug 1, 2022","This review contains spoilers, cli...",4,9
