## Gathering Nintendo Switch game information from Metacritic

First we scrape the metacritic site to find **a database of most Nintendo Switch games**. We chose this site because it has both users and critic score. We obtain the titles, release date, user score, critic score and the genre. Most fields can be extracted from the index page but for genre we have to navigate to each game site.

In [63]:
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_switch_games():
    """
    Scrapes Nintendo Switch game data from Metacritic, including game name, release date, metascore, user score, and genre.

    Returns a csv and a df with the stored data
    """

    # define a dictionary to store scraped data
    data_dict = {'name': [], 'release_date': [], 'metascore': [], 'user_score': [], 'genre': []}

    # function to get the webpage
    def webpage(pageNum):
        url = 'https://www.metacritic.com/browse/games/score/metascore/all/switch/filtered?sort=desc&view=detailed&page=' + str(pageNum)
        userAgent = {'User-agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=userAgent)
        return response

    # function to determine the number of pages
    def numberPages(response):
        soup = BeautifulSoup(response.text, 'html.parser')
        pages = soup.find_all('li', {"class": "active_page"})
        if pages:
            pagesCleaned = pages[0].find('span', {"class": "page_num"})
            return int(pagesCleaned.text)
        return 0

    # with this we scrape game data from each table
    def scraper(num_loops, content, userAgent):
        tblnum = 0
        while tblnum < num_loops:
            table_rows = content[tblnum].find_all('tr')
            for tr in table_rows:
                if len(tr) < 1:
                    continue
                td = tr.find_all('td')
                a = td[1].find('a', {"class": "title"})
                data_dict['name'].append(a.find('h3').text.strip())

                # game release date
                date = td[1].find('span', {"class": ""})
                data_dict['release_date'].append(date.text.strip())

                # getting the user score
                div_score = td[1].find('div', {"class": "clamp-userscore"})
                user = div_score.find('div', {"class": "metascore_w"})
                data_dict['user_score'].append(user.text.strip())

                # getting the metascore
                score = td[1].find('div', {"class": "metascore_w"})
                data_dict['metascore'].append(score.text.strip())

                # with this we get the genre from each game detail page
                game_url = 'https://www.metacritic.com' + a['href']
                game_response = requests.get(game_url, headers=userAgent)
                game_soup = BeautifulSoup(game_response.text, 'html.parser')
                genre_div = game_soup.find('li', class_='summary_detail product_genre')
                genre = genre_div.find('span', class_='data').text.strip() if genre_div else 'N/A'
                data_dict['genre'].append(genre)

            tblnum += 1

    # iterating over pages and scraping game data
    def scrape_pages():
        pgs = list(range(0, 199))
        for pg in pgs:
            numPage = numberPages(webpage(pg))
            if numPage == 0:
                break
            currentPage = pg
            url = 'https://www.metacritic.com/browse/games/score/metascore/all/switch/filtered?page=' + str(currentPage)
            userAgent = {'User-agent': 'Mozilla/5.0'}
            response = requests.get(url, headers=userAgent)
            soup = BeautifulSoup(response.text, 'html.parser')
            content = soup.find_all('table')
            num_loops = len(content)
            scraper(num_loops, content, userAgent)
            time.sleep(5)

    # call function to scrape data from all pages
    scrape_pages()

    # create a dataframe from the scraped data
    switch_games_df = pd.DataFrame.from_dict(data_dict)

    # save to csv 
    switch_games_df.to_csv('nintendo_switch_games.csv', index=False)
    print("data saved to nintendo_switch_games.csv")  


# and now we call the function to scrape game data
scrape_switch_games()


KeyboardInterrupt: 