## Gathering Nintendo Switch game information

First we scrape the metacritic site to find the best ranked games for Nintendo Switch. We obtain the titles, user score, critic score and the genre description, from which we will later obtain the genre. We also get release date of the game. 

In [61]:
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup


def scrape_switch_games():
    """
    Scrapes the Nintendo Switch games data from Metacritic, retrieves the description for each game,
    and saves the combined data to a csv file.
    """
    data_dict = {'name': [], 'release_date': [], 'metascore': [], 'user_score': [], 'genre': []} # empty dictionary to store scraped data

    with requests.Session() as session:
        session.headers.update({'User-Agent': 'Mozilla/5.0'})

        def webpage(pageNum): # get the page
            url = 'https://www.metacritic.com/browse/games/score/metascore/all/switch/filtered?sort=desc&view=detailed&page=' + str(pageNum)
            response = session.get(url)
            return response

        def numberPages(response): # here we determine the number of pages
            soup = BeautifulSoup(response.text, 'html.parser')
            pages = soup.find_all('li', {"class": "active_page"})
            if pages:
                pagesCleaned = pages[0].find('span', {"class": "page_num"})
                return int(pagesCleaned.text)
            return 0

        def scraper(num_loops, content):
            tblnum = 0
            while tblnum < num_loops:
                # get game name
                table_rows = content[tblnum].find_all('tr')
                for tr in table_rows:
                    if len(tr) < 1:
                        continue
                    td = tr.find_all('td')
                    a = td[1].find('a', {"class": "title"})
                    data_dict['name'].append(a.find('h3').text.strip())

                    # get game release date
                    date = td[1].find('span', {"class": ""})
                    data_dict['release_date'].append(date.text.strip())

                    # get user score
                    div_score = td[1].find('div', {"class": "clamp-userscore"})
                    user = div_score.find('div', {"class": "metascore_w"})
                    data_dict['user_score'].append(user.text.strip())

                    # get metascore
                    score = td[1].find('div', {"class": "metascore_w"})
                    data_dict['metascore'].append(score.text.strip())

                    # get genre description
                    genre = get_genre('https://www.metacritic.com' + a['href'])
                    data_dict['genre'].append(genre)

                tblnum += 1

        def get_genre(url): # here we get the description with genre information from each game page
            response = session.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            genre_tags = soup.find_all('span', {"class": "data"})
            genres = [tag.text.strip() for tag in genre_tags]
            return ', '.join(genres) if genres else 'N/A'

        def pages(lastPageNum):
            currentPage = lastPageNum
            url = 'https://www.metacritic.com/browse/games/score/metascore/all/switch/filtered?page=' + str(currentPage)
            response = session.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            content = soup.find_all('table')

            num_loops = len(content)
            scraper(num_loops, content)
            time.sleep(5)

        pagenum = 0
        response = webpage(pagenum)
        lastPageNum = numberPages(response)
        while pagenum <= lastPageNum:
            response = webpage(pagenum)
            soup = BeautifulSoup(response.text, 'html.parser')
            content = soup.find_all('table')
            num_loops = len(content)
            scraper(num_loops, content)
            pagenum += 1

        df = pd.DataFrame(data_dict)
        df.to_csv('switch_games.csv', index=False)
        print("data saved to switch_games.csv")


scrape_switch_games()


data saved to switch_games.csv
