## В этом файле написан код, которы собирает информацию с сайта **metacritic.com** и сохраняет ее в базу данных SQL.

In [None]:
import psycopg2
import time
from typing import Union, Tuple
from requests import Response
from requests_html import HTMLSession

In [2]:
class Database:
    def __init__(self, dbname, table_name, user, password, host, port):
        self.dbname = dbname
        self.table_name = table_name
        self.user = user
        self.password = password
        self.host = host
        self.port = port
        self.connect = None
        self.cursor = None

    def connect_to_db(self):
        self.connect = psycopg2.connect(dbname=self.dbname, user=self.user,
                                        password=self.password, host=self.host,
                                        port=self.port)
        self.cursor = self.connect.cursor()

    def create_table(self):
        expression = f'CREATE TABLE IF NOT EXISTS {self.table_name} (' \
                     'id SERIAL PRIMARY KEY, ' \
                     'name varchar(200) NOT NULL, ' \
                     'platform varchar(20) NOT NULL, ' \
                     'date varchar(20) NOT NULL, ' \
                     'summary varchar(5000) NOT NULL, ' \
                     'metascore varchar(4) NOT NULL, ' \
                     'userscore varchar(5) NOT NULL, ' \
                     'href varchar(200) NOT NULL)'
        self.cursor.execute(expression)
        self.connect.commit()

    def insert_data(self, data):
        for d in data:
            self.cursor.execute(f'INSERT INTO {self.table_name} (name, platform, date, '
                                'summary, metascore, userscore, href) VALUES %s' % (d,))
        self.connect.commit()

    def close(self):
        self.cursor.close()
        self.connect.close()

In [None]:
class Parser:
    def __init__(self, year):
        self.year = year
        self.session = HTMLSession()

    def get_data_from_page(self, pagination_num) -> Union[Tuple[Response, list[tuple]], Tuple[None, None]]:
        url = 'https://www.metacritic.com/browse/games/score/metascore/year/all/filtered?year_selected=' \
                f'{self.year}&distribution=&sort=desc&view=detailed&page={pagination_num}'

        request = self.session.get(url)
        if request.status_code != 200:
            print(f'get_pagination_count_pages error, code {request.status_code}, year {self.year}, {pagination_num}')
            return None, None

        games_data = []
        list_wrapper = request.html.find('.browse_list_wrapper')
        for lw in list_wrapper:
            trs = lw.find('tr')
            for tr in trs:
                if not tr.text:
                    continue
                games_data.append(self.collect_game_data(tr))
        return request, games_data

    @staticmethod
    def collect_game_data(game_card: Response) -> tuple:
        title = game_card.find('h3', first=True).text.replace("'", " ")
        platform = game_card.find('.platform', first=True).find('.data', first=True).text
        date = game_card.find('.clamp-details', first=True).find('span')[-1].text
        summary = game_card.find('.summary', first=True).text.replace("'", " ")
        metascore = game_card.find('.clamp-metascore', first=True).find('.metascore_w', first=True).text
        userscore = game_card.find('.clamp-userscore', first=True).find('.metascore_w', first=True).text
        href = game_card.find('a', first=True).attrs['href']

        return title, platform, date, summary, metascore, userscore, href

    @staticmethod
    def get_pagination_count_pages(request: Response) -> Union[int, None]:
        last_page = request.html.find('.last_page', first=True)
        return int(last_page.find('.page_num', first=True).text) if last_page else 0

In [None]:
def main() -> None:
    db = Database(dbname='YOUR_DATABASE', table_name='YOUR_TABLE_NAME',
                  user='YOUR_USERNAME', password='YOUR_PASSWORD',
                  host='YOUR_HOST', port='YOUR_PORT')
    db.connect_to_db()
    db.create_table()

    for year in range(1996, 2023):
        parser = Parser(year)

        pagination_num = 0
        last_page = 1
        while pagination_num < last_page:
            print(f'year {year}, pagination_num {pagination_num}, last_page {last_page}')

            request, data = parser.get_data_from_page(pagination_num)
            pagination_num += 1
            if request is None:
                print(f'request ERROR year {year}, pagination_num {pagination_num}, last_page {last_page}')
                continue

            if pagination_num == 1:
                last_page = parser.get_pagination_count_pages(request)

            db.insert_data(data)
            time.sleep(10)
    db.close()