# Add data from MTG Decks

In [71]:
import requests

from bs4 import BeautifulSoup
from datetime import datetime
from services.db_service import DbService
from services.utils import rename_name_sticker_goblin, remove_snow_covered_lands

In [72]:
db_service = DbService()
pilots_collection = db_service.pilots_collection()
decks_collection = db_service.deck_collection()
events_collection = db_service.events_collection()

In [73]:
base_url = "https://mtgdecks.net"
headers = {"User-Agent": "MTG Scraper"}

In [74]:
def get_tournaments_url(url):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return f"Falha ao obter a página: {response.status_code} ({response.reason})"

    bs = BeautifulSoup(response.text, "html.parser")

    tables = bs.find_all("table")
    if not tables[0]:
        return "Tabela não encontrada"

    tournaments = []
    for row in tables[0].find_all("tr")[1:]:
        tournament_url = row.find("a")["href"]
        if tournament_url:
            tournaments.append(base_url + tournament_url)

    return tournaments

In [75]:
def text_to_date(date_str):
    return datetime.strptime(date_str, "%Y-%m-%d")

In [76]:
def get_tournament_name_and_location(tournament):
    splitted_tournament = tournament.split("@")
    if len(splitted_tournament) > 1:
        return splitted_tournament[0].strip(), splitted_tournament[1].strip()
    return tournament.strip(), None

In [77]:
def get_tournament_data(url):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return f"Falha ao obter a página: {response.status_code} ({response.reason})"

    bs = BeautifulSoup(response.text, "html.parser")

    name, location = get_tournament_name_and_location(bs.find("h1").text)

    details = bs.find("div", class_="card-item")
    format = details.find("a").text.strip()
    date_str = details.find(
        "div", class_=["small", "text-uppercase"]).text.split("|\n")[1].strip()
    date = text_to_date(date_str)

    # Refatorar em outra função
    tables = bs.find_all("table")
    if not tables[0]:
        return "Tabela não encontrada"

    decklists = []
    for row in tables[0].find_all("tr")[1:]:
        decklist_url = row.find("a")["href"]
        if decklist_url:
            decklists.append(base_url + decklist_url)

    return {"name": name, "location": location, "format": format, "date": date, "decklists": decklists}

In [78]:
def get_decklist_as_list(decklist):
    list = []
    for card in decklist:
        [quantity, name] = card.split(maxsplit=1)
        list.extend([name.strip()] * int(quantity))

    list = rename_name_sticker_goblin(list)
    list = remove_snow_covered_lands(list)
    list.sort()

    return list

In [79]:
def get_player_data(url):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return f"Falha ao obter a página: {response.status_code} ({response.reason})"

    bs = BeautifulSoup(response.text, "html.parser")
    
    if not bs.find("div", id="deck-tags-area"):
        return "Deck inválido"

    details = bs.find("div", id="deck-tags-area").find_all("a")
    player = details[2].text.lower().strip()
    archetype = details[1].text.strip()

    decklist = bs.find("textarea", id="arena_deck").text.split("\n\n")
    
    maindeck = get_decklist_as_list(decklist[0].split("\n"))
    if len(decklist) > 1:
        sideboard = get_decklist_as_list(decklist[1].split("\n"))
    else:
        sideboard = []

    return {"player": player, "archetype": archetype, "maindeck": maindeck, "sideboard": sideboard}

In [80]:
def find_or_create_player(player_name):
    player = pilots_collection.find_one({"name": player_name})
    if player:
        return player["_id"]
    return pilots_collection.insert_one({"name": player_name, "created_at": datetime.now(), "updated_at": datetime.now()}).inserted_id

In [81]:
def find_or_create_decklist(archetype, maindeck, sideboard, format):
    decklist = decks_collection.find_one(
        {"maindeck": maindeck, "sideboard": sideboard, "format": format})
    if decklist:
        return decklist["_id"]

    return decks_collection.insert_one({"name": archetype, "format": format, "maindeck": maindeck, "sideboard": sideboard, "created_at": datetime.now(), "updated_at": datetime.now()}).inserted_id

In [82]:
def create_or_update_tournament(name, location, format, date, players):
    tournament = events_collection.find_one(
        {"name": name, "date": date, "format": format})

    if tournament:
        events_collection.update_one({"_id": tournament["_id"]}, {"$set": {
                                     "location": location, "players": players, "updated_at": datetime.now()}})
        return

    events_collection.insert_one({"name": name, "location": location, "format": format, "date": date,
                                 "players": players, "created_at": datetime.now(), "updated_at": datetime.now()})

In [83]:
tournaments = []

max_pages = 1
for page in range(1, max_pages + 1):
    urls = get_tournaments_url(f"{base_url}/Pauper/tournaments/page:{page}")
    tournaments.extend(urls)

In [84]:
for tournament in tournaments:
    td = get_tournament_data(tournament)
    
    if td["date"].month == 12:
        continue

    players = []
    for decklist_url in td["decklists"]:
        try:
            pd = get_player_data(decklist_url)

            player_id = find_or_create_player(pd["player"])
            decklist_id = find_or_create_decklist(
                pd["archetype"], pd["maindeck"], pd["sideboard"], td["format"])

            players.append({"player": player_id, "decklist": decklist_id})
        except:
            continue

    create_or_update_tournament(
        td["name"], td["location"], td["format"], td["date"], players)

print(f"{len(tournaments)} torneios adicionados")

20 torneios adicionados
