In [1]:
import os
from kedro.framework.startup import bootstrap_project
from kedro.framework.session import KedroSession
import warnings

warnings.filterwarnings("ignore")

# Encontrar o caminho absoluto do diretório atual
notebook_cwd = os.getcwd()

# Definir o caminho correto para a raiz do projeto Kedro
project_path = r"c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\mtg-project"

# Verificar o diretório atual e o caminho do projeto
print(f"Notebook current working directory: {notebook_cwd}")
print(f"Project path: {project_path}")

# Alterar para o diretório raiz do projeto Kedro
os.chdir(project_path)

# Bootstrap o projeto Kedro
bootstrap_project(project_path)

# Inicialize o contexto do Kedro
with KedroSession.create() as session:
    context = session.load_context()

# Recarregar o catálogo
catalog = context.catalog

# Acessar os parâmetros
params = context.params

# Listar o catálogo
catalog.list()

Notebook current working directory: c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\notebooks\gmferratti\pipeline
Project path: c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\mtg-project



[1m[[0m
    [32m'decks_json_partitioned'[0m,
    [32m'decks_txt_partitioned'[0m,
    [32m'sampled_decks'[0m,
    [32m'players'[0m,
    [32m'players_with_decks'[0m,
    [32m'matches_dataframe'[0m,
    [32m'parameters'[0m,
    [32m'params:global'[0m,
    [32m'params:global.run_date'[0m,
    [32m'params:global.run_date.day'[0m,
    [32m'params:global.run_date.month'[0m,
    [32m'params:global.run_date.year'[0m,
    [32m'params:global.user'[0m,
    [32m'params:global.user.project_path'[0m,
    [32m'params:preprocessing'[0m,
    [32m'params:preprocessing.webscraper'[0m,
    [32m'params:preprocessing.webscraper.zip_url'[0m,
    [32m'params:preprocessing.webscraper.zip_folder'[0m,
    [32m'params:preprocessing.webscraper.log_folder'[0m,
    [32m'params:preprocessing.webscraper.deck_cards'[0m,
    [32m'params:preprocessing.webscraper.sample_size_ratio'[0m,
    [32m'params:simulation'[0m,
    [32m'params:simulation.n_players'[0m,
    [32m'params

In [2]:
from typing import List, Dict
from classes.deck import Deck
from classes.player import Player
from classes.player_tracker import PlayerTracker
from src.mtg_project.pipelines.utils import setup_logger
import pandas as pd
import numpy as np
from faker import Faker
import random

In [3]:
def create_players(n_players: int):
    """
    Cria uma lista de objetos Player com nomes aleatórios.

    Args:
        n_players (int): Número de jogadores a serem criados.

    Returns:
        List[Player]: Lista de objetos Player com nomes gerados aleatoriamente.
    """
    # Inicializando o gerador de dados falsos Faker
    fake = Faker()
    
    # Gerando uma lista de nomes aleatórios usando o Faker
    player_names = [fake.first_name() + " " + fake.last_name() for _ in range(n_players)]
    
    # Criando uma lista de objetos Player a partir dos nomes gerados
    players = [Player(name) for name in player_names]

    # Retornando a lista de objetos Player
    return players

n_players = catalog.load("params:simulation.n_players")
players = create_players(n_players)
catalog.save("players", players)

In [4]:
import random
import os
from typing import List, Dict

def assign_decks_to_players(
        players: List[Player], 
        sampled_decks: Dict[str, str],
        log_folder: str) -> List[Player]:
    """
    Função para atribuir decks aleatórios a cada player na lista de players.

    A função tentará atribuir um deck a cada player chamando o método assign_deck().
    Caso ocorra algum erro na atribuição, tentará com outro deck disponível.

    Args:
        players (list): Lista de objetos Player.
        sampled_decks (dict): Dicionário com os nomes e caminhos dos decks.
        log_folder (str): Caminho da pasta para salvar o log.

    Returns:
        List[Player]: Lista de objetos Player com decks atribuídos.
    """
    # Caminho do arquivo de log
    log_filepath = os.path.join(log_folder, 'decks_assignment.txt')

    # Cria a pasta de log se ela não existir
    os.makedirs(log_folder, exist_ok=True)

    # Configura o logger geral
    logger = setup_logger("validate_decks", log_filepath)
    
    # Log de início da validação
    logger.info("Validating decks...")

    # Convertemos as chaves do dicionário para uma lista de nomes de decks disponíveis
    available_decks = list(sampled_decks.keys())
    
    for player in players:
        assigned = False
        while not assigned and available_decks:
            try:
                # Seleciona um deck aleatório da lista de decks disponíveis
                deck_name = random.choice(available_decks)

                # Obter o caminho completo do deck a partir do dicionário sampled_decks
                deck_path = sampled_decks[deck_name]

                # Cria um novo objeto Deck
                deck = Deck()

                # Carrega o deck a partir do arquivo .txt no caminho obtido
                deck.load_deck_from_txt(deck_path)

                # Atribui o deck ao player
                player.assign_deck(deck)
                logger.info(f"Deck '{deck_name}' assigned to player '{player.name}'")
                
                # Remove o deck da lista de decks disponíveis para evitar reutilização
                available_decks.remove(deck_name)

                assigned = True  # Deck atribuído com sucesso
            except Exception as e:
                # Em caso de erro, tenta outro deck
                logger.error(f"Failed to assign deck '{deck_name}' to player '{player.name}': {e}")
                continue
        
        
        # Se não houver mais decks disponíveis e não conseguir atribuir, lança um erro
        if not assigned:
            raise ValueError(f"No available decks left to assign to player '{player.name}'.")

    logger.info("Deck assignment process completed.")

    return players

players = catalog.load("players")
sampled_decks = catalog.load("sampled_decks")
log_folder = catalog.load("params:simulation.log_folder")
players_with_decks = assign_decks_to_players(players, sampled_decks, log_folder)
catalog.save("players_with_decks", players_with_decks)

In [5]:
def simulate_player_matches(params: dict, players_with_decks: list) -> pd.DataFrame:
    """
    Simulates Magic: The Gathering matches for a list of players based on the provided simulation parameters.

    Parameters:
    -----------
    params : dict
        A dictionary containing the simulation parameters, including:
        - 'max_mulligans': Maximum number of mulligans allowed per player.
        - 'mulligan_prob': Probability of a player choosing to mulligan.
        - 'hand_size_stop': Minimum hand size at which the simulation will stop.
        - 'max_turns': Maximum number of turns per match.
        - 'extra_land_prob': Probability of playing an extra land during a turn.
        - 'matches_per_player': Number of matches to simulate per player.
        - 'log_folder': Folder path for logging the simulation process.
    
    players_with_decks : list
        A list of Player objects, each with an assigned deck to be used in the simulation.

    Returns:
    --------
    pd.DataFrame
        A DataFrame containing the match data for all players across all matches and turns, including:
        - Player attributes at each turn.
        - Match number for each simulation.
    """
    
    # Atribuir os parâmetros
    max_mulligans = params["max_mulligans"]
    mulligan_prob = params["mulligan_prob"]
    hand_size_stop = params["hand_size_stop"]
    max_turns = params["max_turns"]
    extra_land_prob = params["extra_land_prob"]
    matches_per_player = params["matches_per_player"]
    log_folder = params["log_folder"]

    # Caminho do arquivo de log
    log_filepath = os.path.join(log_folder, 'player_matches.txt')

    # Cria a pasta de log se ela não existir
    os.makedirs(log_folder, exist_ok=True)

    # Configura o logger geral
    logger = setup_logger("player_matches", log_filepath)

    # Log de início da validação
    logger.info("Initiating simulations...")

    # Inicializa o tracker para armazenar os dados
    tracker = PlayerTracker()

    # Loop através dos jogadores e realizar as simulações de partidas
    for player in players_with_decks:
        for match in range(matches_per_player):
            # Simular várias partidas para o jogador
            player.play_a_match(tracker, 
                                max_mulligans, 
                                mulligan_prob, 
                                max_turns, 
                                hand_size_stop, 
                                extra_land_prob)

    # Obter os dados de todas as partidas e turnos
    matches_dataframe = tracker.get_data()
    
    return matches_dataframe

# Chamada da função
params = catalog.load("params:simulation")
players_with_decks = catalog.load("players_with_decks")
matches_df = simulate_player_matches(params, players_with_decks)
catalog.save("matches_dataframe",matches_df)

In [6]:
from mtgsdk import Card
card = Card.find(386618)
card.mana_cost

[32m'[0m[32m{[0m[32m7[0m[32m}[0m[32m{[0m[32mB[0m[32m}[0m[32m{[0m[32mB[0m[32m}[0m[32m'[0m

In [7]:
pd.set_option('display.max_columns', None)
matches_df = catalog.load("matches_dataframe")
matches_df

Unnamed: 0,name,deck_name,deck_colors,match,turn,mulligan_count,lands_played,spells_played,mana_pool,spent_mana,hand_size,library_size,graveyard_size,full_hand,full_graveyard,hand_W,hand_U,hand_B,hand_R,hand_G,hand_C,graveyard_W,graveyard_U,graveyard_B,graveyard_R,graveyard_G,graveyard_C,mana_pool_W,mana_pool_U,mana_pool_B,mana_pool_R,mana_pool_G,mana_pool_C,battlefield_W,battlefield_U,battlefield_B,battlefield_R,battlefield_G,battlefield_C
0,Gina Bell,mark justice quarterfinalist,"[R, G, W]",1,0,0,0,0,0,0.0,7,53,0,"Hand(7 cards: Plains, Howling Mine, Howling Mi...",Graveyard(0 cards: ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Gina Bell,mark justice quarterfinalist,"[R, G, W]",1,1,0,1,1,1,1.0,6,52,1,"Hand(6 cards: Mountain, Howling Mine, Howling ...",Graveyard(1 cards: Swords to Plowshares),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Gina Bell,mark justice quarterfinalist,"[R, G, W]",1,2,0,1,3,2,2.0,4,51,3,"Hand(4 cards: Mishra's Factory, Howling Mine, ...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Gina Bell,mark justice quarterfinalist,"[R, G, W]",1,3,0,1,3,3,0.0,4,50,3,"Hand(4 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
4,Gina Bell,mark justice quarterfinalist,"[R, G, W]",1,4,0,0,3,3,0.0,5,49,3,"Hand(5 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,Jamie Rodriguez,transference,[U],10,8,0,1,0,4,0.0,7,45,4,"Hand(7 cards: Dragon Blood, Echoing Truth, Arc...","Graveyard(4 cards: Spire Golem, Arcbound Bruis...",0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0
528,Jamie Rodriguez,transference,[U],10,9,0,0,0,4,0.0,7,44,5,"Hand(7 cards: Dragon Blood, Echoing Truth, Arc...","Graveyard(5 cards: Spire Golem, Arcbound Bruis...",0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0
529,Jamie Rodriguez,transference,[U],10,10,0,0,0,4,0.0,7,43,6,"Hand(7 cards: Dragon Blood, Echoing Truth, Arc...","Graveyard(6 cards: Spire Golem, Arcbound Bruis...",0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0
530,Jamie Rodriguez,transference,[U],10,11,0,0,0,4,0.0,7,42,7,"Hand(7 cards: Dragon Blood, Echoing Truth, Arc...","Graveyard(7 cards: Spire Golem, Arcbound Bruis...",0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0


In [8]:
# Carregar o dataframe
matches_df = catalog.load("matches_dataframe")
matches_df["spent_mana"] = matches_df["spent_mana"].astype(int)

# Creating cumulative variables
matches_df['cum_mana_pool'] = matches_df['mana_pool'].cumsum()
matches_df["cum_spent_mana"] = matches_df["spent_mana"].cumsum()
matches_df["cum_spells_played"] = matches_df["spells_played"].cumsum()

# Creating ratio variables
matches_df['spell_ratio'] = (matches_df['spells_played'] / (matches_df['turn'] + 1)).round(2)
matches_df['land_ratio'] = (matches_df['lands_played'] / (matches_df['turn'] + 1)).round(2)

# Creating target variable
matches_df['mana_curve_efficiency'] = matches_df['cum_spent_mana'] / matches_df['cum_mana_pool']
matches_df['mana_curve_efficiency'].replace([float('inf'), -float('inf')], 0, inplace=True)
matches_df['mana_curve_efficiency'].fillna(0, inplace=True)
matches_df['mana_curve_efficiency'] = matches_df['mana_curve_efficiency'].round(2)

# Expand deck_colors into multiple columns (One-Hot Encoding for individual colors)
all_colors = ['W', 'U', 'B', 'R', 'G']

# Criar uma coluna para cada cor
for color in all_colors:
    matches_df[f'{color}'] = (matches_df['deck_colors'].apply(lambda x: 1 if color in x else 0)).astype("category")

# Drop the original deck_colors column
matches_df.drop(columns=['deck_colors'], inplace=True)

# Visualizando as primeiras linhas para confirmar as alterações
matches_df.head()

Unnamed: 0,name,deck_name,match,turn,mulligan_count,lands_played,spells_played,mana_pool,spent_mana,hand_size,library_size,graveyard_size,full_hand,full_graveyard,hand_W,hand_U,hand_B,hand_R,hand_G,hand_C,graveyard_W,graveyard_U,graveyard_B,graveyard_R,graveyard_G,graveyard_C,mana_pool_W,mana_pool_U,mana_pool_B,mana_pool_R,mana_pool_G,mana_pool_C,battlefield_W,battlefield_U,battlefield_B,battlefield_R,battlefield_G,battlefield_C,cum_mana_pool,cum_spent_mana,cum_spells_played,spell_ratio,land_ratio,mana_curve_efficiency,W,U,B,R,G
0,Gina Bell,mark justice quarterfinalist,1,0,0,0,0,0,0,7,53,0,"Hand(7 cards: Plains, Howling Mine, Howling Mi...",Graveyard(0 cards: ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,0,0,1,1
1,Gina Bell,mark justice quarterfinalist,1,1,0,1,1,1,1,6,52,1,"Hand(6 cards: Mountain, Howling Mine, Howling ...",Graveyard(1 cards: Swords to Plowshares),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0.5,0.5,1.0,1,0,0,1,1
2,Gina Bell,mark justice quarterfinalist,1,2,0,1,3,2,2,4,51,3,"Hand(4 cards: Mishra's Factory, Howling Mine, ...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,4,1.0,0.33,1.0,1,0,0,1,1
3,Gina Bell,mark justice quarterfinalist,1,3,0,1,3,3,0,4,50,3,"Hand(4 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,6,3,7,0.75,0.25,0.5,1,0,0,1,1
4,Gina Bell,mark justice quarterfinalist,1,4,0,0,3,3,0,5,49,3,"Hand(5 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,9,3,10,0.6,0.0,0.33,1,0,0,1,1


In [9]:
threshold = 0.85

# Remover colunas do tipo 'object' ou 'category'
matches_df_numeric = matches_df.select_dtypes(include=[np.number])

# Calcular a matriz de correlação
corr_matrix = matches_df_numeric.corr().abs()

# Criar uma máscara para identificar as correlações acima do limiar (excluindo a diagonal da matriz)
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

# Identificar colunas com alta correlação (acima do limiar)
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]

# Remover as colunas altamente correlacionadas
matches_df_cleaned = matches_df.drop(columns=to_drop)

# Exibindo as colunas removidas e o dataframe resultante
print(f"Colunas removidas por alta correlação: {to_drop}")
matches_df_cleaned.head()

Colunas removidas por alta correlação: ['mana_pool', 'library_size', 'mana_pool_G', 'cum_spent_mana', 'cum_spells_played']


Unnamed: 0,name,deck_name,match,turn,mulligan_count,lands_played,spells_played,spent_mana,hand_size,graveyard_size,full_hand,full_graveyard,hand_W,hand_U,hand_B,hand_R,hand_G,hand_C,graveyard_W,graveyard_U,graveyard_B,graveyard_R,graveyard_G,graveyard_C,mana_pool_W,mana_pool_U,mana_pool_B,mana_pool_R,mana_pool_C,battlefield_W,battlefield_U,battlefield_B,battlefield_R,battlefield_G,battlefield_C,cum_mana_pool,spell_ratio,land_ratio,mana_curve_efficiency,W,U,B,R,G
0,Gina Bell,mark justice quarterfinalist,1,0,0,0,0,0,7,0,"Hand(7 cards: Plains, Howling Mine, Howling Mi...",Graveyard(0 cards: ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,0,0,1,1
1,Gina Bell,mark justice quarterfinalist,1,1,0,1,1,1,6,1,"Hand(6 cards: Mountain, Howling Mine, Howling ...",Graveyard(1 cards: Swords to Plowshares),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.5,0.5,1.0,1,0,0,1,1
2,Gina Bell,mark justice quarterfinalist,1,2,0,1,3,2,4,3,"Hand(4 cards: Mishra's Factory, Howling Mine, ...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1.0,0.33,1.0,1,0,0,1,1
3,Gina Bell,mark justice quarterfinalist,1,3,0,1,3,0,4,3,"Hand(4 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,6,0.75,0.25,0.5,1,0,0,1,1
4,Gina Bell,mark justice quarterfinalist,1,4,0,0,3,0,5,3,"Hand(5 cards: Howling Mine, Howling Mine, Wint...","Graveyard(3 cards: Swords to Plowshares, Sword...",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,9,0.6,0.0,0.33,1,0,0,1,1
