In [1]:
import os
from kedro.framework.startup import bootstrap_project
from kedro.framework.session import KedroSession
import warnings

warnings.filterwarnings("ignore")

# Encontrar o caminho absoluto do diretório atual
notebook_cwd = os.getcwd()

# Definir o caminho correto para a raiz do projeto Kedro
project_path = r"c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\mtg-project"

# Verificar o diretório atual e o caminho do projeto
print(f"Notebook current working directory: {notebook_cwd}")
print(f"Project path: {project_path}")

# Alterar para o diretório raiz do projeto Kedro
os.chdir(project_path)

# Bootstrap o projeto Kedro
bootstrap_project(project_path)

# Inicialize o contexto do Kedro
with KedroSession.create() as session:
    context = session.load_context()

# Recarregar o catálogo
catalog = context.catalog

# Acessar os parâmetros
params = context.params

# Listar o catálogo
catalog.list()

Notebook current working directory: c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\notebooks\gmferratti\pipeline
Project path: c:\Users\gufer\OneDrive\Documentos\FIAP\Fase_03\mtg-project



[1m[[0m
    [32m'decks_json_partitioned'[0m,
    [32m'decks_txt_partitioned'[0m,
    [32m'sampled_decks'[0m,
    [32m'players'[0m,
    [32m'players_with_decks'[0m,
    [32m'matches_df'[0m,
    [32m'features_df'[0m,
    [32m'selected_features_df'[0m,
    [32m'train_features'[0m,
    [32m'test_features'[0m,
    [32m'train_target'[0m,
    [32m'test_target'[0m,
    [32m'model_pkl'[0m,
    [32m'parameters'[0m,
    [32m'params:global'[0m,
    [32m'params:global.run_date'[0m,
    [32m'params:global.run_date.day'[0m,
    [32m'params:global.run_date.month'[0m,
    [32m'params:global.run_date.year'[0m,
    [32m'params:global.user'[0m,
    [32m'params:global.user.project_path'[0m,
    [32m'params:preprocessing'[0m,
    [32m'params:preprocessing.webscraper'[0m,
    [32m'params:preprocessing.webscraper.zip_url'[0m,
    [32m'params:preprocessing.webscraper.zip_folder'[0m,
    [32m'params:preprocessing.webscraper.log_folder'[0m,
    [32m'params:

In [2]:
from typing import List, Dict
from classes.deck import Deck
from classes.player import Player
from classes.player_tracker import PlayerTracker
from src.mtg_project.pipelines.utils import setup_logger
import pandas as pd
import numpy as np
from faker import Faker
import random

In [3]:
# with KedroSession.create(env="local", project_path=project_path) as session:
#     session.run(pipeline_name="modeling")

In [4]:
def create_players(n_players: int):
    """
    Cria uma lista de objetos Player com nomes aleatórios.

    Args:
        n_players (int): Número de jogadores a serem criados.

    Returns:
        List[Player]: Lista de objetos Player com nomes gerados aleatoriamente.
    """
    # Inicializando o gerador de dados falsos Faker
    fake = Faker()
    
    # Gerando uma lista de nomes aleatórios usando o Faker
    player_names = [fake.first_name() + " " + fake.last_name() for _ in range(n_players)]
    
    # Criando uma lista de objetos Player a partir dos nomes gerados
    players = [Player(name) for name in player_names]

    # Retornando a lista de objetos Player
    return players

n_players = catalog.load("params:simulation.n_players")
players = create_players(n_players)
catalog.save("players", players)

In [5]:
import random
import os
from typing import List, Dict

def assign_decks_to_players(
        players: List[Player], 
        sampled_decks: Dict[str, str],
        log_folder: str) -> List[Player]:
    """
    Função para atribuir decks aleatórios a cada player na lista de players.

    A função tentará atribuir um deck a cada player chamando o método assign_deck().
    Caso ocorra algum erro na atribuição, tentará com outro deck disponível.

    Args:
        players (list): Lista de objetos Player.
        sampled_decks (dict): Dicionário com os nomes e caminhos dos decks.
        log_folder (str): Caminho da pasta para salvar o log.

    Returns:
        List[Player]: Lista de objetos Player com decks atribuídos.
    """
    # Caminho do arquivo de log
    log_filepath = os.path.join(log_folder, 'decks_assignment.txt')

    # Cria a pasta de log se ela não existir
    os.makedirs(log_folder, exist_ok=True)

    # Configura o logger geral
    logger = setup_logger("validate_decks", log_filepath)
    
    # Log de início da validação
    logger.info("Validating decks...")

    # Convertemos as chaves do dicionário para uma lista de nomes de decks disponíveis
    available_decks = list(sampled_decks.keys())
    
    for player in players:
        assigned = False
        while not assigned and available_decks:
            try:
                # Seleciona um deck aleatório da lista de decks disponíveis
                deck_name = random.choice(available_decks)

                # Obter o caminho completo do deck a partir do dicionário sampled_decks
                deck_path = sampled_decks[deck_name]

                # Cria um novo objeto Deck
                deck = Deck()

                # Carrega o deck a partir do arquivo .txt no caminho obtido
                deck.load_deck_from_txt(deck_path)

                # Atribui o deck ao player
                player.assign_deck(deck)
                logger.info(f"Deck '{deck_name}' assigned to player '{player.name}'")
                
                # Remove o deck da lista de decks disponíveis para evitar reutilização
                available_decks.remove(deck_name)

                assigned = True  # Deck atribuído com sucesso
            except Exception as e:
                # Em caso de erro, tenta outro deck
                logger.error(f"Failed to assign deck '{deck_name}' to player '{player.name}': {e}")
                continue
        
        
        # Se não houver mais decks disponíveis e não conseguir atribuir, lança um erro
        if not assigned:
            raise ValueError(f"No available decks left to assign to player '{player.name}'.")

    logger.info("Deck assignment process completed.")

    return players

players = catalog.load("players")
sampled_decks = catalog.load("sampled_decks")
log_folder = catalog.load("params:simulation.log_folder")
# players_with_decks = assign_decks_to_players(players, sampled_decks, log_folder)
# catalog.save("players_with_decks", players_with_decks)

In [6]:
def simulate_player_matches(params: dict, players_with_decks: list) -> pd.DataFrame:
    """
    Simulates Magic: The Gathering matches for a list of players based on the provided simulation parameters.

    Parameters:
    -----------
    params : dict
        A dictionary containing the simulation parameters, including:
        - 'max_mulligans': Maximum number of mulligans allowed per player.
        - 'mulligan_prob': Probability of a player choosing to mulligan.
        - 'hand_size_stop': Minimum hand size at which the simulation will stop.
        - 'max_turns': Maximum number of turns per match.
        - 'extra_land_prob': Probability of playing an extra land during a turn.
        - 'matches_per_player': Number of matches to simulate per player.
        - 'log_folder': Folder path for logging the simulation process.
    
    players_with_decks : list
        A list of Player objects, each with an assigned deck to be used in the simulation.

    Returns:
    --------
    pd.DataFrame
        A DataFrame containing the match data for all players across all matches and turns, including:
        - Player attributes at each turn.
        - Match number for each simulation.
    """
    
    # Atribuir os parâmetros
    max_mulligans = params["max_mulligans"]
    mulligan_prob = params["mulligan_prob"]
    hand_size_stop = params["hand_size_stop"]
    max_turns = params["max_turns"]
    extra_land_prob = params["extra_land_prob"]
    matches_per_player = params["matches_per_player"]
    log_folder = params["log_folder"]

    # Caminho do arquivo de log
    log_filepath = os.path.join(log_folder, 'player_matches.txt')

    # Cria a pasta de log se ela não existir
    os.makedirs(log_folder, exist_ok=True)

    # Configura o logger geral
    logger = setup_logger("player_matches", log_filepath)

    # Log de início da validação
    logger.info("Initiating simulations...")

    # Inicializa o tracker para armazenar os dados
    tracker = PlayerTracker()

    # Loop através dos jogadores e realizar as simulações de partidas
    for player in players_with_decks:
        for match in range(matches_per_player):
            # Simular várias partidas para o jogador
            player.play_a_match(tracker, 
                                max_mulligans, 
                                mulligan_prob, 
                                max_turns, 
                                hand_size_stop, 
                                extra_land_prob)

    # Obter os dados de todas as partidas e turnos
    matches_df = tracker.get_data()
    
    return matches_df

# Chamada da função
params = catalog.load("params:simulation")
players_with_decks = catalog.load("players_with_decks")
features_df = simulate_player_matches(params, players_with_decks)
catalog.save("matches_df",features_df)

In [15]:
import pandas as pd

def feature_engineering(matches_df: pd.DataFrame) -> pd.DataFrame:
    """
    Realiza engenharia de features nos dados das partidas de Magic: The Gathering, com lag features 
    e rolling features aplicadas separadamente por jogador e partida, iniciando a contagem a partir
    do último turno 0 em caso de mulligan.

    Args:
        matches_df (pd.DataFrame): DataFrame contendo os dados das partidas.

    Retorna:
        pd.DataFrame: DataFrame com novas features calculadas.
    """

    # Configura o logger geral
    logger = setup_logger("feature_engineering")

    logger.info("Criando variáveis cumulativas por jogador e partida...")

    # Garantir que 'spent_mana' esteja no formato correto
    matches_df["spent_mana"] = matches_df["spent_mana"].astype(int)

    # Criação de variáveis cumulativas por 'name' e 'match'
    matches_df['cum_mana_pool'] = matches_df.groupby(['name', 'match'])['mana_pool'].cumsum()
    matches_df["cum_spent_mana"] = matches_df.groupby(['name', 'match'])["spent_mana"].cumsum()

    logger.info("Criando variáveis de razão...")

    # Criação de variáveis baseadas em razões: feitiços por turno e terrenos por turno
    matches_df['spell_ratio'] = (matches_df['spells_played'] / (matches_df['turn'] + 1)).round(2)
    matches_df['land_ratio'] = (matches_df['lands_played'] / (matches_df['turn'] + 1)).round(2)

    logger.info("Criando variável de eficiência da curva de mana...")

    # Criação da variável de eficiência da curva de mana (razão entre mana gasto e mana acumulado)
    matches_df['mana_curve_efficiency'] = matches_df['cum_spent_mana'] / matches_df['cum_mana_pool']

    # Tratamento de valores infinitos e valores ausentes
    matches_df['mana_curve_efficiency'].replace([float('inf'), -float('inf')], 0, inplace=True)
    matches_df['mana_curve_efficiency'].fillna(0, inplace=True)
    matches_df['mana_curve_efficiency'] = matches_df['mana_curve_efficiency'].round(2)

    logger.info("Identificando o último turno 0 por jogador e partida...")

    # Encontrar o último turno 0 em cada partida
    matches_df['is_last_turn_0'] = matches_df.groupby(['name', 'match'])['turn'].transform(lambda x: (x == 0).cumsum())

    # Criar uma máscara para selecionar apenas os turnos após o último turno 0
    valid_turns_mask = matches_df.groupby(['name', 'match'])['is_last_turn_0'].transform(max) == matches_df['is_last_turn_0']

    logger.info("Criando lag features a partir do último turno 0...")

    # Aplicar as lag features apenas nos turnos válidos (após o último turno 0)
    matches_df.loc[valid_turns_mask, 'mana_curve_efficiency_lag_1'] = matches_df.groupby(['name', 'match'])['mana_curve_efficiency'].shift(1)
    matches_df.loc[valid_turns_mask, 'mana_curve_efficiency_lag_2'] = matches_df.groupby(['name', 'match'])['mana_curve_efficiency'].shift(2)
    matches_df.loc[valid_turns_mask, 'spell_ratio_lag_1'] = matches_df.groupby(['name', 'match'])['spell_ratio'].shift(1)
    matches_df.loc[valid_turns_mask, 'land_ratio_lag_1'] = matches_df.groupby(['name', 'match'])['land_ratio'].shift(1)

    logger.info("Criando rolling features a partir do último turno 0...")

    # Aplicar rolling features a partir do último turno 0
    matches_df.loc[valid_turns_mask, 'rolling_mean_mana_curve_efficiency_3'] = matches_df.groupby(['name', 'match'])['mana_curve_efficiency'].rolling(window=3).mean().reset_index(level=[0,1], drop=True)
    matches_df.loc[valid_turns_mask, 'rolling_mean_spell_ratio_3'] = matches_df.groupby(['name', 'match'])['spell_ratio'].rolling(window=3).mean().reset_index(level=[0,1], drop=True)
    matches_df.loc[valid_turns_mask, 'rolling_mean_land_ratio_3'] = matches_df.groupby(['name', 'match'])['land_ratio'].rolling(window=3).mean().reset_index(level=[0,1], drop=True)

    # Tratamento de valores nulos gerados pelos shifts e rolling
    matches_df.fillna(0, inplace=True)

    # Remover a coluna auxiliar 'is_last_turn_0'
    matches_df.drop(columns=['is_last_turn_0'], inplace=True)

    logger.info("Engenharia de features concluída.")

    return matches_df


pd.set_option('display.max_columns', None)
    
# Chamada da função
features_df = catalog.load("matches_df")
features_df = feature_engineering(features_df)
features_df.tail(10)

Unnamed: 0,name,deck_name,deck_colors,match,turn,mulligan_count,lands_played,spells_played,mana_pool,spent_mana,hand_size,library_size,graveyard_size,full_hand,full_graveyard,cum_mana_pool,cum_spent_mana,spell_ratio,land_ratio,mana_curve_efficiency,mana_curve_efficiency_lag_1,mana_curve_efficiency_lag_2,spell_ratio_lag_1,land_ratio_lag_1,rolling_mean_mana_curve_efficiency_3,rolling_mean_spell_ratio_3,rolling_mean_land_ratio_3
336,Kathryn Henry,azorius spirits,"[W, U]",9,0,0,0,0,0,0,7,53,0,"Hand(7 cards: Island, Plains, Island, Plains, ...",Graveyard(0 cards: ),0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
337,Kathryn Henry,azorius spirits,"[W, U]",9,0,1,0,0,0,0,6,54,0,"Hand(6 cards: Temple of Enlightenment, Glacial...",Graveyard(0 cards: ),0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
338,Kathryn Henry,azorius spirits,"[W, U]",9,1,1,1,1,1,1,5,53,1,"Hand(5 cards: Glacial Fortress, Island, Watche...",Graveyard(1 cards: Spell Pierce),1,1,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.333333,0.166667,0.166667
339,Kathryn Henry,azorius spirits,"[W, U]",9,2,1,2,2,3,2,3,52,2,"Hand(3 cards: Mausoleum Wanderer, Lofty Denial...","Graveyard(2 cards: Spell Pierce, Watcher of th...",4,3,0.67,0.67,0.75,1.0,0.0,0.5,0.5,0.583333,0.39,0.39
340,Kathryn Henry,azorius spirits,"[W, U]",9,3,1,1,4,4,4,1,51,4,Hand(1 cards: Mausoleum Wanderer),"Graveyard(4 cards: Spell Pierce, Watcher of th...",8,7,1.0,0.25,0.88,0.75,1.0,0.67,0.67,0.876667,0.723333,0.473333
341,Kathryn Henry,azorius spirits,"[W, U]",10,0,0,0,0,0,0,7,53,0,"Hand(7 cards: Plains, Temple of Enlightenment,...",Graveyard(0 cards: ),0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
342,Kathryn Henry,azorius spirits,"[W, U]",10,1,0,1,1,1,1,6,52,1,"Hand(6 cards: Temple of Enlightenment, Plains,...",Graveyard(1 cards: Spell Pierce),1,1,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
343,Kathryn Henry,azorius spirits,"[W, U]",10,2,0,1,2,2,2,5,51,2,"Hand(5 cards: Plains, Empyrean Eagle, Rattlech...","Graveyard(2 cards: Spell Pierce, Rattlechains)",3,3,0.67,0.33,1.0,1.0,0.0,0.5,0.5,0.666667,0.39,0.276667
344,Kathryn Henry,azorius spirits,"[W, U]",10,3,0,1,3,3,3,4,50,3,"Hand(4 cards: Island, Rattlechains, Selfless S...","Graveyard(3 cards: Spell Pierce, Rattlechains,...",6,6,0.75,0.25,1.0,1.0,1.0,0.67,0.33,1.0,0.64,0.36
345,Kathryn Henry,azorius spirits,"[W, U]",10,4,0,1,5,4,4,2,49,5,"Hand(2 cards: Island, Spectral Sailor)","Graveyard(5 cards: Spell Pierce, Rattlechains,...",10,10,1.0,0.2,1.0,1.0,1.0,0.75,0.25,1.0,0.806667,0.26


In [19]:
def feature_selection(
        features_df: pd.DataFrame,
        target: str,
        threshold_features: float,
        derived_features: list = None,
        key_columns: list = None,
        cols_to_keep: list= None) -> pd.DataFrame:
    """
    Auxilia na seleção de features, removendo aquelas diretamente derivadas do target, 
    colunas-chave, e features altamente correlacionadas entre si.

    Args:
        features_df (pd.DataFrame): DataFrame contendo as features.
        target (str): Nome da variável alvo.
        threshold_features (float): Limiar para remover features com alta correlação.
        derived_features (list, optional): Lista de features derivadas do target para serem removidas.
        key_columns (list, optional): Lista de colunas-chave para serem removidas (ex: 'match', 'turn').

    Returns:
        pd.DataFrame: DataFrame com as features selecionadas.
    """
    # Configura o logger geral
    logger = setup_logger("feature_selection")
    logger.info("Iniciando o processo de seleção de features...")

    # Separar apenas colunas numéricas para o cálculo da correlação
    numeric_features_df = features_df.select_dtypes(include=[np.number])

    logger.info(f"Número inicial de features numéricas: {numeric_features_df.shape[1]}")

    # Remover features derivadas do target, se fornecidas
    if derived_features:
        numeric_features_df = numeric_features_df.drop(columns=derived_features, errors='ignore')
        logger.info(f"Features derivadas do target removidas: {derived_features}")

    # Remover colunas-chave nao categoricas ou string
    if key_columns:
        numeric_features_df = numeric_features_df.drop(columns=key_columns, errors='ignore')
        logger.info(f"Colunas-chave removidas: {key_columns}")

    # Remover a variável alvo do conjunto de features
    features_without_target = numeric_features_df.drop(columns=[target], errors='ignore')

    # Calcular a matriz de correlação entre as features (excluindo o target)
    corr_matrix = features_without_target.corr().abs()

    # Criar uma máscara para identificar as correlações acima do limiar entre as features, excluindo a diagonal
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

    # Identificar colunas com alta correlação entre si, usando o limiar definido
    to_drop_features = [column for column in upper.columns if any(upper[column] > threshold_features)]

    # Manter compulsoriamente algumas colunas
    if cols_to_keep:
        to_drop_features = [col for col in to_drop_features if col not in cols_to_keep]

    # Remover as colunas altamente correlacionadas entre si
    features_cleaned = features_without_target.drop(columns=to_drop_features)

    logger.info(f"Número de features após a remoção de correlação maior que {threshold_features}: {features_cleaned.shape[1]}")    

    # Reconstruir o DataFrame final, reinserindo key_columns e target
    features_cleaned = pd.concat([features_cleaned, features_df[key_columns], features_df[[target]]], axis=1)

    logger.info("Processo de seleção de features concluído.")

    return features_cleaned


derived_features = [
    "cum_spent_mana", 
    "cum_mana_pool", 
    "spent_mana", 
    "mana_pool"
]

key_cols = [
    "name",
    "deck_name",
    "match",
    "turn"
]

features_df = catalog.load("features_df")
threshold_features = catalog.load("params:modeling.feature_engineering.feat_corr_threshold")

selected_features_df = feature_selection(
    features_df=features_df,         
    target="mana_curve_efficiency", 
    threshold_features=threshold_features,        
    derived_features=derived_features, 
    key_columns=key_cols,
    cols_to_keep=["W","U","B","R","G"]     
)

# Salvando o DataFrame de features selecionadas
catalog.save("selected_features_df", selected_features_df)

In [20]:
selected_features_df

Unnamed: 0,mulligan_count,lands_played,spells_played,hand_size,spell_ratio,land_ratio,W,U,B,R,G,mana_curve_efficiency_lag_1,mana_curve_efficiency_lag_2,spell_ratio_lag_1,land_ratio_lag_1,rolling_mean_mana_curve_efficiency_3,rolling_mean_spell_ratio_3,rolling_mean_land_ratio_3,name,deck_name,match,turn,mana_curve_efficiency
0,0,0,0,7,0.00,0.00,1,1,0,0,0,0.00,0.00,0.00,0.00,0.000000,0.000000,0.000000,Shari Grant,azorius advance,1,0,0.00
1,0,1,0,7,0.00,0.50,1,1,0,0,0,0.00,0.00,0.00,0.00,0.000000,0.000000,0.000000,Shari Grant,azorius advance,1,1,0.00
2,0,1,1,6,0.33,0.33,1,1,0,0,0,0.00,0.00,0.00,0.50,0.223333,0.110000,0.276667,Shari Grant,azorius advance,1,2,0.67
3,0,0,2,6,0.50,0.00,1,1,0,0,0,0.67,0.00,0.33,0.33,0.490000,0.276667,0.276667,Shari Grant,azorius advance,1,3,0.80
4,0,1,3,5,0.60,0.20,1,1,0,0,0,0.80,0.67,0.50,0.00,0.783333,0.476667,0.176667,Shari Grant,azorius advance,1,4,0.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,1,0,0,6,0.00,0.00,1,1,0,0,0,0.00,1.18,0.00,0.00,0.393333,0.333333,0.133333,Kathryn Henry,azorius spirits,10,0,0.00
332,1,1,1,5,0.50,0.50,1,1,0,0,0,0.00,0.00,0.00,0.00,3.666667,0.166667,0.166667,Kathryn Henry,azorius spirits,10,1,11.00
333,1,1,2,4,0.67,0.33,1,1,0,0,0,11.00,0.00,0.50,0.50,5.110000,0.390000,0.276667,Kathryn Henry,azorius spirits,10,2,4.33
334,1,1,4,2,1.00,0.25,1,1,0,0,0,4.33,11.00,0.67,0.33,6.000000,0.723333,0.360000,Kathryn Henry,azorius spirits,10,3,2.67


In [11]:
import random
from typing import List
import pandas as pd

import random
import pandas as pd
from typing import List

def train_test_split(
        features_df: pd.DataFrame,
        final_features_list: List[str], 
        target_column: str, 
        n_test_players: int = None, 
        hide_advanced_turns: bool = False,
        turn_threshold: int = None) -> None:
    """
    Segrega as partidas em treino e teste, permitindo que o modelo nunca veja um determinado grupo de jogadores ou
    escondendo os turnos mais avançados de cada jogador durante o treino.
    
    Filtra o DataFrame pelas features selecionadas e produz os DataFrames de treino e teste para features e targets.

    Args:
        features_df (pd.DataFrame): O DataFrame contendo os dados das partidas.
        final_features_list (List[str]): Lista das features selecionadas para o modelo.
        target_column (str): Nome da coluna target.
        n_test_players (int, opcional): Número de jogadores a serem amostrados aleatoriamente para o conjunto de teste.
        hide_advanced_turns (bool, opcional): Se True, usa a estratégia de esconder os turnos mais avançados no conjunto de teste.
        turn_threshold (int, opcional): Limite de turnos para segregar treino e teste. Os turnos maiores que esse valor serão usados como teste.

    Retorna:
        Tuple: DataFrames de treino e teste para features e targets.
    """
    
    if hide_advanced_turns and turn_threshold is None:
        raise ValueError("Se `hide_advanced_turns` for True, `turn_threshold` deve ser fornecido.")
    
    if hide_advanced_turns:
        # Estratégia de esconder turnos mais avançados
        print(f"Usando a estratégia de esconder turnos mais avançados (turnos > {turn_threshold}).")
        
        # Dividir os dados entre treino e teste com base no turn_threshold
        train_df = features_df[features_df['turn'] <= turn_threshold]
        test_df = features_df[features_df['turn'] > turn_threshold]
    
    else:
        # Estratégia de esconder jogadores
        print(f"Usando a estratégia de esconder {n_test_players} jogadores.")
        
        # Verifica se a quantidade de jogadores para o teste é válida
        unique_players = features_df['name'].unique()
        if n_test_players > len(unique_players):
            raise ValueError(f"O número de jogadores de teste ({n_test_players}) excede o número de jogadores únicos ({len(unique_players)}).")
        
        # Amostrando jogadores aleatoriamente
        test_players = random.sample(list(unique_players), n_test_players)
        print(f"Jogadores selecionados para o conjunto de teste: {test_players}")
        
        # Segregar os dados entre treino e teste com base nos jogadores amostrados
        test_df = features_df[features_df['name'].isin(test_players)]
        train_df = features_df[~features_df['name'].isin(test_players)]
    
    # Filtrando apenas as features selecionadas
    train_features = train_df[final_features_list]
    test_features = test_df[final_features_list]

    # Extraindo os targets
    train_target = train_df[[target_column]]
    test_target = test_df[[target_column]]

    return train_features, test_features, train_target, test_target


(train_features, test_features, train_target, test_target) = train_test_split(
    features_df=selected_features_df,
    final_features_list=selected_features_df.columns.tolist(),
    target_column="mana_curve_efficiency",
    n_test_players=1,
)

# Salvando as features e targets no catálogo
catalog.save("train_features", train_features)
catalog.save("test_features", test_features)
catalog.save("train_target", train_target)
catalog.save("test_target", test_target)

Jogadores selecionados para o conjunto de teste: ['David Walsh']


In [12]:
train_features = catalog.load("train_features")
train_target = catalog.load("train_target")

key_cols = [
    "name",
    "deck_name",
    "match",
    "turn"
]

def fit_model(
    train_features: pd.DataFrame,
    train_target: pd.DataFrame,
    params: dict,
    key_cols = key_cols
):
    
    return None
    

#train_features.drop(columns=key_cols, inplace=True)
#train_features
train_target

Unnamed: 0,mana_curve_efficiency
0,0.00
1,0.00
2,0.67
3,0.80
4,0.88
...,...
331,0.00
332,11.00
333,4.33
334,2.67
