## MTG Transformer Decoder




#### Importing data:


In [3]:
import pandas as pd
import numpy as np
import gzip as gz

file_path = 'C:/Users/arpit/Downloads/draft_data_public.FIN.TradDraft.csv.gz'


try:
    # Open the .gz file in read binary mode
    with gz.open(file_path, 'rt', encoding='utf-8') as gz_file:
        # Now you can read from gz_file just like a regular file.
        # For example, if it's a CSV, you can read it with pandas:
        df = pd.read_csv(gz_file)

        print("DataFrame loaded from .gz:")
        display(df.head())

except FileNotFoundError:
    print(f"Error: .gz file not found at {file_path}")
except Exception as e:
    print(f"An error occurred: {e}")

DataFrame loaded from .gz:


Unnamed: 0,expansion,event_type,draft_id,draft_time,rank,event_match_wins,event_match_losses,pack_number,pick_number,pick,...,"pool_Yuna, Hope of Spira","pool_Yuriko, the Tiger's Shadow",pool_Zack Fair,"pool_Zanarkand, Ancient Metropolis",pool_Zell Dincht,pool_Zenos yae Galvus,"pool_Zidane, Tantalus Thief","pool_Zodiark, Umbral God",user_n_games_bucket,user_game_win_rate_bucket
0,FIN,TradDraft,d5b5f363128e4ca79031384c95e11f01,2025-06-10 23:54:18,,1,2,0,0,Dragoon's Lance,...,0,0,0,0,0,0,0,0,100,0.58
1,FIN,TradDraft,d5b5f363128e4ca79031384c95e11f01,2025-06-10 23:54:18,,1,2,0,1,Freya Crescent,...,0,0,0,0,0,0,0,0,100,0.58
2,FIN,TradDraft,d5b5f363128e4ca79031384c95e11f01,2025-06-10 23:54:18,,1,2,0,2,Dragoon's Lance,...,0,0,0,0,0,0,0,0,100,0.58
3,FIN,TradDraft,d5b5f363128e4ca79031384c95e11f01,2025-06-10 23:54:18,,1,2,0,3,G'raha Tia,...,0,0,0,0,0,0,0,0,100,0.58
4,FIN,TradDraft,d5b5f363128e4ca79031384c95e11f01,2025-06-10 23:54:18,,1,2,0,4,Delivery Moogle,...,0,0,0,0,0,0,0,0,100,0.58


#### Getting card names

In [94]:
all_columns = df.columns.tolist()


pack_columns = [col for col in df.columns if col.startswith('pack_card_')]
pick_columns = [col for col in df.columns if col.startswith('pool_')]

cards = [col.replace("pack_card_", "") for col in pack_columns]




In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import math
import random


#### Card to number mapping:

We tokenize cards by assigning a number to each of them

In [96]:
def create_card_to_id_mapping(card_names):
  """
  Creates a dictionary mapping card names to unique integer IDs.

  Args:
    card_names: A list of unique card names (strings).

  Returns:
    A dictionary where keys are card names and values are unique integer IDs.
  """
  card_to_id = {name: i+1 for i, name in enumerate(card_names)}
  return card_to_id



In [176]:
# Data prep: 

# We need to convert the data in the following way. After each card has a token, we need to make 13 x 13 x 1 tensor. 
# Where first 13 are the cards available to pick from, next are cards in hand and last is the card chosen in that round. 

# Helper functions: From a row, take the one hot encoded pack data, pick data and choice and convert to a pytorch tensor

tokens_pack = create_card_to_id_mapping(pack_columns)
tokens_pool = create_card_to_id_mapping(pick_columns)
tokens = create_card_to_id_mapping(cards)




#### Tokenizing rows

For every row, we construct a tensor of tokens of pack and pick data which is then padded to maintain uniform sequence lengths

In [146]:


from torch.nn.utils.rnn import pad_sequence


def tokenizer(row, pad_len=40, pad_val=0):
    # Collect tokens for pack and pick from the row
    pack_token = []
    pick_token = []

    for col in pack_columns:
        if row[col] == 1:
            pack_token.append(torch.tensor([tokens_pack[col]], dtype=torch.long))

    for col in pick_columns:
        if row[col] == 1:
            pick_token.append(torch.tensor([tokens_pool[col]], dtype=torch.long))

    # Helper to pad a list of 1D tensors to fixed length pad_len
    def pad_to_fixed(tensor_list, length, pad_value):
        if len(tensor_list) == 0:
            # if empty, return all padding
            return torch.full((length,), pad_value, dtype=torch.long)
        # concatenate tokens into one 1D tensor
        combined = torch.cat(tensor_list)
        if combined.size(0) < length:
            pad_amount = length - combined.size(0)
            combined = F.pad(combined, (0, pad_amount), value=pad_value)
        else:
            combined = combined[:length]
        return combined

    padded_picks = pad_to_fixed(pick_token, pad_len, pad_val)
    padded_packs = pad_to_fixed(pack_token, pad_len, pad_val)

     
    final_tensor = torch.stack([padded_packs, padded_picks], dim=0)

    

    picked_card = row['pick']
    
    picked_card_token = torch.tensor([tokens[picked_card]], dtype=torch.long)
    

    return final_tensor, picked_card_token


In [147]:
data = df.apply(lambda row: tokenizer(row), axis=1)

In [None]:
# Train-test split 


from sklearn.model_selection import train_test_split

inputs = []
labels = []



for input_tensor, label_tensor in data:
    inputs.append(input_tensor)  # shape (2, pad_len)
    labels.append(label_tensor)  # shape (1,)
    
X_train, X_test, y_train, y_test = train_test_split(
inputs, labels, test_size=0.1, random_state=42)

pack_train = torch.stack([x[0] for x in X_train])   # x[0] is the "pack" part (shape: pad_len)
pool_train = torch.stack([x[1] for x in X_train])
pack_test = torch.stack([x[0] for x in X_test])   # x[0] is the "pack" part (shape: pad_len)
pool_test = torch.stack([x[1] for x in X_test]) 
labels_train = torch.stack(y_train).squeeze()
labels_test = torch.stack(y_test).squeeze()



In [None]:
# Some Hyperparameters


VOCAB_SIZE = len(pack_columns)+1  # Number of unique cards
SEQ_LEN = 40
BATCH_SIZE = 32
EMBED_DIM = 128
NUM_LAYERS = 2
NHEAD = 4
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

#### Decoder sequence

In [173]:
class DraftDecoder(nn.Module):
    def __init__(self, vocab_size, seq_len, nhead, pad_idx = 0 , embed_dim = 128):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)
        decoder_layer = nn.TransformerDecoderLayer(embed_dim, nhead)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=2)
        self.output = nn.Linear(embed_dim, vocab_size)

    def forward(self, pack_tensor, pool_tensor):
        
        tgt = self.embedding(pack_tensor).permute(1, 0, 2)  # (seq_len, batch, d_model) 
        memory = self.embedding(pool_tensor).permute(1,0,2)
        
        sz = tgt.size(0)
        tgt_mask = nn.Transformer.generate_square_subsequent_mask(sz).to(pack_tensor.device)

        
        out = self.decoder(tgt, memory, tgt_mask=tgt_mask)
        out = self.output(out[-1])  # predict last token only
        return out

#### Training loop

In [174]:
from torch.utils.data import TensorDataset

dataset = TensorDataset(pack_train, pool_train, labels_train)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)


def train():
    model = DraftDecoder(VOCAB_SIZE, SEQ_LEN, NHEAD).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    

    for epoch in range(5):
        total_loss = 0
        for pack_token, pool_token,target in loader:
            pack_token = pack_token.to(DEVICE)
            pool_token = pool_token.to(DEVICE)
            target = target.to(DEVICE)

            logits = model(pack_token, pool_token)
            loss = criterion(logits, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}: Loss = {total_loss / len(loader):.4f}")

train()


Epoch 1: Loss = 1.9181
Epoch 2: Loss = 1.5117
Epoch 3: Loss = 1.4267
Epoch 4: Loss = 1.3787
Epoch 5: Loss = 1.3485
