<a href="https://colab.research.google.com/github/doctorsmylie/mtg-draft-agent/blob/main/model-trainer-with-deck-eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Configure Drive or Jupyter notebook -- only runs when first loaded
if "CONFIG_DONE" not in globals():
    # Need to mount drive and clone repo to access data and functions
    try:
        from google.colab import drive  # type: ignore

        IN_COLAB = True

        # clone repo
        !git clone https://github.com/doctorsmylie/mtg-draft-agent
        %cd mtg-draft-agent

    except ModuleNotFoundError:
        IN_COLAB = False

    # Finish configuration -- also configures notebook outside of Colab
    %run "project_path.ipynb"
else:
    print("Config done")

Cloning into 'mtg-draft-agent'...
remote: Enumerating objects: 257, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 257 (delta 16), reused 13 (delta 4), pack-reused 218 (from 1)[K
Receiving objects: 100% (257/257), 13.82 MiB | 15.31 MiB/s, done.
Resolving deltas: 100% (136/136), done.
/content/mtg-draft-agent
Starting config...
Running in Colab? Yes

Configuring Google Colab...
Mounting Drive...
Mounted at /content/mtg-draft-agent/drive
BASE_PATH =  /content/mtg-draft-agent
DATA_FOLDER = /content/mtg-draft-agent/drive/MyDrive/Erdos25/MTGdraft
BASE_PATH == os.getcwd(): True

Configuration done


In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

# from datasets import Dataset
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

import pathlib
from itertools import product
from sklearn.model_selection import train_test_split

from time import time
from tqdm.auto import tqdm

import functions.card_io as card_io
import functions.utils as utils

# Setting device on GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
drafts_all = pd.read_parquet('clean_data/DSK_drafts.parquet')

In [4]:
drafts = drafts_all.iloc[:42*100,:]

In [5]:
from bots.lstm_bot import *

# Model, Loss and Optimizer
vocab_size = 286

embed_dim = 128 #64
hidden_dim = 256 #128
num_layers = 2
p_LSTM = 0.3
p_out = 0.5
model = DraftBotLSTM(
    vocab_size, embed_dim, hidden_dim, num_layers=num_layers, p_LSTM=p_LSTM, p_out=p_out
)

batch_size = 64
loss_fn = nn.CrossEntropyLoss()

lr = 1e-2
weight_decay = 1e-5
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=lr,
    weight_decay=weight_decay,
)

In [6]:
# Split players into train/val/test
draft_ids = drafts["draft_id"].unique()

train_ids, temp_ids = train_test_split(draft_ids, test_size=0.2, random_state=304)
val_ids, test_ids = train_test_split(temp_ids, test_size=0.5, random_state=304)

# Split Dataframe
drafts_train = drafts[drafts["draft_id"].isin(train_ids)]
drafts_val = drafts[drafts["draft_id"].isin(val_ids)]
drafts_test = drafts[drafts["draft_id"].isin(test_ids)]

# Create custom Datasets for each split
dataset_train = PlayerDataset(drafts_train)
dataset_val = PlayerDataset(drafts_val)
dataset_test = PlayerDataset(drafts_test)

In [7]:
# Create custom DataLoaders
dlss = []
for dataset in [dataset_train, dataset_val, dataset_test]:
    dls = DataLoader(
        dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_player_turns
    )
    dlss.append(dls)

dls_train, dls_val, dls_test = dlss

In [10]:
# Load deck evaluators
%run "deck_classification/deck_eval.ipynb"

Config done before loading deck_eval.ipynb
Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=e95e74291405ff53a7e0224091d48fe803c93078bb5dc44a8bef8f616dfb4ab6
  Stored in directory: /root/.cache/pip/wheels/40/b3/0f/a40dbd1c6861731779f62cc4babcb234387e11d697df70ee97
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
Collecting spells-mtg
  Downloading spells_mtg-0.11.10-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading spells_mtg-0.11.10-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.9/43.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling co

In [11]:
def deck_list_to_vector(deck_list):
  unique, counts = np.unique(deck_list, return_counts=True)
  deck_dict = dict(zip(unique, counts))

  deck_freq = np.zeros(vocab_size)
  for i in range(vocab_size):
      if i in deck_dict.keys():
          deck_freq[i] = deck_dict[i]

  return deck_freq

In [24]:
def train_epoch(
    model, dataloader, optimizer, loss_fn, chunk_size=max_pack_size, device=None
):
    """
    chunk_size: Back propagate over a smaller number of turns.
                The default is the size of a pack (i.e. the length of one "round" of
                drafting).
                If None, we backpropagate over all turns.
    """
    # Move model to new device
    if device is not None:
        model = model.to(device)

    # Initialize training mode
    model.train()

    # Remember: In PlayerDataset, each entry has the game information of a player,
    # which consists of two lists of length equal to the number of turns
    num_batches = len(dataloader)
    num_players = len(dataloader.dataset)
    num_turns = dataloader.dataset.num_turns()

    # Accumulate the correct picks made by all players of each batch and on each turn
    all_correct = torch.zeros(num_batches, num_turns)

    # Deck evaluation store
    deck_scores = torch.zeros(num_players)
    print(deck_scores.shape)

    # Accumulate loss over all players and all turns
    total_loss = 0

    # Each (pack_batches, pick_batches) is a list of turn states for a player batch
    batch_count = 0
    batch_idx = 0
    for pack_batches, pick_batches in tqdm(dataloader):
        # Each batch is a group of players, but the last batch may be smaller
        batch_size = len(pack_batches[0])

        # Store the final deck of every player
        final_deck = torch.zeros((batch_size, num_turns))

        # Initialize variables at the start of the game
        batch_loss = 0
        hidden_state = None
        optimizer.zero_grad()

        # In case we want to backpropagate the whole game
        if chunk_size is None:
            chunk_size = num_turns

        # Play game and backpropagate every chunk_size turns
        for t0 in range(0, num_turns, chunk_size):
            # End the chunk at the game's end, not later
            chunk_end = min(t0 + chunk_size, num_turns)

            # Play chunk_size turns
            for t in range(t0, chunk_end):
                pack_batch = pack_batches[t]
                pick_batch = pick_batches[t]

                if device is not None:
                    pack_batch = pack_batch.to(device)
                    pick_batch = pick_batch.to(device)

                # Cards available to pick
                pack_size = torch.tensor(pack_batch.shape[1], device=device)

                # Forward pass -- remember hidden state from previous turn
                logits, hidden_state = model(pack_batch, hidden_state=hidden_state)

                # Note: logits is shaped (batch_size, seq_len, vocab_size) with seq_len=1
                # but loss functions such as cross entropy expect shape
                # (batch_size, vocab_size). That's why I slice here
                logits = logits[:, -1, :]

                # Accumulate losses of all players, normalized by pack size
                # if pack_size > 1:
                #     batch_loss += loss_fn(logits, pick_batch) / torch.log(pack_size)
                # else:
                #     batch_loss += loss_fn(logits, pick_batch)
                batch_loss += loss_fn(logits, pick_batch)

                # Count the number of players that picked the correct card
                predictions = torch.argmax(logits, dim=-1)  # (batch,)
                all_correct[batch_count, t] = (predictions == pick_batch).sum()

                # Store decks
                final_deck[:, t] = predictions

            # I accumulated losses for several players across several turns.
            # To keep magnitudes and variables interpretable (e.g. gradients),
            # I backpropagate the average loss
            played_turns = chunk_end - t0
            mean_batch_loss = batch_loss / (batch_size)

            # Backpropagate
            mean_batch_loss.backward()
            optimizer.step()

            # Reset optimizer
            optimizer.zero_grad()

            # Detach hidden state to truncate gradients every chunk_size turns
            hidden_state = tuple(h.detach() for h in hidden_state)

            # Accumulate losses of all players
            total_loss += batch_loss.item()

        # Advance batch counter
        batch_count += 1

        # Evaluate decks
        print(deck_scores.shape)
        for idx in range(batch_size):
          print(idx)
          deck_list = final_deck[idx, :]
          deck_freq = deck_list_to_vector(deck_list)
          deck_freq = deck_freq[None, :]

          # deck is a list of cards. Turn it into a vector
          deck_scores[batch_idx + idx] = calculate_adjusted_win_rate_inverse_squared_weights(deck_freq)

    # Add correct choices over all batches (i.e. over all players)
    # then average over the number of players
    # accuracy_per_turn = all_correct
    accuracy_per_turn = all_correct.sum(dim=0) / num_players  # (num_turns,)

    # Average total loss over the number of players and the number of turns
    mean_loss = total_loss / (num_players)

    return mean_loss, accuracy_per_turn, deck_scores


@torch.no_grad()
def evaluate(model, dataloader, loss_fn, device=None):
    # Move model to new device
    if device is not None:
        model = model.to(device)

    # Initialize evaluation mode
    model.eval()

    # Remember: In PlayerDataset, each entry has the game information of a player,
    # which consists of two lists of length equal to the number of turns
    num_batches = len(dataloader)
    num_players = len(dataloader.dataset)
    num_turns = dataloader.dataset.num_turns()

    # Accumulate the correct picks made by all players of each batch and on each turn
    all_correct = torch.zeros(num_batches, num_turns)

    # Accumulate loss over all players and all turns
    total_loss = 0

    # Deck evaluation store
    deck_scores = torch.zeros(num_players)

    # Each (pack_batches, pick_batches) is a list of turn states for a player batch
    batch_count = 0
    batch_idx = 0
    for pack_batches, pick_batches in dataloader:
        # Each batch is a group of players, but the last batch may be smaller
        batch_size = len(pack_batches[0])

        # Store the final deck of every player
        final_deck = torch.zeros((batch_size, num_turns))

        # Initialize variables at the start of the game
        batch_loss = 0
        hidden_state = None

        for t in range(num_turns):
            # Extract turn info and move it to new device (if required)
            pack_batch = pack_batches[t]
            pick_batch = pick_batches[t]

            if device is not None:
                pack_batch = pack_batch.to(device)
                pick_batch = pick_batch.to(device)

            # Cards available to pick
            pack_size = torch.tensor(pack_batch.shape[1], device=device)

            # Forward pass
            logits, hidden_state = model(pack_batch, hidden_state=hidden_state)

            # Note: logits is shaped (batch_size, seq_len, vocab_size) with seq_len=1
            # but loss functions such as cross entropy expect shape
            # (batch_size, vocab_size). That's why I slice here
            logits = logits[:, -1, :]

            # Accumulate losses of all players, normalized by pack size
            # if pack_size > 1:
            #     batch_loss += loss_fn(logits, pick_batch) / torch.log(pack_size)
            # else:
            #     batch_loss += loss_fn(logits, pick_batch)
            batch_loss += loss_fn(logits, pick_batch)

            # Count the number of players that picked the correct card
            predictions = torch.argmax(logits, dim=-1)  # (batch,)
            all_correct[batch_count, t] = (predictions == pick_batch).sum()

            # Store decks
            final_deck[:, t] = predictions

        # Accumulate batch losses. In total, this accumulates losses of all players
        total_loss += batch_loss.item()

        # Advance batch counter
        batch_count += 1

        # Evaluate decks
        for idx in range(batch_size):
            deck_list = final_deck[idx, :]
            deck_freq = deck_list_to_vector(deck_list)

            # deck is a list of cards. Turn it into a vector
            deck_scores[batch_idx + idx] = calculate_adjusted_win_rate_inverse_squared_weights(deck_freq)

    # Add correct choices over all batches (i.e. over all players)
    # then average over the number of players
    # accuracy_per_turn = all_correct
    accuracy_per_turn = all_correct.sum(dim=0) / num_players  # (num_turns,)

    # Average total loss over the number of players and the number of turns
    mean_loss = total_loss / (num_players)

    return mean_loss, accuracy_per_turn, deck_scores


In [25]:
# Training loop parameters
num_epochs = 25
num_turns = dataset_train.num_turns()

train_losses = torch.zeros(num_epochs)
val_losses = torch.zeros(num_epochs)

train_accuracies = torch.zeros((num_epochs, num_turns))
val_accuracies = torch.zeros((num_epochs, num_turns))

train_rating = torch.zeros(num_epochs)
val_rating = torch.zeros(num_epochs)

# Back propagate over the whole game
# If we omit this line, we back propagate over a single round of the draft
# (i.e. over 14 turns)
chunk_size = None

for epoch in range(num_epochs):
    # Train
    train_loss, train_accuracy_epoch, train_deck_scores = train_epoch(
        model, dls_train, optimizer, loss_fn, chunk_size=None, device=device
    )

    # Evaluate on validation set
    val_loss, val_accuracy_epoch, val_deck_scores = evaluate(model, dls_val, loss_fn, device=device)

    # Print results
    print(
        f"Epoch {epoch+1}/{num_epochs}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}\n"
    )

    train_losses[epoch] = train_loss
    val_losses[epoch] = val_loss

    train_accuracies[epoch, :] = train_accuracy_epoch.cpu()
    val_accuracies[epoch, :] = val_accuracy_epoch.cpu()

torch.Size([80])


  0%|          | 0/2 [00:00<?, ?it/s]

torch.Size([80])
0


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 281 is different from 286)

In [None]:
# Calculate probability of random choice
tt = np.arange(14, dtype=float)
prob_random = 1 / np.flip(tt + 1)

In [None]:
plt.plot(train_losses, label="Train", marker=".")
plt.plot(val_losses, label="Val", marker=".")
plt.legend()

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Losses")

y_max = plt.ylim()[1]
y_max = utils.ceil_digit(y_max, digits=2)
plt.ylim(0, y_max)

In [None]:
# Separate packs
plt.axvline(max_pack_size - 1, color="red")
plt.axvline(2 * max_pack_size - 1, color="red")

# Plot probability of random choice
plt.plot(tt, prob_random, "--", color="blue", label="Chance")
plt.plot(14 * 1 + tt, prob_random, "--", color="blue")
plt.plot(14 * 2 + tt, prob_random, "--", color="blue")

epoch = -1
plt.plot(
    train_accuracies[epoch, :], label="Train", marker=".", markersize=15, color="orange"
)
plt.plot(
    val_accuracies[epoch, :], label="Val", marker=".", markersize=10, color="green"
)
plt.legend()

plt.xlabel("Turn")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracies per Turn")

plt.ylim([0, 1.05])

plt.show()

In [None]:
torch.save(model.state_dict(), "trained_models/lstm_params.pth")