In [1]:
import pandas as pd
import numpy as np
import torch
import math
import inspect
from dataclasses import dataclass
import torch.nn as nn
from torch.nn import functional as F

from spl_model import *

In [2]:
with open('test.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True)

In [3]:
print(data.min(), data.max()) # so we know what vocab size we need

0 647


In [4]:
#had some issue with cude device, so used only cpu

device = 'cpu' #'cuda' if torch.cuda.is_available() else 'cpu'
device_type = 'cpu' #'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast

In [5]:
class GPTConfig:
    block_size: int = 64
    lineup_size: int = 7
    vocab_size: int = 648 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
    n_layer: int = 3
    n_head: int = 1
    n_rules: int = 57
    n_embd: int = 64
    dropout: float = 0.0
    bias: bool = True # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster

gptconf = GPTConfig()

model = GPT(gptconf)
model.to(device)

number of parameters: 191.55K


GPT(
  (transformer): ModuleDict(
    (wte): Embedding(648, 64)
    (wpe): Embedding(64, 64)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-2): 3 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=64, out_features=192, bias=True)
          (c_proj): Linear(in_features=64, out_features=64, bias=True)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=64, out_features=256, bias=True)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=256, out_features=64, bias=True)
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=64, out_features=648, bias=False)
)

In [7]:
model = torch.load('spl_bot_model.pt')

## Training the model

In [9]:
#after about 20k steps the model improvement was slow

max_iters = 50000
learning_rate = 1e-3
eval_interval = 2000
batch_size = 64

# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):

    # sample a batch of data
    x0, x1, y = get_batch(data, batch_size, gptconf.n_embd, gptconf.lineup_size, device)

    # evaluate the loss
    logits, loss = model(x0, x1, y)
    
    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        print(f"step {iter}: train loss {loss:.4f}")
    
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

## Evaluating performance

In [10]:
x0, x1, y = get_batch(data, 1, gptconf.n_embd, gptconf.lineup_size, device, generate=True)

#generated outputs
idx = model.generate(x0, x1, max_new_tokens=7, top_k=3)
idx

tensor([[437, 388, 384, 385, 337, 520, 391],
        [442, 436, 542, 433, 514, 336, 380],
        [442, 343, 455, 414, 457, 415, 346],
        [437, 388, 384, 339, 390, 385, 516],
        [442, 348, 450, 433, 371, 457, 512],
        [442, 343, 455, 411, 414, 555, 346],
        [437, 388, 384, 390, 519, 385, 520]])

In [11]:
cards_df = get_all_cards()['name']

#adding extra name for empty slot in lineups
cards_df.loc[0] = 'none'

f = lambda x: cards_df.loc[x]

for i in idx.numpy():
    print(f(i))

id
437     Kelya Frendul
388       Diemonshark
384      Flying Squid
385        Deeplurker
337    Pelacor Bandit
520     Swamp Spitter
391        Wave Brood
Name: name, dtype: object
id
442    Quix the Devious
436         Void Dragon
542     Venari Marksrat
433        Chaos Dragon
514      Fungus Flinger
336        Djinn Biljka
380        Fungus Fiend
Name: name, dtype: object
id
442       Quix the Devious
343       Pelacor Conjurer
455               Vulguine
414        Celestial Harpy
457    Dhampir Infiltrator
415              Time Mage
346          Naga Assassin
Name: name, dtype: object
id
437      Kelya Frendul
388        Diemonshark
384       Flying Squid
339     Djinn Oshannus
390    Nerissa Tridawn
385         Deeplurker
516    Kulu Mastermind
Name: name, dtype: object
id
442       Quix the Devious
348           Djinn Chwala
450          Carnage Titan
433           Chaos Dragon
371         Goblin Psychic
457    Dhampir Infiltrator
512          Thane Newsong
Name: name, dtype: o

In [12]:
#model often returns summoners 437 and 442 because they are most common in training data
unique, counts = np.unique(data[:,0,-7], return_counts=True)
dict(zip(unique, counts))

{240: 308,
 437: 33173,
 438: 12322,
 439: 9310,
 440: 7723,
 441: 7077,
 442: 24705,
 463: 5539,
 464: 3583,
 502: 1383,
 506: 4553,
 507: 8123,
 509: 6489,
 547: 866,
 548: 309,
 549: 3435,
 550: 1312,
 551: 2339,
 552: 3873,
 553: 2436,
 554: 2178,
 637: 167,
 638: 148,
 639: 215,
 640: 203,
 641: 116,
 642: 119,
 644: 279,
 645: 209,
 647: 278}

## Save the model

In [None]:
torch.save(model, 'spl_bot_model.pt')