In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# make deterministic
from mingpt.utils import set_seed
set_seed(44)

In [3]:
import os
import math
import time
from tqdm import tqdm
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from data.othello import Othello, OthelloBoardState, permit_reverse
from mingpt.dataset import CharDataset
from mingpt.utils import sample
from mingpt.model import GPT, GPTConfig
from mingpt.trainer import Trainer, TrainerConfig

## Training

In [None]:
# n_games=-1 means use as many simulated games as possible (from "data/othello_synthetic/")
othello = Othello(n_games=-1, data_root="othello_synthetic")
train_dataset = CharDataset(othello)
# original OthelloGPT params: n_layer=8, n_head=8, n_embd=512
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

In [4]:
othello = Othello(n_games=-1, data_root="othello_synthetic")

  0%|          | 0/50 [00:00<?, ?it/s]

Mem Used: 4.171 GB: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s]


Loaded 5000000 from 50 files
Deduplicating finished with 5000000 games left
Using 4000000 for training, 1000000 for validation


In [5]:
# for fine-tuning on fewer games
n = 100
train_dataset = CharDataset(othello[:n])
# original OthelloGPT params: n_layer=8, n_head=8, n_embd=512
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

Dataset created has 100 sequences, 61 unique words.


In [4]:
# for player type training run
othello = Othello(n_games=-1, data_root="othello_1player", deduplicate=False)

seq = [p[1] for p in othello]

train_dataset = CharDataset(seq)
# original OthelloGPT params: n_layer=8, n_head=8, n_embd=512
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

Mem Used: 4.449 GB: 100%|██████████| 50/50 [00:20<00:00,  2.39it/s]


Loaded 5000000 from 50 files
Using 4000000 for training, 1000000 for validation
Dataset created has 4000000 sequences, 61 unique words.


In [6]:
load_res = model.load_state_dict(torch.load(f"./ckpts/bias/TLbias80.ckpt"))

In [5]:
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    model = model.to(device)
else:
    print("NO GPU FOUND")

In [6]:
# setting up training
max_epochs = 100
experiment_name = "playertype_"
t_start = time.strftime("_%Y%m%d_%H%M%S")
ckpt_path = f"./ckpts/{experiment_name}_{t_start}.ckpt"
tconf = TrainerConfig(
    max_epochs=max_epochs, 
    batch_size=512*4, # using 4 gpus
    # learning_rate=1e-4,
    # lr_decay=False,
    learning_rate=5e-4,
    lr_decay=True, 
    warmup_tokens=len(train_dataset)*train_dataset.block_size*5, 
    final_tokens=len(train_dataset)*train_dataset.block_size*max_epochs,
    num_workers=0, 
    ckpt_path=ckpt_path, 
    saved_epochs=[10, 20, 40, 80],
)
trainer = Trainer(model, train_dataset, None, tconf)
device = trainer.device
print(t_start)

_20230720_185718


In [7]:
trainer.train()

epoch 1 iter 1953: train loss 1.27189. lr 1.000000e-04: 100%|██████████| 1954/1954 [11:03<00:00,  2.94it/s]
epoch 2 iter 1953: train loss 1.05247. lr 2.000000e-04: 100%|██████████| 1954/1954 [10:51<00:00,  3.00it/s]
epoch 3 iter 1953: train loss 0.93706. lr 3.000000e-04: 100%|██████████| 1954/1954 [10:50<00:00,  3.00it/s]
epoch 4 iter 1953: train loss 0.90153. lr 4.000000e-04: 100%|██████████| 1954/1954 [10:50<00:00,  3.00it/s]
epoch 5 iter 1953: train loss 0.86758. lr 5.000000e-04: 100%|██████████| 1954/1954 [10:51<00:00,  3.00it/s]
epoch 6 iter 1953: train loss 0.80442. lr 4.998633e-04: 100%|██████████| 1954/1954 [10:51<00:00,  3.00it/s]
epoch 7 iter 1953: train loss 0.77039. lr 4.994534e-04: 100%|██████████| 1954/1954 [10:53<00:00,  2.99it/s]
epoch 8 iter 1953: train loss 0.76682. lr 4.987707e-04: 100%|██████████| 1954/1954 [10:51<00:00,  3.00it/s]
epoch 9 iter 1953: train loss 0.76409. lr 4.978160e-04: 100%|██████████| 1954/1954 [10:51<00:00,  3.00it/s]
epoch 10 iter 1953: train lo

## Validation

In [4]:
def load_othello_model(ckpt):
    # original OthelloGPT params: n_layer=8, n_head=8, n_embd=512
    # vocab_size = 59, block_size = 61 for othello
    mconf = GPTConfig(61, 59, n_layer=8, n_head=8, n_embd=512)
    model = GPT(mconf)
    load_res = model.load_state_dict(torch.load(f"./ckpts/{ckpt}.ckpt"))
    if torch.cuda.is_available():
        device = torch.cuda.current_device()
        model = model.to(device)
        return model, device
    else:
        print("NO GPU FOUND")

In [5]:
# checks if model prediction is legal for each node in given game
# expects dataset has already been loaded and model is on GPU
def check_legal(model, device, train_dataset, game):
    total_nodes = 0
    success_nodes = 0

    len_whole_game = len(game)
    for len_partial_game in range(1, len_whole_game):
        total_nodes += 1
        context = game[:len_partial_game]
        x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(device)
        y = sample(model, x, 1, temperature=1.0)
        # taking top-1 prediction
        completion = [train_dataset.itos[int(i)] for i in y[0] if i != -1]
        try:
            OthelloBoardState().update(completion)
        except Exception:
            # print(completion)
            pass
        else:
            success_nodes += 1
    
    return total_nodes, success_nodes

In [6]:
# default data root is othello_synthetic
def validate_with_dataset(model, device, data_root=None, n_games=1000):
    # find to load in first n games, because the first ~1 million othello_synthetic games are test set for unbiased model
    val_games = Othello(data_root=data_root, n_games=n_games, test_split=1, deduplicate=False)
    char = CharDataset(val_games.val)

    total_nodes = 0
    success_nodes = 0

    def progress_report():
        return f"{success_nodes/total_nodes*100:.4f}% pass rate: {success_nodes}/{total_nodes} among all searched nodes"
    
    bar = tqdm(val_games.val[:n_games])
    for game in bar:
        tn, sn = check_legal(model, device, char, game)
        total_nodes += tn
        success_nodes += sn
        bar.set_description(progress_report())
    print(progress_report())

In [7]:
def validate_from_checkpoint(ckpt, data_root=None, n_games=1000):
    model, device = load_othello_model(ckpt)
    validate_with_dataset(model, device, data_root=data_root, n_games=n_games)

In [11]:
validate_from_checkpoint("bias/finetune_bias80_e5", data_root="othello_synthetic")

Mem Used: 4.062 GB:   0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]

Loaded 100000 from 1 files
Using 0 for training, 1000 for validation
Dataset created has 1000 sequences, 61 unique words.


99.9474% pass rate: 58909/58940 among all searched nodes: 100%|██████████| 1000/1000 [06:48<00:00,  2.45it/s]

99.9474% pass rate: 58909/58940 among all searched nodes





In [None]:
for c in ["bias/TLcontrol", "bias/TLbias50", "bias/TLbias80", "bias/TLbias95"]:
    for dr in ["synthetic", "TLbias50", "TLbias80", "TLbias95"]:
        print(f"======== ckpt: {c} | data: {dr} =========")
        validate_from_checkpoint(c, data_root=f"othello_{dr}", n_games=1000)

## Baselines for legal move accuracy

In [18]:
# eight directions
eights = [[-1, 0], [-1, 1], [0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1]]
# adds up empty spaces in current board state that have an adjacent occupied square
def check_adj(ob):
    total = 0
    occupied = ob.get_occupied()
    for i in range(64):
        r, c = i // 8, i % 8
        adj = False
        if occupied[i]:
            continue
        for dir in eights:
            test_r, test_c = r + dir[0], c + dir[1]
            if test_r not in range(8) or test_c not in range(8):
                continue
            if occupied[test_r * 8 + test_c]:
                adj = True
                break
        total += 1 if adj else 0
    return total

In [None]:
othello = Othello(data_root="othello_synthetic", n_games=1000, test_split=0, deduplicate=False)
baselines = [
    0, # full random
    0, # no-repeats random
    0  # only adjacent
]
legal = 0
bar = tqdm(othello)
for seq in bar:
    ob = OthelloBoardState()
    for i, move in enumerate(seq):
        baselines[0] += 60
        baselines[1] += 60 - i
        baselines[2] += check_adj(ob)        
        legal += len(ob.get_valid_moves())
        ob.update([move])
    bar.set_description(desc=f"{legal}/{baselines[0]}, {legal/baselines[0]:.4f}")

for b in baselines:
    print(legal/b)