# NIM BERT Full Experiment

Michael DeLeo

Dr. Erhan Guven

1. Imports
2. Config
3. NIM Game Players
    - GURU
    - QLearner
    - RAndom
4. Generate NIM Games
5. Train the Tokenizer
6. Build the Datasets
7. Build the BERT Model
8. Train the BERT Models
9. Evaluation

## 1. Imports

In [1]:
import torch
import os
import itertools

import numpy as np

from transformers import (
    BertConfig, BertForMaskedLM, TrainingArguments, Trainer, 
    DataCollatorForLanguageModeling, BertTokenizer
)
from datasets import load_dataset
from tokenizers import BertWordPieceTokenizer
from pathlib import Path
from random import randint, choice, random
from re import search
from tqdm.notebook import tqdm
from BertHarmon import BertHarmon

## 2. Config Variables

In [2]:
# General
NUM_PROC = 20 # Change to 1 if on windows

if NUM_PROC >  1:
    os.environ["TOKENIZERS_PARALLELISM"] = "true"
    BATCHED = True
else:
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    BATCHED = False

# Tokenizer
MAX_VOCAB = 1000
TOKENIZER_DIR = os.path.abspath("./nim-tokenizer")
TOKENIZER_PATH = os.path.join(TOKENIZER_DIR, "vocab.txt")
MAX_STR_LEN = 12

# NIM
# max number of items per pile
ITEMS_MX = 10

# Model
NUM_HIDDEN_LAYERS = 2

In [3]:
os.makedirs(TOKENIZER_DIR, exist_ok=True)

## 3. NIM Game Players

### Game Code

In [4]:
# Initialize starting position
def init_game():
    return [randint(1,ITEMS_MX), randint(1,ITEMS_MX), randint(1,ITEMS_MX)]

### Init of player functions

In [5]:
# Random Nim player
def nim_random(_st, random_chance=None):
    """
    random_chance is a dummy param here. does not do anything
    """
    pile = choice([i for i in range(3) if _st[i]>0])  # find the non-empty piles
    return randint(1, _st[pile]), pile  # random move

In [6]:
# Based on X-oring the item counts in piles - mathematical solution
def nim_guru(st, random_chance=None):
    """
    st: game state [a, b, c]
    random_chance: if none, then just play as player. If some number [0, 1], then
        this is the probability of making a random move instead
    """
    if random_chance is not None:
        if random() <= random_chance:
            return nim_random(st)
    
    xored = st[0] ^ st[1] ^ st[2]
    
    if xored == 0:
        return nim_random(st)
    
    for pile in range(3):
        s = st[pile] ^ xored
        
        if s <= st[pile]:
            return st[pile]-s, pile

In [7]:
def nim_qlearner(_st, random_chance=None):
    """
    _st: game state [a, b, c]
    random_chance: if none, then just play as player. If some number [0, 1], then
        this is the probability of making a random move instead
    """
    if random_chance is not None:
        if random() <= random_chance:
            return nim_random(_st)
    global qtable
    # pick the best rewarding move, equation 1
    a = np.argmax(qtable[_st[0], _st[1], _st[2]])  # exploitation
    # index is based on move, pile
    move, pile = a%ITEMS_MX+1, a//ITEMS_MX
    # check if qtable has generated a random but game illegal move - we have not explored there yet
    if move <= 0 or _st[pile] < move:
        move, pile = nim_random(_st)  # exploration
    return move, pile  # action

### Train QLearner

In [8]:
qtable, Alpha, Gamma, Reward = None, 1.0, 0.8, 100.0

# learn from _n games, randomly played to explore the possible states
def nim_qlearn(_n):
    global qtable
    # based on max items per pile
    qtable = np.zeros((ITEMS_MX+1, ITEMS_MX+1, ITEMS_MX+1, ITEMS_MX*3), dtype=float)
    # play _n games
    for i in tqdm(range(_n)):
        # first state is starting position
        st1 = init_game()
        while True:  # while game not finished
            # make a random move - exploration
            move, pile = nim_random(st1)
            st2 = list(st1)
            # make the move
            st2[pile] -= move  # --> last move I made
            if st2 == [0, 0, 0]:  # game ends
                qtable_update(Reward, st1, move, pile, 0)  # I won
                break  # new game

            elif np.max(qtable[st2[0], st2[1], st2[2]]) >= Reward:
                # immediate loss - penalize it
                qtable_update(-Reward, st1, move, pile, np.min(qtable[st2[0], st2[1], st2[2]]))

            else:
                # not immediate loss - reward it
                qtable_update(Reward, st1, move, pile, np.max(qtable[st2[0], st2[1], st2[2]]))
            # Switch sides for play and learning
            st1 = st2

# Equation 3 - update the qtable
def qtable_update(r, _st1, move, pile, q_future_best):
    a = pile*ITEMS_MX+move-1
    qtable[_st1[0], _st1[1], _st1[2], a] = Alpha * (r + Gamma * q_future_best)

In [9]:
nim_qlearn(300000)

  0%|          | 0/300000 [00:00<?, ?it/s]

## 4. Generate NIM Games

In [10]:
PILE = {0:'a', 1:'b', 2:'c'}

def save_move(fn, state, move, pile, side):
    dir_path = os.path.dirname(os.path.abspath(fn))
    os.makedirs(dir_path, exist_ok=True)
    with open(fn, 'a') as fout:
        fout.write(f'a{state[0]}/b{state[1]}/c{state[2]} {side} - {PILE[pile]}{move}\n')

In [11]:
Engines = {'Random':nim_random, 'Guru':nim_guru, 'Qlearner':nim_qlearner}

def game(a, b, fn=None, random_chance=None):
    state, side = init_game(), a[0]
    while True:
        engine = Engines[a] if side == a[0] else Engines[b]
        move, pile = engine(state, random_chance=random_chance)
        # print(state, move, pile)  # debug purposes
        if fn is not None:
            save_move(fn, state, move, pile, side)
        state[pile] -= move
        if state == [0, 0, 0]:  # game ends
            return side  # winning side
        side = b[0] if side == a[0] else a[0]  # switch sides

def play_games(_n, a, b, fn=None, random_chance=None):
    """
    _n: number of games
    a: player A function
    b: player B function
    fn: filename to save moves
    random_chance: random chance to choose random move (none is no random)
    """
    from collections import defaultdict
    wins = defaultdict(int)
    for i in range(_n):
        wins[game(a, b, fn=fn, random_chance=random_chance)] += 1
    # info
    print(f"{_n} games, {a:>8s}{wins[a[0]]:5d}  {b:>8s}{wins[b[0]]:5d}")
    
    return wins[a[0]], wins[b[0]]

In [12]:
play_games(1000, 'Guru', 'Random', fn="test.txt", random_chance=0.5)
play_games(1000, 'Guru', 'Qlearner')
play_games(1000, 'Qlearner', 'Guru')

1000 games,     Guru  763    Random  237
1000 games,     Guru  928  Qlearner   72
1000 games, Qlearner  942      Guru   58


(942, 58)

In [13]:
def generate_roster(player_names):
    res = []

    for comb in itertools.permutations(player_names, 2):
        res.append(comb)
    return res

roster = generate_roster(list(Engines.keys()))
print(roster)

chances = [num for num in range(0, 11, 1)]

root_path = os.path.abspath("./nim_datasets")

chance_paths = [os.path.join(root_path, str(chance * 10) + "_prcnt_random")
                             for chance in chances]
print(chance_paths)

[('Random', 'Guru'), ('Random', 'Qlearner'), ('Guru', 'Random'), ('Guru', 'Qlearner'), ('Qlearner', 'Random'), ('Qlearner', 'Guru')]
['/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/0_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/10_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/20_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/30_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/40_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/50_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/60_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/70_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/80_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/90_prcnt_random', '/home/michael/Workspace/nlp-chess/src/bert/nim_datasets/100_prcnt_random']


In [14]:
flat_datasets = []
dict_datasets = {}

for chance in chances:
    
    chance = chance / 10.0
    str_chance = str(int(chance * 100))
    print(chance)
    
    dict_datasets[str_chance] = []
    
    # Upper directory for datasets of this random chance
    chance_dir = os.path.join(root_path, str_chance + "_prcnt_random")
    
    for matchup in roster:
        dataset_path = os.path.join(chance_dir, 
                                    f"{matchup[0]}_{matchup[1]}.txt")
        
        flat_datasets.append(dataset_path)
        dict_datasets[str_chance].append(dataset_path)
        
        play_games(200, *matchup, random_chance=chance, fn=dataset_path)
    

0.0
200 games,   Random    0      Guru  200
200 games,   Random    1  Qlearner  199
200 games,     Guru  200    Random    0
200 games,     Guru  188  Qlearner   12
200 games, Qlearner  200    Random    0
200 games, Qlearner  186      Guru   14
0.1
200 games,   Random   10      Guru  190
200 games,   Random   16  Qlearner  184
200 games,     Guru  192    Random    8
200 games,     Guru  153  Qlearner   47
200 games, Qlearner  190    Random   10
200 games, Qlearner  151      Guru   49
0.2
200 games,   Random   24      Guru  176
200 games,   Random   22  Qlearner  178
200 games,     Guru  186    Random   14
200 games,     Guru  135  Qlearner   65
200 games, Qlearner  181    Random   19
200 games, Qlearner  138      Guru   62
0.3
200 games,   Random   34      Guru  166
200 games,   Random   34  Qlearner  166
200 games,     Guru  176    Random   24
200 games,     Guru  120  Qlearner   80
200 games, Qlearner  175    Random   25
200 games, Qlearner  103      Guru   97
0.4
200 games,   Random 

In [15]:
nim_datasets = flat_datasets

## 5. Train the Tokenizer

In [16]:
# Initialize a tokenizer
tokenizer = BertWordPieceTokenizer(lowercase=False, clean_text=True)

# Customize training
tokenizer.train(files=nim_datasets, vocab_size=MAX_VOCAB, min_frequency=1,
                show_progress=True)

print(tokenizer.get_vocab())

save_path = tokenizer.save_model(TOKENIZER_DIR)
print(f"\nSaved tokenizer to {save_path[0]}")


{'c2': 40, '##4': 31, 'b2': 41, '1': 8, 'c0': 37, '-': 5, '7': 14, 'c': 22, 'a8': 57, 'b3': 44, 'a1': 33, '3': 10, '[SEP]': 3, 'a9': 62, 'G': 17, 'Q': 18, 'b4': 45, '##0': 23, '##3': 28, '/': 6, 'b7': 55, 'c8': 59, 'c10': 65, 'b8': 58, 'b9': 61, '2': 9, 'R': 19, '[PAD]': 0, 'a5': 50, 'a0': 36, 'a': 20, '[MASK]': 4, '5': 12, '6': 13, '0': 7, '##1': 24, '##2': 29, '9': 16, '[UNK]': 1, 'a2': 39, 'b5': 48, 'a6': 53, 'c7': 54, '##8': 32, '##9': 26, 'c3': 43, '4': 11, 'c9': 60, 'b0': 38, 'a10': 63, 'a7': 56, 'c1': 34, '##6': 25, 'b1': 35, 'c4': 46, 'b10': 64, 'b6': 52, 'c6': 51, 'a3': 42, 'c5': 49, '[CLS]': 2, '##5': 30, 'a4': 47, '##7': 27, 'b': 21, '8': 15}



Saved tokenizer to /home/michael/Workspace/nlp-chess/src/bert/nim-tokenizer/vocab.txt


In [17]:
# Reimport the tokenizer to test it was successful
tokenizer = BertTokenizer.from_pretrained(TOKENIZER_PATH, max_len=MAX_STR_LEN)



## 6. Build the Datasets

In [18]:
def tokenize_dataset(path):

    raw_datasets = load_dataset('text', data_files=path,
                                split='train')

    #cut size in half
    raw_datasets = raw_datasets.shuffle(seed=42)

    #raw_datasets = raw_datasets.select(range(10000))
    raw_datasets = raw_datasets.train_test_split()


    def tokenize_function(examples):
        return tokenizer(examples["text"], padding='max_length', truncation=True)

    tokenized_datasets = raw_datasets.map(tokenize_function, 
                                          batched=BATCHED, 
                                          keep_in_memory=True, 
                                          num_proc=NUM_PROC, 
                                          remove_columns=["text"])
    
    return tokenized_datasets

tokenized_datasets = {key: tokenize_dataset(dict_datasets[key]) for key in dict_datasets.keys()}

Using custom data configuration default-09d29cfe395c19c2


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-09d29cfe395c19c2/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-09d29cfe395c19c2/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-bf4d44678a418644


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-bf4d44678a418644/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-bf4d44678a418644/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-03a10f9abab416ff


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-03a10f9abab416ff/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-03a10f9abab416ff/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-5dce8196b210d771


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-5dce8196b210d771/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-5dce8196b210d771/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-d7e829a21d8a0b2e


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-d7e829a21d8a0b2e/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-d7e829a21d8a0b2e/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-598359f1e7706b2e


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-598359f1e7706b2e/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-598359f1e7706b2e/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-83adbbc97844cf75


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-83adbbc97844cf75/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-83adbbc97844cf75/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-106389a32593c1f3


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-106389a32593c1f3/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-106389a32593c1f3/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-3c9861c4bdbcee61


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-3c9861c4bdbcee61/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-3c9861c4bdbcee61/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-3977c1bf37cb70c8


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-3977c1bf37cb70c8/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-3977c1bf37cb70c8/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.
Using custom data configuration default-590f9839f48166db


Downloading and preparing dataset text/default to /home/michael/.cache/huggingface/datasets/text/default-590f9839f48166db/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Setting TOKENIZERS_PARALLELISM=false for forked processes.


Dataset text downloaded and prepared to /home/michael/.cache/huggingface/datasets/text/default-590f9839f48166db/0.0.0/d86c40dad297bdddf277b406c6a59f0250b5318c400bf23d420a31aff88c84c4. Subsequent calls will reuse this data.


Setting TOKENIZERS_PARALLELISM=false for forked processes.


In [19]:
tokenized_datasets["0"]["train"][1]

{'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
 'input_ids': [2, 39, 6, 38, 6, 37, 1, 5, 39, 3, 0, 0],
 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}

In [20]:
block_size = MAX_STR_LEN

def group_texts(examples):
    # Concatenate all texts.
    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
    total_length = len(concatenated_examples[list(examples.keys())[0]])
    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
    total_length = (total_length // block_size) * block_size
    # Split by chunks of max_len.
    result = {
        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
        for k, t in concatenated_examples.items()
    }
    #result["labels"] = result["input_ids"].copy()
    return result

datasets = {key: tokenized_datasets[key].map(group_texts,
                batched=BATCHED,
                batch_size=1000,
                num_proc=NUM_PROC,
                keep_in_memory=True)
            for key in tokenized_datasets.keys()}

Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processes.
Setting TOKENIZERS_PARALLELISM=false for forked processe

## 7. Build the BERT Model

In [21]:
# Set a configuration for our model
config = BertConfig(
    vocab_size=len(tokenizer),
    max_position_embeddings=MAX_STR_LEN,
    num_attention_heads=12,
    num_hidden_layers=NUM_HIDDEN_LAYERS
)
# Initialize the model from a configuration without pretrained weights
model = BertForMaskedLM(config=config)
print('Num parameters: ',model.num_parameters())

Num parameters:  14830914


## 8. Train the BERT Models

In [22]:
training_args = TrainingArguments(
    per_device_train_batch_size=3200,
    output_dir='./output-nim', 
    num_train_epochs=1600,
    eval_steps=400,
    evaluation_strategy="steps")

def do_training(save_name, train_dataset, eval_dataset):

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm_probability=0.15,
    )
    
    model = BertForMaskedLM(config=config)

    trainer = Trainer(
        model=model, 
        args=training_args, 
        train_dataset=full_train_dataset, 
        eval_dataset=full_eval_dataset, 
        data_collator=data_collator
    )

    trainer.train()
    
    trainer.save_model(save_name)

In [23]:
for key in tokenized_datasets.keys():
    

    full_train_dataset = tokenized_datasets[key]["train"]
    full_eval_dataset = tokenized_datasets[key]["test"]

    do_training(f"./{key}-nim", full_train_dataset, full_eval_dataset)

***** Running training *****
  Num examples = 6041
  Num Epochs = 1600
  Instantaneous batch size per device = 3200
  Total train batch size (w. parallel, distributed & accumulation) = 3200
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Step,Training Loss,Validation Loss
400,No log,0.894379
800,1.004600,0.825257
1200,0.788200,0.751511
1600,0.728200,0.721265
2000,0.693200,0.744999
2400,0.693200,0.716694
2800,0.667900,0.774669
3200,0.657400,0.702898


***** Running Evaluation *****
  Num examples = 2014
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2014
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2014
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2014
  Batch size = 8
***** Running Evaluation *****
  Num examples = 2014
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.919479
800,1.008900,0.832182
1200,0.817500,0.850327
1600,0.766200,0.800778
2000,0.736000,0.813228
2400,0.736000,0.855394
2800,0.718400,0.827184
3200,0.705200,0.756895


***** Running Evaluation *****
  Num examples = 1978
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1978
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1978
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1978
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1978
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.935952
800,1.032800,0.8926
1200,0.864800,0.848879
1600,0.815800,0.903867
2000,0.787100,0.834511
2400,0.787100,0.870851
2800,0.769200,0.888708
3200,0.753500,0.843373


***** Running Evaluation *****
  Num examples = 1983
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1983
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1983
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1983
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1983
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.957375
800,1.038600,0.909758
1200,0.889400,0.911581
1600,0.850500,0.952642
2000,0.822500,0.888505
2400,0.822500,0.888008
2800,0.802900,0.887126
3200,0.789200,0.87734


***** Running Evaluation *****
  Num examples = 1950
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1950
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1950
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1950
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1950
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.972604
800,1.051300,0.905123
1200,0.915600,0.944507
1600,0.878800,0.955856
2000,0.853300,0.981495
2400,0.853300,1.003121
2800,0.835900,0.971576
3200,0.823700,0.914322


***** Running Evaluation *****
  Num examples = 1960
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1960
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1960
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1960
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1960
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.989297
800,1.073100,1.010626
1200,0.938800,0.997403
1600,0.908100,0.980984
2000,0.885700,0.919443
2400,0.885700,0.971414
2800,0.870300,1.006132
3200,0.860300,0.975694


***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.990865
800,1.071200,0.953962
1200,0.952000,0.958829
1600,0.927200,1.030709
2000,0.908400,1.02631
2400,0.908400,0.974243
2800,0.894400,1.017209
3200,0.884500,1.043193


***** Running Evaluation *****
  Num examples = 1948
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1948
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1948
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1948
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1948
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.959303
800,1.081900,1.006147
1200,0.964700,0.997497
1600,0.943200,0.989638
2000,0.925600,0.990977
2400,0.925600,1.039234
2800,0.915100,1.014785
3200,0.905500,1.060272


***** Running Evaluation *****
  Num examples = 1959
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1959
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1959
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1959
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1959
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,1.013289
800,1.072400,1.003014
1200,0.964400,1.038501
1600,0.946100,1.01122
2000,0.931000,0.983576
2400,0.931000,0.969384
2800,0.918600,1.045261
3200,0.910300,1.009902


***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1962
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,0.987094
800,1.073600,1.053536
1200,0.962000,0.997236
1600,0.945600,0.986522
2000,0.930600,1.015485
2400,0.930600,1.017046
2800,0.922000,1.031293
3200,0.912800,0.969582


***** Running Evaluation *****
  Num examples = 1970
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1970
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1970
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1970
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1970
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

Step,Training Loss,Validation Loss
400,No log,1.000779
800,1.072100,0.988808
1200,0.961700,0.984789
1600,0.945900,1.000607
2000,0.930800,0.988148
2400,0.930800,1.021623
2800,0.922100,0.990636
3200,0.912800,1.00584


***** Running Evaluation *****
  Num examples = 1977
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-500
Configuration saved in ./output-nim/checkpoint-500/config.json
Model weights saved in ./output-nim/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1977
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1000
Configuration saved in ./output-nim/checkpoint-1000/config.json
Model weights saved in ./output-nim/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1977
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-1500
Configuration saved in ./output-nim/checkpoint-1500/config.json
Model weights saved in ./output-nim/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1977
  Batch size = 8
***** Running Evaluation *****
  Num examples = 1977
  Batch size = 8
Saving model checkpoint to ./output-nim/checkpoint-2000
Configuration saved in ./ou

## 9. Evaluation

In [24]:
models = {key: BertHarmon(f"./{key}-nim", "./nim-tokenizer")
          for key in tokenized_datasets.keys()}

loading configuration file ./0-nim/config.json
Model config BertConfig {
  "_name_or_path": "./0-nim",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 12,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.16.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 66
}

loading weights file ./0-nim/pytorch_model.bin
All model checkpoint weights were used when initializing BertForMaskedLM.

All the weights of BertForMaskedLM were initialized from the model checkpoint at ./0-nim.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Be

In [31]:
# BERT Nim player
def nim_bert(_st, random_chance=None):
    lk = {'a':0, 'b':1, 'c':2}
    move = model.pipeline(f"a{_st[0]}/b{_st[1]}/c{_st[2]}" + " Q - [MASK]")[0]
    try:
        r = move["token_str"]
        r = r.replace(" ","")
        
        return int(r[1:]), lk[r[0]]
        
    except:
        print(f'impos bert move {move}, returning random move')
        return nim_random(_st)

In [32]:
Engines["BERT"] = nim_bert

In [None]:
for key in tokenized_datasets.keys():
    model = models[key]
    
    roster = generate_roster(list(Engines.keys()))
    print(key)
    for matchup in roster:
        
        play_games(200, *matchup, random_chance=int(key) / 100)

0
200 games,   Random    0      Guru  200
200 games,   Random    0  Qlearner  200
200 games,   Random    6      BERT  194
200 games,     Guru  200    Random    0
200 games,     Guru  190  Qlearner   10
200 games,     Guru  194      BERT    6
200 games, Qlearner  197    Random    3
200 games, Qlearner  186      Guru   14
200 games, Qlearner  187      BERT   13
200 games,     BERT  195    Random    5
200 games,     BERT   75      Guru  125
200 games,     BERT   76  Qlearner  124
10
200 games,   Random   13      Guru  187
200 games,   Random    8  Qlearner  192
200 games,   Random   26      BERT  174
200 games,     Guru  189    Random   11
200 games,     Guru  154  Qlearner   46
200 games,     Guru  147      BERT   53
200 games, Qlearner  191    Random    9
200 games, Qlearner  148      Guru   52
200 games, Qlearner  147      BERT   53
200 games,     BERT  185    Random   15
200 games,     BERT   89      Guru  111
200 games,     BERT   79  Qlearner  121
20
200 games,   Random   25      Gu