In [1]:
import torch
import torch.nn as nn
import torch.utils.data as tud
from tqdm.notebook import tqdm
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import importlib
import sys
sys.path.append("../../")
import autoregressive

In [2]:
# chess

with open("data/chess_book.txt", encoding = "utf-8") as file:
    text = file.read()
print(len(text))
print(text[:1000])

556949
﻿The Project Gutenberg EBook of Chess Strategy, by Edward Lasker

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Chess Strategy

Author: Edward Lasker

Translator: J. Du Mont

Release Date: November 11, 2012 [EBook #5614]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK CHESS STRATEGY ***




Produced by John Mamoun <mamounjo@umdnj.edu>, Charles
Franks, and the Online Distributed Proofreaders website.








INFORMATION ABOUT THIS E-TEXT EDITION



The following is an e-text of "Chess Strategy," second edition, (1915)
by Edward Lasker, translated by J. Du Mont.

This e-text contains the 167 chess and checkers board game
diagrams appearing in the original book, all in the form of
ASCII line drawings. The following is a key to the diagrams:

For chess

In [3]:
len(text)

556949

In [4]:
vocabulary = set(text)
len(vocabulary)

92

In [5]:
char2i = {c:i for i, c in enumerate(sorted(vocabulary), 3)}
char2i["<PAD>"] = 0
char2i["<START>"] = 1
char2i["<END>"] = 2
i2char = {i:c for i, c in enumerate(sorted(vocabulary), 3)}
i2char[0] = "<PAD>"
i2char[1] = "<START>"
i2char[2] = "<END>"

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [7]:
length = 33
lines = []
for i in range(len(text))[:-length]:
    lines.append(text[i:length + i])
print(len(text))
print(len(lines))
print(lines[:5])
encoded = torch.tensor([[char2i[c] for c in l] for l in tqdm(lines)]).to(device).long()
print(encoded.shape)

556949
556916
['\ufeffThe Project Gutenberg EBook of C', 'The Project Gutenberg EBook of Ch', 'he Project Gutenberg EBook of Che', 'e Project Gutenberg EBook of Ches', ' Project Gutenberg EBook of Chess']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=556916.0), HTML(value='')))


torch.Size([556916, 33])


In [8]:
importlib.reload(autoregressive)    
net = autoregressive.TransformerEncoder(char2i, i2char)
net.to(device)
# using a subsample as "dev" set
performance = net.fit(encoded, encoded[:1000], save_path = "checkpoints/autoregressive_transformer.pt")

Model: Autoregressive Transformer Encoder
Tokens in the in vocabulary: 95
Tokens in the out vocabulary: 95
Max sequence length: 32
Embedding dimension: 32
Feedforward dimension: 128
Layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 45,311

Epoch | Train                 | Development           | Training time
      | Loss     | Error Rate | Loss     | Error Rate |
---------------------------------------------------------------------
    1 |   2.1008 |     53.381 |   3.1091 |     76.778 |         78.23
    2 |   1.5755 |     44.286 |   2.9905 |     74.178 |        153.83
    3 |   1.4174 |     40.473 |   2.8962 |     72.456 |        228.42
    4 |   1.3301 |     38.326 |   2.8123 |     70.347 |        301.76
    5 |   1.2726 |     36.896 |   2.7413 |     69.388 |        378.12


In [9]:
importlib.reload(autoregressive)    
net = autoregressive.TransformerEncoder(char2i, i2char)
net.load_state_dict(torch.load("checkpoints/autoregressive_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.greedy_search(encoded[:5, :12])

print(log_probabilities)
net.tensor2text(indexes)

Model: Autoregressive Transformer Encoder
Tokens in the in vocabulary: 95
Tokens in the out vocabulary: 95
Max sequence length: 32
Embedding dimension: 32
Feedforward dimension: 128
Layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 45,311

tensor([-17.3235, -17.3223, -17.8486, -15.1379, -17.0083], device='cuda:0')


['\ufeffThe Project of the play the paw',
 'The Project the pawn an the pawn',
 'he Project Gut the play the pawn',
 'e Project Gut the pawn the pawn ',
 ' Project Gut the pawn the play t']

In [10]:
importlib.reload(autoregressive)    
net = autoregressive.TransformerEncoder(char2i, i2char)
net.load_state_dict(torch.load("checkpoints/autoregressive_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.sample(encoded[:5, :12])

print(log_probabilities)
net.tensor2text(indexes)

Model: Autoregressive Transformer Encoder
Tokens in the in vocabulary: 95
Tokens in the out vocabulary: 95
Max sequence length: 32
Embedding dimension: 32
Feedforward dimension: 128
Layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 45,311

tensor([-35.4042, -37.3997, -38.7031, -39.0357, -51.5738], device='cuda:0')


['\ufeffThe Project\nof removerate a pos',
 'The Project coning the or thotk.',
 'he Project Guagre al ady would Q',
 'e Project Gurse me precculy be t',
 ' Project Gut sijelence iticaugti']

In [11]:
importlib.reload(autoregressive)    
net = autoregressive.TransformerEncoder(char2i, i2char)
net.load_state_dict(torch.load("checkpoints/autoregressive_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.beam_search(encoded[:5, :12])

print(log_probabilities)
pprint([net.tensor2text(t) for t in indexes])

Model: Autoregressive Transformer Encoder
Tokens in the in vocabulary: 95
Tokens in the out vocabulary: 95
Max sequence length: 32
Embedding dimension: 32
Feedforward dimension: 128
Layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 45,311

tensor([[-12.3343, -14.5509, -15.1686, -15.4284, -15.7189],
        [-14.2857, -14.3190, -14.7305, -15.1044, -15.2126],
        [-14.9410, -15.0016, -16.4287, -16.5968, -16.8477],
        [-15.1379, -15.1908, -15.6329, -16.5546, -16.5848],
        [-12.7563, -15.0439, -15.6290, -15.8614, -16.1553]], device='cuda:0')
[['\ufeffThe Project of the King.\n\n     ',
  "\ufeffThe Project of the King's of th",
  "\ufeffThe Project of the King's the p",
  "\ufeffThe Project of the King's the K",
  "\ufeffThe Project of the King's the t"],
 ['The Project of the the pawn the ',
  "The Project of the King's of the",
  "The Project of the King's the mo",
  "The Project of the King's the pa",
  "The Project of the King's the pr"],
 [

In [12]:
idx, log_probabilities = net.predict(encoded[:5, :12])

net.tensor2text(idx)

['\ufeffThe Project of the King.\n\n     ',
 'The Project of the the pawn the ',
 "he Project Gut of the the King's",
 'e Project Gut the pawn the pawn ',
 ' Project Gut of the King.\n\n     ']