In [1]:
import torch
import torch.nn as nn
import torch.utils.data as tud
from tqdm.notebook import tqdm
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import importlib
import sys
sys.path.append("../../")
import seq2seq

In [2]:
# chess

with open("data/chess_book.txt", encoding = "utf-8") as file:
    text = file.read()
print(len(text))
print(text[:1000])

556949
﻿The Project Gutenberg EBook of Chess Strategy, by Edward Lasker

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Chess Strategy

Author: Edward Lasker

Translator: J. Du Mont

Release Date: November 11, 2012 [EBook #5614]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK CHESS STRATEGY ***




Produced by John Mamoun <mamounjo@umdnj.edu>, Charles
Franks, and the Online Distributed Proofreaders website.








INFORMATION ABOUT THIS E-TEXT EDITION



The following is an e-text of "Chess Strategy," second edition, (1915)
by Edward Lasker, translated by J. Du Mont.

This e-text contains the 167 chess and checkers board game
diagrams appearing in the original book, all in the form of
ASCII line drawings. The following is a key to the diagrams:

For chess

In [3]:
len(text)

556949

In [4]:
vocabulary = set(text)
len(vocabulary)

92

In [5]:
char2i = {c:i for i, c in enumerate(sorted(vocabulary), 3)}
char2i["<PAD>"] = 0
char2i["<START>"] = 1
char2i["<END>"] = 2
print(len(char2i))
i2char = {i:c for i, c in enumerate(sorted(vocabulary), 3)}
i2char[0] = "<PAD>"
i2char[1] = "<START>"
i2char[2] = "<END>"
print(len(i2char))

95
95


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [7]:
length = 30
lines = []
for i in range(len(text))[:-length]:
    lines.append(text[i:length + i])
print(len(text))
print(len(lines))
print(lines[:5])
encoded = torch.tensor([[char2i[c] for c in l] for l in tqdm(lines)]).to(device).long()
print(encoded.shape)
source = encoded[:, :length // 2]
print(source.shape)
target = torch.cat((torch.ones(encoded.shape[0], 1).to(device).long(), encoded[:, length // 2:]), axis = 1)
print(target.shape)

556949
556919
['\ufeffThe Project Gutenberg EBook o', 'The Project Gutenberg EBook of', 'he Project Gutenberg EBook of ', 'e Project Gutenberg EBook of C', ' Project Gutenberg EBook of Ch']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=556919.0), HTML(value='')))


torch.Size([556919, 30])
torch.Size([556919, 15])
torch.Size([556919, 16])


# fit

In [8]:
importlib.reload(seq2seq)  

net = seq2seq.Transformer(char2i, i2char)
net.to(device)
performance = net.fit(source, target, save_path = "checkpoints/seq2seq_transformer.pt", progress_bar = 0)
net.save_architecture("architectures/seq2seq_transformer.architecture")

Model: Transformer
Tokens in the input vocabulary: 95
Tokens in the output vocabulary: 95
Max sequence length: 32
Embedding dimension: 16
Feedforward dimension: 64
Encoder layers: 2
Decoder layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 20,591

Epoch | Train                 | Minutes
      | Loss     | Error Rate |
---------------------------------------
    1 |   1.6274 |     44.788 |      1.5
    2 |   1.3131 |     37.988 |      3.0
    3 |   1.2407 |     36.074 |      4.7
    4 |   1.2031 |     35.121 |      6.2
    5 |   1.1806 |     34.564 |      7.6


In [9]:
# includes all the information about the epoch and the model, useful for reproducibility

performance

Unnamed: 0,epoch,train_loss,train_error_rate,minutes,learning_rate,weight_decay,model,max_sequence_length,embedding_dimension,feedforward_dimension,encoder_layers,decoder_layers,attention_heads,activation,dropout,parameters
0,1,1.627416,44.787806,1.514237,0.001,0,Transformer,32,16,64,2,2,2,relu,0.0,20591
1,2,1.313123,37.988421,3.04305,0.001,0,Transformer,32,16,64,2,2,2,relu,0.0,20591
2,3,1.240702,36.074342,4.679946,0.001,0,Transformer,32,16,64,2,2,2,relu,0.0,20591
3,4,1.203115,35.121373,6.160605,0.001,0,Transformer,32,16,64,2,2,2,relu,0.0,20591
4,5,1.18064,34.563638,7.625306,0.001,0,Transformer,32,16,64,2,2,2,relu,0.0,20591


In [10]:
# input for testing

net.tensor2text(source[:5])

['\ufeffThe Project Gu',
 'The Project Gut',
 'he Project Gute',
 'e Project Guten',
 ' Project Gutenb']

# predict

In [11]:
importlib.reload(seq2seq)    
net = seq2seq.load_architecture("architectures/seq2seq_transformer.architecture")
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
net.to(device)

idx, log_probabilities = net.predict(source[:5], progress_bar = 0)

net.tensor2text(idx)

Model: Transformer
Tokens in the input vocabulary: 95
Tokens in the output vocabulary: 95
Max sequence length: 32
Embedding dimension: 16
Feedforward dimension: 64
Encoder layers: 2
Decoder layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 20,591



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))




["<START>nd the Black's ",
 '<START> the Black the ',
 '<START>r the Black to ',
 "<START> the King's to ",
 "<START>ing the King's "]

# greedy_search

In [12]:
importlib.reload(seq2seq)    
net = seq2seq.load_architecture("architectures/seq2seq_transformer.architecture")
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.greedy_search(source[:5], progress_bar = False)

print(log_probabilities)
net.tensor2text(indexes)

Model: Transformer
Tokens in the input vocabulary: 95
Tokens in the output vocabulary: 95
Max sequence length: 32
Embedding dimension: 16
Feedforward dimension: 64
Encoder layers: 2
Decoder layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 20,591

tensor([-16.3043, -16.0721, -17.6378, -16.8939, -14.0852], device='cuda:0')


['<START>res the pawn thiis t',
 '<START>er the pawn the   su',
 '<START>r the pawn the t m\ns',
 '<START> the pawn the p\n teA',
 '<START>ing the pawn thiis t']

# sample

In [13]:
importlib.reload(seq2seq)    
net = seq2seq.load_architecture("architectures/seq2seq_transformer.architecture")
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.sample(source[:5], progress_bar = False)

print(log_probabilities)
net.tensor2text(indexes)

Model: Transformer
Tokens in the input vocabulary: 95
Tokens in the output vocabulary: 95
Max sequence length: 32
Embedding dimension: 16
Feedforward dimension: 64
Encoder layers: 2
Decoder layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 20,591

tensor([-33.5113, -24.8168, -29.2963, -34.1867, -28.5067], device='cuda:0')


['<START>t pawns.\n\nIP Bl 7 t.',
 '<START>ed the canificich t ',
 '<START>le the\ncastle t\n\nB-.',
 '<START>  pasten.\n\nThe t\nW\nt',
 '<START> King, the\nmoved t\nt']

# beam_search

In [14]:
importlib.reload(seq2seq)    
net = seq2seq.load_architecture("architectures/seq2seq_transformer.architecture")
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
net.to(device)
indexes, log_probabilities = net.beam_search(source[:5], progress_bar = 0)

print(log_probabilities)
[net.tensor2text(t) for t in indexes]

Model: Transformer
Tokens in the input vocabulary: 95
Tokens in the output vocabulary: 95
Max sequence length: 32
Embedding dimension: 16
Feedforward dimension: 64
Encoder layers: 2
Decoder layers: 2
Attention heads: 2
Activation: relu
Dropout: 0.0
Trainable parameters: 20,591

tensor([[-15.0610, -15.7869, -16.5086, -16.5354, -16.6885],
        [-17.0994, -17.3677, -17.8005, -17.8175, -17.8557],
        [-15.5115, -15.5431, -15.5876, -15.8779, -16.3111],
        [-15.1215, -15.3521, -15.6821, -16.3405, -16.3609],
        [-13.0319, -13.7725, -13.9181, -14.0929, -14.1328]], device='cuda:0')


[["<START>nd the Black's ct t ",
  "<START>nd the Black's ct s ",
  "<START>nd the Black's t tac",
  "<START>nd the King's sforii",
  "<START>nd the Black's ct t\n"],
 ["<START> the King's the K t ",
  "<START> the King's the B t ",
  "<START> the King's the K t\n",
  "<START> the King's the K t-",
  "<START> the King's the Bl t"],
 ['<START>r the Black the   su',
  '<START>r the Black the  d  ',
  '<START>r the Black to t t t',
  '<START>r the Black the   ch',
  '<START>r the Black the  m\n '],
 ["<START> the King's Blachi\nt",
  "<START> the King's to t t t",
  "<START> the King's Blact\nt\n",
  "<START> the King's Blact\nt ",
  "<START> the King's Blact-W\n"],
 ["<START>ing the King's ct t ",
  '<START>ing the Black tcoaco',
  "<START>ing the King's ct ct",
  "<START>ing the King's ct s ",
  "<START>ing the King's t tt "]]