In [1]:
import torch
import torch.nn as nn
import torch.utils.data as tud
from tqdm.notebook import tqdm
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import importlib
import architectures

In [2]:
# chess

with open("data/pg5614.txt", encoding = "utf-8") as file:
    text = file.read()
print(len(text))
print(text[:1000])

556949
﻿The Project Gutenberg EBook of Chess Strategy, by Edward Lasker

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Chess Strategy

Author: Edward Lasker

Translator: J. Du Mont

Release Date: November 11, 2012 [EBook #5614]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK CHESS STRATEGY ***




Produced by John Mamoun <mamounjo@umdnj.edu>, Charles
Franks, and the Online Distributed Proofreaders website.








INFORMATION ABOUT THIS E-TEXT EDITION



The following is an e-text of "Chess Strategy," second edition, (1915)
by Edward Lasker, translated by J. Du Mont.

This e-text contains the 167 chess and checkers board game
diagrams appearing in the original book, all in the form of
ASCII line drawings. The following is a key to the diagrams:

For chess

In [3]:
len(text)

556949

In [4]:
vocabulary = set(text)
len(vocabulary)

92

In [5]:
char2i = {c:i for i, c in enumerate(sorted(vocabulary), 3)}
char2i["<PAD>"] = 0
char2i["<START>"] = 1
char2i["<END>"] = 2
print(len(char2i))
i2char = {i:c for i, c in enumerate(sorted(vocabulary), 3)}
i2char[0] = "<PAD>"
i2char[1] = "<START>"
i2char[2] = "<END>"
print(len(i2char))

95
95


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [7]:
length = 20
lines = []
for i in range(len(text))[:-length]:
    lines.append(text[i:length + i])
print(len(text))
print(len(lines))
print(lines[:5])
encoded = torch.tensor([[char2i[c] for c in l] for l in tqdm(lines)]).to(device).long()
print(encoded.shape)
source_1 = encoded[:, :length // 2]
print(source_1.shape)
target_1 = torch.cat((torch.ones(encoded.shape[0], 1).to(device).long(), encoded[:, length // 2:]), axis = 1)
print(target_1.shape)

556949
556929
['\ufeffThe Project Gutenbe', 'The Project Gutenber', 'he Project Gutenberg', 'e Project Gutenberg ', ' Project Gutenberg E']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=556929.0), HTML(value='')))


torch.Size([556929, 20])
torch.Size([556929, 10])
torch.Size([556929, 11])


In [8]:
importlib.reload(architectures)    
net = architectures.Transformer(char2i, i2char, 
                                             embedding_dimension = 128, encoder_layers = 1,
                                             feedforward_dimension = 128, decoder_layers = 1)
net.to(device)
net.fit(source_1, target_1, epochs = 5, batch_size = 150, lr = 0.0001, verbose = 3, 
        save_path = "checkpoints/seq2seq_transformer.pt")

Net parameters: 347,359


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    1, Loss: 1.7833
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
[' e mus  te', 'ahous   e ', ' aus  te  ', 'tas  te  h', 'at  te  at']

greedy_search
[' the pawn ', 'and the of', ' and the o', 'the the of', 'and the of']
tensor([ -9.9700, -10.2300, -10.6000,  -9.2800,  -9.7700], device='cuda:0')

sample
['rack; Q-K5', 'r me for t', ' B. R-R4.\n', 'the forawn', 'antuld pro']

beam_search
[[' the of th', 'he of the ', 'he the the', 'the of the', 'the of the'],
 [' the pawn ', 'he the the', ' of the th', 'the the th', 'and the th'],
 [' the the p', 'and the th', ' of the of', 'the the of', 'and the of'],
 [' the the o', 'and the of', ' the the o', 'the the on', 'the the of'],
 [' the the a', 'and the on', ' the the t', 'the the an', 'and the on']]
tensor([[ -9.5300,  -8.6800,  -9.0900,  -8.0300,  -8.8300],
        [ -9.9700,  -9.1600,  -9.2500,  -8.9600,  -9.4100],
        [-10.1300,  -9.7300,  -9.8000,  -9.2800,  -9.7700],
        [-10.1600, -

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    2, Loss: 1.4209
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
[' t aut nte', 't out  te ', ' aut rte  ', 'tut nte  -', 'ot rtec -t']

greedy_search
[' the pawn ', 't of the p', ' and the p', 'the pawn t', 'on the paw']
tensor([ -8.5600,  -9.0600,  -9.6800,  -8.2300, -10.0200], device='cuda:0')

sample
['d warivom.', 'tiony\nthok', ' Whited an', 'of wendubt', 'in own 41.']

beam_search
[[' pawn the ', 'tion of th', ' of the th', 'the of the', 'ing the th'],
 [' the pawn ', 'tion the p', ' of the of', 'the pawn t', 'ing the of'],
 [' the of th', 'tion the o', ' of the pa', 'the the th', 'ing the pa'],
 [' the pawns', 'tion the t', ' of the pr', 'the the of', 'ould the p'],
 [' the the p', 't of the p', 'ould the p', 'the pawn a', 'ould the o']]
tensor([[-7.9000, -8.7600, -8.5900, -7.1800, -8.5400],
        [-8.5600, -8.8100, -8.7900, -8.2300, -8.8600],
        [-8.5700, -8.8700, -9.1600, -8.4800, -9.2700],
        [-9.2000, -9.0500, -9.5800,

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    3, Loss: 1.3403
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
[' t Gut nte', 't out nter', ' aut nter ', 'tut nte  -', 'otenter -t']

greedy_search
[' the pawn ', 't of the p', ' and the p', 'the pawn t', 'one of the']
tensor([-8.5600, -8.5600, -9.2800, -7.9000, -9.3000], device='cuda:0')

sample
[' wous KP\nw', 'te, anme.\n', '. Aftence ', 'to an ing ', 'bilest of\n']

beam_search
[[' the pawn ', 'tions the ', ' the pawn ', 'the of the', 'ing the th'],
 [' the pawns', 'tion the p', ' of the of', 'the pawn t', 'ing the of'],
 [' the of th', 't of the p', ' of the pa', 'of the of ', 'ing the pa'],
 ['d the pawn', 'tion the o', ' the pawns', 'the pawns ', 'ing the Kt'],
 ['s the pawn', 'ting the p', ' of the pl', 'of the paw', 'ing the pl']]
tensor([[-8.5600, -8.3400, -8.4100, -6.9100, -9.0700],
        [-8.8100, -8.3600, -8.5600, -7.9000, -9.1700],
        [-8.8200, -8.5600, -8.6800, -8.0500, -9.3900],
        [-8.8400, -8.7300, -8.9100, -8

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    4, Loss: 1.2926
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['dt Gut n e', 'tiout nter', ' out nter ', 'tut nter -', 'utenter -t']

greedy_search
['d be the p', 'tion the p', ' of the pa', 'the pawn t', 'utent of t']
tensor([-10.7300,  -8.1300,  -8.1200,  -8.0700,  -9.1100], device='cuda:0')

sample
[' and se va', 't Gut toen', 're in be Q', 'drasidsjec', 'ounterango']

beam_search
[['d, and the', 'ting of th', ' the pawn ', 'the of the', 'utent the '],
 [' the pawn ', 'ting the p', ' of the pa', 'the pawns ', 'uteng the '],
 [' the pawns', 'ting the o', ' of the of', 'of the of ', 'utent of t'],
 [' the of th', 'tion the p', ' the pawns', 'the pawn t', 'utention t'],
 ['ct Gut and', 'ting the t', ' of the pl', 'of the paw', 'utentions ']]
tensor([[ -8.7000,  -7.6100,  -8.0900,  -7.2800,  -8.4600],
        [ -8.7700,  -7.8400,  -8.1200,  -7.8800,  -8.7500],
        [ -8.9200,  -8.0000,  -8.2000,  -8.0400,  -9.1100],
        [ -9.3800,  -

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    5, Loss: 1.2582
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['ct Gut n e', 't out nter', ' out nter ', 'tut nter -', 'utenter -a']

greedy_search
['ct Gut and', 't of the p', ' of the pa', 'the pawn t', 'utent the ']
tensor([-8.2600, -7.9400, -8.2000, -8.3800, -8.2400], device='cuda:0')

sample
['n obmight ', 'ct If ins ', ' dratim in', 'move is a ', 'orks.\n\nIn ']

beam_search
[['ct of the ', 't of the p', ' the King ', 'of the Kin', 'utent the '],
 ['ction the ', 'ting the p', ' the pawn ', 'the pawns ', 'utent of t'],
 ['ct Gut the', 't of the K', " the King'", 'of the paw', 'utention t'],
 ['ct Gut and', 'tion the p', ' of the of', 'of the pla', 'utentions '],
 ['ction ther', 't of the o', ' of the Ki', 'the pawn t', 'utent the\n']]
tensor([[ -7.0400,  -7.9400,  -7.7700,  -8.0400,  -8.2400],
        [ -7.0500,  -8.3100,  -7.8500,  -8.1000,  -9.3800],
        [ -7.7900,  -8.3900,  -8.0800,  -8.1600,  -9.6700],
        [ -8.2600,  -8.

In [9]:
importlib.reload(architectures)    
net = architectures.Transformer(char2i, i2char, 
                                            embedding_dimension = 128, encoder_layers = 1,
                                            feedforward_dimension = 128, decoder_layers = 1)
net.to(device)
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
test = net.beam_search(source_1[:10000], verbose = 1, batch_size = 50)

test[1][:20]

Net parameters: 347,359


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19.0), HTML(value='')))




tensor([[-17.0280, -18.0812, -18.2160, -18.2463, -18.5167],
        [-12.7134, -13.1868, -13.1969, -13.4203, -13.4516],
        [-17.5389, -18.4222, -18.6140, -18.6276, -18.6539],
        [-15.4512, -16.2743, -16.7598, -16.7968, -16.9277],
        [-17.8137, -18.8173, -19.1984, -19.2659, -19.8412],
        [-16.8233, -17.0049, -17.8791, -18.0772, -18.1335],
        [-16.1091, -16.6924, -17.2834, -17.2901, -17.3117],
        [-18.0249, -19.2058, -19.2838, -19.5513, -19.6117],
        [-16.8597, -16.8829, -16.9050, -17.0564, -17.2969],
        [-17.6703, -18.1156, -18.2650, -18.3249, -18.4903],
        [-19.0326, -19.0954, -19.5943, -19.6219, -19.7543],
        [-19.0347, -19.3764, -19.4285, -19.6189, -19.6807],
        [ -8.6593,  -9.3941,  -9.5499, -10.5886, -10.6136],
        [-17.7744, -17.9958, -18.1085, -18.1423, -18.1725],
        [ -6.8340,  -7.6262,  -7.7715,  -7.8911,  -8.0361],
        [-17.8210, -18.3382, -18.4014, -18.4168, -18.4532],
        [-18.1568, -18.6705, -18.9180, -