In [1]:
import torch
import torch.nn as nn
import torch.utils.data as tud
from tqdm.notebook import tqdm
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import importlib
import language_modelling_seq2seq

In [2]:
# chess

with open("data/pg5614.txt", encoding = "utf-8") as file:
    text = file.read()
print(len(text))
print(text[:1000])

556949
﻿The Project Gutenberg EBook of Chess Strategy, by Edward Lasker

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Chess Strategy

Author: Edward Lasker

Translator: J. Du Mont

Release Date: November 11, 2012 [EBook #5614]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK CHESS STRATEGY ***




Produced by John Mamoun <mamounjo@umdnj.edu>, Charles
Franks, and the Online Distributed Proofreaders website.








INFORMATION ABOUT THIS E-TEXT EDITION



The following is an e-text of "Chess Strategy," second edition, (1915)
by Edward Lasker, translated by J. Du Mont.

This e-text contains the 167 chess and checkers board game
diagrams appearing in the original book, all in the form of
ASCII line drawings. The following is a key to the diagrams:

For chess

In [3]:
len(text)

556949

In [4]:
vocabulary = set(text)
len(vocabulary)

92

In [5]:
char2i = {c:i for i, c in enumerate(sorted(vocabulary), 3)}
char2i["<PAD>"] = 0
char2i["<START>"] = 1
char2i["<END>"] = 2
print(len(char2i))
i2char = {i:c for i, c in enumerate(sorted(vocabulary), 3)}
i2char[0] = "<PAD>"
i2char[1] = "<START>"
i2char[2] = "<END>"
print(len(i2char))

95
95


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [7]:
length = 20
lines = []
for i in range(len(text))[:-length]:
    lines.append(text[i:length + i])
print(len(text))
print(len(lines))
print(lines[:5])
encoded = torch.tensor([[char2i[c] for c in l] for l in tqdm(lines)]).to(device).long()
print(encoded.shape)
source_1 = encoded[:, :length // 2]
print(source_1.shape)
target_1 = torch.cat((torch.ones(encoded.shape[0], 1).to(device).long(), encoded[:, length // 2:]), axis = 1)
print(target_1.shape)

556949
556929
['\ufeffThe Project Gutenbe', 'The Project Gutenber', 'he Project Gutenberg', 'e Project Gutenberg ', ' Project Gutenberg E']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=556929.0), HTML(value='')))


torch.Size([556929, 20])
torch.Size([556929, 10])
torch.Size([556929, 11])


In [8]:
importlib.reload(language_modelling_seq2seq)    
net = language_modelling_seq2seq.Transformer(char2i, i2char, 
                                             embedding_dimension = 128, encoder_layers = 1,
                                             feedforward_dimension = 128, decoder_layers = 1)
net.to(device)
net.fit(source_1, target_1, epochs = 5, batch_size = 150, lr = 0.0001, verbose = 3, 
        save_path = "checkpoints/seq2seq_transformer.pt")

Net parameters: 347,359


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    1, Loss: 1.8000
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
[' k oos  te', 'ehoot  te ', ' tos  te  ', 'tot  te  a', 'ts  te  et']

greedy_search
[' the the t', 'e the the ', ' the the t', 'the the th', 't of the t']
tensor([-10.0200,  -8.0500,  -8.1900,  -7.9200, -10.5000], device='cuda:0')

sample
[' the weran', 'aus R-Kt5 ', '. P-B3, an', 'hhe\nthat p', 'wIusthe or']

beam_search
[['n the the ', 'e the the ', ' the of th', 'the the th', 'n the the '],
 ['s the the ', 'on the the', ' the the t', 'the of the', 'the the th'],
 ['r the the ', 'he the the', ' the pawn ', 'of the the', 'on the the'],
 ['n the pawn', 'ing the th', ' the the p', 'the the of', 'the the an'],
 [' the the t', 'ing the an', ' of the th', 'the the an', 'n the the\n']]
tensor([[ -8.6600,  -8.0500,  -7.8400,  -7.9200,  -8.6100],
        [ -8.8500,  -8.6900,  -8.1900,  -7.9600,  -9.3500],
        [ -9.0400,  -8.7900,  -8.3600,  -8.0500,  -9.4600],
        [-10.0200, 

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    2, Loss: 1.4276
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['rt out r e', 't out nte ', ' tut nte  ', 'aut nte  e', 'nserte  et']

greedy_search
['r the pawn', 't of the p', ' the the p', 'and the th', 'nter the t']
tensor([ -9.3700,  -9.1400,  -9.7300,  -9.4500, -11.8700], device='cuda:0')

sample
['Becasces, ', 'opposiond ', "'s any ont", 'oulow Whit', '. I s undi']

beam_search
[['nt of the ', 't and the ', ' the pawn ', 'the of the', 'moves the '],
 ['r the the ', 't of the p', ' the of th', 'of the the', 'mover the '],
 ['r the pawn', 't of the t', ' of the th', 'the the th', 'movent the'],
 ['nt the paw', 't of the a', ' the the p', 'the pawn t', 'moves the\n'],
 ['r the of t', 't of the o', ' the the t', 'the pawn a', 'mover the\n']]
tensor([[ -8.8500,  -8.6800,  -8.6000,  -7.9000,  -9.9500],
        [ -8.9900,  -9.1400,  -8.7500,  -8.0500,  -9.9700],
        [ -9.3700,  -9.1800,  -8.9200,  -8.9800, -10.5800],
        [-10.3600, 

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    3, Loss: 1.3404
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['rt outer e', 't oute te ', ' tuterte  ', 'oute te  -', 'nserte e-t']

greedy_search
['r the pawn', 't of the p', ' the pawn ', 'of the paw', 'ntage the ']
tensor([ -8.8400,  -8.2900,  -7.9400,  -8.3100, -10.7500], device='cuda:0')

sample
['rdis no Bl', 'an the ly ', 'st\nredvram', "Black's at", 'onterechut']

beam_search
[['r the the ', 't of the p', ' the pawn ', 'of the the', 'ing of the'],
 ['r the pawn', 't of the t', ' of the th', 'of the pla', 'ing the th'],
 ['r the play', 't of the o', ' the the p', 'of the paw', 'ing the mo'],
 ['nd the paw', 't of the a', ' the pawns', 'of the and', 'ing the of'],
 ['r the open', 't of the w', ' the the t', 'of the ope', 'ing the pa']]
tensor([[ -8.5800,  -8.2900,  -7.9400,  -7.3100,  -9.1100],
        [ -8.8400,  -8.4300,  -8.1200,  -8.2200,  -9.7800],
        [ -8.9600,  -8.4800,  -8.9000,  -8.3100, -10.3800],
        [-10.2300,  

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    4, Loss: 1.2927
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['rt Gutente', 't iutente ', ' tutente g', 'outente  -', 'osenterg-t']

greedy_search
['r the pawn', 't in the o', ' the the p', 'of the the', 'ongentre o']
tensor([ -8.7100,  -9.0800,  -9.5800,  -7.4800, -13.7700], device='cuda:0')

sample
['rgway, the', 't ther P-Q', '.\n\nWhite, ', 'taing he r', 'ure on for']

beam_search
[['ct of the ', 't of the o', ' the pawn ', 'of the the', '.\n\n       '],
 ['r the open', 't of the p', 'ion the th', 'of the to ', '.\n\n\n      '],
 ['ct of the\n', 't of the t', 'ion the op', 'of the paw', '.\n\n      1'],
 ['ct Gutent ', 't of the a', 'ion the pa', 'of the ope', 'ing of the'],
 ['ct of ther', 't of the w', ' the pawns', 'and the th', '.\n\n      2']]
tensor([[ -8.1400,  -8.3800,  -8.2100,  -7.4800,  -4.1800],
        [ -9.6800,  -8.3800,  -8.2400,  -8.4900,  -5.6000],
        [ -9.9700,  -8.3900,  -8.9900,  -8.5700,  -9.0700],
        [

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3713.0), HTML(value='')))


Epoch:    5, Loss: 1.2615
Y
['ct Gutenbe', 't Gutenber', ' Gutenberg', 'Gutenberg ', 'utenberg E']

forward
['ct Gutente', 't outenter', ' tutenterg', 'outenterge', 'otenterg-t']

greedy_search
['ct Gutent ', 't of the p', ' the pawn ', 'of the paw', 'ompless th']
tensor([ -7.6700,  -7.3300,  -7.7800,  -8.4800, -11.1900], device='cuda:0')

sample
['ct the DLa', 'inneith by', ' it is Bis', '5. The pri', 'e.\n\n8\n\n   ']

beam_search
[['ct of the ', 't of the p', " the King'", "the King's", 'entrationg'],
 ['ction the ', 't of the K', ' the King ', 'of the the', 'entrations'],
 ['ct Gutent ', 't of the t', ' the pawn ', 'of the Kin', 'entration '],
 ['ct Gutend ', 'tion the p', ' the pawns', 'of the paw', 'ing the th'],
 ['ct of the\n', 't of the o', 'ion the pa', 'the King t', 'ing the pr']]
tensor([[ -6.7500,  -7.3300,  -7.3600,  -7.1300, -10.0000],
        [ -7.1300,  -7.6200,  -7.6500,  -7.6400, -10.1600],
        [ -7.6700,  -7.8000,  -7.7800,  -7.9400, -10.5200],
        [ -8.220

In [9]:
importlib.reload(language_modelling_seq2seq)    
net = language_modelling_seq2seq.Transformer(char2i, i2char, 
                                            embedding_dimension = 128, encoder_layers = 1,
                                            feedforward_dimension = 128, decoder_layers = 1)
net.to(device)
net.load_state_dict(torch.load("checkpoints/seq2seq_transformer.pt"))
test = net.beam_search(source_1[:10000], verbose = 1, batch_size = 50)

test[1][:20]

Net parameters: 347,359


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19.0), HTML(value='')))




tensor([[-17.7795, -18.8638, -19.3273, -19.3379, -19.3922],
        [-17.3115, -17.5989, -17.9131, -18.1782, -18.6752],
        [-18.3636, -18.5197, -18.9098, -19.1159, -19.4686],
        [-18.1090, -18.3892, -19.4127, -19.4142, -19.4159],
        [-18.7290, -19.0163, -19.5060, -19.6826, -20.2288],
        [-18.2854, -19.0116, -19.1073, -19.7666, -19.7727],
        [-17.5524, -18.1625, -18.2581, -18.3883, -19.1537],
        [-19.2922, -19.3426, -19.4420, -19.4885, -19.7252],
        [-17.2028, -18.9227, -18.9658, -18.9668, -19.1886],
        [-17.8737, -18.6877, -19.3677, -19.5201, -19.6070],
        [-18.2874, -19.0708, -19.4179, -19.4984, -19.5076],
        [-20.5747, -20.6049, -21.1766, -21.2433, -21.5175],
        [ -4.1325,  -6.9096,  -7.5791,  -8.0838,  -8.3037],
        [-18.5519, -18.8185, -18.8756, -19.0184, -19.5266],
        [-16.8947, -17.0268, -17.2045, -17.6133, -17.6189],
        [-17.8384, -17.8389, -17.8906, -19.0541, -19.1177],
        [-18.5707, -18.7196, -18.7317, -