In [1]:
import torch
import torch.nn as nn
import torch.utils.data as tud
from tqdm.notebook import tqdm
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import importlib
import sys
sys.path.append("../../")
import autoregressive

In [2]:
# chess

with open("chess_book.txt", encoding = "utf-8") as file:
    text = file.read()
print(len(text))
print(text[:1000])

556948
The Project Gutenberg EBook of Chess Strategy, by Edward Lasker

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Chess Strategy

Author: Edward Lasker

Translator: J. Du Mont

Release Date: November 11, 2012 [EBook #5614]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK CHESS STRATEGY ***




Produced by John Mamoun <mamounjo@umdnj.edu>, Charles
Franks, and the Online Distributed Proofreaders website.








INFORMATION ABOUT THIS E-TEXT EDITION



The following is an e-text of "Chess Strategy," second edition, (1915)
by Edward Lasker, translated by J. Du Mont.

This e-text contains the 167 chess and checkers board game
diagrams appearing in the original book, all in the form of
ASCII line drawings. The following is a key to the diagrams:

For chess 

In [3]:
len(text)

556948

In [4]:
vocabulary = set(text)

# from collections import Counter
# vocabulary = Counter(text)

# from nltk.lm import Vocabulary
# vocabulary = Vocabulary(text)

len(vocabulary)

91

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

In [6]:
length = 17
lines = []
for i in range(len(text))[:-length]:
    lines.append(text[i:length + i])
print(len(text))
print(len(lines))
print(lines[:5])

556948
556931
['The Project Guten', 'he Project Gutenb', 'e Project Gutenbe', ' Project Gutenber', 'Project Gutenberg']


# fit

In [7]:
importlib.reload(autoregressive)    
net = autoregressive.LSTM(vocabulary)
# net = autoregressive.TransformerEncoder(vocabulary)
net.to(device)

encoded = net.text2tensor(lines)
print("encoded", encoded.shape)

performance = net.fit(encoded, save_path = "model.pt")
net.save_architecture("model.arch")

Model: Autoregressive LSTM
Tokens in the vocabulary: 91
Embedding dimension: 32
Hidden units: 128
Layers: 2
Dropout: 0.0
Trainable parameters: 229,852

encoded torch.Size([556931, 17])


  0%|          | 0/5 [00:00<?, ?it/s]

Training started
Epochs: 5
Learning rate: 0.0001
Weight decay: 0
Epoch | Train                 | Minutes
      | Loss     | Error Rate |
---------------------------------------


  0%|          | 0/5570 [00:00<?, ?it/s]

    1 |   1.9663 |     49.577 |     0.6


  0%|          | 0/5570 [00:00<?, ?it/s]

    2 |   1.3551 |     37.738 |     1.3


  0%|          | 0/5570 [00:00<?, ?it/s]

    3 |   1.2150 |     34.651 |     2.0


  0%|          | 0/5570 [00:00<?, ?it/s]

    4 |   1.1435 |     32.968 |     2.6


  0%|          | 0/5570 [00:00<?, ?it/s]

    5 |   1.0969 |     31.873 |     3.2


In [8]:
# includes all the information about the epoch and the model, useful for reproducibility

performance

Unnamed: 0,epoch,train_loss,train_error_rate,training_minutes,learning_rate,weight_decay,model,embedding_dimension,hidden_units,layers,dropout,parameters
0,1,1.966324,49.577461,0.649485,0.0001,0,Autoregressive LSTM,32,128,2,0.0,229852
1,2,1.355083,37.738236,1.301947,0.0001,0,Autoregressive LSTM,32,128,2,0.0,229852
2,3,1.215004,34.650601,1.950478,0.0001,0,Autoregressive LSTM,32,128,2,0.0,229852
3,4,1.143546,32.967594,2.59787,0.0001,0,Autoregressive LSTM,32,128,2,0.0,229852
4,5,1.096932,31.873248,3.245812,0.0001,0,Autoregressive LSTM,32,128,2,0.0,229852


In [9]:
# the input for testing

net.tensor2text(encoded[:5])

['The Project Guten',
 'he Project Gutenb',
 'e Project Gutenbe',
 ' Project Gutenber',
 'Project Gutenberg']

# predict

In [10]:
importlib.reload(autoregressive)    
net = autoregressive.load_architecture("model.arch")
net.load_state_dict(torch.load("model.pt"))
net.to(device)
idx, log_probabilities = net.predict(encoded[:5], main_progress_bar = False, progress_bar = 0)

net.tensor2text(idx)

Model: Autoregressive LSTM
Tokens in the vocabulary: 91
Embedding dimension: 32
Hidden units: 128
Layers: 2
Dropout: 0.0
Trainable parameters: 229,852



["The Project Gutenberg the King's side",
 "he Project Gutenberg the King's side ",
 "e Project Gutenberg the King's side, ",
 " Project Gutenberg-tm with the King's",
 "Project Gutenberg-tm with the King's "]

In [11]:
log_probabilities

tensor([[ -9.4010, -11.3459, -11.4911, -11.7941, -12.9359],
        [ -9.5127, -10.6926, -10.7004, -10.8122, -10.9443],
        [-10.7119, -11.1233, -11.4759, -11.5428, -11.6310],
        [-11.4333, -11.6359, -11.6593, -12.5754, -12.7295],
        [-11.3023, -11.7619, -11.9283, -12.4930, -12.9769]], device='cuda:0')

# greedy_search

In [12]:
importlib.reload(autoregressive)    
net = autoregressive.load_architecture("model.arch")
net.load_state_dict(torch.load("model.pt"))
net.to(device)
indexes, log_probabilities = net.greedy_search(encoded[:5], progress_bar = False)

print(log_probabilities)
net.tensor2text(indexes)

Model: Autoregressive LSTM
Tokens in the vocabulary: 91
Embedding dimension: 32
Hidden units: 128
Layers: 2
Dropout: 0.0
Trainable parameters: 229,852

tensor([-11.8130, -12.6617, -13.5937, -15.0422, -15.8397], device='cuda:0')


['The Project Gutenberg-tm the pawn at ',
 'he Project Gutenberg-tm the pawn at K',
 'e Project Gutenberg-tm the pawn at Kt',
 ' Project Gutenberg-tm the pawn at Kt5',
 'Project Gutenberg-tm the pawn at Kt5.']

# sample

In [13]:
importlib.reload(autoregressive)    
net = autoregressive.load_architecture("model.arch")
net.load_state_dict(torch.load("model.pt"))
net.to(device)
indexes, log_probabilities = net.sample(encoded[:5], progress_bar = False)

print(log_probabilities)
net.tensor2text(indexes)

Model: Autoregressive LSTM
Tokens in the vocabulary: 91
Embedding dimension: 32
Hidden units: 128
Layers: 2
Dropout: 0.0
Trainable parameters: 229,852

tensor([-16.9294, -23.2442, -12.3178, -25.7675, -31.6309], device='cuda:0')


['The Project Gutenberg-tm acteration.\n',
 'he Project Gutenberg-tm chances weens',
 'e Project Gutenberg-tm exchanges on t',
 ' Project Gutenberg, and the Knagy the',
 'Project Gutenberg Kt-B3 and rong remo']

# beam_search

In [14]:
importlib.reload(autoregressive)    
net = autoregressive.load_architecture("model.arch")
net.load_state_dict(torch.load("model.pt"))
net.to(device)
indexes, log_probabilities = net.beam_search(encoded[:5], progress_bar = False)

print(log_probabilities)
pprint([net.tensor2text(t) for t in indexes])

Model: Autoregressive LSTM
Tokens in the vocabulary: 91
Embedding dimension: 32
Hidden units: 128
Layers: 2
Dropout: 0.0
Trainable parameters: 229,852

tensor([[ -9.4010, -11.3459, -11.4911, -11.7941, -12.9359],
        [ -9.5127, -10.6926, -10.7004, -10.8122, -10.9443],
        [-10.7119, -11.1233, -11.4759, -11.5428, -11.6310],
        [-11.4333, -11.6359, -11.6593, -12.5754, -12.7295],
        [-11.3023, -11.7619, -11.9283, -12.4930, -12.9769]], device='cuda:0')
[["The Project Gutenberg the King's side",
  'The Project Gutenberg-tm with the Kin',
  'The Project Gutenberg-tm with the paw',
  "The Project Gutenberg the King's simp",
  'The Project Gutenberg-tm the pawns an'],
 ["he Project Gutenberg the King's side ",
  "he Project Gutenberg the King's side,",
  "he Project Gutenberg the King's side.",
  'he Project Gutenberg-tm with the King',
  'he Project Gutenberg-tm with the pawn'],
 ["e Project Gutenberg the King's side, ",
  "e Project Gutenberg the King's side. ",
  'e Project