In [1]:
%reload_ext autoreload
%autoreload 2
import chess.pgn
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

from chess_dataset import ChessDataset, ChessPairDataset
import helper

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [2]:
train_chess_pair_dataset = ChessPairDataset("dataset/npy/")
test_chess_pair_dataset = ChessPairDataset("dataset/npy/", train=False)
train_chess_dataset = ChessDataset("dataset/npy/")
test_chess_dataset = ChessDataset("dataset/npy/", train=False)

In [3]:
len(train_chess_pair_dataset), len(test_chess_pair_dataset)

(32000, 8000)

In [4]:
len(train_chess_dataset), len(test_chess_dataset)

(2739, 686)

In [5]:
chess_trainloader = DataLoader(train_chess_dataset, batch_size=64, shuffle=True)
chess_testloader = DataLoader(test_chess_dataset, batch_size=64, shuffle=False)
chess_pair_trainloader = DataLoader(train_chess_pair_dataset, batch_size=64, shuffle=True)
chess_pair_testloader = DataLoader(test_chess_pair_dataset, batch_size=64, shuffle=True)

In [6]:
def freeze(layer):
  layer.weight.requires_grad = False
  layer.bias.requires_grad = False

def unfreeze(layer):
  layer.weight.requires_grad = True
  layer.bias.requires_grad = True
  
# Dense encoding layers
en1 = nn.Linear(773, 600)
en2 = nn.Linear(600, 400)
en3 = nn.Linear(400, 200)
en4 = nn.Linear(200, 100)

# Dense decoding layers
de1 = nn.Linear(600, 773)
de2 = nn.Linear(400, 600)
de3 = nn.Linear(200, 400)
de4 = nn.Linear(100, 200)

# Autoencoder train
# 773 - 600 - 773
ae1 = nn.Sequential(en1, nn.ReLU(), 
                    de1, nn.Sigmoid())
helper.train_autoencoder(ae1, chess_trainloader, chess_testloader, 10, 2)

# * means freeze
# 773 *- 600 - 400 - 600 *- 773
freeze(en1)
freeze(de1)
ae2 = nn.Sequential(en1, nn.ReLU(), en2, nn.ReLU(), 
                    de2, nn.ReLU(), de1, nn.Sigmoid())
helper.train_autoencoder(ae2, chess_trainloader, chess_testloader, 10, 2)

# 773 *- 600 *- 400 - 200 - 400 *- 600 *- 773
freeze(en2)
freeze(de2)
ae3 = nn.Sequential(en1, nn.ReLU(), en2, nn.ReLU(), en3, nn.ReLU(), 
                    de3, nn.ReLU(), de2, nn.ReLU(), de1, nn.Sigmoid())
helper.train_autoencoder(ae3, chess_trainloader, chess_testloader, 10, 2)

# 773 *- 600 *- 400 *- 200 - 100 - 200 *- 400 *- 600 *- 773
freeze(en3)
freeze(de3)
ae4 = nn.Sequential(en1, nn.ReLU(), en2, nn.ReLU(), en3, nn.ReLU(), en4, nn.ReLU(), 
                    de4, nn.ReLU(), de3, nn.ReLU(), de2, nn.ReLU(), de1, nn.Sigmoid())
helper.train_autoencoder(ae4, chess_trainloader, chess_testloader, 10, 2)

unfreeze(en1)
unfreeze(en1)
unfreeze(en3)
pos2vec = nn.Sequential(en1, nn.ReLU(), en2, nn.ReLU(), en3, nn.ReLU(), en4, nn.ReLU())

Epoch: 0 | Loss: 0.19825418293476105: 100%|█████████████████████████████████████| 43/43 [00:01<00:00, 29.68it/s]
Val loss: 0.19519387185573578: 100%|████████████████████████████████████████████| 11/11 [00:00<00:00, 37.84it/s]
Epoch: 1 | Loss: 0.08829965442419052: 100%|█████████████████████████████████████| 43/43 [00:01<00:00, 29.75it/s]
Val loss: 0.08668049424886703: 100%|████████████████████████████████████████████| 11/11 [00:00<00:00, 38.35it/s]
Epoch: 2 | Loss: 0.04529280960559845: 100%|█████████████████████████████████████| 43/43 [00:01<00:00, 28.97it/s]
Val loss: 0.04661478474736214: 100%|████████████████████████████████████████████| 11/11 [00:00<00:00, 41.13it/s]
Epoch: 3 | Loss: 0.04260724410414696: 100%|█████████████████████████████████████| 43/43 [00:01<00:00, 30.62it/s]
Val loss: 0.035814229398965836: 100%|███████████████████████████████████████████| 11/11 [00:00<00:00, 32.31it/s]
Epoch: 4 | Loss: 0.03489038348197937: 100%|█████████████████████████████████████| 43/43 [00:01<0

In [7]:
class DeepChess(nn.Module):
  def __init__(self, pos2vec):
    super(DeepChess, self).__init__()
    self.pos2vec = pos2vec
    self.base = nn.Sequential(nn.Linear(200, 400), nn.ReLU(), nn.BatchNorm1d(400), nn.Dropout(0.3),
                              nn.Linear(400, 200), nn.ReLU(), nn.BatchNorm1d(200), nn.Dropout(0.3),
                              nn.Linear(200, 100), nn.ReLU(), nn.BatchNorm1d(100), nn.Dropout(0.3),
                              nn.Linear(100, 2), nn.Softmax(dim=1))
  
  def forward(self, input1, input2):
    first_board = self.pos2vec(input1)
    second_board = self.pos2vec(input2)
    concat = torch.cat((first_board, second_board), 1)
    return self.base(concat)

In [8]:
dp = DeepChess(pos2vec)
dp

DeepChess(
  (pos2vec): Sequential(
    (0): Linear(in_features=773, out_features=600, bias=True)
    (1): ReLU()
    (2): Linear(in_features=600, out_features=400, bias=True)
    (3): ReLU()
    (4): Linear(in_features=400, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=100, bias=True)
    (7): ReLU()
  )
  (base): Sequential(
    (0): Linear(in_features=200, out_features=400, bias=True)
    (1): ReLU()
    (2): BatchNorm1d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=400, out_features=200, bias=True)
    (5): ReLU()
    (6): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.5, inplace=False)
    (8): Linear(in_features=200, out_features=100, bias=True)
    (9): ReLU()
    (10): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Dropout(p=0.5, inplace=False)
   

In [9]:
helper.train_supervise(model=dp, train_data=chess_pair_trainloader, val_data=chess_pair_testloader, epochs=10, patient=2)

Epoch: 0 | Loss: 0.00999060645699501 | Accuracy: 0.8926875: 100%|█████████████| 500/500 [00:32<00:00, 15.36it/s]
Loss: 0.3021301329135895 | Accuracy: 0.840875: 100%|██████████████████████████| 125/125 [00:06<00:00, 20.45it/s]
Epoch: 1 | Loss: 0.004585872869938612 | Accuracy: 0.999625: 100%|█████████████| 500/500 [00:31<00:00, 16.03it/s]
Loss: 0.39275050163269043 | Accuracy: 0.87525: 100%|██████████████████████████| 125/125 [00:07<00:00, 17.83it/s]
Epoch: 2 | Loss: 0.001127562951296568 | Accuracy: 0.99996875: 100%|███████████| 500/500 [00:32<00:00, 15.35it/s]
Loss: 0.2191186398267746 | Accuracy: 0.888625: 100%|██████████████████████████| 125/125 [00:06<00:00, 18.85it/s]
Epoch: 3 | Loss: 0.003983388189226389 | Accuracy: 1.0: 100%|██████████████████| 500/500 [00:31<00:00, 15.63it/s]
Loss: 0.15971989929676056 | Accuracy: 0.89825: 100%|██████████████████████████| 125/125 [00:06<00:00, 20.62it/s]
Epoch: 4 | Loss: 0.0018299478106200695 | Accuracy: 1.0: 100%|█████████████████| 500/500 [00:31<0

[tensor(0.3021, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.3928, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.2191, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.1597, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.4302, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.2004, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.3894, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.2667, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.1928, grad_fn=<BinaryCrossEntropyBackward0>),
 tensor(0.0855, grad_fn=<BinaryCrossEntropyBackward0>)]

In [12]:
i = next(iter(chess_pair_testloader))
z = i[2]
zz = dp(i[0], i[1])
print(torch.argmax(z, 1))
#print(zz)
print(torch.argmax(zz, 1))
# for a in zip(z, zz):
#   print(a)      
print(sum(1 for x in (torch.argmax(z, 1) == torch.argmax(zz, 1)) if x), "/", i[0].shape[0])

tensor([1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1,
        1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0])
tensor([1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0])
58 / 64


In [13]:
torch.save(dp.state_dict(), "model/save/deepchess.pth")
torch.save(pos2vec.state_dict(), "model/save/pos2vec.pth")