In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
import numpy as np

In [2]:
# data = pd.read_csv('data/kaggle_data/chessData.csv')
data = pd.read_csv('data/chessdata_500k.csv')

In [3]:
data.head()

Unnamed: 0,FEN,Evaluation
0,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,-10
1,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...,56
2,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...,-9
3,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...,52
4,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...,-26


In [4]:
def fen_to_bit_vector(fen):
    parts = fen.split(' ')
    piece_placement = parts[0].split('/')
    active_color = parts[1]
    castling_rights = parts[2]
    en_passant = parts[3]
    halfmove_clock = int(parts[4])
    fullmove_clock = int(parts[5])

    bit_vector = np.zeros((13, 8, 8), dtype=np.uint8)
    
    # piece to layer structure taken from reference [1]
    piece_to_layer = {
        'R': 1,'N': 2,'B': 3,'Q': 4,'K': 5,'P': 6,'p': 7,'k': 8,
        'q': 9,'b': 10,'n': 11,'r': 12
    }
    
    castling = {'K': (7,7),'Q': (7,0),'k': (0,7),'q': (0,0),}

    for r, row in enumerate(piece_placement):
        c = 0
        for piece in row:
            if piece in piece_to_layer:
                bit_vector[piece_to_layer[piece], r, c] = 1
                c += 1
            else:
                c += int(piece)
    
    if en_passant != '-':
        bit_vector[0, ord(en_passant[0]) - ord('a'), int(en_passant[1]) - 1] = 1
    
    if castling_rights != '-':
        for char in castling_rights:
            bit_vector[0, castling[char][0], castling[char][1]] = 1
    
    if active_color == 'w':
        bit_vector[0, 7, 4] = 1
    else:
        bit_vector[0, 0, 4] = 1

    if halfmove_clock > 0:
        c = 7
        while halfmove_clock > 0:
            bit_vector[0, 3, c] = halfmove_clock%2
            halfmove_clock = halfmove_clock // 2
            c -= 1
            if c < 0:
                break

    if fullmove_clock > 0:
        c = 7
        while fullmove_clock > 0:
            bit_vector[0, 4, c] = fullmove_clock%2
            fullmove_clock = fullmove_clock // 2
            c -= 1
            if c < 0:
                break

    return bit_vector

In [6]:
class ChessDataset(torch.utils.data.Dataset):
    def __init__(self, fen_board, score):
        self.fen_board = fen_board
        self.score = score

    def __len__(self):
        return len(self.score)

    def __getitem__(self, index):
        # print(self.data[index])
        fen_board, score = self.fen_board[index], self.score[index]
        
        piece_boards = fen_to_bit_vector(fen_board)

        
        score= score.replace('\ufeff', '')
        try:
            score = int(score)
        except ValueError:
            score = 10000 if score[1] == '+' else -10000
        score = score / 100
            
        
        data = torch.tensor(piece_boards, dtype=torch.float32)
        label = torch.tensor(int(score), dtype=torch.float32)

        return data, label

In [7]:
dataset = ChessDataset(data['FEN'], data['Evaluation'])

In [8]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32,
                                         shuffle=True, pin_memory=False)

In [9]:
for data in dataloader:
    print(data[0][0])
    break

tensor([[[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 1.],
         [0., 0., 0., 1., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 1., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
       

In [16]:
class EvalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(832, 832)
        self.hidden_layer1 = nn.Linear(832, 416)
        self.hidden_layer2 = nn.Linear(416, 218)
        self.hidden_layer3 = nn.Linear(218, 104)
        self.output_layer = nn.Linear(104, 1)
    
    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.input_layer(x)
        x = nn.functional.relu(x)
        x = self.hidden_layer1(x)
        x = nn.functional.relu(x)
        x = self.hidden_layer2(x)
        x = nn.functional.relu(x)
        x = self.hidden_layer3(x)
        x = nn.functional.relu(x)
        x = self.output_layer(x)
        return x

In [17]:
def train(model, dataloader, optimizer, criterion, epochs, device):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, (data, target) in enumerate(dataloader):
            data = data.to(device)
            target = target.to(device)

            # Forward pass
            output = model(data)

            # Loss
            loss = criterion(output, target)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()

            # Gradient descent
            optimizer.step()
        
            running_loss += loss.item()
            if i%5000 == 4999:
                print('Epoch %d, %5d : loss %.4f'%(epoch+1, i+1, running_loss/(5000*len(data))))
                running_loss = 0.0

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
model = EvalNetwork()
model.to(device)

EvalNetwork(
  (input_layer): Linear(in_features=832, out_features=832, bias=True)
  (hidden_layer1): Linear(in_features=832, out_features=416, bias=True)
  (hidden_layer2): Linear(in_features=416, out_features=218, bias=True)
  (hidden_layer3): Linear(in_features=218, out_features=104, bias=True)
  (output_layer): Linear(in_features=104, out_features=1, bias=True)
)

In [20]:
# create the loss function
criterion = nn.MSELoss()

# create the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# num of epochs
epochs = 10

# train the model
loss_list = train(model, dataloader,  optimizer, criterion,epochs, device)


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1,  5000 : loss 4.6519
Epoch 1, 10000 : loss 4.5714
Epoch 1, 15000 : loss 4.6082
Epoch 2,  5000 : loss 4.6517
Epoch 2, 10000 : loss 4.6372
Epoch 2, 15000 : loss 4.5205
Epoch 3,  5000 : loss 4.6376
Epoch 3, 10000 : loss 4.6218
Epoch 3, 15000 : loss 4.6052
Epoch 4,  5000 : loss 4.5799
Epoch 4, 10000 : loss 4.6800
Epoch 4, 15000 : loss 4.5701
Epoch 5,  5000 : loss 4.6423
Epoch 5, 10000 : loss 4.5161
Epoch 5, 15000 : loss 4.6379
Epoch 6,  5000 : loss 4.6451
Epoch 6, 10000 : loss 4.6306
Epoch 6, 15000 : loss 4.5658
Epoch 7,  5000 : loss 4.4794
Epoch 7, 10000 : loss 4.7601
Epoch 7, 15000 : loss 4.6408
Epoch 8,  5000 : loss 4.6938
Epoch 8, 10000 : loss 4.5801
Epoch 8, 15000 : loss 4.5553
Epoch 9,  5000 : loss 4.6234
Epoch 9, 10000 : loss 4.5695
Epoch 9, 15000 : loss 4.6085
Epoch 10,  5000 : loss 4.6096
Epoch 10, 10000 : loss 4.5311
Epoch 10, 15000 : loss 4.6965


In [15]:
torch.save(model.state_dict(), "model/cnn_1.pt")

In [None]:
model

In [None]:
model.output_layer.weight.shape

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.plot(loss_list)