**IMPORTS**

In [1]:
import aux_functions
import importlib

importlib.reload(aux_functions)
from aux_functions import *

# Pytorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.masked import masked_tensor

import numpy as np
import chess
from datetime import datetime
import sklearn
from sklearn.model_selection import KFold

**DATA PROCESSING**

- Importing the pgn data
- Transforming the data to sparce tensors 
- Splitting the data into training and testing

In [2]:
TEST_PERCENT = 0.25

# Load pgn paths
pgns = import_data(5)

# Convert pgns to tensors
board_tensors, next_moves = parse_pgn_to_tensors(pgns)

# Converting the dataset into a custom pytorch one
dataset = ChessDataset(board_tensors, next_moves)

# Setting manual seed so that the split always has the same indexes 
torch.manual_seed(0)
# Splitting the data into train and test
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [1-TEST_PERCENT, TEST_PERCENT])

print(len(train_dataset))  
print(train_dataset.indices[:10])

2537
[414, 235, 1355, 1009, 437, 1064, 2105, 260, 2175, 81]


**NEURAL NETWORK DESIGN**
- 2 Convolutional layers
- 2 Fully connected hidden layers

In [3]:
# Whether to do the operations on the cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Neural network to predict which piece to move
class PieceToMoveNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Takes as input a tensor of 14 channels (8x8 board)
        self.conv1 = nn.Conv2d(14, 6, 3)  # 6 filters, 3x3 kernel
        self.pool = nn.MaxPool2d(2, 2)    # Max pooling with 2x2 window
        self.conv2 = nn.Conv2d(6, 16, 3)  # 16 filters, 3x3 kernel

        # Using droput to reduce overfitting
        self.dropout = nn.Dropout(p=0.3)

        # Using batch normalization to make training faster and more stable
        self.bn1 = nn.BatchNorm1d(120)  # For the 1st layer
        self.bn2 = nn.BatchNorm1d(84)   # For the 2nd layer
        
        # Output from conv2 will be (16 channels, 1x1 feature maps)
        self.fc1 = nn.Linear(16 * 1 * 1, 120)
        # First hidden layer with 120 inputs and 84 outputs
        self.fc2 = nn.Linear(120, 84)
        # Second hidden layer with 84 inputs and 64 outputs (board tiles)
        self.fc3 = nn.Linear(84, 64)


    def forward(self, x):
        # First convolutional layer and pooling
        x = self.pool(F.relu(self.conv1(x))) 
        # Second convolutional layer (no pooling needed)
        x = F.relu(self.conv2(x)) 
        # Flatten all dimensions except batch size            
        x = torch.flatten(x, 1)       

        # Fully connected layer 1 and batch normalization
        x = F.relu(self.bn1(self.fc1(x)))    
        # Dropout neurons from the first layer to reduce overfitting
        x = self.dropout(x)    
        # Fully connected layer 2 and batch normalization               
        x = F.relu(self.bn2(self.fc2(x)))  
        # Output layer (no activation, logits for classification)   
        x = self.fc3(x)         

        return x


**TRAINING LOOP**

Generating a mask on which pieces the NN can move based on the current board.

In [4]:
def generate_mask(tensor) -> list:
    """Generates a mask which contains the position of the pieces that can be moved"""

    # If layer 12 has any 1 it will be whites turn
    if  torch.any(tensor[12] == 1):
        # White pieces are in layers 0 to 5, summing across those layers
        mask = torch.sum(tensor[0:6], dim = 0).int()


    # If layer 13 has any 1 it will be blacks turn
    elif torch.any(tensor[13] == 1):
        # Black pieces are in layers 6 to 11, summing across those layers
        mask = torch.sum(tensor[6:12], dim = 0).int()

    # Returning the masking in a list format
    return mask.flatten().tolist()


- Training the model on one epoch
- Validating the model on one epoch

In [5]:
# Get current time
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

def train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size): 
    """Trains the model for one epoch and returns the average training loss and accuracy"""

    # Initializing the avg. loss and correct guesses
    running_loss = 0.  
    running_correct = 0.

    # Looping through all samples in a batch
    for i, data in enumerate(train_loader):

        # Extracting the board tensor
        inputs = data[0]
        # Extracting the tile of the piece to move
        labels = data[1]

        # Resetting the gradients
        optimizer.zero_grad()

        # Calculating the mask for the current position
        mask = [generate_mask(pos) for pos in inputs]
        mask = torch.tensor(mask)
        
        # Moving inputs, labels and mask to the gpu/cpu
        inputs = inputs.to(device)
        labels = labels.to(device)
        mask = mask.to(device)

        # Calculating the masked output
        logits = model(inputs)
        outputs = logits * mask.float()

        # Calculating the sample loss
        loss = loss_fn(outputs, labels)
        # Calculating the gradient with respect to the loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Updating model parameters
        optimizer.step()

        # Adding the last loss to the running loss
        running_loss += loss.item()

        # Calculate number of correct predictions
        _, predictions = torch.max(outputs.data, 1)
        running_correct += (predictions == labels).sum().item()

    # Averaging the loss for all samples in the batch
    running_loss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (train_sampler_size)
    train_accuracy = running_correct / train_sampler_size

    return running_loss, train_accuracy


def validation_epoch(model, validation_loader, loss_fn, val_sampler_size):
    """Validates the model for one epoch and returns the average validation loss and accuracy"""

    # Initializes the validation loss and correct guesses
    running_vloss = 0.
    running_vcorrect = 0.

    # Set model to evaluation mode
    model.eval()

    # Disable gradient calculations for validation set
    with torch.no_grad():

        # Looping through all batches in the validation set
        for i, v_data in enumerate(validation_loader):

            # Getting the tensors of the validation data
            vinputs = v_data[0]
            vlabels = v_data[1] 

            # Calculating the mask for the current position
            mask = [generate_mask(pos) for pos in vinputs]
            mask = torch.tensor(mask)

            # Moving inputs, labels and mask to the gpu/cpu
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            mask = mask.to(device)

            # Calculating the masked output
            logits = model(vinputs)
            voutputs = logits * mask.float()

            # Calculating the loss of the model in the validation sample
            vloss = loss_fn(voutputs, vlabels)

            # Adding this sample's loss to the total loss
            running_vloss += vloss.item()

            # Calculate number of correct predictions
            _, predictions = torch.max(voutputs.data, 1)
            running_vcorrect += (predictions == vlabels).sum().item()

    # Averaging the loss for all samples in the validation set
    running_vloss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (val_sampler_size)
    validation_accuracy = running_vcorrect / val_sampler_size

    return running_vloss, validation_accuracy


Performing cross validation while training the model on multiple epochs.

In [6]:
def train_multiple_folds(n_epochs, n_folds, batch_size, splits, writer, optimizer_class, optimizer_params, loss_fn):
    """Trains and validates the model on multiple folds"""

    # Initializing best validation loss of the model
    best_vloss = 1_000

    # Initializing losses and accuracies for training and validation to save them on TensorBoard
    epochs_tloss = [0 for _ in range(n_epochs)]
    epochs_tacc = [0 for _ in range(n_epochs)]
    epochs_vloss = [0 for _ in range(n_epochs)]
    epochs_vacc = [0 for _ in range(n_epochs)]
    
    # Looping through all folds for cross validation 
    for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(train_dataset)))):

        print(f"FOLD {fold+1}")

        # Getting the sampler for training and validation
        train_sampler = SubsetRandomSampler(train_idx)
        val_sampler = SubsetRandomSampler(val_idx)
        # Loaders for training and validation
        train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
        val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)

        # Resetting the model each fold
        model = PieceToMoveNet()
        model.to(device)

        # Resetting the optimizer and its parameters each fold
        optimizer = optimizer_class(model.parameters(), **optimizer_params)
        
        # Getting the sample sizes of training and validation on this fold
        train_sampler_size = len(train_sampler)
        val_sampler_size = len(val_sampler)

        # Looping through all epochs
        for epoch in range(n_epochs): 
            # Training the model one epoch
            train_loss, train_acc = train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size)
            # Validating the model on one epoch
            val_loss, val_acc = validation_epoch(model, val_loader, loss_fn, val_sampler_size)

            # Adding losses and accuracies for insights 
            epochs_tloss[epoch] += train_loss
            epochs_tacc[epoch] += train_acc
            epochs_vloss[epoch] += val_loss
            epochs_vacc[epoch] += val_acc

            # Printing insights
            print(f"Epoch: {epoch + 1} Train Loss: {train_loss}, Valid Loss: {val_loss} |\
                   Train Acc: {train_acc}, Valid Acc: {val_acc}")

            # Saving the model if the loss on the validation is lower than the best one
            if val_loss < best_vloss:
                best_vloss = val_loss
                model_path = f"models/piece_to_move_net_{timestamp}_{fold+1}_{epoch+1}"
                torch.save(model.state_dict(), model_path)
    
    # Averaging losses and accuracies on each fold
    for i in range(n_epochs):
        epochs_tloss[i] /= (n_folds)
        epochs_tacc[i] /= (n_folds)
        epochs_vloss[i] /= (n_folds)
        epochs_vacc[i] /= (n_folds)

    # Saving losses and accuracies on TensorBoard
    for i in range(n_epochs):
        # Adding insights
        writer.add_scalars("Loss", {"Training": epochs_tloss[i], "Validation": epochs_vloss[i]}, i + 1)
        writer.add_scalars("Accuracy", {"Training": epochs_tacc[i], "Validation": epochs_vacc[i]}, i + 1)
        writer.flush()

Finally training and validating the model.

In [7]:
EPOCHS = 25  # Number of epochs
BATCH_SIZE = 32  # Number of batches
K = 3  # Number of folds

# Logs training statistics for TensorBoard visualization
writer = SummaryWriter(f"runs/piece_to_move_{timestamp}")  

# Obtaining folds for cross validaiton
splits = KFold(n_splits=K, shuffle=True, random_state=42)

# Initializing optimizer parameters to pass during training
optimizer_class = optim.Adam
optimizer_params = {
    "lr": 1e-4,
    "weight_decay": 1e-5
}

# Initializing the loss function
# Cross entropy loss used since it is a classification
loss_fn = torch.nn.CrossEntropyLoss()  

# Training the model with cross validation on multiple epochs
train_multiple_folds(EPOCHS, K, BATCH_SIZE, splits, writer, optimizer_class, optimizer_params, loss_fn)

FOLD 1
Epoch: 1 Train Loss: 4.138028810609062, Valid Loss: 4.122337129380968 |                   Train Acc: 0.10585452395032525, Valid Acc: 0.11465721040189125
Epoch: 2 Train Loss: 4.068300278681629, Valid Loss: 4.010005385787399 |                   Train Acc: 0.1283264340626848, Valid Acc: 0.1347517730496454
Epoch: 3 Train Loss: 3.8835944004778593, Valid Loss: 3.7629760371314154 |                   Train Acc: 0.15079834417504434, Valid Acc: 0.13238770685579196
Epoch: 4 Train Loss: 3.5303423899524615, Valid Loss: 3.3750836319393582 |                   Train Acc: 0.15789473684210525, Valid Acc: 0.14420803782505912
Epoch: 5 Train Loss: 3.1052874169259703, Valid Loss: 2.9821939821596497 |                   Train Acc: 0.1785925487876996, Valid Acc: 0.16784869976359337
Epoch: 6 Train Loss: 2.747179643163141, Valid Loss: 2.69347106968915 |                   Train Acc: 0.1856889414547605, Valid Acc: 0.17494089834515367
Epoch: 7 Train Loss: 2.521464064436139, Valid Loss: 2.51214771800571 |    

KeyboardInterrupt: 