**IMPORTS**

In [25]:
import aux_functions
import importlib

importlib.reload(aux_functions)
from aux_functions import *

# Pytorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.masked import masked_tensor

import optuna
import numpy as np
import chess
from datetime import datetime
import sklearn
from sklearn.model_selection import KFold

**DATA PROCESSING**

- Importing the pgn data
- Transforming the data to sparce tensors 
- Splitting the data into training and testing

In [26]:
TEST_PERCENT = 0.25

# Load pgn paths
pgns = import_data(3)

# Convert pgns to tensors
board_tensors, next_moves = parse_pgn_to_tensors(pgns)

# Converting the dataset into a custom pytorch one
dataset = ChessDataset(board_tensors, next_moves)

# Setting manual seed so that the split always has the same indexes 
torch.manual_seed(0)
# Splitting the data into train and test
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [1-TEST_PERCENT, TEST_PERCENT])

print(len(train_dataset))  
print(train_dataset.indices[:10])

975
[521, 580, 4, 1107, 222, 930, 427, 510, 338, 1212]


**NEURAL NETWORK DESIGN**
- 2 Convolutional layers
- 2 Fully connected hidden layers

In [27]:
# Whether to do the operations on the cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Neural network to predict which piece to move
class PieceToMoveNet(nn.Module):
    def __init__(self, fc2_neurons , dropout_p, random_state=42):
        super().__init__()

        # Takes as input a tensor of 14 channels (8x8 board)
        self.conv1 = nn.Conv2d(14, 6, 3)  # 6 filters, 3x3 
        self.pool = nn.MaxPool2d(2, 2)    # Max pooling with 2x2 window
        self.conv2 = nn.Conv2d(6, 16, 3)  # 16 filters, 3x3 kernel

        # Using droput to reduce overfitting
        self.dropout = nn.Dropout(dropout_p)

        # Using batch normalization to make training faster and more stable
        self.bn1 = nn.BatchNorm1d(120)  # For the 1st layer
        self.bn2 = nn.BatchNorm1d(84)   # For the 2nd layer
        
        # Output from conv2 will be (16 channels, 1x1 feature maps)
        self.fc1 = nn.Linear(16 * 1 * 1, 120)
        # 2nd hidden layer with 120 inputs and 84 outputs
        self.fc2 = nn.Linear(120, fc2_neurons)
        # Due to the random assignation of neurons, need to match size of neurons
        self.bn2 = nn.BatchNorm1d(fc2_neurons)
        # Ouput layer (64 squares)
        self.fc3 = nn.Linear(fc2_neurons, 64)


    def forward(self, x):
        # First convolutional layer and pooling
        x = self.pool(F.relu(self.conv1(x))) 
        # Second convolutional layer (no pooling needed)
        x = F.relu(self.conv2(x)) 
        # Flatten all dimensions except batch size            
        x = torch.flatten(x, 1)       

        # Fully connected layer 1 and batch normalization
        x = F.relu(self.bn1(self.fc1(x)))    
        # Dropout neurons from the first layer to reduce overfitting
        x = self.dropout(x)    
        # Fully connected layer 2 and batch normalization               
        x = F.relu(self.bn2(self.fc2(x)))  
        # Output layer (no activation, logits for classification)   
        x = self.fc3(x)         

        return x


In [28]:
# Neural network to predict where to move the piece to
class SquareToMoveToNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Takes as input a tensor of 14 channels (8x8 board)
        self.conv1 = nn.Conv2d(14, 6, 3)  # 6 filters, 3x3 kernel
        self.pool = nn.MaxPool2d(2, 2)    # Max pooling with 2x2 window
        self.conv2 = nn.Conv2d(6, 16, 3)  # 16 filters, 3x3 kernel

        # Using droput to reduce overfitting
        self.dropout = nn.Dropout(p=0.3)

        # Using batch normalization to make training faster and more stable
        self.bn1 = nn.BatchNorm1d(120)  # For the 1st layer
        self.bn2 = nn.BatchNorm1d(84)   # For the 2nd layer

        # Output from conv2 will be (16 channels, 1x1 feature maps)
        self.fc1 = nn.Linear(16 * 1 * 1, 120)
        # 2nd hidden layer with 120 inputs and 84 outputs
        self.fc2 = nn.Linear(120, 84)
        # Output layer (64 squares)
        self.fc3 = nn.Linear(84, 64)

    def forward(self, x):

        # First convolutional layer and pooling
        x = self.pool(F.relu(self.conv1(x)))
        # Second convolutional layer (no pooling needed)
        x = F.relu(self.conv2(x))
        # Flatten all dimensions except batch size
        x = torch.flatten(x, 1)

        # Fully connected layer 1 and batch normalization
        x = F.relu(self.bn1(self.fc1(x)))
        # Dropout neurons from the first layer to reduce overfitting
        x = self.dropout(x)
        # Fully connected layer 2 and batch normalization 
        x = F.relu(self.bn2(self.fc2(x)))
        # Output layer (no activation, logits for classification)  
        x = self.fc3(x)

        return x


**TRAINING LOOP**

Generating a mask on which pieces the NN can move based on the current board.

In [29]:
def generate_mask(tensor, model) -> list:
    """Generates a mask with only legal moves and pieces 
    to move for the current position"""

    # Generates a mask with ones on the pieces than can be moved
    if isinstance(model, PieceToMoveNet):
        # If layer 12 has any 1 it will be whites turn
        if  torch.any(tensor[12] == 1):
            # White pieces are in layers 0 to 5, summing across those layers
            mask = torch.sum(tensor[0:6], dim = 0).int()

        # If layer 13 has any 1 it will be blacks turn
        else:
            # Black pieces are in layers 6 to 11, summing across those layers
            mask = torch.sum(tensor[6:12], dim = 0).int()

    # Generates a mask with ones on squares where a piece can be moved to
    elif isinstance(model, SquareToMoveToNet):
        # If layer 12 has any 1 it will be whites turn
        if  torch.any(tensor[12] == 1):
            # Possible white movements are on layer 12
            mask = tensor[12]

        # If layer 13 has any 1 it will be blacks turn
        else:
            # Possible black movements are on layer 13
            mask = tensor[13]

    # Returning the masking in a list format
    return mask.flatten().tolist()


- Training the model on one epoch
- Validating the model on one epoch

In [30]:
# Get current time
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

def train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size): 
    """Trains the model for one epoch and returns the average training loss and accuracy"""

    # Initializing the avg. loss and correct guesses
    running_loss = 0.  
    running_correct = 0.

    # Looping through all samples in a batch
    for i, data in enumerate(train_loader):

        # Getting the board tensor
        inputs = data[0]
        # Getting the square of the piece to move
        if isinstance(model, PieceToMoveNet):
            labels = data[1]
        # Getting the square to move the piece to
        elif isinstance(model, SquareToMoveToNet):
            labels = data[2]

        # Resetting the gradients
        optimizer.zero_grad()

        # Calculating the mask for the current position
        mask = [generate_mask(pos, model) for pos in inputs]
        mask = torch.tensor(mask)
        
        # Moving inputs, labels and mask to the gpu/cpu
        inputs = inputs.to(device)
        labels = labels.to(device)
        mask = mask.to(device)

        # Calculating the masked output
        logits = model(inputs)
        outputs = logits * mask.float()

        # Calculating the sample loss
        loss = loss_fn(outputs, labels)
        # Calculating the gradient with respect to the loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Updating model parameters
        optimizer.step()

        # Adding the last loss to the running loss
        running_loss += loss.item()

        # Calculate number of correct predictions
        _, predictions = torch.max(outputs.data, 1)
        running_correct += (predictions == labels).sum().item()

    # Averaging the loss for all samples in the batch
    running_loss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (train_sampler_size)
    train_accuracy = running_correct / train_sampler_size

    return running_loss, train_accuracy


def validation_epoch(model, validation_loader, loss_fn, val_sampler_size):
    """Validates the model for one epoch and returns the average validation loss and accuracy"""

    # Initializes the validation loss and correct guesses
    running_vloss = 0.
    running_vcorrect = 0.

    # Set model to evaluation mode
    model.eval()

    # Disable gradient calculations for validation set
    with torch.no_grad():

        # Looping through all batches in the validation set
        for i, v_data in enumerate(validation_loader):

            # Getting the board tensors 
            vinputs = v_data[0]
            # Getting the square of the piece to move
            if isinstance(model, PieceToMoveNet):
                vlabels = v_data[1]
            # Getting the square to move the piece to
            elif isinstance(model, SquareToMoveToNet):
                vlabels = v_data[2]

            # Calculating the mask for the current position
            mask = [generate_mask(pos, model) for pos in vinputs]
            mask = torch.tensor(mask)

            # Moving inputs, labels and mask to the gpu/cpu
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            mask = mask.to(device)

            # Calculating the masked output
            logits = model(vinputs)
            voutputs = logits * mask.float()

            # Calculating the loss of the model in the validation sample
            vloss = loss_fn(voutputs, vlabels)

            # Adding this sample's loss to the total loss
            running_vloss += vloss.item()

            # Calculate number of correct predictions
            _, predictions = torch.max(voutputs.data, 1)
            running_vcorrect += (predictions == vlabels).sum().item()

    # Averaging the loss for all samples in the validation set
    running_vloss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (val_sampler_size)
    validation_accuracy = running_vcorrect / val_sampler_size

    return running_vloss, validation_accuracy


Performing cross validation while training the model on multiple epochs. This is done inside the hyperparameter function

In [31]:
EPOCHS = 25      # Number of epochs
BATCH_SIZE = 32  # Number of batches
K = 3            # Number of folds

def hyperparameter_tuning(trial):
    """Obtaining the best hyperparameters to increase the accuracy of the model,performs cross validation 
    with random parameters and returns the best values"""

    # Random values for the hyperparameters
    fc2_neurons = trial.suggest_int("fc2_neurons", 64, 128)
    dropout_p = trial.suggest_float("dropout_p", 0.2, 0.5)
    pooling = trial.suggest_categorical("use_pooling", [True, False])
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-3) # Loguniform will be better in this case 
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-4)

    # Adjust the parameters for Adam Optimizer
    optimizer_params = {
        "lr" : lr,
        "weight_decay": weight_decay
    }
    # WHEN RUNNING THE CODE NEEX TO CHECK PARAMTERS FOR PIECE TO MOVE NET ARE ADDED AND POOLING IS ALSO CHECKED AND RANDOM STATE
    # Run the model with the parameters
    model = PieceToMoveNet(fc2_neurons, dropout_p,random_state = 42) # Same as using get seed, so the values returned are the same
    model.to(device)

    optimizer = optim.Adam(model.parameters(), **optimizer_params) # 
    # Initializing the loss function
    # Cross entropy loss used since it is a classification
    loss_fn = torch.nn.CrossEntropyLoss()  

    # Cross Validation
    splits = KFold(n_splits= K, shuffle = True, random_state= 42)

    # Create folder to save models if it doesnt exist, save after it is correctly tested
    #model_save_dir = "models"
    #os.makedirs(model_save_dir, exist_ok= True)

    best_vloss = float("inf") # Initializing a really big loss to obtain better values
    best_model_path = None # No best model path yet, initialized to None


    # Looping through all folds for cross validation 
    for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(train_dataset)))):

            print(f"FOLD {fold+1}")

            # Getting the sampler for training and validation
            train_sampler = SubsetRandomSampler(train_idx)
            val_sampler = SubsetRandomSampler(val_idx)
            # Loaders for training and validation
            train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
            val_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)

            # Getting the sample sizes of training and validation on this fold
            train_sampler_size = len(train_sampler)
            val_sampler_size = len(val_sampler)

            # Looping through all epochs
            for epoch in range(EPOCHS): 
                # Training the model one epoch
                train_loss, train_acc = train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size)
                # Validating the model on one epoch
                val_loss, val_acc = validation_epoch(model, val_loader, loss_fn, val_sampler_size)

                # Adding losses and accuracies for insights 
                #epochs_tloss[epoch] += train_loss
                #epochs_tacc[epoch] += train_acc
                #epochs_vloss[epoch] += val_loss
                #epochs_vacc[epoch] += val_acc

                #Printing insights
                #print(f"Epoch: {epoch + 1} Train Loss: {train_loss}, Valid Loss: {val_loss} |\
                #Train Acc: {train_acc}, Valid Acc: {val_acc}")

                # Saving the model if the loss on the validation is lower than the best one
                if val_loss < best_vloss:
                    best_vloss = val_loss
                    model_path = f"models/piece_to_move_net_{timestamp}_{fold+1}_{epoch+1}"
                    torch.save(model.state_dict(), model_path)

    trial.set_user_attr("best_model_path", best_model_path)
    return best_vloss



Returns the best hyperparameters

In [32]:
study = optuna.create_study(direction = "minimize") # Will focus on minimizing validation loss
study.optimize(hyperparameter_tuning, n_trials = 50 ) # Random value that will do the trials, can be set ot anything


# After the study is completed, print the best hyperparameters
best_trial = study.best_trial
# Print the best parameters
print("Best hyperparameters", study.best_trial.params)

[I 2024-10-15 22:28:47,014] A new study created in memory with name: no-name-9abdb39d-dfbe-4b2c-9568-6e1a2a479166
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-3) # Loguniform will be better in this case
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-4)


FOLD 1
FOLD 2
FOLD 3


[I 2024-10-15 22:29:07,474] Trial 0 finished with value: 2.2061439860950816 and parameters: {'fc2_neurons': 107, 'dropout_p': 0.45989622121223933, 'use_pooling': False, 'lr': 0.00017145991910703473, 'weight_decay': 1.9857612609641977e-06}. Best is trial 0 with value: 2.2061439860950816.


FOLD 1


[W 2024-10-15 22:29:13,228] Trial 1 failed with parameters: {'fc2_neurons': 113, 'dropout_p': 0.24509703780250142, 'use_pooling': False, 'lr': 0.000964409749008135, 'weight_decay': 2.2696176804168675e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\1\AppData\Local\Programs\Python\Python312\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\1\AppData\Local\Temp\ipykernel_9448\1635072522.py", line 61, in hyperparameter_tuning
    train_loss, train_acc = train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size)
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\1\AppData\Local\Temp\ipykernel_9448\1928562526.py", line 36, in train_epoch
    logits = model(inputs)
             ^^^^^^^^^^^^^
  File "c:\Users\1\AppData\Local\Programs\Python\Python312

KeyboardInterrupt: 