**IMPORTS**

In [16]:
import aux_functions
import importlib

importlib.reload(aux_functions)
from aux_functions import *

# Pytorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.masked import masked_tensor

import optuna
import numpy as np
import chess
from datetime import datetime
import sklearn
from sklearn.model_selection import KFold

**DATA PROCESSING**

- Importing the pgn data
- Transforming the data to sparce tensors 
- Splitting the data into training and testing

In [19]:
TEST_PERCENT = 0.25

# Load pgn paths
pgns = import_data(6)

# Convert pgns to tensors
board_tensors, next_moves = parse_pgn_to_tensors(pgns)

# Converting the dataset into a custom pytorch one
dataset = ChessDataset(board_tensors, next_moves)

# Setting manual seed so that the split always has the same indexes 
torch.manual_seed(0)
# Splitting the data into train and test
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [1-TEST_PERCENT, TEST_PERCENT])

print(len(train_dataset))  
print(train_dataset.indices[:10])

c:\Users\javie\chess-data\pgn
2976
[182, 886, 1680, 979, 3365, 2170, 1569, 3230, 3351, 1034]


**NEURAL NETWORK DESIGN**
- 2 Convolutional layers
- 2 Fully connected hidden layers

In [71]:
# Whether to do the operations on the cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Neural network to predict which piece to move
class PieceToMoveNet(nn.Module):
    def __init__(self, fc1_neurons, fc2_neurons, dropout_p, random_state=42):
        super().__init__()

        # Takes as input a tensor of 14 channels (8x8 board)
        self.conv1 = nn.Conv2d(14, 6, 3)  # 6 filters, 3x3 
        self.pool = nn.MaxPool2d(2, 2)    # Max pooling with 2x2 window
        self.conv2 = nn.Conv2d(6, 16, 3)  # 16 filters, 3x3 kernel

        # Using droput to reduce overfitting
        self.dropout = nn.Dropout(dropout_p)

        # Using batch normalization to make training faster and more stable
        self.bn1 = nn.BatchNorm1d(fc1_neurons)   # For the 1st layer
        self.bn2 = nn.BatchNorm1d(fc2_neurons)   # For the 2nd layer
        
        # Output from conv2 will be (16 channels, 4x4 feature maps)
        self.fc1 = nn.Linear(16 * 4 * 4, fc1_neurons)
            
        # 2nd hidden layer with 120 inputs and 84 outputs
        self.fc2 = nn.Linear(fc1_neurons, fc2_neurons)
        # Ouput layer (64 squares)
        self.fc3 = nn.Linear(fc2_neurons, 64)


    def forward(self, x):
        # First convolutional layer
        x = F.relu(self.conv1(x))
        # Second convolutional layer 
        x = F.relu(self.conv2(x)) 
        # Flatten all dimensions except batch size            
        x = torch.flatten(x, 1)       

        # Fully connected layer 1 and batch normalization
        x = F.relu(self.bn1(self.fc1(x)))    
        # Dropout neurons from the first layer to reduce overfitting
        x = self.dropout(x)    
        # Fully connected layer 2 and batch normalization               
        x = F.relu(self.bn2(self.fc2(x)))  
        # Output layer (no activation, logits for classification)   
        x = self.fc3(x)         

        return x


In [72]:
# Neural network to predict where to move the piece to
class SquareToMoveToNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Takes as input a tensor of 14 channels (8x8 board)
        self.conv1 = nn.Conv2d(14, 6, 3)  # 6 filters, 3x3 kernel
        self.pool = nn.MaxPool2d(2, 2)    # Max pooling with 2x2 window
        self.conv2 = nn.Conv2d(6, 16, 3)  # 16 filters, 3x3 kernel

        # Using droput to reduce overfitting
        self.dropout = nn.Dropout(p=0.3)

        # Using batch normalization to make training faster and more stable
        self.bn1 = nn.BatchNorm1d(120)  # For the 1st layer
        self.bn2 = nn.BatchNorm1d(84)   # For the 2nd layer

        # Output from conv2 will be (16 channels, 1x1 feature maps)
        self.fc1 = nn.Linear(16 * 1 * 1, 120)
        # 2nd hidden layer with 120 inputs and 84 outputs
        self.fc2 = nn.Linear(120, 84)
        # Output layer (64 squares)
        self.fc3 = nn.Linear(84, 64)

    def forward(self, x):

        # First convolutional layer and pooling
        x = self.pool(F.relu(self.conv1(x)))
        # Second convolutional layer (no pooling needed)
        x = F.relu(self.conv2(x))
        # Flatten all dimensions except batch size
        x = torch.flatten(x, 1)

        # Fully connected layer 1 and batch normalization
        x = F.relu(self.bn1(self.fc1(x)))
        # Dropout neurons from the first layer to reduce overfitting
        x = self.dropout(x)
        # Fully connected layer 2 and batch normalization 
        x = F.relu(self.bn2(self.fc2(x)))
        # Output layer (no activation, logits for classification)  
        x = self.fc3(x)

        return x


**TRAINING LOOP**

Generating a mask on which pieces the NN can move based on the current board.

In [73]:
def generate_mask(tensor, model) -> list:
    """Generates a mask with only legal moves and pieces 
    to move for the current position"""

    # Generates a mask with ones on the pieces than can be moved
    if isinstance(model, PieceToMoveNet):
        # If layer 12 has any 1 it will be whites turn
        if  torch.any(tensor[12] == 1):
            # White pieces are in layers 0 to 5, summing across those layers
            mask = torch.sum(tensor[0:6], dim = 0).int()

        # If layer 13 has any 1 it will be blacks turn
        else:
            # Black pieces are in layers 6 to 11, summing across those layers
            mask = torch.sum(tensor[6:12], dim = 0).int()

    # Generates a mask with ones on squares where a piece can be moved to
    elif isinstance(model, SquareToMoveToNet):
        # If layer 12 has any 1 it will be whites turn
        if  torch.any(tensor[12] == 1):
            # Possible white movements are on layer 12
            mask = tensor[12]

        # If layer 13 has any 1 it will be blacks turn
        else:
            # Possible black movements are on layer 13
            mask = tensor[13]

    # Returning the masking in a list format
    return mask.flatten().tolist()


- Training the model on one epoch
- Validating the model on one epoch

In [74]:
# Get current time
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

def train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size): 
    """Trains the model for one epoch and returns the average training loss and accuracy"""

    # Initializing the avg. loss and correct guesses
    running_loss = 0.  
    running_correct = 0.

    # Looping through all samples in a batch
    for i, data in enumerate(train_loader):

        # Getting the board tensor
        inputs = data[0]
        # Getting the square of the piece to move
        if isinstance(model, PieceToMoveNet):
            labels = data[1]
        # Getting the square to move the piece to
        elif isinstance(model, SquareToMoveToNet):
            labels = data[2]

        # Resetting the gradients
        optimizer.zero_grad()

        # Calculating the mask for the current position
        mask = [generate_mask(pos, model) for pos in inputs]
        mask = torch.tensor(mask)
        
        # Moving inputs, labels and mask to the gpu/cpu
        inputs = inputs.to(device)
        labels = labels.to(device)
        mask = mask.to(device)

        # Calculating the masked output
        logits = model(inputs)
        outputs = logits * mask.float()

        # Calculating the sample loss
        loss = loss_fn(outputs, labels)
        # Calculating the gradient with respect to the loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Updating model parameters
        optimizer.step()

        # Adding the last loss to the running loss
        running_loss += loss.item()

        # Calculate number of correct predictions
        _, predictions = torch.max(outputs.data, 1)
        running_correct += (predictions == labels).sum().item()

    # Averaging the loss for all samples in the batch
    running_loss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (train_sampler_size)
    train_accuracy = running_correct / train_sampler_size

    return running_loss, train_accuracy


def validation_epoch(model, validation_loader, loss_fn, val_sampler_size):
    """Validates the model for one epoch and returns the average validation loss and accuracy"""

    # Initializes the validation loss and correct guesses
    running_vloss = 0.
    running_vcorrect = 0.

    # Set model to evaluation mode
    model.eval()

    # Disable gradient calculations for validation set
    with torch.no_grad():

        # Looping through all batches in the validation set
        for i, v_data in enumerate(validation_loader):

            # Getting the board tensors 
            vinputs = v_data[0]
            # Getting the square of the piece to move
            if isinstance(model, PieceToMoveNet):
                vlabels = v_data[1]
            # Getting the square to move the piece to
            elif isinstance(model, SquareToMoveToNet):
                vlabels = v_data[2]

            # Calculating the mask for the current position
            mask = [generate_mask(pos, model) for pos in vinputs]
            mask = torch.tensor(mask)

            # Moving inputs, labels and mask to the gpu/cpu
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            mask = mask.to(device)

            # Calculating the masked output
            logits = model(vinputs)
            voutputs = logits * mask.float()

            # Calculating the loss of the model in the validation sample
            vloss = loss_fn(voutputs, vlabels)

            # Adding this sample's loss to the total loss
            running_vloss += vloss.item()

            # Calculate number of correct predictions
            _, predictions = torch.max(voutputs.data, 1)
            running_vcorrect += (predictions == vlabels).sum().item()

    # Averaging the loss for all samples in the validation set
    running_vloss /= (i + 1)

    # Calculate accuracy based on the total samples in the fold (val_sampler_size)
    validation_accuracy = running_vcorrect / val_sampler_size

    return running_vloss, validation_accuracy


Performing cross validation while training the model on multiple epochs. This is done inside the hyperparameter function

In [75]:
EPOCHS = 15      # Number of epochs
BATCH_SIZE = 32  # Number of batches
K = 3            # Number of folds

def hyperparameter_tuning(trial):
    """Obtaining the best hyperparameters to increase the accuracy of the model,performs cross validation 
    with random parameters and returns the best values"""

    # Random values for the hyperparameters
    fc1_neurons = trial.suggest_int("fc1_neurons", 128, 256)
    fc2_neurons = trial.suggest_int("fc2_neurons", 64, 150)
    dropout_p = trial.suggest_float("dropout_p", 0.2, 0.5)
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-3) # Loguniform will be better in this case 
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-4)

    # Adjust the parameters for Adam Optimizer
    optimizer_params = {
        "lr" : lr,
        "weight_decay": weight_decay
    }

    # Print the hyperparameters for each trial
    print(f"\nTRIAL {trial.number + 1}:")
    print(f"Hyperparameters: fc1_neurons={fc1_neurons}, fc2_neurons={fc2_neurons}, dropout_p={dropout_p}, lr={lr}, weight_decay={weight_decay}")

    # WHEN RUNNING THE CODE NEEX TO CHECK PARAMTERS FOR PIECE TO MOVE NET ARE ADDED AND POOLING IS ALSO CHECKED AND RANDOM STATE
    # Run the model with the parameters

    # Initializing the loss function
    # Cross entropy loss used since it is a classification
    loss_fn = torch.nn.CrossEntropyLoss()  

    # Cross Validation
    splits = KFold(n_splits= K, shuffle = True, random_state= 42)

    # Create folder to save models if it doesnt exist, save after it is correctly tested
    #model_save_dir = "models"
    #os.makedirs(model_save_dir, exist_ok= True)

    best_vloss = float("inf") # Initializing a really big loss to obtain better values
    best_model_path = None # No best model path yet, initialized to None


    # Looping through all folds for cross validation 
    for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(train_dataset)))):

            print(f"FOLD {fold+1}")

            model = PieceToMoveNet(fc1_neurons, fc2_neurons, dropout_p) 
            model.to(device)

            optimizer = optim.Adam(model.parameters(), **optimizer_params)

            # Getting the sampler for training and validation
            train_sampler = SubsetRandomSampler(train_idx)
            val_sampler = SubsetRandomSampler(val_idx)
            # Loaders for training and validation
            train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
            val_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)

            # Getting the sample sizes of training and validation on this fold
            train_sampler_size = len(train_sampler)
            val_sampler_size = len(val_sampler)

            # Looping through all epochs
            for epoch in range(EPOCHS): 
                # Training the model one epoch
                train_loss, train_acc = train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size)
                # Validating the model on one epoch
                val_loss, val_acc = validation_epoch(model, val_loader, loss_fn, val_sampler_size)

                # Adding losses and accuracies for insights 
                """epochs_tloss[epoch] += train_loss
                epochs_tacc[epoch] += train_acc
                epochs_vloss[epoch] += val_loss
                eepochs_vacc[epoch] += val_acc"""

                #Printing insights
                if (epoch + 1) % 5 == 0: 
                    print(f"Epoch: {epoch + 1} Train Loss: {train_loss}, Valid Loss: {val_loss} |Train Acc: {train_acc}, Valid Acc: {val_acc}")

                # Saving the model if the loss on the validation is lower than the best one
                if val_loss < best_vloss:
                    best_vloss = val_loss
                    model_path = f"models/piece_to_move_net_{timestamp}_{fold+1}_{epoch+1}"
                    torch.save(model.state_dict(), model_path)

    trial.set_user_attr("best_model_path", best_model_path)
    return best_vloss



Returns the best hyperparameters

In [76]:
study = optuna.create_study(direction = "minimize") # Will focus on minimizing validation loss
study.optimize(hyperparameter_tuning, n_trials = 20) # Random value that will do the trials, can be set ot anything


# After the study is completed, print the best hyperparameters
best_trial = study.best_trial
# Print the best parameters
print("Best hyperparameters", study.best_trial.params)

[I 2024-10-16 23:17:50,484] A new study created in memory with name: no-name-225da62a-5b70-47d2-9677-073dc72a6c11



TRIAL 1:
Hyperparameters: fc1_neurons=230, fc2_neurons=107, dropout_p=0.4569885345866877, lr=0.00013355145602529149, weight_decay=7.392828448747519e-05
FOLD 1


  lr = trial.suggest_loguniform("lr", 1e-5, 1e-3) # Loguniform will be better in this case
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-4)


Epoch: 5 Train Loss: 2.2666635513305664, Valid Loss: 2.3086259595809446 |Train Acc: 0.21370967741935484, Valid Acc: 0.20766129032258066
Epoch: 10 Train Loss: 2.10558823808547, Valid Loss: 2.285001539414929 |Train Acc: 0.25756048387096775, Valid Acc: 0.17842741935483872
Epoch: 15 Train Loss: 1.938422441482544, Valid Loss: 2.2745994214088685 |Train Acc: 0.3084677419354839, Valid Acc: 0.1905241935483871
FOLD 2
Epoch: 5 Train Loss: 2.2601035794904156, Valid Loss: 2.326939990443568 |Train Acc: 0.20967741935483872, Valid Acc: 0.19153225806451613
Epoch: 10 Train Loss: 2.1023235936318674, Valid Loss: 2.2637697265994166 |Train Acc: 0.26663306451612906, Valid Acc: 0.1935483870967742
Epoch: 15 Train Loss: 1.9344479787734248, Valid Loss: 2.232185744470166 |Train Acc: 0.3392137096774194, Valid Acc: 0.1935483870967742
FOLD 3
Epoch: 5 Train Loss: 2.2847737669944763, Valid Loss: 2.302284102286062 |Train Acc: 0.2212701612903226, Valid Acc: 0.2127016129032258


[W 2024-10-16 23:18:15,256] Trial 0 failed with parameters: {'fc1_neurons': 230, 'fc2_neurons': 107, 'dropout_p': 0.4569885345866877, 'lr': 0.00013355145602529149, 'weight_decay': 7.392828448747519e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\javie\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\javie\AppData\Local\Temp\ipykernel_6788\2672535786.py", line 70, in hyperparameter_tuning
    val_loss, val_acc = validation_epoch(model, val_loader, loss_fn, val_sampler_size)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\javie\AppData\Local\Temp\ipykernel_6788\1928562526.py", line 91, in validation_epoch
    mask = [generate_mask(pos, model) for pos in vinputs]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C

KeyboardInterrupt: 

In [67]:
TEST_PERCENT = 0.25

# Load pgn paths
pgns = import_data(15)
print("Finished importing")

# Convert pgns to tensors
board_tensors, next_moves = parse_pgn_to_tensors(pgns)


# Converting the dataset into a custom pytorch one
dataset = ChessDataset(board_tensors, next_moves)

# Setting manual seed so that the split always has the same indexes 
torch.manual_seed(0)
# Splitting the data into train and test
final_train_dataset, test_dataset = torch.utils.data.random_split(dataset, [1-TEST_PERCENT, TEST_PERCENT])

print(len(final_train_dataset))


In [68]:
EPOCHS = 25

fc1_neurons = 160
fc2_neurons = 120
dropout_p = 0.33
lr = 0.00015
weight_decay = 1.9e-06

optimizer_params = {
        "lr" : lr,
        "weight_decay": weight_decay
    }

writer = SummaryWriter(f"runs/piece_to_move_{timestamp}") 

loss_fn = torch.nn.CrossEntropyLoss()  
# Cross Validation
splits = KFold(n_splits= K, shuffle = True, random_state= 42)
best_vloss = 1_000

epochs_tloss = [0 for _ in range(EPOCHS)]
epochs_tacc = [0 for _ in range(EPOCHS)]
epochs_vloss = [0 for _ in range(EPOCHS)]
epochs_vacc = [0 for _ in range(EPOCHS)]



# Looping through all folds for cross validation 
for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(train_dataset)))):

        print(f"FOLD {fold+1}")

        model = PieceToMoveNet(fc1_neurons, fc2_neurons, dropout_p) 
        model.to(device)

        optimizer = optim.Adam(model.parameters(), **optimizer_params)

        # Getting the sampler for training and validation
        train_sampler = SubsetRandomSampler(train_idx)
        val_sampler = SubsetRandomSampler(val_idx)
        # Loaders for training and validation
        train_loader = DataLoader(final_train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
        val_loader = DataLoader(final_train_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)

        # Getting the sample sizes of training and validation on this fold
        train_sampler_size = len(train_sampler)
        val_sampler_size = len(val_sampler)

        # Looping through all epochs
        for epoch in range(EPOCHS): 
            # Training the model one epoch
            train_loss, train_acc = train_epoch(model, optimizer, train_loader, loss_fn, train_sampler_size)
            # Validating the model on one epoch
            val_loss, val_acc = validation_epoch(model, val_loader, loss_fn, val_sampler_size)

            # Adding losses and accuracies for insights 
            epochs_tloss[epoch] += train_loss
            epochs_tacc[epoch] += train_acc
            epochs_vloss[epoch] += val_loss
            epochs_vacc[epoch] += val_acc

            print(f"Epoch: {epoch + 1} Train Loss: {train_loss}, Valid Loss: {val_loss} |Train Acc: {train_acc}, Valid Acc: {val_acc}")

            # Saving the model if the loss on the validation is lower than the best one
            if val_loss < best_vloss:
                best_vloss = val_loss
                model_path = f"models/piece_to_move_net_{timestamp}_{fold+1}_{epoch+1}"
                torch.save(model.state_dict(), model_path)

for i in range(EPOCHS):
        epochs_tloss[i] /= K
        epochs_tacc[i] /= K
        epochs_vloss[i] /= K
        epochs_vacc[i] /= K

# Saving losses and accuracies on TensorBoard
for i in range(EPOCHS):
    # Adding insights
    writer.add_scalars("Loss", {"Training": epochs_tloss[i], "Validation": epochs_vloss[i]}, i + 1)
    writer.add_scalars("Accuracy", {"Training": epochs_tacc[i], "Validation": epochs_vacc[i]}, i + 1)
    writer.flush()
