<a href="https://colab.research.google.com/github/enrischia/Connect-Four/blob/main/Project_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Project**

<br>

---

<br>

**Track 1: Supervised Learning** \\
Given a supervised dataset of game positions and moves selected by an AI agent, we aim to develop and train a machine learning model that imitates the moves performed by our agent, using four different approaches. In particular we decided to consider two different types of neural networks: A MultiLayer Percepton (MLP) and a Convolutional Neural Network (CNN). We also tried to train the models starting from two slighly different inputs. Firstly we used just two matrices (i.e. a tensor of size (2, 6, 7)) containing the positions of the two players on the board. Then, we added more features to include information on the location of empty cells and on some of the empty slots where either player could complete a line of four consecutive discs by placing a token.

<br>

**Settings**


*   Import packages



In [1]:
import copy
import torch
import gdown
import matplotlib.pyplot as plt
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

from torch.optim.lr_scheduler import StepLR
from sklearn import metrics
from torchsummary import summary

# Verify if GPU is available
if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'



*   Function to process the imported files into list of pairs (position, move)



In [2]:
def process_dataset(filename):
    dataset = []

    with open(filename, 'r') as f:
        # Initialize position with zero rows
        position = []

        # Initialize selected move
        move = None

        for line in f:
            if len(position) < 6:
                # This line describes a new row
                position.append(list(line.strip('\n')))

            else:
                # This line describes the move (an integer between 0 and 6)
                move = int(line.strip())

                # We need to reverse the rows in the position (the top row has index 5 but appeared first in the file)
                position = list(reversed(position))

                # Add (position, move) to the dataset
                dataset.append((position, move))

                # Reset position and move
                position = []
                move = None

    print(f'Processed {filename} into a dataset with {len(dataset)} positions')
    return dataset



*   Functions to train and evaluate the model



In [3]:
def training_procedure(model, optimizer, loader, epochs, loss_fn, validation_loader = None, cm = False, device = 'cpu', mu = 1/2, gamma = 0.1, test = False):

    model.to(device) # Move the model to the specified device (CPU or GPU)

    # Scheduler for learning rate adjustment
    step_size = round(epochs * mu)  # Calculate step size for the reduced LR during training
    scheduler = StepLR(optimizer, step_size = step_size, gamma = gamma, verbose = True)

    # Initialize losses and accuracies lists
    train_losses = []
    val_losses = []
    val_accuracies = []
    train_accuracies = []


    for epoch in range(epochs):

        model.train() # Set the model to training mode

        losses = []  # List to store batch losses during training
        correct = 0  # Counter for correct predictions during training

        for data, target in loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()  # Reset gradients
            output = model(data)  # Forward pass
            loss = loss_fn(output, target)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

            losses.append(loss.item())  # Track loss
            pred = output.argmax(dim=1, keepdim=True)  # Get predictions
            correct += pred.eq(target.view_as(pred)).sum().item()  # Count correct predictions


        # Calculate and store training metrics
        avg_loss = np.mean(losses)
        accuracy = 100. * correct / len(loader.dataset)
        print(f'Train Epoch: {epoch + 1}  Loss: {avg_loss:.4f}')
        train_losses.append(avg_loss)
        train_accuracies.append(accuracy)

        # Validation step
        if validation_loader:
            val_loss, val_accuracy = evaluating_procedure(model, validation_loader, loss_fn, cm, device)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)

        scheduler.step() # Update learning rate
        print(scheduler.get_last_lr())


    # Plots: training vs. evaluation loss and accuracy
    fig = plt.figure(figsize = (10, 6))
    fig.subplots_adjust(wspace = 0.6)
    #Plot losses
    fig1 = fig.add_subplot(1, 2, 1)
    fig1.plot(range(len(train_losses)), train_losses, label = 'Training loss', color = (97/255, 165/255, 194/255))
    if validation_loader:
      if test:
        fig1.plot(range(len(train_losses)), val_losses, label = 'Test loss', color = (1/255, 58/255, 99/255))
      else:
        fig1.plot(range(len(train_losses)), val_losses, label = 'Validation loss', color = (1/255, 58/255, 99/255))
    fig1.set_xlabel('Number of epochs')
    fig1.set_ylabel('Cross-entropy loss')
    if validation_loader:
      if test:
        fig1.set_title('Training loss vs. Test loss')
      else:
        fig1.set_title('Training loss vs. Validation loss')
    else:
      fig1.set_title('Training loss')
    fig1.legend()
    fig1.grid()
    #Plot accuracies
    fig2 = fig.add_subplot(1, 2, 2)
    fig2.plot(range(len(train_losses)), train_accuracies, label = 'Training accuracy', color = (97/255, 165/255, 194/255))
    if validation_loader:
      if test:
        fig2.plot(range(len(train_losses)), val_accuracies, label = 'Test accuracy', color = (1/255, 58/255, 99/255))
      else:
        fig2.plot(range(len(train_losses)), val_accuracies, label = 'Validation accuracy', color = (1/255, 58/255, 99/255))
    fig2.set_xlabel('Number of epochs')
    fig2.set_ylabel('Accuracy')
    fig2.set_yticks([x for x in fig2.get_yticks()])
    fig2.set_yticklabels([f'{x:.0f}%' for x in fig2.get_yticks()])
    if validation_loader:
      if test:
        fig2.set_title('Training accuracy vs. Test accuracy')
      else:
        fig2.set_title('Training accuracy vs. Validation accuracy')
    else:
      fig2.set_title('Training accuracy')
    fig2.legend()
    fig2.grid()

    fig.show()
    return


def evaluating_procedure(model, loader, loss_fn, cm = False, device = 'cpu'):

    model.eval()  # Set model to evaluation mode
    model.to(device)  # Move model to the specified device

    losses = 0
    correct = 0
    targets = []
    preds = []

    with torch.no_grad(): # Disable gradient computation
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)  # Forward pass
            losses += loss_fn(output, target).item()  # Accumulate loss
            pred = output.argmax(dim=1, keepdim=True)  # Get predictions
            preds.append(pred.tolist())  # Store predictions
            correct += pred.eq(target.view_as(pred)).sum().item()  # Count correct predictions
            targets.append(target.view_as(pred).tolist())  # Store true labels

    # Calculate average loss and accuracy
    losses /= len(loader)
    accuracy = 100. * correct / len(loader.dataset)
    print('Evaluation set: Average loss: %.4f, Accuracy: %d/%d (%.4f)' % (losses, correct, len(loader.dataset), accuracy))

    # Plot confusion matrix
    if cm:
      targets_flat = [x_ for xs in targets for x in xs for x_ in x]
      preds_flat = [x_ for xs in preds for x in xs for x_ in x]
      confusion_matrix = metrics.confusion_matrix(targets_flat, preds_flat)
      cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1, 2, 3, 4, 5, 6])
      cm_display.plot(cmap ='PuBu')
      plt.title('Confusion matrix', fontsize = 17)
      plt.show()

      confusion_matrix_norm = metrics.confusion_matrix(targets_flat, preds_flat, normalize = 'true')
      cm_display_norm = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_norm, display_labels = [0, 1, 2, 3, 4, 5, 6])
      cm_display_norm.plot(cmap = 'PuBu')
      plt.title('     normalization over rows')
      plt.suptitle('Recall (on the main diagonal)', fontsize = 17, y = 1)
      plt.show()

      confusion_matrix_norm = metrics.confusion_matrix(targets_flat, preds_flat, normalize = 'pred')
      cm_display_norm = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_norm, display_labels = [0, 1, 2, 3, 4, 5, 6])
      cm_display_norm.plot(cmap = 'PuBu')
      plt.title('     normalization over columns')
      plt.suptitle('Precision (on the main diagonal)', fontsize = 17, y = 1)
      plt.show()

    return losses, accuracy



*   Function that creates an iterable dataset object in batches of a given input batch size over the training and validation sets



In [4]:
def create_data(train_set, val_set, batch_size):

  train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True)
  val_loader = torch.utils.data.DataLoader(val_set, batch_size = batch_size, shuffle = True)

  return train_loader, val_loader

*   Function that identifies vertical, horizontal or diagonal lines of three consecutive discs in a given input grid (of either yellow or red tokens) and returns a 6x7 matrix marking all the corresponding empty slots in the grid that would allow a player to form a winning sequence of four discs

In [5]:
def three_consecuitive_matrix(positions, positions_empty):

  # Define length of the input tensor
  l = len(positions)

  # Define kernels to detect three consecutive tokens
  horizontal_kernel = torch.tensor([[[[1, 1, 1]]]], dtype = torch.float32)
  vertical_kernel = torch.tensor([[[[1], [1], [1]]]], dtype = torch.float32)
  diag1_kernel = torch.tensor([[[[1, 0, 0], [0, 1, 0], [0, 0, 1]]]], dtype = torch.float32)
  diag2_kernel = torch.tensor([[[[0, 0, 1], [0, 1, 0], [1, 0, 0]]]], dtype = torch.float32)

  # Add channel dimension
  positions = positions.unsqueeze(1)  # shape: (l, 1, 6, 7)

  # Perform convolutions to find three consecutive tokens
  horizontal_output = F.conv2d(positions, horizontal_kernel, padding = (0, 1))
  vertical_output = F.conv2d(positions, vertical_kernel, padding = (1, 0))
  diag1_output = F.conv2d(positions, diag1_kernel, padding = 1)
  diag2_output = F.conv2d(positions, diag2_kernel, padding = 1)

  # Create masks to find potential positions to complete a line of four tokens
  horizontal_positions = (horizontal_output == 3).float()
  vertical_positions = (vertical_output == 3).float()
  diag1_positions = (diag1_output == 3).float()
  diag2_positions = (diag2_output == 3).float()

  # Generate horizontal candidates
  horizontal_candidates1 = torch.cat((torch.zeros(l,1,6,2), horizontal_positions[:, :, :, :-2]), 3)
  horizontal_candidates2 = torch.cat((horizontal_positions[:, :, :, 2:], torch.zeros(l,1,6,2)), 3)

  # Generate vertical candidates
  vertical_candidates1 = torch.cat((torch.zeros(l,1,2,7), vertical_positions[:, :, :-2, :]), 2)
  vertical_candidates2 = torch.cat((vertical_positions[:, :, 2:, :], torch.zeros(l,1,2,7)), 2)

  # Generate diagonal candidates (\)
  diag1_candidates1 = torch.cat((torch.zeros(l,1,2,7), torch.cat((torch.zeros(l,1,4,2), diag1_positions[:, :, :-2, :-2]), 3)), 2)
  diag1_candidates2 = torch.cat((torch.cat((diag1_positions[:, :, 2:, 2:], torch.zeros(l,1,4,2)), 3), torch.zeros(l,1,2,7)), 2)

  # Generate diagonal candidates (/)
  diag2_candidates1 = torch.cat((torch.cat((torch.zeros(l,1,4,2), diag2_positions[:, :, 2:, :-2]), 3), torch.zeros(l,1,2,7)), 2)
  diag2_candidates2 = torch.cat((torch.zeros(l,1,2,7), torch.cat((diag2_positions[:, :, :-2, 2:], torch.zeros(l,1,4,2)), 3)), 2)

  # Combine all potential positions
  potential_positions = (horizontal_candidates1 + horizontal_candidates2 + vertical_candidates1 + vertical_candidates2+ diag1_candidates1 + diag1_candidates2 +
                        diag2_candidates1 + diag2_candidates2) > 0

  potential_positions = potential_positions.squeeze(1)
  positions = positions.squeeze(1)

  # Ensure that the potential positions are valid (i.e. empty slots in the current grid)
  result_positions = potential_positions.float() * (positions_empty == 1).float()

  return result_positions

*   Function that removes duplicates from the training dataset and enlarges it including also the symmetric version of the board, if not already present

In [6]:
def remove_duplicates(train_set):

    # Create a list to store unique elements
    unique_elements = []

    for element in train_set:
        # Check if the element is already in the unique list
        if element not in unique_elements:
            unique_elements.append(element)  # Add the element to the unique list

            # Create the symmetric version of the board
            symmetric_board = [row[::-1] for row in element[0]]
            symmetric_element = (symmetric_board, 6 - element[1])

            # Add the symmetric element to the unique list if not already present
            if symmetric_element not in unique_elements:
                unique_elements.append(symmetric_element)

    return unique_elements

**Import and process the data**
*   Import train and test datasets as text files

In [7]:
train_set_url = 'https://drive.google.com/file/d/1zwshKNoeCh_JHYbaDS0e5Mu_kTPXfL_y/view?usp=drive_link'
train_set_filename = 'train_set.txt'

gdown.download(train_set_url, train_set_filename, fuzzy=True)

test_set_url = 'https://drive.google.com/file/d/1Y9HM-56TTDhIq9B1w7H_WwN1Htg87LyY/view?usp=drive_link'
test_set_filename = 'test_set.txt'

gdown.download(test_set_url, test_set_filename, fuzzy=True)

Downloading...
From: https://drive.google.com/uc?id=1zwshKNoeCh_JHYbaDS0e5Mu_kTPXfL_y
To: /content/train_set.txt
100%|██████████| 350k/350k [00:00<00:00, 39.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Y9HM-56TTDhIq9B1w7H_WwN1Htg87LyY
To: /content/test_set.txt
100%|██████████| 50.0k/50.0k [00:00<00:00, 59.5MB/s]


'test_set.txt'

*   Convert the datasets into PyTorch datasets, extending them by considering also the symmetric version of each grid and extract different features

In [None]:
# 1. Use the function "process_dataset" to convert the datasets from text files into a list
#    of tuples, each one containing a list of lists that represents the grid of
#    the match and the associated move

train_set = process_dataset(train_set_filename)
test_set = process_dataset(test_set_filename)


# 2. Narrow down the increased dataset (with all the flipped elements) by removing all duplicates

unique = remove_duplicates(train_set)


# 3. Use the build-in function zip and the * operator to unzip the lists
#    and create two different tuples, one for the match positions and one for the moves

positions_train, moves_train = zip(*unique)
positions_test, moves_test = zip(*test_set)


# 4. Starting from the match grids create three different 6x7 matrices:
#    one marking the positions of red tokens, one for the yellow ones and one for the empty slots

positions_red_train = torch.tensor([[[1 if elem == 'X' else 0 for elem in row] for row in position] for position in positions_train]).float()
positions_yel_train = torch.tensor([[[1 if elem == 'O' else 0 for elem in row] for row in position] for position in positions_train]).float()
positions_emp_train = torch.tensor([[[1 if elem == ' ' else 0 for elem in row] for row in position] for position in positions_train]).float()

positions_red_test = torch.tensor([[[1 if elem == 'X' else 0 for elem in row] for row in position] for position in positions_test]).float()
positions_yel_test = torch.tensor([[[1 if elem == 'O' else 0 for elem in row] for row in position] for position in positions_test]).float()
positions_emp_test = torch.tensor([[[1 if elem == ' ' else 0 for elem in row] for row in position] for position in positions_test]).float()

#    Create two more matrices using the function "three_consecutive_matrix" defined above
three_red_train = three_consecuitive_matrix(positions_red_train, positions_emp_train)
three_yel_train = three_consecuitive_matrix(positions_yel_train, positions_emp_train)

three_red_test = three_consecuitive_matrix(positions_red_test, positions_emp_test)
three_yel_test = three_consecuitive_matrix(positions_yel_test, positions_emp_test)


# 5. Concatenate the position matrices in a tensor

stacked_inputs_ry = torch.stack((positions_red_train, positions_yel_train), dim = 1) # tensor of size (2,6,7) containing information on red and yellow tokens
stacked_inputs_ryery = torch.stack((positions_red_train, positions_yel_train, positions_emp_train, three_red_train, three_yel_train ), dim = 1) # tensor of size (5, 6, 7) containg all the information

stacked_test_ry = torch.stack((positions_red_test, positions_yel_test), dim = 1)
stacked_test_ryery = torch.stack((positions_red_test, positions_yel_test, positions_emp_test, three_red_test, three_yel_test ), dim = 1)


# 6. Convert into PyTorch datasets

pytorch_train_ry = torch.utils.data.TensorDataset(stacked_inputs_ry, torch.tensor(moves_train))
pytorch_train_ryery = torch.utils.data.TensorDataset(stacked_inputs_ryery, torch.tensor(moves_train))

pytorch_test_ry = torch.utils.data.TensorDataset(stacked_test_ry, torch.tensor(moves_test))
pytorch_test_ryery = torch.utils.data.TensorDataset(stacked_test_ryery, torch.tensor(moves_test))

Processed train_set.txt into a dataset with 7000 positions
Processed test_set.txt into a dataset with 1000 positions


*   Split the dataset into training and validation sets


In [None]:
train_ry, val_ry = torch.utils.data.random_split(pytorch_train_ry, [11878, 1000])
train_ryery, val_ryery = torch.utils.data.random_split(pytorch_train_ryery, [11878, 1000])

<br><br>

---
<br>

**Fully connected neural networks**

**1.**   Considering the dataset with no repetition and two imputs matrices corresponding to the red and yellow tokens in the match grid

In [None]:
fullyconnected_model_ry = nn.Sequential(

    nn.Flatten(),             # Flatten the input (transform a multi-dimensional array into a one-dimensional array)

    nn.Linear(42*2, 350),     # Linear layer with input size 84 (42*2)
    nn.BatchNorm1d(350),      # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one, to improve training stability
    nn.ReLU(),                # ReLU activation function
    nn.Dropout(0.5),          # Dropout with 0.5 probability to reduce overfitting

    nn.Linear(350, 150),      # Linear layer
    nn.BatchNorm1d(150),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(150, 7),        # Linear layer, 7-class classification

)

if device == 'cuda':
  summary(fullyconnected_model_ry.cuda(), (2,6,7))
else:
  summary(fullyconnected_model_ry, (2,6,7))

<br>

\- Model training



In [None]:
# 1. Create an iterable object in batches of both train and validation sets

batch_size_ry_mlp = 20
train_loader_ry_mlp, val_loader_ry_mlp = create_data(train_ry, val_ry, batch_size_ry_mlp)


# 2. Set hyperparameters and functions for the training procedure: learning rate, multiplicative learning rate decay factor gamma,
#    percentage of learning rate decay mu, number of epochs, weight decay (regularization parameter), optimizer and loss function

learning_rate_ry_mlp = 0.0005
mu_ry_mlp = 1/2
gamma_ry_mlp = 0.2
epochs_ry_mlp = 25
weight_decay_ry_mlp = 5e-3
optimizer_ry_mlp = torch.optim.Adam(fullyconnected_model_ry.parameters(), lr = learning_rate_ry_mlp,
                                           weight_decay = weight_decay_ry_mlp)
loss_fn_ry_mlp = nn.CrossEntropyLoss()


# # 3. Training of the model

# training_procedure(fullyconnected_model_ry, optimizer_ry_mlp, train_loader_ry_mlp, epochs_ry_mlp, loss_fn_ry_mlp,
#                    validation_loader = val_loader_ry_mlp, cm = False, device = device, mu = mu_ry_mlp, gamma = gamma_ry_mlp)


# # 4. Evaluation of the model on the validation set

# __ = evaluating_procedure(fullyconnected_model_ry, val_loader_ry_mlp, loss_fn_ry_mlp, cm = True, device = device)

<br>

\- Retrain the model on the whole training dataset and evaluate it on the test set

In [None]:
fullyconnected_test_ry = nn.Sequential(

    nn.Flatten(),             # Flatten the input (transform a multi-dimensional array into a one-dimensional array)

    nn.Linear(42*2, 350),     # Linear layer with input size 84 (42*2)
    nn.BatchNorm1d(350),      # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one, to improve training stability
    nn.ReLU(),                # ReLU activation function
    nn.Dropout(0.5),          # Dropout with 0.5 probability to reduce overfitting

    nn.Linear(350, 150),      # Linear layer
    nn.BatchNorm1d(150),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(150, 7),        # Linear layer, 7-class classification
)

epochs_ry_mlp = 45
train_loader_ry_mlp, test_loader_ry_mlp = create_data(pytorch_train_ry, pytorch_test_ry, batch_size_ry_mlp)

optimizer_ry_mlp = torch.optim.Adam(fullyconnected_test_ry.parameters(), lr = learning_rate_ry_mlp,
                                           weight_decay = weight_decay_ry_mlp)
loss_fn_ry_mlp = nn.CrossEntropyLoss()



training_procedure(fullyconnected_test_ry, optimizer_ry_mlp, train_loader_ry_mlp, epochs_ry_mlp, loss_fn_ry_mlp,
                   validation_loader = test_loader_ry_mlp, cm = False, device = device, mu = mu_ry_mlp, gamma = gamma_ry_mlp, test = True)

__ = evaluating_procedure(fullyconnected_test_ry, test_loader_ry_mlp, loss_fn_ry_mlp, cm = True, device = device)

<br> <br>

**2.** Considering the dataset with no repetition and five imputs matrices representing the Red and Yellow positions in the match grid, the matrix of Empty cells and the matrices created via the "three_consecutive_matrix" function applied respectively to the Red token grid and the Yellow ones

In [None]:
fullyconnected_model_ryery = nn.Sequential(

    nn.Flatten(),             # Flatten the input (transforms a multi-dimensional array into a one-dimensional array)

    nn.Linear(42*5, 350),     # Linear layer with input size 84 (42*2)
    nn.BatchNorm1d(350),      # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one, to improve training stability
    nn.ReLU(),                # ReLU activation function
    nn.Dropout(0.5),          # Dropout with 0.5 probability to reduce overfitting

    nn.Linear(350, 128),      # Linear layer
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(128, 64),      # Linear layer
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(64, 7),        # Linear layer, 7-class classification
)


print(fullyconnected_model_ryery)
if device == 'cuda':
  summary(fullyconnected_model_ryery.cuda(), (5,6,7))
else:
  summary(fullyconnected_model_ryery, (5,6,7))

<br>

\- Model training

In [None]:
# 1. Create an iterable object in batches of both train and validation sets

batch_size_ryery_mlp = 20
train_loader_ryery_mlp, val_loader_ryery_mlp = create_data(train_ryery, val_ryery, batch_size_ryery_mlp)


# 2. Set hyperparameters and functions for the training procedure

learning_rate_ryery_mlp = 0.0001
mu_ryery_mlp = 5/8
epochs_ryery_mlp = 60
weight_decay_ryery_mlp = 5e-3
gamma_ryery_mlp = 0.3
optimizer_ryery_mlp = torch.optim.Adam(fullyconnected_model_ryery.parameters(), lr = learning_rate_ryery_mlp,
                                           weight_decay = weight_decay_ryery_mlp)
loss_fn_ryery_mlp = nn.CrossEntropyLoss()


# # 3. Training of the model

# training_procedure(fullyconnected_model_ryery, optimizer_ryery_mlp, train_loader_ryery_mlp, epochs_ryery_mlp,
#                    loss_fn_ryery_mlp, validation_loader = val_loader_ryery_mlp, cm = False, device = device, mu = mu_ryery_mlp, gamma = gamma_ryery_mlp)


# # 4. Evaluation of the model on the validation set

# __ = evaluating_procedure(fullyconnected_model_ryery, val_loader_ryery_mlp, loss_fn_ryery_mlp, cm = True, device = device)

<br>

\- Retrain the model on the whole training dataset and evaluate it on the test set

In [None]:
fullyconnected_test_ryery = nn.Sequential(

    nn.Flatten(),             # Flatten the input (transforms a multi-dimensional array into a one-dimensional array)

    nn.Linear(42*5, 350),     # Linear layer with input size 84 (42*2)
    nn.BatchNorm1d(350),      # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one, to improve training stability
    nn.ReLU(),                # ReLU activation function
    nn.Dropout(0.5),          # Dropout with 0.5 probability to reduce overfitting

    nn.Linear(350, 128),      # Linear layer
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(128, 64),      # Linear layer
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(64, 7),        # Linear layer, 7-class classification
)

epochs_ryery_mlp = 80
train_loader_ryery_mlp, test_loader_ryery_mlp = create_data(pytorch_train_ryery, pytorch_test_ryery, batch_size_ryery_mlp)

optimizer_ryery_mlp = torch.optim.Adam(fullyconnected_test_ryery.parameters(), lr = learning_rate_ryery_mlp,
                                           weight_decay = weight_decay_ryery_mlp)
loss_fn_ryery_mlp = nn.CrossEntropyLoss()



training_procedure(fullyconnected_test_ryery, optimizer_ryery_mlp, train_loader_ryery_mlp, epochs_ryery_mlp,
                   loss_fn_ryery_mlp, validation_loader = test_loader_ryery_mlp, cm = False, device = device, mu = mu_ryery_mlp, gamma = gamma_ryery_mlp, test = True)

__ = evaluating_procedure(fullyconnected_test_ryery, test_loader_ryery_mlp, loss_fn_ryery_mlp, cm = True, device = device)

<br><br>

---

<br>

**Convolutional Neural Networks**

**1.**   Considering the dataset with no repetition and two imputs matrices corresponding to Red and Yellow positions in the match grid

In [None]:
# cnn_model_ry = nn.Sequential(

#     #Kernel: a small matrix used in convolution to capture certain patterns
#     #Padding to add extra pixels around the edges of the input image

#     nn.Conv2d(2, 64, kernel_size =4, padding=2),     # Convolutional layer with 2 input matrices
#     nn.BatchNorm2d(64),                              # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one
#     nn.ReLU(),                                       # ReLU activation function
#     nn.Dropout(0.15),

#     nn.Conv2d(64, 32, kernel_size=4, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(32),
#     nn.ReLU(),
#     nn.Dropout(0.3),

#     nn.Conv2d(32, 32, kernel_size=4, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(32),
#     nn.ReLU(),
#     nn.Dropout(0.4),

#     nn.Conv2d(32, 16, kernel_size=3, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(16),
#     nn.ReLU(),
#     nn.Dropout(0.4),

#     nn.Flatten(),                                    # Flatten the input

#     nn.Linear(480, 64),                              # Linear layer
#     nn.BatchNorm1d(64),
#     nn.ReLU(),
#     nn.Dropout(0.5),

#     nn.Linear(64, 7),                                # Linear layer

# )

# if device == 'cuda':
#   summary(cnn_model_ry.cuda(), (2,6,7))
# else:
#   summary(cnn_model_ry, (2,6,7))

<br>

\- Model training

In [None]:
# # 1. Compute the batch of both train and validation sets

# batch_size_ry_cnn=30
# train_loader_ry_cnn, val_loader_ry_cnn= create_data(train_ry, val_ry, batch_size_ry_cnn)


# # 2. Set parameters and functions for the training procedure such as: learning rate, multiplicative learning rate decay factor gamma,
# #    percentage of learning rate decay mu, number of epochs, weight decay (regularization parameter), optimizer and loss function

# learning_rate_ry_cnn = 0.005
# epochs_ry_cnn = 120
# weight_decay_ry_cnn = 1e-3
# optimizer_ry_cnn = torch.optim.Adam(cnn_model_ry.parameters(), lr = learning_rate_ry_cnn, weight_decay = weight_decay_ry_cnn)
# loss_fn_ry_cnn = nn.CrossEntropyLoss()
# mu_ry_cnn = 2/3


# # 3. Training of the model

# train_losses = training_procedure(cnn_model_ry, optimizer_ry_cnn, train_loader_ry_cnn, epochs_ry_cnn,
#                                   loss_fn_ry_cnn, validation_loader = val_loader_ry_cnn, device = device, mu = mu_ry_cnn)


# # 4. Evaluation of the model in the validation set

# __ = evaluating_procedure(cnn_model_ry, val_loader_ry_cnn, loss_fn_ry_cnn, cm = True, device = device)

<br>

\- Retrain the model on the whole training dataset and evaluate it on the test set

In [None]:
# cnn_test_ry = nn.Sequential(

#     #Kernel a small matrix used in convolution to capture certain patterns
#     #Padding to add extra pixels around the edges of the input image

#     nn.Conv2d(2, 64, kernel_size =4, padding=2),     # Convolutional layer with 2 input matrices
#     nn.BatchNorm2d(64),                              # Normalizes the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one
#     nn.ReLU(),                                       # ReLU activation function
#     nn.Dropout(0.15),

#     nn.Conv2d(64, 32, kernel_size=4, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(32),
#     nn.ReLU(),
#     nn.Dropout(0.3),

#     nn.Conv2d(32, 32, kernel_size=4, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(32),
#     nn.ReLU(),
#     nn.Dropout(0.4),

#     nn.Conv2d(32, 16, kernel_size=3, padding=1),     # Convolutional layer
#     nn.BatchNorm2d(16),
#     nn.ReLU(),
#     nn.Dropout(0.4),

#     nn.Flatten(),                                    # Flattens the input

#     nn.Linear(480, 64),                              # Linear layer
#     nn.BatchNorm1d(64),
#     nn.ReLU(),
#     nn.Dropout(0.5),

#     nn.Linear(64, 7),                                # Linear layer

# )

# epochs_ry_cnn = 160
# train_loader_ry_cnn, test_loader_ry_cnn= create_data(pytorch_train_ry, pytorch_test_ry, batch_size_ry_cnn)
# optimizer_ry_cnn = torch.optim.Adam(cnn_test_ry.parameters(), lr = learning_rate_ry_cnn, weight_decay = weight_decay_ry_cnn)
# loss_fn_ry_cnn = nn.CrossEntropyLoss()



# training_procedure(cnn_test_ry, optimizer_ry_cnn, train_loader_ry_cnn, epochs_ry_cnn,
#                    loss_fn_ry_cnn, validation_loader = test_loader_ry_cnn, device = device, mu = mu_ry_cnn, test = True)

# __ = evaluating_procedure(cnn_test_ry, test_loader_ry_cnn, loss_fn_ry_cnn, cm = True, device = device)

<br>

**2.**   Considering the dataset with no repetition and five imputs matrices representing the Red and Yellow positions in the match grid, the matrix of Empty cells and the matrices created via the "three_consecutive_matrix" function applied respectively to the Red token grid and the Yellow one

In [None]:
cnn_model_ryery = nn.Sequential(

    #Kernel a small matrix used in convolution to capture certain patterns
    #Padding to add extra pixels around the edges of the input image

    nn.Conv2d(5, 64, kernel_size =4, padding=2),     # Convolutional layer with 2 input matrices
    nn.BatchNorm2d(64),                              # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one
    nn.ReLU(),                                       # ReLU activation function
    nn.Dropout(0.45),

    nn.Conv2d(64, 64, kernel_size=4, padding=1),     # Convolutional layer
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Conv2d(64, 64, kernel_size=4, padding=1),     # Convolutional layer
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Conv2d(64, 32, kernel_size=3, padding=1),     # Convolutional layer
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Flatten(),                                    # Flatten the input

    nn.Linear(960, 64),                              # Linear layer
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(64, 7),                                # Linear layer

)


if device == 'cuda':
  summary(cnn_model_ryery.cuda(), (5,6,7))
else:
  summary(cnn_model_ryery, (5,6,7))

<br>

\- Model training

In [None]:
# 1. Compute the batch of both train and validation sets

batch_size_ryery_cnn=30
train_loader_ryery_cnn, val_loader_ryery_cnn = create_data(train_ryery, val_ryery, batch_size_ryery_cnn)


# 2. Set hyperparameters and functions for the training procedure such as: learning rate, multiplicative learning rate decay factor gamma,
#    percentage of learning rate decay mu, number of epochs, weight decay (regularization parameter), optimizer and loss function

learning_rate_ryery_cnn = 0.001
epochs_ryery_cnn = 120
weight_decay_ryery_cnn = 1e-3
optimizer_ryery_cnn = torch.optim.Adam(cnn_model_ryery.parameters(), lr = learning_rate_ryery_cnn, weight_decay = weight_decay_ryery_cnn)
loss_fn_ryery_cnn = nn.CrossEntropyLoss()
mu_ryery_cnn = 5/8
gamma_ryery_cnn = 0.4


# # 3.  Training of the model

# training_procedure(cnn_model_ryery, optimizer_ryery_cnn, train_loader_ryery_cnn, epochs_ryery_cnn,
#                    loss_fn_ryery_cnn, validation_loader = val_loader_ryery_cnn, device = device, mu = mu_ryery_cnn, gamma = gamma_ryery_cnn)


# # 4. Evaluation of the model in the validation set

# x = evaluating_procedure(cnn_model_ryery, val_loader_ryery_cnn, loss_fn_ryery_cnn, cm = True, device = device)

<br>

\- Retrain the model on the whole training dataset and evaluate it on the test set

In [None]:
cnn_test_ryery = nn.Sequential(

    #Kernel a small matrix used in convolution to capture certain patterns
    #Padding to add extra pixels around the edges of the input image

    nn.Conv2d(5, 64, kernel_size =4, padding=2),     # Convolutional layer with 2 input matrices
    nn.BatchNorm2d(64),                              # Normalize the input values ​​in each mini-batch, so that they have a mean of zero and a standard deviation of one
    nn.ReLU(),                                       # ReLU activation function
    nn.Dropout(0.45),

    nn.Conv2d(64, 64, kernel_size=4, padding=1),     # Convolutional layer
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Conv2d(64, 64, kernel_size=4, padding=1),     # Convolutional layer
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Conv2d(64, 32, kernel_size=3, padding=1),     # Convolutional layer
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Flatten(),                                    # Flatten the input

    nn.Linear(960, 64),                              # Linear layer
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(64, 7),                                # Linear layer
)

epochs_ryery_cnn = 160
train_loader_ryery_cnn, test_loader_ryery_cnn= create_data(pytorch_train_ryery, pytorch_test_ryery, batch_size_ryery_cnn)

optimizer_ryery_cnn = torch.optim.Adam(cnn_test_ryery.parameters(), lr = learning_rate_ryery_cnn, weight_decay = weight_decay_ryery_cnn)
loss_fn_ryery_cnn = nn.CrossEntropyLoss()



training_procedure(cnn_test_ryery, optimizer_ryery_cnn, train_loader_ryery_cnn, epochs_ryery_cnn, loss_fn_ryery_cnn,
                   validation_loader = test_loader_ryery_cnn, device = device, mu = mu_ryery_cnn, gamma = gamma_ryery_cnn, test = True)

__ = evaluating_procedure(cnn_test_ryery, test_loader_ryery_cnn, loss_fn_ryery_cnn, cm = True, device = device)