#**Pattern Recognition - Machine Learning** | Assignment 3





#**1) Music Track classification**





In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

### Question 1: Feedforward Neural Network

1.   Loading Data

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

train_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/train/mfccs/"
valid_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/val/mfccs/"
test_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/test/mfccs/"

# Load MFCC data
# train data
train_data = np.load(train_data_path + 'X.npy')
train_labels = np.load(train_data_path + 'labels.npy')

# validation data
valid_data = np.load(valid_data_path + 'X.npy')
valid_labels = np.load(valid_data_path + 'labels.npy')

# test data
test_data = np.load(test_data_path + 'X.npy')
test_labels = np.load(test_data_path + 'labels.npy')

# Create label mapping
label_mapping = {label: idx for idx, label in enumerate(np.unique(train_labels))}
num_classes = len(label_mapping)

# Map label strings to integer numbers
train_labels = np.array([label_mapping[label] for label in train_labels])   # train
valid_labels = np.array([label_mapping[label] for label in valid_labels])   # validation
test_labels = np.array([label_mapping[label] for label in test_labels])     # test

# Convert data and labels to PyTorch tensors
train_data = torch.from_numpy(train_data).float()     # train
train_labels = torch.from_numpy(train_labels).long()
valid_data = torch.from_numpy(valid_data).float()     # validation
valid_labels = torch.from_numpy(valid_labels).long()
test_data = torch.from_numpy(test_data).float()       # test
test_labels = torch.from_numpy(test_labels).long()

# Create TensorDataset
train_dataset = TensorDataset(train_data, train_labels) # train
valid_dataset = TensorDataset(valid_data, valid_labels) # validation
test_dataset = TensorDataset(test_data, test_labels)    # test

# Create train, validation, and testing dataloaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # train DataLoader
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True) # validation DataLoader
test_loader = DataLoader(test_dataset, batch_size=batch_size)                 # test DataLoader


2.   FCNN (Fully Connected Neural Network) initialization

In [None]:
import torch.nn as nn

class FullyConnectedNet(nn.Module):
    def __init__(self):
        super(FullyConnectedNet, self).__init__()

        # Definition of the layers and the neurons
        self.fc1 = nn.Linear(26, 128)               # Input layer with 26 neurons and 128 neurons in the first layer
        self.fc2 = nn.Linear(128, 32)               # Second hidden layer with 128 neurons and 32 neurons in the third layer
        self.fc3 = nn.Linear(32, 4)                 # Third hidden layer with 32 neurons and 4 neurons in the output layer
        self.relu = nn.ReLU()                       # ReLU activation function

    def forward(self, x):
        # Perform the forward pass through the network
        x = self.relu(self.fc1(x))  # Apply ReLU activation to the output of the first layer
        x = self.relu(self.fc2(x))  # Apply ReLU activation to the output of the second layer
        x = self.fc3(x)             # Output layer
        return x



3.   Function to train the FCNN

In [None]:
# function to train the NN
def train(epochs, optimizer, dataloader, cost_function, model):

    for epoch in range(epochs):
        running_loss = 0.0

        # Iterate over the batches in the dataloader
        for batch_inputs, batch_labels in dataloader:
            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            batch_outputs = model(batch_inputs)

            # Calculate the loss
            loss = cost_function(batch_outputs, batch_labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Update the running loss
            running_loss += loss.item()

        # Print the average loss for the epoch
        epoch_loss = running_loss / len(dataloader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")

    # Return the trained model
    return model


4.   Function to evaluate the trained FCNN model

In [None]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
import time

# function that evaluates the trained model
def evaluate(model, dataloader, loss_function):

    model.eval()                              # set the model to evaluation mode
    device = next(model.parameters()).device  # get the device of the model's parameters
    running_loss = 0.0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_inputs, batch_labels in dataloader:
            batch_inputs = batch_inputs.to(device)
            batch_labels = batch_labels.to(device)

            # forward passing
            batch_outputs = model(batch_inputs)

            # loss calculation
            loss = loss_function(batch_outputs, batch_labels)
            running_loss += loss.item()

            # convert logits to predicted labels
            _, predicted = torch.max(batch_outputs, dim=1)

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(batch_labels.cpu().numpy())

    # calculate metrics
    epoch_loss = running_loss / len(dataloader)
    f1 = f1_score(all_labels, all_predictions, average='macro')
    accuracy = accuracy_score(all_labels, all_predictions)
    confusion_mat = confusion_matrix(all_labels, all_predictions)

    # printing evaluation metrics
    print(f"Loss: {epoch_loss:.4f}")
    print(f"F1 Score (macro-averaged): {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print("Confusion Matrix:")
    print(confusion_mat)

    return epoch_loss, f1, accuracy, confusion_mat


5.   Call train() to train the model. Then, call the evaluate() to evaluate the efficiency of the trained model.

In [None]:
# initialization of all input parameters for the train() function

# initialize a FCNN
model = FullyConnectedNet()

# initialize the optimizer with learning rate 'lr'=0.002
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)

# cost function
cost_function = nn.CrossEntropyLoss()

# epochs
epochs = 30

print("\n-------------------------Model training results-------------------------\n")

# Measure CPU time
start_time = time.time()

# acquire the trained model
trained_model = train(epochs, optimizer, train_loader, cost_function, model)

training_time = time.time() - start_time
print("\n> Training time: ", training_time, "sec")

In [None]:
# initialize the inut parameters and call the evaluate() function
model = trained_model
loss_function = nn.CrossEntropyLoss()
print("\n-------------------------Model evaluation results-------------------------\n")

# Measure time
start_time = time.time()

loss, f1_macro_avg, accuracy, confusion_m = evaluate(model, test_loader, loss_function)

evaluation_time = time.time() - start_time
print("\n> Evaluation time: ", evaluation_time, "sec")

6.   Train the FCNN using GPU

For finding the results in this question, we first run the entire code using CPU runtime (selecting "None" as the hardware accelerator in Colab settings). The results we obtained for CPU runtime are as follows:

In [None]:
# -------------------------Model training results-------------------------

# Epoch [1/30], Loss: 1.3829
# Epoch [2/30], Loss: 1.3602
# Epoch [3/30], Loss: 1.3519
# Epoch [4/30], Loss: 1.3405
# Epoch [5/30], Loss: 1.3272
# Epoch [6/30], Loss: 1.3162
# Epoch [7/30], Loss: 1.3012
# Epoch [8/30], Loss: 1.2848
# Epoch [9/30], Loss: 1.2668
# Epoch [10/30], Loss: 1.2470
# Epoch [11/30], Loss: 1.2262
# Epoch [12/30], Loss: 1.2080
# Epoch [13/30], Loss: 1.1876
# Epoch [14/30], Loss: 1.1696
# Epoch [15/30], Loss: 1.1493
# Epoch [16/30], Loss: 1.1335
# Epoch [17/30], Loss: 1.1194
# Epoch [18/30], Loss: 1.1036
# Epoch [19/30], Loss: 1.0866
# Epoch [20/30], Loss: 1.0762
# Epoch [21/30], Loss: 1.0637
# Epoch [22/30], Loss: 1.0532
# Epoch [23/30], Loss: 1.0403
# Epoch [24/30], Loss: 1.0315
# Epoch [25/30], Loss: 1.0216
# Epoch [26/30], Loss: 1.0106
# Epoch [27/30], Loss: 1.0005
# Epoch [28/30], Loss: 0.9882
# Epoch [29/30], Loss: 0.9900
# Epoch [30/30], Loss: 0.9794

# > Training time:  7.532282829284668 sec


# -------------------------Model evaluation results-------------------------

# Loss: 1.0728
# F1 Score (macro-averaged): 0.5322
# Accuracy: 0.5392
# Confusion Matrix:
# [[112  91  52  69]
#  [ 19 271   5   2]
#  [ 93  54 194  15]
#  [112  84  38 165]]

# > Evaluation time:  0.06703901290893555 sec

Next, we execute the code using GPU runtime, selecting "GPU" as the hardware accelerator in Colab settings and "A100". We utilize the paid version of GColab Pro, as I also use it for professional purposes. The results for GPU A100 runtime are:

In [None]:
# -------------------------Model training results-------------------------

# Epoch [1/30], Loss: 1.3786
# Epoch [2/30], Loss: 1.3666
# Epoch [3/30], Loss: 1.3587
# Epoch [4/30], Loss: 1.3506
# Epoch [5/30], Loss: 1.3410
# Epoch [6/30], Loss: 1.3303
# Epoch [7/30], Loss: 1.3173
# Epoch [8/30], Loss: 1.3028
# Epoch [9/30], Loss: 1.2871
# Epoch [10/30], Loss: 1.2701
# Epoch [11/30], Loss: 1.2517
# Epoch [12/30], Loss: 1.2317
# Epoch [13/30], Loss: 1.2114
# Epoch [14/30], Loss: 1.1908
# Epoch [15/30], Loss: 1.1711
# Epoch [16/30], Loss: 1.1550
# Epoch [17/30], Loss: 1.1364
# Epoch [18/30], Loss: 1.1215
# Epoch [19/30], Loss: 1.1103
# Epoch [20/30], Loss: 1.0960
# Epoch [21/30], Loss: 1.0869
# Epoch [22/30], Loss: 1.0765
# Epoch [23/30], Loss: 1.0651
# Epoch [24/30], Loss: 1.0560
# Epoch [25/30], Loss: 1.0455
# Epoch [26/30], Loss: 1.0377
# Epoch [27/30], Loss: 1.0326
# Epoch [28/30], Loss: 1.0232
# Epoch [29/30], Loss: 1.0089
# Epoch [30/30], Loss: 1.0032

# > Training time:  5.595116376876831 sec


# -------------------------Model evaluation results-------------------------

# Loss: 1.0313
# F1 Score (macro-averaged): 0.5502
# Accuracy: 0.6025
# Confusion Matrix:
# [[ 23  21 168 112]
#  [ 29 210  33  25]
#  [ 10  12 312  22]
#  [ 21  18  76 284]]

# > Evaluation time:  0.05098462104797363 sec

Indeed, we can observe that GPU times are better than CPU times, which is theoretically expected since GPUs operate more efficiently in tasks such as model training, neural network training and evaluation, and parallel computing tasks due to their different hardware architecture in general.

7.   Find the best instance of the FCNN through the epochs

In [None]:
# function to train the NN and find the best model instance based on F1 metric
def train_and_find_best_instance(epochs, optimizer, train_loader, valid_loader, cost_function, model):

    best_f1 = 0.0
    best_epoch = 0
    f1_best_instance = None

    for epoch in range(epochs):
        running_loss = 0.0

        # Training phase
        model.train()
        for batch_inputs, batch_labels in train_loader:
            optimizer.zero_grad()
            batch_outputs = model(batch_inputs)
            loss = cost_function(batch_outputs, batch_labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Calculate average training loss
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {epoch_loss:.4f}")

        # Evaluation phase
        model.eval()
        with torch.no_grad():
            all_predictions = []
            all_labels = []

            for batch_inputs, batch_labels in valid_loader:
                batch_outputs = model(batch_inputs)
                _, predicted = torch.max(batch_outputs, dim=1)

                all_predictions.extend(predicted.tolist())
                all_labels.extend(batch_labels.tolist())

            # Calculate evaluation metrics
            f1 = f1_score(all_labels, all_predictions, average='macro')

            # Print F1 score
            print(f"F1 Score (macro-averaged): {f1:.4f}\n")

            # Check if current model instance has the best F1 score
            if f1 > best_f1:
                best_f1 = f1
                best_epoch = epoch + 1
                f1_best_instance = model.state_dict().copy()

    # Load the best model instance
    model.load_state_dict(f1_best_instance)

    # Print the epoch with the best model instance
    print(f"> The best model instance is from epoch {best_epoch}\n")

    # Return the best model instance
    return model, best_epoch


In [None]:
# initialization of all input parameters for the train() function

# initialize a FCNN
model = FullyConnectedNet()

# initialize the optimizer with learning rate 'lr'=0.002
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)

# cost function
cost_function = nn.CrossEntropyLoss()

# epochs
epochs = 30

print("\n-------------------------Model training results and best model instance computation-------------------------\n")

# Measure CPU time
start_time = time.time()

# acquire the BEST trained model based on the 'f1' metric
best_trained_model, best_epoch = train_and_find_best_instance(epochs, optimizer, train_loader, valid_loader, cost_function, model)

training_time = time.time() - start_time
print("> Training and best instance search time: ", training_time, "sec")

* Δοκιμή του ανωτέρω μοντέλου στο test dataset και εξαγωγή συμπερασμάτων/μετρικών

In [None]:
# initialization of parameters and

loss_function = nn.CrossEntropyLoss()
print("\n-------------------------Model evaluation results-------------------------\n")

# Measure time
start_time = time.time()

loss, f1_macro_avg, accuracy, confusion_m = evaluate(best_trained_model, test_loader, loss_function)

evaluation_time = time.time() - start_time
print(f"\n> Evaluation time of the best model instance (training epoch {best_epoch}): ", evaluation_time, "sec")

The model instance that we found to be the best in terms of the 'f1' metric during the training process is expected to be the most efficient in terms of this metric when applied to the test dataset.

### Question 2: Convolutional Neural Network

1.   Data loading for spectrograms

In [None]:
# for executing melgrams code separate from MFCCs

from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

mel_train_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/train/melgrams/"
mel_valid_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/val/melgrams/"
mel_test_data_path = "/content/gdrive/MyDrive/data/music_genre_data_di/test/melgrams/"

# Load melgarms data
# train data
mel_train_data = np.load(mel_train_data_path + 'X.npy')
mel_train_labels = np.load(mel_train_data_path + 'labels.npy')

# validation data
mel_valid_data = np.load(mel_valid_data_path + 'X.npy')
mel_valid_labels = np.load(mel_valid_data_path + 'labels.npy')

# test data
mel_test_data = np.load(mel_test_data_path + 'X.npy')
mel_test_labels = np.load(mel_test_data_path + 'labels.npy')

# Create label mapping
mel_label_mapping = {label: idx for idx, label in enumerate(np.unique(mel_train_labels))}
mel_num_classes = len(mel_label_mapping)

# Map label strings to integer numbers
mel_train_labels = np.array([mel_label_mapping[label] for label in mel_train_labels])   # train
mel_valid_labels = np.array([mel_label_mapping[label] for label in mel_valid_labels])   # validation
mel_test_labels = np.array([mel_label_mapping[label] for label in mel_test_labels])     # test

# Convert data and labels to PyTorch tensors
mel_train_data = torch.from_numpy(mel_train_data).float()     # train
mel_train_labels = torch.from_numpy(mel_train_labels).long()
mel_valid_data = torch.from_numpy(mel_valid_data).float()     # validation
mel_valid_labels = torch.from_numpy(mel_valid_labels).long()
mel_test_data = torch.from_numpy(mel_test_data).float()       # test
mel_test_labels = torch.from_numpy(mel_test_labels).long()

# Create TensorDataset
mel_train_dataset = TensorDataset(mel_train_data, mel_train_labels) # train
mel_valid_dataset = TensorDataset(mel_valid_data, mel_valid_labels) # validation
mel_test_dataset = TensorDataset(mel_test_data, mel_test_labels)    # test

# Create train, validation, and testing dataloaders
batch_size = 16
mel_train_loader = DataLoader(mel_train_dataset, batch_size=batch_size, shuffle=True) # train DataLoader
mel_valid_loader = DataLoader(mel_valid_dataset, batch_size=batch_size, shuffle=True) # validation DataLoader
mel_test_loader = DataLoader(mel_test_dataset, batch_size=batch_size)                 # test DataLoader

# melgram visualization
import random
import matplotlib.pyplot as plt

# Function to visualize random melgrams
def visualize_random_melgrams(dataloader, label_mapping):
    fig, axs = plt.subplots(nrows=len(label_mapping), ncols=1, figsize=(8, 8))
    fig.tight_layout(pad=2.0)

    # Create a list to keep track of visited labels
    visited_labels = []

    # Iterate over the dataloader
    for batch_inputs, batch_labels in dataloader:
        for melgram, label in zip(batch_inputs, batch_labels):
            if label.item() not in visited_labels:
                class_name = list(label_mapping.keys())[list(label_mapping.values()).index(label.item())]
                axs[label].imshow(melgram.squeeze(), cmap='jet')
                axs[label].set_title(class_name)
                axs[label].set_xlabel('Time')
                axs[label].set_ylabel('Frequency')
                axs[label].axis('off')
                visited_labels.append(label.item())
            if len(visited_labels) == len(label_mapping):
                break
        if len(visited_labels) == len(label_mapping):
            break

    plt.show()

# Visualize random melgrams by calling visualization function
print("\n---------------------------------Random class track visualization---------------------------------\n")
visualize_random_melgrams(mel_train_loader, mel_label_mapping)

2.   CNN (Convolutional Neural Network) initialization



In [None]:
import torch.nn as nn

class CNN_FCNN(nn.Module):
    def __init__(self, in_channels, out_dim):   # we can set 'in_channels'=1 and 'out_dim'=any dimension we want
        super(CNN_FCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=5)

        # Fully connected layers
        self.fc1 = nn.Linear(71680, 1024)   # output dimension of the convolutional layers is 71680
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 32)
        self.fc4 = nn.Linear(32, out_dim)

        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))

        # Flatten the tensor for the fully connected layers
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)

        return x


3.   Training and evaluation procedure of the CNN

In [None]:
from sklearn.metrics import f1_score

# Function to train the CNN and find the best model instance based on F1 metric
def train_and_find_best_CNN(epochs, optimizer, train_loader, valid_loader, cost_function, model):

    best_f1 = 0.0
    best_epoch = 0
    device = next(model.parameters()).device  # get the device of the model's parameters
    f1_best_instance = None

    for epoch in range(epochs):
        running_loss = 0.0

        # Training phase
        model.train()
        for batch_inputs, batch_labels in train_loader:
            optimizer.zero_grad()
            batch_inputs = batch_inputs.unsqueeze(1).to(device)  # Adjust input tensor shape
            batch_labels = batch_labels.to(device)
            batch_outputs = model(batch_inputs)
            loss = cost_function(batch_outputs, batch_labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Calculate average training loss
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {epoch_loss:.4f}")

        # Evaluation phase
        model.eval()
        with torch.no_grad():
            all_predictions = []
            all_labels = []

            for batch_inputs, batch_labels in valid_loader:
                batch_inputs = batch_inputs.unsqueeze(1).to(device)  # Adjust input tensor shape
                batch_labels = batch_labels.to(device)
                batch_outputs = model(batch_inputs)
                _, predicted = torch.max(batch_outputs, dim=1)

                all_predictions.extend(predicted.tolist())
                all_labels.extend(batch_labels.tolist())

            # Calculate evaluation metrics
            f1 = f1_score(all_labels, all_predictions, average='macro')

            # Print F1 score
            print(f"F1 Score (macro-averaged): {f1:.4f}\n")

            # Check if current model instance has the best F1 score
            if f1 > best_f1:
                best_f1 = f1
                best_epoch = epoch + 1
                f1_best_instance = model.state_dict().copy()

    # Load the best model instance
    model.load_state_dict(f1_best_instance)

    # Print the epoch with the best model instance
    print(f"> The best CNN model instance is from epoch {best_epoch}\n")

    # Return the best model instance and the best epoch
    return model, best_epoch


In [None]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

# Function that evaluates the trained CNN model
def evaluate_CNN(model, dataloader, loss_function):

    model.eval()                              # Set the model to evaluation mode
    device = next(model.parameters()).device  # Get the device of the model's parameters
    running_loss = 0.0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch_inputs, batch_labels in dataloader:
            batch_inputs = batch_inputs.unsqueeze(1).to(device)
            batch_labels = batch_labels.to(device)

            # Forward pass
            batch_outputs = model(batch_inputs)

            # Loss calculation
            loss = loss_function(batch_outputs, batch_labels)
            running_loss += loss.item()

            # Convert logits to predicted labels
            _, predicted = torch.max(batch_outputs, dim=1)

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(batch_labels.cpu().numpy())

    # Calculate metrics
    epoch_loss = running_loss / len(dataloader)
    f1 = f1_score(all_labels, all_predictions, average='macro')
    accuracy = accuracy_score(all_labels, all_predictions)
    confusion_mat = confusion_matrix(all_labels, all_predictions)

    # Print evaluation metrics
    print(f"Loss: {epoch_loss:.4f}")
    print(f"F1 Score (macro-averaged): {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print("Confusion Matrix:")
    print(confusion_mat)

    return epoch_loss, f1, accuracy, confusion_mat


Code to test run the above functions:

In [None]:
# Import the required modules
import torch.nn as nn
import time

# Initialize a CNN model
model = CNN_FCNN(in_channels=1, out_dim=16)

# Initialize the optimizer with learning rate 'lr'=0.002
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)

# Cost function
cost_function = nn.CrossEntropyLoss()

# Epochs
epochs = 30

print("\n-------------------------CNN training results and best model instance computation-------------------------\n")

# Measure CPU time
start_time = time.time()

# Acquire the BEST trained model based on the 'f1' metric
best_CNN_trained_model, CNN_best_epoch = train_and_find_best_CNN(epochs, optimizer, mel_train_loader, mel_valid_loader, cost_function, model)

training_time = time.time() - start_time
print("> CNN training and best instance search time: ", training_time, "sec")


We observe variation and unpredictable fluctuations in the values of the training loss and 'f1' metric, despite the fact that as the epochs progress, the parameters should improve. This inconsistency can be attributed to various factors, such as the initialization method of the model, the lack of data normalization, or the small quantity of input data, among others.

In [None]:
# initialization of parameters to find the best trained CNN instance through the epochs

loss_function = nn.CrossEntropyLoss()
print("\n-------------------------Best CNN model instance evaluation results on test data-------------------------\n")

# Measure time
start_time = time.time()

loss, f1_macro_avg, accuracy, confusion_m = evaluate_CNN(best_CNN_trained_model, mel_test_loader, loss_function)

evaluation_time = time.time() - start_time
print(f"\n> Evaluation time of the best CNN model instance (training epoch {CNN_best_epoch}): ", evaluation_time, "sec")



4.   Add pooling and padding in the data





We create a new initialization method for our CNN to compare the previous execution logic with the current one.

In [None]:
import torch
import torch.nn as nn

class new_CNN_FCNN(nn.Module):
    def __init__(self, in_channels, out_dim):
        super(new_CNN_FCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, padding=1)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, padding=1)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, padding=1)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, padding=1)
        )

        # Fully connected layers
        self.fc1 = nn.Linear(3456, 1024)   # Input size is 3456 after failed testings and debugging
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 32)
        self.fc4 = nn.Linear(32, out_dim)

        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        # Flatten the tensor for the fully connected layers
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)

        return x



In [None]:
# Import the required modules
import torch.nn as nn
import time

# Initialize a CNN model
model = new_CNN_FCNN(in_channels=1, out_dim=16)

# Initialize the optimizer with learning rate 'lr'=0.002
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)

# Cost function
cost_function = nn.CrossEntropyLoss()

# Epochs
epochs = 30

print("\n-------------------------New CNN (+ pooling,padding) training results and best model instance computation-------------------------\n")

# Measure CPU time
start_time = time.time()

# Acquire the BEST trained model based on the 'f1' metric
best_new_CNN_trained_model, new_CNN_best_epoch = train_and_find_best_CNN(epochs, optimizer, mel_train_loader, mel_valid_loader, cost_function, model)

training_time = time.time() - start_time
print("> CNN training and best instance search time: ", training_time, "sec")


* The new model (after adding padding and max pooling) is trained more efficiently than the previous "simpler" version. When comparing the two best-trained models from both cases, we observe better performance in the new CNN model.
* Additionally, if we observe the training epochs, we notice that the simple CNN model trains relatively quickly in the first 4-5 epochs, as expected. However, in the subsequent epochs, it exhibits a behavior of stagnation/recycling within a range of values (as if it reaches a "dead end"). In contrast, the new CNN, after adding padding and max pooling, gradually trains with increasing performance, which is more expected, without displaying a similar "stagnation."
* The padding parameter helps in training the network by creating boundary pixels within the data, which act as "sealed" borders with other data regions. This allows the model to process separated data more effectively and produce fewer errors.
* Max pooling is a method of dividing the data into non-overlapping data pools (e.g., 2x2 square regions in a 16x16 data matrix). The "max" property means that the maximum data value (e.g., numerical or based on another convention of the problem) is extracted from each pool/region. The purpose of pooling is to reduce the dimensions of the problem while retaining the dominant data components. This achieves better computational and time complexity.

In [None]:
# initialization of parameters and evaluation of the best trained CNN instance on the test data

loss_function = nn.CrossEntropyLoss()
print("\n-------------------------New CNN (+ pooling,padding) Model evaluation results on test data-------------------------\n")

# Measure time
start_time = time.time()

loss, f1_macro_avg, accuracy, confusion_m = evaluate_CNN(best_new_CNN_trained_model, mel_test_loader, loss_function)

evaluation_time = time.time() - start_time
print(f"\n> Evaluation time of the best model instance (+ pooling,padding) (training epoch {new_CNN_best_epoch}): ", evaluation_time, "sec")

5.   Optimization algorithms

The final results table includes the performances of the best trained instance for each optimizer category in terms of 'f1' score on the test data, following the same reasoning as the previous sub-questions.

In [None]:
import torch.optim as optim

# Try a list of optimizers
optimizers = [
    optim.SGD,
    optim.Adam,
    optim.RMSprop,
    optim.Adadelta,
    optim.Adagrad,
    optim.Adamax,
    optim.ASGD,
    optim.Rprop,
]

# Create an empty matrix to store the results
results_matrix = torch.zeros((2, len(optimizers)))

# Iterate over the optimizers
for i, optimizer_class in enumerate(optimizers):

    # Create an instance of the CNN model
    model = model = new_CNN_FCNN(in_channels=1, out_dim=16)

    # Define other hyperparameters and settings
    learning_rate = 0.002
    # less epochs for faster computation
    epochs = 20

    # Define the optimizer based on the optimizer class
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)

    # Define the loss function
    cost_function = nn.CrossEntropyLoss()

    print(f"\n> Optimizer {optimizer_class} ------------New CNN (+ pooling,padding) training results and best model instance computation---------------\n")

    # Measure CPU time
    start_time = time.time()

    # Acquire the BEST trained model based on the 'f1' metric
    best_new_CNN_trained_model, new_CNN_best_epoch = train_and_find_best_CNN(epochs, optimizer, mel_train_loader, mel_valid_loader, cost_function, model)

    training_time = time.time() - start_time
    print("> CNN training and best instance search time: ", training_time, "sec")

    # initialization of parameters and evaluation of the best trained CNN instance on the test data

    loss_function = nn.CrossEntropyLoss()
    print(f"\n> Optimizer {optimizer_class} ------------New CNN (+ pooling,padding) Model evaluation results on test data----------------\n")

    # Measure time
    start_time = time.time()

    loss, f1_macro_avg, accuracy, confusion_m = evaluate_CNN(best_new_CNN_trained_model, mel_test_loader, loss_function)

    evaluation_time = time.time() - start_time
    print(f"\n> Evaluation time of the best model instance (+ pooling,padding) (training epoch {new_CNN_best_epoch}): ", evaluation_time, "sec")
    # printing to separate sections
    print("##################################################################################################################################\n")
    # Store the metrics in the results matrix
    results_matrix[0, i] = accuracy
    results_matrix[1, i] = f1_macro_avg

# Print the results matrix

print("Results Matrix:")
print("          ", end="")
for i, optimizer_class in enumerate(optimizers):
    print(f"{optimizer_class}", end=" ")
print()

print("accuracy | ", end="        ")
for i in range(len(optimizers)):
    print(f"{results_matrix[0, i]:.4f}", end="                             ")
print()

print("f1       | ", end="        ")
for i in range(len(optimizers)):
    print(f"{results_matrix[1, i]:.4f}", end="                             ")
print()
