In [None]:
# Import necessary libraries
import os  # For interacting with the operating system (e.g., reading directories)
import scipy.io  # For working with MATLAB files (.mat format)
import numpy as np  # For handling numerical operations and arrays
import torch  # For working with tensors, used in PyTorch

# Define the sampling frequency (not used in the code, but may be useful for future data processing)
fr_samp = 200

# Specify the path to the directory containing .mat files
path = r'./data/'  # Ensure this path points to the folder where your .mat files are stored

# Get a list of all files in the specified directory
file_list = os.listdir(path)

# Initialize empty PyTorch tensors to store data and markers
data = torch.tensor([])  # Will hold all concatenated data from .mat files
mark = torch.tensor([])  # Will hold all concatenated markers from .mat files

# Initialize empty lists (not currently used but possibly for debugging or future use)
data_list = []  # Could store individual data arrays if needed
marker_list = []  # Could store individual marker arrays if needed

# Loop through each file in the directory
for file_name in file_list:
    # Check if the file has a .mat extension (to ensure only MATLAB files are processed)
    if file_name.endswith('.mat'):
        # Construct the full path to the .mat file
        file_path = os.path.join(path, file_name)
        
        # Load the .mat file as a dictionary using scipy.io.loadmat
        mat_data = scipy.io.loadmat(file_path)
        
        # Extract the 'data' and 'marker' arrays from the loaded MATLAB object
        # Assuming the structure of the MATLAB file includes 'o.data' and 'o.marker'
        data_array = np.array(mat_data['o']['data'][0, 0])  # Convert to NumPy array
        marker_array = np.array(mat_data['o']['marker'][0, 0])  # Convert to NumPy array
        
        # Convert the NumPy arrays to PyTorch tensors for further processing
        torch_data = torch.from_numpy(data_array)
        torch_marker = torch.from_numpy(marker_array)
        
        # Concatenate the current file's data and marker tensors to the overall tensors
        # dim=0 specifies concatenation along the first axis (rows)
        data = torch.cat((data, torch_data), dim=0)
        mark = torch.cat((mark, torch_marker), dim=0)

# Select only the first column of the 'mark' tensor
# This assumes that the marker array has multiple columns and you need just the first one
mark = mark[:, 0]

# Print the shape of the final marker and data tensors
print(mark.shape)  # Output the dimensions of the marker tensor for debugging
print(data.shape)  # Output the dimensions of the data tensor for debugging



'''
File Reading:

The os.listdir(path) fetches all filenames in the specified directory, which are then iterated over.
The if file_name.endswith('.mat') ensures that only MATLAB files (.mat) are processed.
Loading .mat Files:

scipy.io.loadmat() reads .mat files and loads their content into a Python dictionary. The exact structure depends on how the .mat file was created.
Extracting Data:

MATLAB objects (e.g., o.data and o.marker) are extracted using their expected paths (['o']['data'] and ['o']['marker']). Adjust this if your .mat files have a different structure.
Tensor Concatenation:

PyTorch tensors (data and mark) are initialized as empty tensors and then appended using torch.cat() during each iteration. The concatenation occurs along the first dimension (dim=0).
Marker Column Selection:

After concatenation, only the first column of the marker tensor is selected with mark[:, 0], assuming the marker array is multi-dimensional.
Debugging Information:

Printing the shapes of mark and data helps verify that the concatenation and processing are working as expected.
'''

In [None]:
# Initialize a variable to store the previous marker value
pr = 0

# Initialize empty lists to store indices for each class label
cl_1_ind = []  # Will hold indices where the marker changes to 1
cl_2_ind = []  # Will hold indices where the marker changes to 2
cl_3_ind = []  # Will hold indices where the marker changes to 4
cl_4_ind = []  # Will hold indices where the marker changes to 5
cl_5_ind = []  # Will hold indices where the marker changes to 6

# Loop through each marker in the 'mark' tensor
for i in range(mark.shape[0]):  # mark.shape[0] gives the number of rows (or elements)
    nx = mark[i]  # Get the current marker value
    
    # Check if the current marker value is different from the previous one
    if pr != nx:
        # Based on the new marker value, append the index to the appropriate list
        if nx == 1:  # If the marker value changes to 1
            cl_1_ind.append(i)
        elif nx == 2:  # If the marker value changes to 2
            cl_2_ind.append(i)
        elif nx == 4:  # If the marker value changes to 4
            cl_3_ind.append(i)
        elif nx == 5:  # If the marker value changes to 5
            cl_4_ind.append(i)
        elif nx == 6:  # If the marker value changes to 6
            cl_5_ind.append(i)
    
    # Update the previous marker value to the current one
    pr = nx

# Print the lengths of each class index list
# This indicates the number of times the marker changed to each class
print(len(cl_1_ind), len(cl_2_ind), len(cl_3_ind), len(cl_4_ind), len(cl_5_ind))


'''
Tracking Changes in mark:

The variable pr holds the previous marker value to detect when the marker value changes during iteration.
The condition if pr != nx ensures indices are only appended when a marker change occurs.
Classifying Indices:

Depending on the value of nx, the current index i is appended to the appropriate class list (cl_1_ind, cl_2_ind, etc.).
This assumes that markers 1, 2, 4, 5, 6 represent distinct classes.
Updating pr:

After processing each marker, pr is updated to the current marker value (nx) to continue tracking changes.
Counting Marker Changes:

At the end of the loop, len(cl_x_ind) provides the count of indices stored for each class (e.g., how many times the marker switched to class 1, 2, etc.).
'''

In [None]:
# Function to extract and separate trials based on marker indices
def trial_sepration(data, mark_indx, length=fr_samp):
    """
    Separates trials from the given data based on the provided marker indices.

    Parameters:
    - data: torch.Tensor
        The input data tensor from which trials will be extracted. Assumes rows correspond to time samples and columns to features.
    - mark_indx: list or torch.Tensor
        A list of indices indicating where each trial begins.
    - length: int (default=fr_samp)
        The length (in time samples) of each trial to be extracted. Defaults to the sampling frequency.

    Returns:
    - output: torch.Tensor
        A tensor containing all extracted trials. Each trial is of shape [length, number_of_features].
        The output tensor will have shape [number_of_trials, length, number_of_features].
    """
    # Initialize an empty tensor to store the extracted trials
    # Shape of 'output' will grow as trials are concatenated
    output = torch.tensor([])

    # Loop through each marker index to extract trials
    for index in mark_indx:
        # Slice the data from the current index to index+length (a single trial)
        # unsqueeze(0) adds a new dimension at the front to represent the trial (for stacking)
        trial = data[index: index + length, :].unsqueeze(0)
        
        # Concatenate the new trial to the output tensor along the first dimension (number of trials)
        output = torch.cat((output, trial), dim=0)

    # Return the final tensor containing all extracted trials
    return output

'''
Input Parameters:

data: This is the main tensor containing the dataset. It is expected to be a 2D tensor where:
Rows represent time samples.
Columns represent features (e.g., channels, sensors, etc.).
mark_indx: A list or tensor of starting indices for each trial. Each index specifies the beginning of a trial in the data.
length: The number of samples to extract for each trial. Defaults to fr_samp, which represents the sampling frequency.
Initialization:

output: An empty tensor is initialized to store the extracted trials. This will grow dynamically as trials are appended.
Trial Extraction:

For each index in mark_indx, a slice of the data tensor is taken using data[index: index + length, :]. This extracts a trial of the specified length.
.unsqueeze(0) adds an additional dimension at the front, changing the shape from [length, number_of_features] to [1, length, number_of_features]. This is required to stack multiple trials along the first dimension.
Concatenation:

The extracted trial is concatenated to the output tensor along the first dimension (dim=0), which represents the number of trials.
Output:

The final output tensor has the shape [number_of_trials, length, number_of_features], where:
number_of_trials is the number of indices in mark_indx.
length is the number of time samples per trial.
number_of_features is the number of columns in the input data.
'''

In [None]:
# Function to create a subset of data and corresponding markers for a specific class
def data_mark_maker(cl_ind, new_mark, dataset=data):
    """
    Creates a subset of data and corresponding markers for a given class.

    Parameters:
    - cl_ind: list or torch.Tensor
        A list or tensor of indices indicating the starting points of trials for the specific class.
    - new_mark: int
        The marker value to assign to the trials of this class.
    - dataset: torch.Tensor (default=data)
        The input dataset from which trials will be extracted. Defaults to the global variable `data`.

    Returns:
    - cls_data: torch.Tensor
        A tensor containing all extracted trials for the specified class. Shape: [number_of_trials, trial_length, number_of_features].
    - cls_mark: torch.Tensor
        A tensor containing the markers for each trial, with all values set to `new_mark`. Shape: [number_of_trials].
    """
    # Extract the trials corresponding to the given class indices
    # 'cls_data' will contain trials with shape [number_of_trials, trial_length, number_of_features]
    cls_data = trial_sepration(dataset, cl_ind)
    
    # Create a tensor of the same shape as 'cls_data' filled with the new marker value
    # torch.full creates a tensor filled with 'new_mark', and dtype=torch.long ensures integer values
    # cls_mark is extracted as the first element of each trial, resulting in a shape [number_of_trials]
    cls_mark = torch.full(cls_data.shape, new_mark, dtype=torch.long)[:, 0, 0]
    
    # Return the data tensor (cls_data) and the marker tensor (cls_mark)
    return cls_data, cls_mark


'''
Input Parameters:

cl_ind: A list or tensor of indices indicating the starting points of trials for a specific class. These indices are typically obtained from a marker segmentation process.
new_mark: An integer representing the marker value to assign to all trials in this subset.
dataset: The main dataset (data) from which trials will be extracted. Defaults to a global variable data.
Extracting Class Data:

trial_sepration(dataset, cl_ind) extracts the trials from dataset based on the indices in cl_ind. The result (cls_data) is a 3D tensor with dimensions:
[number_of_trials, trial_length, number_of_features].
Creating Class Markers:

torch.full(cls_data.shape, new_mark, dtype=torch.long) creates a tensor with the same shape as cls_data, filled entirely with the value new_mark.
[:, 0, 0]: This selects the first element of each trial, reducing the tensor to a 1D tensor (cls_mark) of shape [number_of_trials]. It assumes that all markers are identical for each trial, so taking the first value is sufficient.
Output:

cls_data: Contains the extracted trials for the specified class. Each trial is represented as a slice of the dataset based on the indices in cl_ind.
cls_mark: Contains the marker values (new_mark) assigned to each trial, with one marker per trial.
'''

In [None]:
# Create data and corresponding markers for class 1
# cl_1_ind contains indices for class 1 trials
# Marker value assigned to class 1 trials is 0
cls_1_data, cls_1_mark = data_mark_maker(cl_1_ind, 0)

# Create data and corresponding markers for class 2
# cl_2_ind contains indices for class 2 trials
# Marker value assigned to class 2 trials is 1
cls_2_data, cls_2_mark = data_mark_maker(cl_2_ind, 1)

# Create data and corresponding markers for class 3
# cl_3_ind contains indices for class 3 trials
# Marker value assigned to class 3 trials is 2
cls_3_data, cls_3_mark = data_mark_maker(cl_3_ind, 2)

# Create data and corresponding markers for class 4
# cl_4_ind contains indices for class 4 trials
# Marker value assigned to class 4 trials is 3
cls_4_data, cls_4_mark = data_mark_maker(cl_4_ind, 3)

# Create data and corresponding markers for class 5
# cl_5_ind contains indices for class 5 trials
# Marker value assigned to class 5 trials is 4
cls_5_data, cls_5_mark = data_mark_maker(cl_5_ind, 4)


In [None]:
# Concatenate all class data into a single tensor
# Combines the trials from all classes (cls_1_data, cls_2_data, ..., cls_5_data) along the first dimension (number of trials)
# Resulting tensor 'data' will have the shape: [total_number_of_trials, trial_length, number_of_features]
data = torch.cat((cls_1_data, cls_2_data, cls_3_data, cls_4_data, cls_5_data), dim=0)

# Concatenate all class markers into a single tensor
# Combines the markers from all classes (cls_1_mark, cls_2_mark, ..., cls_5_mark) along the first dimension
# Resulting tensor 'mark' will have the shape: [total_number_of_trials]
mark = torch.cat((cls_1_mark, cls_2_mark, cls_3_mark, cls_4_mark, cls_5_mark), dim=0)


In [None]:
# Get the total number of samples (trials) in the 'mark' tensor
# Since 'mark' contains the class labels for each trial, its length corresponds to the number of trials
num_sample = mark.shape[0]

# Generate a random permutation of indices from 0 to num_sample-1
# torch.randperm generates a tensor of random indices of shape [num_sample]
rnd_perm = torch.randperm(num_sample)

# Shuffle the 'data' tensor by reordering it according to the random permutation
# This ensures that the trials are randomly shuffled, breaking any previous order
data = data[rnd_perm].float()

# Shuffle the 'mark' tensor similarly, reordering the class labels to match the shuffled data
# Ensuring that the class labels are still aligned with their respective trials
mark = mark[rnd_perm].long()


In [None]:
# Add an extra dimension to the 'data' tensor at position 1
# This is typically done to add a channel dimension (e.g., for 1D CNN models that expect a (batch, channel, length, features) format)
data = data.unsqueeze(1)

# Permute the dimensions of the 'data' tensor to change the order of axes
# The original shape of 'data' is [num_samples, 1, trial_length, num_features]
# We permute the dimensions to [num_samples, 1, num_features, trial_length], 
# changing the axes for trial length and number of features (e.g., for models that expect (batch, channels, features, length))
data = data.permute(0, 1, 3, 2)

# Save the 'data' tensor to a file called 'data.pth'
# This allows you to store the processed data for later use or sharing
torch.save(data, 'data.pth')

# Save the 'mark' tensor to a file called 'mark.pth'
# This saves the class labels (markers) corresponding to the trials in 'data'
torch.save(mark, 'mark.pth')

# Print the shapes of 'data' and 'mark' to verify the results
print(data.shape)  # The shape of the 'data' tensor after modification
print(mark.shape)  # The shape of the 'mark' tensor (should be [num_samples])


In [None]:
from sklearn.model_selection import train_test_split
train_ratio= 0.8

x_train, x_test, y_train, y_test= train_test_split(data, mark, train_size= train_ratio)
x_train, x_valid, y_train, y_valid= train_test_split(x_train, y_train, train_size= train_ratio)
print('train: ', x_train.shape, y_train.shape)
print('valid: ', x_valid.shape, y_valid.shape)
print('test: ', x_test.shape, y_test.shape)

In [None]:
# Set batch sizes for training and validation/test datasets
train_batch_size = 330  # Batch size for the training dataset
valid_batch_size = 330  # Batch size for the validation and test datasets

# Create datasets using TensorDataset, which pairs the input data and target labels
# 'x_train', 'y_train', 'x_valid', 'y_valid', 'x_test', and 'y_test' are assumed to be pre-defined tensors
# Each dataset contains input-output pairs, i.e., features and corresponding labels
train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)
test_dataset = TensorDataset(x_test, y_test)

# Create DataLoader for each dataset
# DataLoader is used to handle batching, shuffling, and loading data during model training and evaluation
# It ensures efficient data loading and batching, especially when working with large datasets

# For the training dataset, shuffle the data at the start of each epoch for randomness
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)

# For validation and test datasets, do not shuffle, since we want to evaluate on the full dataset without randomness
valid_loader = DataLoader(valid_dataset, batch_size=valid_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=valid_batch_size, shuffle=False)

# Print the batch size and the number of batches in each DataLoader
# This helps confirm that the datasets are properly batched and loaded
print("train batch size:", train_loader.batch_size, ", num of batch:", len(train_loader))
print("valid batch size:", valid_loader.batch_size, ", num of batch:", len(valid_loader))
print("test batch size:", test_loader.batch_size, ", num of batch:", len(test_loader))


In [None]:
# Fetch the next batch from the training DataLoader iterator
x, y = next(iter(train_loader))

# Print the shapes of the input data (x) and the target labels (y)
print(x.shape, y.shape)


# Train - model

In [None]:
num_class= 5
num_input= 1
channel= 22
signal_length= 200
fs= 200

F1= 8
D= 3
F2= D*F1

kernel_size_1= (1,round(fs/2)) 
kernel_size_2= (channel, 1)
kernel_size_3= (1, round(fs/8))
kernel_size_4= (1, 1)

kernel_avgpool_1= (1,4)
kernel_avgpool_2= (1,8)
dropout_rate= 0.2

ks0= int(round((kernel_size_1[0]-1)/2))
ks1= int(round((kernel_size_1[1]-1)/2))
kernel_padding_1= (ks0, ks1-1)
ks0= int(round((kernel_size_3[0]-1)/2))
ks1= int(round((kernel_size_3[1]-1)/2))
kernel_padding_3= (ks0, ks1)

## defining the base class

In [None]:
import torch.nn as nn

class EEGNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        # layer 1: Convolutional layer with Batch Normalization
        self.conv2d = nn.Conv2d(num_input, F1, kernel_size_1, padding=kernel_padding_1)  # 2D Convolutional layer
        self.Batch_normalization_1 = nn.BatchNorm2d(F1)  # Batch normalization after conv
        
        # layer 2: Depthwise separable convolution and Batch Normalization
        self.Depthwise_conv2D = nn.Conv2d(F1, D * F1, kernel_size_2, groups=F1)  # Depthwise convolution (groups=F1 to perform depthwise convolutions)
        self.Batch_normalization_2 = nn.BatchNorm2d(D * F1)  # Batch normalization after depthwise conv
        self.Elu = nn.ELU()  # Exponential Linear Unit activation function
        self.Average_pooling2D_1 = nn.AvgPool2d(kernel_avgpool_1)  # 2D average pooling
        self.Dropout = nn.Dropout2d(dropout_rate)  # Dropout layer (2D for spatial data)
        
        # layer 3: Separable convolutions (depthwise + pointwise) and Batch Normalization
        self.Separable_conv2D_depth = nn.Conv2d(D * F1, D * F1, kernel_size_3,
                                               padding=kernel_padding_3, groups=D * F1)  # Depthwise separable convolution
        self.Separable_conv2D_point = nn.Conv2d(D * F1, F2, kernel_size_4)  # Pointwise convolution
        self.Batch_normalization_3 = nn.BatchNorm2d(F2)  # Batch normalization after separable conv
        self.Average_pooling2D_2 = nn.AvgPool2d(kernel_avgpool_2)  # 2D average pooling
        
        # layer 4: Fully connected (dense) layer
        self.Flatten = nn.Flatten()  # Flatten the output to a 1D vector
        self.Dense = nn.Linear(F2 * round(signal_length / 32), num_class)  # Dense (fully connected) layer
        self.Softmax = nn.Softmax(dim=1)  # Softmax activation for classification
        
    def forward(self, x):
        # layer 1: Convolution + Batch Normalization
        y = self.Batch_normalization_1(self.conv2d(x))  # Apply convolution and batch normalization
        
        # layer 2: Depthwise Convolution + ELU + Dropout + Pooling
        y = self.Batch_normalization_2(self.Depthwise_conv2D(y))  # Apply depthwise convolution and batch normalization
        y = self.Elu(y)  # Apply ELU activation
        y = self.Dropout(self.Average_pooling2D_1(y))  # Apply average pooling and dropout
        
        # layer 3: Separable Convolution + Pointwise Convolution + ELU + Dropout + Pooling
        y = self.Separable_conv2D_depth(y)  # Apply depthwise separable convolution
        y = self.Batch_normalization_3(self.Separable_conv2D_point(y))  # Apply pointwise convolution and batch normalization
        y = self.Elu(y)  # Apply ELU activation
        y = self.Dropout(self.Average_pooling2D_2(y))  # Apply average pooling and dropout
        
        # layer 4: Flatten + Dense + Softmax
        y = self.Flatten(y)  # Flatten the output to a 1D vector
        y = self.Dense(y)  # Apply fully connected layer
        y = self.Softmax(y)  # Apply softmax to get probabilities for classification
        
        return y  # Return the output (class probabilities)


'''
Detailed Explanation of the Model Layers:
Layer 1: Convolution + Batch Normalization

self.conv2d: A standard 2D convolutional layer with num_input input channels and F1 output channels. kernel_size_1 and padding=kernel_padding_1 define the size of the convolution filter and padding, respectively.
self.Batch_normalization_1: Batch normalization applied after the convolution to normalize activations, helping speed up training and providing regularization.
Layer 2: Depthwise Separable Convolution + ELU + Pooling + Dropout

self.Depthwise_conv2D: A depthwise separable convolution with F1 input channels, where groups=F1 ensures each input channel is convolved separately. This significantly reduces the number of parameters.
self.Batch_normalization_2: Batch normalization after the depthwise convolution to stabilize learning.
self.Elu: Exponential Linear Unit (ELU) activation function applied element-wise. ELU is often used because it helps avoid dead neurons and can lead to faster convergence.
self.Average_pooling2D_1: A 2D average pooling layer with kernel size kernel_avgpool_1 to reduce the spatial dimensions after the convolutional operations.
self.Dropout: Dropout applied to the feature map after pooling, which helps to prevent overfitting by randomly zeroing out some of the activations.
Layer 3: Separable Convolution + Pointwise Convolution + ELU + Pooling + Dropout

self.Separable_conv2D_depth: A depthwise separable convolution, which applies a separate convolution per channel (as opposed to applying a convolution across all channels simultaneously).
self.Separable_conv2D_point: A pointwise convolution that is used to combine the outputs of the depthwise convolution.
This operation is typically applied after depthwise convolutions to mix the features across channels.
self.Batch_normalization_3: Batch normalization applied after the separable convolution to normalize activations.
self.Average_pooling2D_2: Another 2D average pooling layer, with a kernel size defined by kernel_avgpool_2.
self.Dropout: Dropout applied after the pooling operation to further regularize the network.
Layer 4: Fully Connected (Dense) Layer + Softmax

self.Flatten: A Flatten layer is applied to convert the output from a multi-dimensional tensor (after convolution and pooling) to a 1D vector suitable for fully connected layers.
self.Dense: A fully connected (dense) layer with F2 * round(signal_length / 32) input features (this size depends on the specific input signal length and pooling operations) and num_class output features,
where num_class is the number of classes for classification.
self.Softmax: The final Softmax activation layer computes the class probabilities, which will sum to 1.
Hyperparameters:
num_input: The number of input channels (typically 1 for EEG data).
F1, F2: Number of filters in each layer (these are hyperparameters that you will define).
D: A scaling factor for the number of filters in the depthwise convolution.
kernel_size_1, kernel_size_2, kernel_size_3, kernel_size_4: Sizes of the convolution kernels for each respective convolutional layer.
kernel_padding_1, kernel_padding_3: Padding values for each respective convolutional layer.
kernel_avgpool_1, kernel_avgpool_2: The kernel sizes for the average pooling layers.
dropout_rate: The dropout rate to be used in the dropout layers.
signal_length: Length of the input signal (needed to calculate the output size after convolution and pooling).
num_class: The number of output classes for classification.
Why This Architecture?
Depthwise Separable Convolutions: These convolutions (used in layer 2 and 3) are computationally efficient and reduce the number of parameters compared to standard convolutions.
This is useful for working with small datasets like EEG signals.
Batch Normalization: Helps stabilize training by normalizing activations, leading to faster convergence and better performance.
Dropout: Reduces overfitting by randomly dropping neurons during training.
Softmax: Used at the end to output probabilities for classification tasks.
This model architecture is inspired by EEGNet, which is designed specifically for EEG signal classification. Let me know if you need further clarifications or adjustments!
'''

In [None]:
import torch.optim as optim

# Initialize the EEGNet model
model = EEGNet()

# Set the learning rate (eta) for the optimizer
eta = 0.001

# Define the loss function: Cross-Entropy Loss for classification tasks
loss_fn = nn.CrossEntropyLoss()

# Initialize the optimizer (NAdam) with model parameters and learning rate
optimizer = optim.NAdam(model.parameters(), lr=eta)

'''
Model Initialization (model = EEGNet()):

This line creates an instance of the EEGNet class. This will initialize all the layers and parameters defined in your EEGNet class.
The model is now ready to be trained.
Learning Rate (eta = 0.001):

eta is the learning rate, which controls the step size taken by the optimizer during gradient updates.
A small learning rate like 0.001 is often a good starting point for many neural networks, though it can be adjusted based on the training results.
Loss Function (loss_fn = nn.CrossEntropyLoss()):

nn.CrossEntropyLoss() is a commonly used loss function for multi-class classification problems.
It computes the softmax of the model’s output and then calculates the negative log-likelihood loss.
This loss function is suitable when the model’s output consists of raw logits (not probabilities) for each class, and the targets are integer labels.
Optimizer (optimizer = optim.NAdam(model.parameters(), lr=eta)):

optim.NAdam is a variant of the Adam optimizer that uses Nesterov momentum.
NAdam combines the benefits of Adam and Nesterov Accelerated Gradient (NAG). It has been shown to improve performance in many tasks.
model.parameters() passes the parameters (weights and biases) of the model to the optimizer. This allows the optimizer to update these parameters during training.
lr=eta sets the learning rate for the optimizer. This determines how large the step will be during the update of each parameter.
'''

### one epoch

In [None]:
from torchmetrics import Accuracy

def train_one_epoch(model, train_loader, loss_fn, optimizer):
    # Set the model to training mode
    model.train()
    
    # Initialize average meters to track loss and accuracy during training
    loss_train = AverageMeter()  # A custom class to keep track of average loss
    acc_train = Accuracy(task="multiclass", num_classes=num_class)  # Accuracy metric for multiclass classification
    
    # Iterate over the training data loader
    for i, (inputs, targets) in enumerate(train_loader):
        # Forward pass: Compute the model output (predictions)
        outputs = model(inputs)
        
        # Compute the loss using the defined loss function (cross-entropy)
        loss = loss_fn(outputs, targets)
        
        # Backward pass: Compute gradients
        loss.backward()
        
        # Gradient clipping to avoid exploding gradients
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        
        # Update the model parameters based on the gradients
        optimizer.step()
        
        # Zero the gradients after updating parameters
        optimizer.zero_grad()
        
        # Update the training loss average
        loss_train.update(loss.item())
        
        # Update the accuracy metric
        acc_train(outputs, targets.int())
    
    # Return the model, average loss, and accuracy for this epoch
    return model, loss_train.avg, acc_train.compute().item()


'''
model.train():

This sets the model to "training mode." In this mode, certain layers (like dropout and batch normalization) behave differently than in evaluation mode. It ensures the model is in the correct state for training.
loss_train = AverageMeter():

AverageMeter is typically a custom class that computes the average of the loss values over the course of the epoch. It helps track the average loss without having to manually compute the mean.
This helps you monitor the training process.
acc_train = Accuracy(task="multiclass", num_classes=num_class):

Accuracy from torchmetrics is used to compute the classification accuracy. Here, it’s set for a multiclass classification task.
num_classes=num_class defines how many classes your model is predicting.
for i, (inputs, targets) in enumerate(train_loader)::

This loop iterates through the training data (train_loader) in batches. Each batch consists of inputs (the features) and targets (the ground truth labels).
outputs = model(inputs):

This is the forward pass. The model processes the inputs and produces predictions (outputs).
loss = loss_fn(outputs, targets):

This computes the loss (error) between the model's predictions (outputs) and the ground truth labels (targets).
loss_fn is the loss function (CrossEntropyLoss in your case), which measures how well the model's predictions align with the true labels.
loss.backward():

This computes the gradients of the loss with respect to all the model parameters (weights and biases) using backpropagation.
nn.utils.clip_grad_norm_(model.parameters(), 1):

This clips the gradients to prevent the exploding gradients problem. It ensures that the gradients do not become too large, which can destabilize the training process.
The 1 here refers to the maximum allowable norm of the gradients (you can adjust it based on experimentation).
optimizer.step():

This updates the model’s parameters (weights and biases) using the gradients calculated in the backward pass.
The optimizer applies an optimization algorithm (like NAdam, Adam, or SGD) to minimize the loss.
optimizer.zero_grad():

After each parameter update, this clears the gradients. In PyTorch, gradients are accumulated by default,
so you need to manually zero them to prevent them from accumulating over multiple iterations.
loss_train.update(loss.item()):
This updates the running average of the loss for this epoch. loss.item() extracts the scalar value of the loss from the tensor.
acc_train(outputs, targets.int()):
This updates the running accuracy for the epoch. It computes the accuracy by comparing the predicted classes (outputs) with the true labels (targets).
return model, loss_train.avg, acc_train.compute().item():
After completing the epoch, the function returns:
model: The updated model.
loss_train.avg: The average loss computed over the entire epoch.
acc_train.compute().item(): The computed accuracy for the epoch. The .item() method converts the result to a Python number (scalar).
'''

In [None]:
from torchmetrics import F1Score

def test_one_epoch(model, test_loader, loss_fn, optimizer):
    # Set the model to evaluation mode (disables dropout, batch normalization, etc.)
    model.eval()
    
    # Initialize average meters for tracking loss, accuracy, and F1 score
    loss_test = AverageMeter()  # Custom class to track the average loss
    acc_test = Accuracy(task="multiclass", num_classes=num_class)  # Accuracy metric for multiclass classification
    f1_test = F1Score(task="multiclass", num_classes=num_class)  # F1 score metric for multiclass classification

    # Iterate over the test data loader
    for i, (inputs, targets) in enumerate(test_loader):
        # Forward pass: Compute the model output (predictions)
        outputs = model(inputs)
        
        # Compute the loss using the loss function
        loss = loss_fn(outputs, targets)
        
        # Update the loss tracker
        loss_test.update(loss.item())
        
        # Update accuracy and F1 score trackers
        acc_test(outputs, targets.int())
        f1_test(outputs, targets.int())
    
    # Return the model, average loss, accuracy, and F1 score for this epoch
    return model, loss_test.avg, acc_test.compute().item(), f1_test.compute().item()


In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    
    def __init__(self):
        # Initialize the meter, setting all values to their initial state
        self.reset()

    def reset(self):
        """Resets the values to start fresh."""
        # Initialize the current value, average, sum, and count
        self.val = 0  # Current value, usually the latest value added
        self.avg = 0  # Running average of the values
        self.sum = 0  # Sum of all values added
        self.count = 0  # Count of values added

    def update(self, val, n=1):
        """Updates the meter with the latest value and count.

        Args:
            val (float): The latest value to be added.
            n (int): The number of occurrences (used when we have multiple values for one sample, e.g., mini-batches).
        """
        self.val = val  # Set the current value
        self.sum += val * n  # Update the sum (adding val * n to account for the batch size)
        self.count += n  # Update the count of how many values have been added
        self.avg = self.sum / self.count  # Compute the new average value


'''
__init__(self):
The constructor method initializes an instance of the AverageMeter class. When an object of this class is created,
it immediately calls the reset() method, which sets the initial state of the attributes.
reset(self):
This method is used to reset all the values (val, avg, sum, and count) back to their initial states:
val: Stores the most recent value added.
avg: Stores the running average.
sum: Stores the sum of all values added (helps in calculating the average).
count: Keeps track of how many values have been added so far.
update(self, val, n=1):
This method is called to update the AverageMeter with new values:
val: The latest value that needs to be added (e.g., the current loss or accuracy for a batch).
n: The number of occurrences, usually the size of the current batch (default is 1).
This method updates:
val: Sets it to the latest value.
sum: Adds the current value (val) multiplied by the batch size (n) to the running total.
count: Increments by n to track how many values have been added.
avg: Recalculates the running average by dividing the sum by the count.
'''

loss_train = AverageMeter()  # Initialize the average meter for tracking loss
loss_train.update(loss.item())  # Update the meter with the current batch's loss
print("Average loss for the epoch:", loss_train.avg)


### training

In [None]:
num_epochs = 100  # Number of epochs for training the model

# Lists to store the loss and accuracy history for train, validation, and test sets
loss_train_hist, acc_train_hist = [], []
loss_valid_hist, acc_valid_hist = [], []
loss_test_hist, acc_test_hist = [], []

# Training loop over multiple epochs
for epoch in range(num_epochs):
    # Training phase: train the model for one epoch
    model, loss_train, acc_train = train_one_epoch(model, train_loader, loss_fn, optimizer)
    
    # Validation phase: test the model on validation set for this epoch
    model, loss_valid, acc_valid, _ = test_one_epoch(model, valid_loader, loss_fn, optimizer)
    
    # Test phase: test the model on test set for this epoch
    model, loss_test, acc_test, f1score = test_one_epoch(model, test_loader, loss_fn, optimizer)
  
    # Append the loss and accuracy for the current epoch to the history lists
    loss_train_hist.append(loss_train)
    acc_train_hist.append(acc_train)
    loss_valid_hist.append(loss_valid)
    acc_valid_hist.append(acc_valid)
    loss_test_hist.append(loss_test)
    acc_test_hist.append(acc_test)
    
    # Print the loss and accuracy for each phase (train, valid, test) every 5 epochs
    if (epoch % 5 == 0):
        print(f'epoch {epoch}:')
        print(f' Loss= {loss_train:.4}, Accuracy= {int(acc_train * 100)}%')  # Training loss and accuracy
        print(f' Loss= {loss_valid:.4}, Accuracy= {int(acc_valid * 100)}%')  # Validation loss and accuracy
        print(f' Loss= {loss_test:.4}, Accuracy= {int(acc_test * 100)}%')  # Test loss and accuracy
        print(f' f1 score= {(f1score * 100):.4}\n')  # Test F1 score


'''
num_epochs:
This variable specifies the total number of epochs (iterations) for training the model. In your case, the model will train for 100 epochs.
2. History Lists:
loss_train_hist, acc_train_hist: These lists store the training loss and accuracy for each epoch.
loss_valid_hist, acc_valid_hist: These store the validation loss and accuracy for each epoch.
loss_test_hist, acc_test_hist: These store the test loss and accuracy for each epoch.
3. Training Loop:
The for loop runs for num_epochs (100 times). For each epoch:
Training Phase (train_one_epoch): You train the model on the training dataset using the current batch of data and compute the training loss and accuracy.
Validation Phase (test_one_epoch): After training, you evaluate the model on the validation dataset to monitor the model's performance and prevent overfitting.
Test Phase (test_one_epoch): After validation, you evaluate the model on the test set. This gives a final measure of how well the model performs on unseen data.
4. Appending Values to History:
After each epoch, the loss and accuracy for train, validation, and test phases are appended to their respective lists to track the progress over time.
5. Periodic Print:
Every 5 epochs (epoch % 5 == 0), you print the loss and accuracy for each phase:
Training Loss and Accuracy: loss_train and acc_train represent how well the model is fitting the training data.
Validation Loss and Accuracy: loss_valid and acc_valid help track how the model generalizes to unseen data (validation set).
Test Loss and Accuracy: loss_test and acc_test show the final performance on the test set after the epoch.
F1 Score: f1score gives an additional metric to assess the model’s performance, especially in the case of imbalanced classes.
Things to Keep in Mind:
Model Evaluation: Every epoch, after training, you're testing the model on the validation and test sets.
It's a good practice to monitor not just accuracy, but also other metrics like F1-score, especially if your dataset has imbalanced classes.
Epochs: 100 epochs might be a bit high depending on the problem; you may want to tune this based on the model’s convergence behavior.
Overfitting: If you notice the validation accuracy stagnating or decreasing while the training accuracy increases, it could be a sign of overfitting.
Regularization techniques like dropout, early stopping, or reducing the complexity of the model could help.
'''

In [None]:
import matplotlib.pyplot as plt

# Plotting loss curves for train, validation, and test sets
plt.plot(range(num_epochs), loss_train_hist, 'b-', label='Train')  # Blue line for training loss
plt.plot(range(num_epochs), loss_valid_hist, 'k-', label='Valid')  # Black line for validation loss
plt.plot(range(num_epochs), loss_test_hist, 'g-', label='Test')  # Green line for test loss

# Adding labels to the plot
plt.xlabel('Epoch')  # X-axis label: Epochs
plt.ylabel('Loss')  # Y-axis label: Loss

# Adding a grid for better visualization
plt.grid(True)

# Adding a legend to distinguish between the curves
plt.legend()

# Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Plotting accuracy curves for train, validation, and test sets
plt.plot(range(num_epochs), acc_train_hist, 'b-', label='Train')  # Blue line for training accuracy
plt.plot(range(num_epochs), acc_valid_hist, 'k-', label='Valid')  # Black line for validation accuracy
plt.plot(range(num_epochs), acc_test_hist, 'g-', label='Test')  # Green line for test accuracy

# Adding labels to the plot
plt.xlabel('Epoch')  # X-axis label: Epochs
plt.ylabel('Acc')  # Y-axis label: Accuracy

# Adding a grid for better visualization
plt.grid(True)

# Adding a legend to distinguish between the curves
plt.legend()

# Display the plot
plt.show()
