In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

This imports PyTorch, alongside datasets, transforms, and DataLoader, which will help get the images to the CNN.
Torch.nn is for the neural network layers, and torch.nn.functional is for activation functions.

In [2]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

This will resize and make the images into a tensor for our CNN

In [3]:
train_dataset = datasets.ImageFolder(root="MLDL_Data_Face-1/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = datasets.ImageFolder(root="MLDL_Data_Face-1/val", transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


Here, we create our datasets and loaders using the imports. We have our training data set composed of subjects 1 and 2, for all three classes. We want to transform them as defined above, then make a batch size (32 is adjustable). Shuffling the training set helps with randomness, while shuffling the validation set would be detrimental.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # First convolutional layer
        # Input channels: 3 (RGB image)
        # Output channels: 32 (number of filters)
        # Kernel size: 3x3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        
        # Second convolutional layer
        # Input channels: 32 (from the first conv layer)
        # Output channels: 64 (number of filters)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        
        # Third convolutional layer
        # Input channels: 64 (from the second conv layer)
        # Output channels: 128 (number of filters)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # Fully connected layer (after flattening the output)
        # The input size here will be calculated from the output of the last max pooling step
        # After pooling, we have 128 channels of 16x16 size, so the total size is 128*16*16
        self.fc1 = nn.Linear(128 * 16 * 16, 128)
        
        # Final output layer (3 classes)
        self.fc2 = nn.Linear(128, 3)  # Output layer has 3 units (for 3 classes)
        
    def forward(self, x):
        # Apply first convolution, followed by ReLU and MaxPooling
        x = F.relu(self.conv1(x))  # Shape: (batch_size, 32, 128, 128)
        x = F.max_pool2d(x, 2)  # Shape: (batch_size, 32, 64, 64)
        
        # Apply second convolution, followed by ReLU and MaxPooling
        x = F.relu(self.conv2(x))  # Shape: (batch_size, 64, 64, 64)
        x = F.max_pool2d(x, 2)  # Shape: (batch_size, 64, 32, 32)
        
        # Apply third convolution, followed by ReLU and MaxPooling
        x = F.relu(self.conv3(x))  # Shape: (batch_size, 128, 32, 32)
        x = F.max_pool2d(x, 2)  # Shape: (batch_size, 128, 16, 16)
        
        # Flatten the output for the fully connected layer
        x = x.view(x.size(0), -1)  # Flatten to shape (batch_size, 128*16*16)
        
        # Apply fully connected layer
        x = F.relu(self.fc1(x))  # Shape: (batch_size, 128)
        
        # Apply final output layer with softmax for classification
        x = self.fc2(x)  # Shape: (batch_size, 3)
        
        return x

# Example of creating the model and printing it
model = SimpleCNN()
print(model)



In [5]:
import torch.optim as optim

# Loss function for multi-class classification
criterion = nn.CrossEntropyLoss()  # This computes softmax internally
optimizer = optim.Adam(model.parameters(), lr=0.0001)  # Adam optimizer


In [None]:
num_epochs = 5  # The number of times to loop through the dataset

for epoch in range(num_epochs):  # Loop over each epoch
    model.train()  # Set the model to training mode
    running_loss = 0.0  # This will track the loss during the epoch

    # Loop through the training data in batches
    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Zero the gradients before each backward pass

        # Forward pass: Get model predictions
        outputs = model(inputs)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass: Compute gradients
        loss.backward()

        # Update the model parameters using the optimizer
        optimizer.step()

        running_loss += loss.item()  # Add this batch's loss to the total loss

    avg_loss = running_loss / len(train_loader)  # Average loss for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")


Epoch [1/10], Loss: 0.5788
Epoch [2/10], Loss: 0.1444
Epoch [3/10], Loss: 0.0704
Epoch [4/10], Loss: 0.0413
Epoch [5/10], Loss: 0.0252
Epoch [6/10], Loss: 0.0191
Epoch [7/10], Loss: 0.0108
Epoch [8/10], Loss: 0.0136
Epoch [9/10], Loss: 0.0075
Epoch [10/10], Loss: 0.0032


In [11]:
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation for validation
    for inputs, labels in val_loader:
        outputs = model(inputs)  # Get model predictions
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)  # Count the total number of samples
        correct += (predicted == labels).sum().item()  # Count correct predictions

accuracy = correct / total
print(f"Validation Accuracy: {accuracy * 100:.2f}%")
print(correct)
print(total)


Validation Accuracy: 7.62%
133
1745


https://arxiv.org/pdf/1902.03524


https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=ffe8cc49b14ede3807b91b4fa5217daf8515fdb2


https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8053088casa_token=eNqo9STTYNwAAAAA:y28zM5DEUi6EWK7AbP07CDo6VEQjvsM-k_S5tI2XrFo9GwI_wS8D_gKDZKHCpU-J6ftcr00WCA&tag=1
