# FER-2013 Dataset


In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, Subset
import torch.nn as nn
import matplotlib.pyplot as plt
import copy

# List all available devices
if torch.cuda.is_available():
    cuda_count = torch.cuda.device_count()
    print(f"Found {cuda_count} CUDA device(s):")

    for i in range(cuda_count):
        device_props = torch.cuda.get_device_properties(i)
        print(f"  Device {i}: {torch.cuda.get_device_name(i)}")
        print(f"    - Compute capability: {device_props.major}.{device_props.minor}")
        print(f"    - Total memory: {device_props.total_memory / 1024**3:.2f} GB")
else:
    print("No CUDA devices available, using CPU only.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# Download FER-2013 latest version
data_dir = "../data/FER-2013"

data_transforms = torchvision.transforms.Compose(
    [
        torchvision.transforms.Grayscale(
            num_output_channels=3
        ),  # si modèles pré-entraînés ImageNet
        torchvision.transforms.Resize((224, 224)),
        torchvision.transforms.ToTensor(),
    ]
)

# Load the FER-2013 dataset
train_data = torchvision.datasets.ImageFolder(
    data_dir + "/train", transform=data_transforms
)

test_data = torchvision.datasets.ImageFolder(
    data_dir + "/test", transform=data_transforms
)

print("Classes of the dataset:", train_data.classes)
print("Number of training samples:", len(train_data))
print("Number of test samples:", len(test_data))

# Pour afficher le mapping complet
for idx, emotion in enumerate(train_data.classes):
    print(f"Label {idx} → {emotion}")

In [None]:
# Fetch one data pair (one image and the corresponding label)
image, label = train_data[0]  # type: ignore
print(image.shape)
print("Image label:", label)

print(image.shape)

In [None]:
batch_size = 16

train_dataloader = DataLoader(
    train_data, batch_size=batch_size, shuffle=True, drop_last=True
)

test_dataloader = DataLoader(
    test_data, batch_size=batch_size, shuffle=True, drop_last=True
)

# - print the number of batches in the training subset
num_batches = len(train_dataloader)
print("Number of batches in the training subset:", num_batches)

# - print the number of batches in the testing subset
num_batches = len(test_dataloader)
print("Number of batches in the testing subset:", num_batches)

In [None]:
# 3. Calcul de mean & std
sum_ = 0.0
sum_sq_ = 0.0
nb_pixels = 0

for imgs, _ in test_dataloader:
    # imgs shape: (B,1,48,48)
    B, C, H, W = imgs.shape
    imgs = imgs.view(B, C, -1)  # (B,1,2304)
    sum_ += imgs.sum(dim=[0, 2])  # somme des pixels par canal
    sum_sq_ += (imgs**2).sum(dim=[0, 2])  # somme des carrés
    nb_pixels += B * H * W

mean = sum_ / nb_pixels  # tensor([μ])
std = torch.sqrt(
    sum_sq_ / torch.tensor(nb_pixels, dtype=torch.float32) - mean**2
)  # tensor([σ])

print("Dataset mean:", mean)  # ex. ≈tensor([0.485])
print("Dataset std: ", std)  # ex. ≈tensor([0.237])

In [None]:
data_transforms = torchvision.transforms.Compose(
    [
        torchvision.transforms.Grayscale(
            num_output_channels=3
        ),  # si modèles pré-entraînés ImageNet
        torchvision.transforms.Resize((224, 224)),  # ou autre résolution
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=mean, std=std),
    ]
)

# Load the FER-2013 dataset
train_data = torchvision.datasets.ImageFolder(
    data_dir + "/train", transform=data_transforms
)

test_data = torchvision.datasets.ImageFolder(
    data_dir + "/test", transform=data_transforms
)

print("Classes of the dataset:", train_data.classes)
print("Number of training samples:", len(train_data))
print("Number of test samples:", len(test_data))

# Pour afficher le mapping complet
for idx, emotion in enumerate(train_data.classes):
    print(f"Label {idx} → {emotion}")

In [None]:
# Fetch one data pair (one image and the corresponding label)
image, label = train_data[0]  # type: ignore
print(image.shape)
print("Image label:", label)

plt.figure()
plt.imshow(image[0], cmap="gray_r")
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
batch_size = 16

train_dataloader = DataLoader(
    train_data, batch_size=batch_size, shuffle=True, drop_last=True
)

test_dataloader = DataLoader(
    test_data, batch_size=batch_size, shuffle=True, drop_last=True
)

# - print the number of batches in the training subset
num_batches = len(train_dataloader)
print("Number of batches in the training subset:", num_batches)

# - print the number of batches in the testing subset
num_batches = len(test_dataloader)
print("Number of batches in the testing subset:", num_batches)

In [None]:
weights = torchvision.models.VGG11_Weights.IMAGENET1K_V1
model = torchvision.models.vgg11(
    weights=weights
)  # charges les poids ImageNet pré-entraînés

In [None]:
# Geler toutes les couches
for param in model.parameters():
    param.requires_grad = (
        False  # figure l’intégralité des poids pour ne pas les recalculer
    )

In [None]:
def training_mlp_classifier(
    model: nn.Module,
    train_dataloader: DataLoader,
    num_epochs: int,
    loss_fn,
    learning_rate: float,
    verbose=True,
):
    # Make a copy of the model (avoid changing the model outside this function)
    model_tr = copy.deepcopy(model)
    model_tr = model_tr.to(device)

    # Set the model in 'training' mode (ensures all parameters' gradients are computed - it's like setting 'requires_grad=True' for all parameters)
    model_tr.train()

    # Option 1: Configurer l'optimiseur avec uniquement les paramètres qui ont requires_grad=True
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model_tr.parameters()), lr=learning_rate
    )

    # # Option 2: Spécifier explicitement les paramètres à optimiser
    # optimizer = torch.optim.Adam(model_tr.classifier[6].parameters(), lr=learning_rate)

    # Initialize a list for storing the training loss over epochs
    train_losses = []

    # Training loop
    for epoch in range(num_epochs):
        # Initialize the training loss for the current epoch
        tr_loss = 0

        # Iterate over batches using the dataloader
        for batch_index, (images, labels) in enumerate(train_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            # TO DO: write the training procedure for each batch. This should consist of:
            # - vectorizing the images (size should be (batch_size, input_size))
            # images = images.view(batch_size, -1)

            # - calculate the predicted labels from the vectorized images using 'model_tr'
            labels_pred = model_tr(images)

            # - using loss_fn, calculate the 'loss' between the predicted and true labels
            loss = loss_fn(labels_pred, labels)

            # - set the optimizer gradients at 0 for safety
            optimizer.zero_grad()

            # - compute the gradients (use the 'backward' method on 'loss')
            loss.backward()

            # - apply the gradient descent algorithm (perform a step of the optimizer)
            optimizer.step()

            # Update the current epoch loss
            # Note that 'loss.item()' is the loss averaged over the batch, so multiply it with the current batch size to get the total batch loss
            tr_loss += loss.item() * batch_size

        # At the end of each epoch, get the average training loss and store it
        tr_loss = tr_loss / (len(train_dataloader) * batch_size)
        train_losses.append(tr_loss)

        # Display the training loss
        if verbose:
            print(
                "Epoch [{}/{}], Training loss: {:.4f}".format(
                    epoch + 1, num_epochs, tr_loss
                )
            )

    return model_tr, train_losses

In [None]:
# # FC1 est model.classifier[0], FC2 est model.classifier[3], sortie initiale est [6]
# for idx in [0, 3, 6]:
#     for param in model.classifier[idx].parameters():
#         param.requires_grad = True  # seules ces couches recevront des gradients :contentReference[oaicite:2]{index=2}


# Remplacer la couche de sortie (le dernier module du classifier)
num_classes = 7
model.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes)

# Cette nouvelle couche est dégelée par défaut, car elle vient d'être créée
# Mais pour être explicite:
for param in model.classifier[6].parameters():
    param.requires_grad = True

# optimizer = torch.optim.Adam(
#     filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-5
# )

In [None]:
num_epochs = 30
learning_rate = 0.01
loss_fn = nn.CrossEntropyLoss()
model_trained, train_losses = training_mlp_classifier(
    model, train_dataloader, num_epochs, loss_fn, learning_rate, verbose=True
)
torch.save(model_trained.state_dict(), "training/vgg-11_trained.pt")

In [None]:
# Evaluation function: similar to the training loop, except we don't need to compute any gradient / backprop
def eval_mlp_classifier(model: nn.Module, eval_dataloader: DataLoader):
    # Set the model in 'evaluation' mode (this disables some layers (batch norm, dropout...) which are not needed when testing)
    model.eval()

    # In evaluation phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        # initialize the total and correct number of labels to compute the accuracy
        correct_labels = 0
        total_labels = 0

        # Iterate over the dataset using the dataloader
        for images, labels in eval_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            # Get the predicted labels
            # images = images.view(batch_size, -1)
            y_predicted = model(images)

            # To get the predicted labels, we need to get the max over all possible classes
            _, labels_predicted = torch.max(y_predicted.data, 1)

            # Compute accuracy: count the total number of samples, and the correct labels (compare the true and predicted labels)
            total_labels += labels.size(0)
            correct_labels += (labels_predicted == labels).sum().item()

    accuracy = 100 * correct_labels / total_labels

    return accuracy

In [None]:
model_test = copy.deepcopy(model)
model_test.load_state_dict(torch.load("training/vgg-11_trained.pt"))
# - Apply the evaluation function using the test dataloader
test_accuracy = eval_mlp_classifier(model_test, test_dataloader)

# - Print the test accuracy
print("Test accuracy: {:.2f}%".format(test_accuracy))

# - Plot the training loss over epochs
plt.figure()
plt.plot(train_losses)
plt.title("Training loss over epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid()
plt.show()