In [12]:
import pandas as pd
import os
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.optim import lr_scheduler
from tqdm import tqdm

# Load the annotations for training and validation from separate CSV files
# Load the annotations for training and validation from separate CSV files
IMAGE_FOLDER = r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\AffectNet\\train_set_resized\\images"
IMAGE_FOLDER_TEST = r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\AffectNet\\val_set_resized\\images"
train_annotations_path = (
    r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\CAGE-Affectnet-CVPR\\affectnet_annotations\\train_set_annotation_without_lnd.csv"
)
valid_annotations_path = (
    r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\CAGE-Affectnet-CVPR\\affectnet_annotations\\val_set_annotation_without_lnd.csv"
)
train_annotations_df = pd.read_csv(train_annotations_path)
valid_annotations_df = pd.read_csv(valid_annotations_path)


In [13]:
# Set parameters
BATCHSIZE = 64
NUM_EPOCHS = 20
LR = 4e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# **** Create dataset and data loaders ****
class CustomDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None, balance=False):
        self.dataframe = dataframe
        self.transform = transform
        self.root_dir = root_dir
        self.balance = balance

        if self.balance:
            self.dataframe = self.balance_dataset()

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = os.path.join(
            self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
        )
        if os.path.exists(image_path):
            image = Image.open(image_path)
        else:
            image = Image.new(
                "RGB", (224, 224), color="white"
            )  # Handle missing image file

        classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
        labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, classes, labels

    def balance_dataset(self):
        balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
            lambda x: x.sample(self.dataframe["exp"].value_counts().min())
        )
        return balanced_df


transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomGrayscale(0.01),
        transforms.RandomRotation(10),
        transforms.ColorJitter(
            brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
        ),  # model more robust to changes in lighting conditions.
        transforms.RandomPerspective(
            distortion_scale=0.2, p=0.5
        ),  # can be helpful if your images might have varying perspectives.
        transforms.ToTensor(),  # saves image as tensor (automatically divides by 255)
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.RandomErasing(
            p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
        ),  # TEST: Should help overfitting
    ]
)

transform_valid = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

train_dataset = CustomDataset(
    dataframe=train_annotations_df,
    root_dir=IMAGE_FOLDER,
    transform=transform,
    balance=False,
)
valid_dataset = CustomDataset(
    dataframe=valid_annotations_df,
    root_dir=IMAGE_FOLDER_TEST,
    transform=transform_valid,
    balance=False,
)
train_loader = DataLoader(
    train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=0
)
valid_loader = DataLoader(
    valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=0
)

# ***** Define the model *****

# Initialize the model
MODEL = models.efficientnet_v2_s(weights="DEFAULT")
num_features = MODEL.classifier[1].in_features
MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=10)
MODEL.to(DEVICE)
# Define (weighted) loss function
weights = torch.tensor(
    [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
)

In [14]:
# Display a single batch from the train_loader
for images, classes, labels in train_loader:  # Unpack into three variables
    valence, arousal = labels[:, 0], labels[:, 1]  # Split the labels tensor into valence and arousal

    print("Batch of Images:")
    print(images.shape)  # Shape of the images tensor
    print("Batch of Classes (Labels):")
    print(classes)  # Tensor of class labels
    print("Batch of Valence Values:")
    print(valence)  # Tensor of valence values
    print("Batch of Arousal Values:")
    print(arousal)  # Tensor of arousal values
    break  # Break after the first batch to avoid printing all batches


Batch of Images:
torch.Size([64, 3, 96, 96])
Batch of Classes (Labels):
tensor([0, 5, 1, 2, 1, 0, 1, 1, 1, 0, 3, 5, 0, 1, 2, 0, 0, 0, 1, 1, 0, 1, 6, 0,
        1, 2, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 2, 0, 0, 6, 6, 1, 2, 2, 2, 6, 2,
        0, 6, 1, 1, 6, 0, 1, 0, 1, 1, 7, 1, 0, 1, 6, 0])
Batch of Valence Values:
tensor([ 0.0000, -0.7619,  0.7480, -0.8230,  0.6928,  0.0000,  0.7044,  0.5847,
         0.5238, -0.0556,  0.3254, -0.3175, -0.1210,  0.7533, -0.5178,  0.0173,
         0.0000, -0.0436,  0.7924,  0.8003, -0.3793,  0.6556, -0.7063, -0.3388,
         0.8016, -0.9593,  0.0129,  0.0230,  0.0000,  0.6111, -0.4003,  0.9206,
         0.7653,  0.7843, -0.0097,  0.0159,  0.0159, -0.7114, -0.0339, -0.0048,
        -0.7057, -0.3333,  0.9206, -0.3373, -0.8750, -0.1190, -0.5794, -0.7970,
         0.0000, -0.4679,  0.6967,  0.6581, -0.5000,  0.2160,  0.7196, -0.1111,
         0.8584,  0.7082, -0.6436,  0.8175,  0.0000,  0.8130, -0.1599,  0.0690])
Batch of Arousal Values:
tensor([ 0.0317,  0.

In [15]:
criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE))
criterion_cls_val = (
    nn.CrossEntropyLoss()
)  # Use two loss functions, as the validation dataset is balanced
criterion_reg = nn.MSELoss()

optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)

# ***** Train the model *****
print("--- Start training ---")
scaler = torch.cuda.amp.GradScaler()
best_valid_loss = 100

for epoch in range(NUM_EPOCHS):
    MODEL.train()
    total_train_correct = 0
    total_train_samples = 0
    for images, classes, labels in tqdm(
        train_loader, desc="Epoch train_loader progress"
    ):
        images, classes, labels = (
            images.to(DEVICE),
            classes.to(DEVICE),
            labels.to(DEVICE),
        )
        optimizer.zero_grad()
        with torch.autocast(device_type="cuda", dtype=torch.float16):
            outputs = MODEL(images)
            outputs_cls = outputs[:, :8]
            outputs_reg = outputs[:, 8:]
            loss = criterion_cls(
                outputs_cls.cuda(), classes.cuda()
            ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            lr_scheduler.step()
            current_lr = optimizer.param_groups[0]["lr"]

        _, train_predicted = torch.max(outputs_cls, 1)
        total_train_samples += classes.size(0)
        total_train_correct += (train_predicted == classes).sum().item()

    train_accuracy = (total_train_correct / total_train_samples) * 100

    MODEL.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, classes, labels in valid_loader:
            images, classes, labels = (
                images.to(DEVICE),
                classes.to(DEVICE),
                labels.to(DEVICE),
            )
            outputs = MODEL(images)
            outputs_cls = outputs[:, :8]
            outputs_reg = outputs[:, 8:]
            loss = criterion_cls_val(
                outputs_cls.cuda(), classes.cuda()
            ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
            valid_loss += loss.item()
            _, predicted = torch.max(outputs_cls, 1)
            total += classes.size(0)
            correct += (predicted == classes).sum().item()

    print(
        f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
        f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
        f"Validation Accuracy: {(correct/total)*100:.2f}%"
        f", Training Accuracy: {train_accuracy:.2f}%, "
    )

    if abs(valid_loss/len(valid_loader)) < best_valid_loss:
        best_valid_loss = abs(valid_loss/len(valid_loader))
        print(f"Saving model at epoch {epoch+1}")
        torch.save(MODEL.state_dict(), "model.pt")  # Save the best model


  scaler = torch.cuda.amp.GradScaler()


--- Start training ---


Epoch train_loader progress: 100%|██████████| 4495/4495 [32:04<00:00,  2.34it/s]


Epoch [1/20] - Validation Loss: 2.2581, Validation Accuracy: 47.86%, Training Accuracy: 48.21%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:40<00:00,  4.49it/s]


Epoch [2/20] - Validation Loss: 2.0138, Validation Accuracy: 54.14%, Training Accuracy: 56.03%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:18<00:00,  4.59it/s]


Epoch [3/20] - Validation Loss: 2.0599, Validation Accuracy: 55.56%, Training Accuracy: 58.40%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:45<00:00,  4.47it/s]


Epoch [4/20] - Validation Loss: 1.9861, Validation Accuracy: 56.26%, Training Accuracy: 59.45%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [17:28<00:00,  4.29it/s]


Epoch [5/20] - Validation Loss: 1.9259, Validation Accuracy: 58.06%, Training Accuracy: 60.59%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [17:31<00:00,  4.27it/s]


Epoch [6/20] - Validation Loss: 1.8898, Validation Accuracy: 58.81%, Training Accuracy: 61.31%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:24<00:00,  4.57it/s]


Epoch [7/20] - Validation Loss: 1.8620, Validation Accuracy: 58.86%, Training Accuracy: 61.84%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:16<00:00,  4.60it/s]


Epoch [8/20] - Validation Loss: 1.8573, Validation Accuracy: 58.34%, Training Accuracy: 62.75%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:29<00:00,  4.54it/s]


Epoch [9/20] - Validation Loss: 1.9499, Validation Accuracy: 59.04%, Training Accuracy: 63.05%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:40<00:00,  4.49it/s]


Epoch [10/20] - Validation Loss: 1.8652, Validation Accuracy: 59.29%, Training Accuracy: 63.32%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:44<00:00,  4.47it/s]


Epoch [11/20] - Validation Loss: 1.9204, Validation Accuracy: 58.84%, Training Accuracy: 63.77%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:23<00:00,  4.57it/s]


Epoch [12/20] - Validation Loss: 1.9256, Validation Accuracy: 58.09%, Training Accuracy: 64.27%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:27<00:00,  4.55it/s]


Epoch [13/20] - Validation Loss: 1.8994, Validation Accuracy: 58.74%, Training Accuracy: 64.37%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [17:09<00:00,  4.37it/s]


Epoch [14/20] - Validation Loss: 1.9168, Validation Accuracy: 59.54%, Training Accuracy: 64.53%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:50<00:00,  4.45it/s]


Epoch [15/20] - Validation Loss: 1.9466, Validation Accuracy: 58.84%, Training Accuracy: 65.07%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:28<00:00,  4.55it/s]


Epoch [16/20] - Validation Loss: 1.9358, Validation Accuracy: 58.91%, Training Accuracy: 65.37%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:28<00:00,  4.55it/s]


Epoch [17/20] - Validation Loss: 1.9995, Validation Accuracy: 58.56%, Training Accuracy: 65.93%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:31<00:00,  4.53it/s]


Epoch [18/20] - Validation Loss: 1.9891, Validation Accuracy: 58.64%, Training Accuracy: 66.14%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:55<00:00,  4.43it/s]


Epoch [19/20] - Validation Loss: 2.0240, Validation Accuracy: 58.91%, Training Accuracy: 66.41%, 


Epoch train_loader progress: 100%|██████████| 4495/4495 [16:28<00:00,  4.55it/s]


Epoch [20/20] - Validation Loss: 2.0690, Validation Accuracy: 58.39%, Training Accuracy: 66.88%, 


In [18]:
print(best_valid_loss,valid_loss)
MODEL

100 130.34760284423828


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  

In [19]:
torch.save(MODEL.state_dict(), "model.pt")  # Save the best model