In [27]:
import pandas as pd
import os
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset, Subset
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.optim import lr_scheduler
from tqdm import tqdm

# Load the annotations for training and validation from separate CSV files
IMAGE_FOLDER = r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\AffectNet\\train_set\\images"
IMAGE_FOLDER_TEST = r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\AffectNet\\val_set\\images"
train_annotations_path = (
    r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\CAGE-Affectnet-CVPR\\affectnet_annotations\\train_set_annotation_without_lnd.csv"
)
valid_annotations_path = (
    r"D:\\DebosmitaPhD\\VALENCE-AROUSAL\\CAGE-Affectnet-CVPR\\affectnet_annotations\\val_set_annotation_without_lnd.csv"
)
train_annotations_df = pd.read_csv(train_annotations_path)
valid_annotations_df = pd.read_csv(valid_annotations_path)

In [28]:
# Check if the folder exists
print("Image folder exists:", os.path.exists(IMAGE_FOLDER))

# Check a few files in the directory
sample_files = os.listdir(IMAGE_FOLDER)[:5]
print("Sample files in the directory:", sample_files)

# Check if the first image exists
image_path = os.path.join(IMAGE_FOLDER, f"{train_annotations_df['number'].iloc[0]}.jpg")
print("First image exists:", os.path.exists(image_path))

Image folder exists: True
Sample files in the directory: ['0.jpg', '1.jpg', '10.jpg', '100.jpg', '100000.jpg']
First image exists: True


In [29]:


# Set parameters
BATCHSIZE = 32
NUM_EPOCHS = 10
LR = 0.0001#4e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# **** Create dataset and data loaders ****
class CustomDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None, balance=False):
        self.dataframe = dataframe
        self.transform = transform
        self.root_dir = root_dir
        self.balance = balance

        if self.balance:
            self.dataframe = self.balance_dataset()

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = os.path.join(
            self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
        )
        image = Image.open(image_path)

        classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
        valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
        arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)

        if self.transform:
            image = self.transform(image)

        return image, classes, valence, arousal

    def balance_dataset(self):
        balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
            lambda x: x.sample(self.dataframe["exp"].value_counts().min())
        )
        return balanced_df


transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomGrayscale(0.01),
        transforms.RandomRotation(10),
        transforms.ColorJitter(
            brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
        ),  # model more robust to changes in lighting conditions.
        transforms.RandomPerspective(
            distortion_scale=0.2, p=0.5
        ),  # can be helpful if your images might have varying perspectives.
        transforms.ToTensor(),  # saves image as tensor (automatically divides by 255)
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.RandomErasing(
            p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
        ),  # Should help overfitting
    ]
)

transform_valid = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

# Select the first 1000 indices for faster experimentation
subset_indices = list(range(100000))

# Create a subset of the training dataset
train_subset = Subset(
    CustomDataset(
        dataframe=train_annotations_df,
        root_dir=IMAGE_FOLDER,
        transform=transform,
        balance=True,
    ),
    subset_indices,
)

# val_subset = Subset(
#     CustomDataset(
#         dataframe=train_annotations_df,
#         root_dir=IMAGE_FOLDER_TEST,
#         transform=transform,
#         balance=False,
#     ),
#     subset_indices,
# )

train_dataset = CustomDataset(
    dataframe=train_annotations_df,
    root_dir=IMAGE_FOLDER,
    transform=transform,
    balance=True,
)


valid_dataset = CustomDataset(
    dataframe=valid_annotations_df,
    root_dir=IMAGE_FOLDER_TEST,
    transform=transform_valid,
    balance=False,
)
# train_loader = DataLoader(
#     train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=0
# )
# Use the subset in the DataLoader
train_loader = DataLoader(
    train_subset,
    batch_size=BATCHSIZE,
    shuffle=True,
    num_workers=0
)
valid_loader = DataLoader(
    valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=0
)


sample = train_subset[0]  # Access the first sample
image, label, valence, arousal = sample

print("Sample Data:")
print("Image shape:", image.shape if isinstance(image, torch.Tensor) else "Not Tensor")
print("Label (Class):", label)
print("Valence:", valence)
print("Arousal:", arousal)




Sample Data:
Image shape: torch.Size([3, 224, 224])
Label (Class): tensor(0, dtype=torch.int8)
Valence: tensor(0.0306, dtype=torch.float16)
Arousal: tensor(-0.0077, dtype=torch.float16)


In [30]:
# Display a single batch from the train_loader
for images, classes, valence, arousal in train_loader:
    print("Batch of Images:")
    print(images.shape)  # Shape of the images tensor
    print("Batch of Classes (Labels):")
    print(classes)  # Tensor of class labels
    print("Batch of Valence Values:")
    print(valence)  # Tensor of valence values
    print("Batch of Arousal Values:")
    print(arousal)  # Tensor of arousal values
    break  # Break after the first batch to avoid printing all batches

Batch of Images:
torch.Size([32, 3, 224, 224])
Batch of Classes (Labels):
tensor([1, 1, 0, 1, 0, 0, 0, 0, 2, 1, 0, 0, 2, 1, 1, 0, 2, 0, 0, 1, 1, 1, 0, 0,
        0, 1, 1, 1, 0, 1, 1, 2], dtype=torch.int8)
Batch of Valence Values:
tensor([ 0.7769,  0.8228, -0.0079,  0.8711, -0.2266,  0.0000,  0.2408,  0.0079,
        -0.3730,  0.8018, -0.0079, -0.0635, -0.9126,  0.8857,  0.3174, -0.2581,
        -0.8237, -0.4226, -0.3579,  0.5078,  0.5317,  0.6982,  0.0159, -0.4456,
        -0.1404,  0.8569,  0.5952,  0.7461, -0.4062,  0.5181,  0.4048, -0.1984],
       dtype=torch.float16)
Batch of Arousal Values:
tensor([ 0.3884,  0.2412,  0.0000, -0.0048,  0.0843,  0.0000, -0.4060,  0.0159,
        -0.1031,  0.3491,  0.0079, -0.0793, -0.3254, -0.0775,  0.1349,  0.1159,
        -0.4097,  0.2255,  0.2064,  0.0555,  0.0079,  0.6348, -0.0159,  0.2019,
        -0.3533,  0.2460,  0.0238, -0.1270,  0.1711,  0.1749,  0.0317, -0.3889],
       dtype=torch.float16)


In [31]:
print(train_annotations_df['exp'].value_counts())  # Check class distribution


1    134415
0     74874
2     25459
6     24882
3     14090
4      6378
5      3803
7      3750
Name: exp, dtype: int64


In [32]:
import torch  # type: ignore
import torch.nn as nn  # type: ignore
import torch.nn.functional as F  # type: ignore
import torch.optim as optim  # type: ignore
from tqdm import tqdm

# Define the Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = F.relu(out)

        return out


# Define the Residual Network
class ResidualNetwork(nn.Module):
    def __init__(self, num_outputs=2):  # 2 outputs for valence and arousal
        super(ResidualNetwork, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 2, stride=1)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_outputs)  # 2 outputs for valence and arousal

    def _make_layer(self, out_channels, num_blocks, stride):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
        layers = [ResidualBlock(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


# Initialize the model, loss function, and optimizer
model = ResidualNetwork(num_outputs=2).to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=LR)

# Initialize gradient scaler for mixed precision
scaler = torch.amp.GradScaler()

# Training Loop
for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0

    for images, _, valence, arousal in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        # Move data to the device
        images = images.to(DEVICE)
        valence = valence.to(DEVICE, dtype=torch.float32)
        arousal = arousal.to(DEVICE, dtype=torch.float32)

        optimizer.zero_grad()

        # Mixed precision forward pass
        with torch.autocast(device_type="cuda", dtype=torch.float16):
            outputs = model(images)
            val_pred, aro_pred = outputs[:, 0], outputs[:, 1]

            # Compute loss
            loss = criterion(val_pred, valence) + criterion(aro_pred, arousal)

        # Backward pass with scaled gradients
        scaler.scale(loss).backward()

        # Optimizer step
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()

    print(f"Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.4f}")


Epoch 1/10: 100%|██████████| 313/313 [03:10<00:00,  1.64it/s]


Epoch 1, Training Loss: 0.3333


Epoch 2/10: 100%|██████████| 313/313 [01:17<00:00,  4.05it/s]


Epoch 2, Training Loss: 0.2497


Epoch 3/10: 100%|██████████| 313/313 [01:44<00:00,  3.00it/s]


Epoch 3, Training Loss: 0.2266


Epoch 4/10: 100%|██████████| 313/313 [01:27<00:00,  3.59it/s]


Epoch 4, Training Loss: 0.2137


Epoch 5/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]


Epoch 5, Training Loss: 0.2031


Epoch 6/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]


Epoch 6, Training Loss: 0.1963


Epoch 7/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]


Epoch 7, Training Loss: 0.1827


Epoch 8/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]


Epoch 8, Training Loss: 0.1796


Epoch 9/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]


Epoch 9, Training Loss: 0.1742


Epoch 10/10: 100%|██████████| 313/313 [01:17<00:00,  4.03it/s]

Epoch 10, Training Loss: 0.1688





In [34]:
# Validation Phase
model.eval()
valid_loss = 0.0

with torch.no_grad():
    for images, _, valence, arousal in tqdm(valid_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} - Validation"):
        # Move data to the device
        images = images.to(DEVICE)
        valence = valence.to(DEVICE, dtype=torch.float32)
        arousal = arousal.to(DEVICE, dtype=torch.float32)

        # Mixed precision forward pass
        with torch.autocast(device_type="cuda", dtype=torch.float16):
            outputs = model(images)
            val_pred, aro_pred = outputs[:, 0], outputs[:, 1]

            # Compute loss
            loss = criterion(val_pred, valence) + criterion(aro_pred, arousal)
            valid_loss += loss.item()

valid_loss /= len(valid_loader)
print(f"Epoch {epoch+1}, Validation Loss: {valid_loss:.4f}")


Epoch 10/10 - Validation: 100%|██████████| 125/125 [00:05<00:00, 22.49it/s]


Epoch 10, Validation Loss: 0.5205
