In [25]:
from random import randint
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader,Dataset,random_split
import torch.nn.functional as F
import os
from PIL import Image

## Load the data from faces folder

In [2]:

class CustomDataset(Dataset):
    def __init__(self, root_directory, transform=None):
        self.root_directory = root_directory
        self.transform = transform
        self.images = []
        self.emotions = []
        self.orientations = []

        # Define mappings
        emotion_mapping = {'neutral': 0, 'sad': 1, 'happy': 2, 'angry': 3}
        orientation_mapping = {'up': 0, 'down': 1, 'left': 2, 'right': 3, 'straight': 4}

        # Recursively walk through all subdirectories to find images
        for subdir, dirs, files in os.walk(root_directory):
            for file in files:
                if file.endswith("_2.pgm"):
                    filepath = os.path.join(subdir, file)
                    # Extract label from the filename
                    parts = file.split('_')
                    orientation = parts[1]  # Get orientation from filename
                    emotion = parts[2].split('.')[0]  # Extract the emotion from the filename

                    self.images.append(filepath)
                    self.emotions.append(emotion_mapping[emotion])
                    self.orientations.append(orientation_mapping[orientation])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        emotion = self.emotions[idx]
        orientation = self.orientations[idx]

        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)

        emotion_tensor = torch.tensor(emotion, dtype=torch.long)
        orientation_tensor = torch.tensor(orientation, dtype=torch.long)

        return image, emotion_tensor, orientation_tensor

# Example usage of the dataset
dataset_directory = './faces'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])
])

dataset = CustomDataset(dataset_directory, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# To test the dataloader
for images, emotions, orientations in dataloader:
    print("Image Batch Shape:", images.shape)
    print("Emotions:", emotions)
    print("Orientations:", orientations)
    break  # Just to see the first batch


total_size = len(dataset)
train_size = int(total_size * 0.8)  # 80% of the dataset for training
validation_size = total_size - train_size  # The rest for validation

train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=32, shuffle=False)


Image Batch Shape: torch.Size([32, 1, 224, 224])
Emotions: tensor([3, 2, 2, 3, 0, 3, 1, 3, 2, 0, 2, 0, 0, 1, 2, 0, 0, 3, 1, 0, 3, 3, 0, 1,
        1, 1, 2, 1, 2, 3, 0, 0])
Orientations: tensor([0, 0, 2, 2, 2, 0, 3, 2, 3, 2, 3, 4, 4, 2, 4, 2, 0, 0, 2, 4, 4, 4, 3, 4,
        0, 3, 0, 3, 4, 4, 3, 4])


In [3]:
len(train_dataset)

499

## Set up the nn

In [4]:
def compute_output_dim(self, input_size, kernel_size, kernel_channels, stride, padding, pool_kernel_size):
    output_size = (input_size - kernel_size + 2 * padding) // stride + 1
    output_size = (output_size - pool_kernel_size + 1) // pool_kernel_size + 1  # Assuming stride of pool_kernel_size
    return output_size * output_size * kernel_channels

In [5]:
class MultiTaskCNN(nn.Module):
    def __init__(self, num_emotions, num_orientations):
        super(MultiTaskCNN, self).__init__()
        # Shared layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.flatten_size = 32 * 56 * 56
        
        # Task-specific layers
        self.fc1_emotion = nn.Linear(self.flatten_size, 128)
        self.fc2_emotion = nn.Linear(128, num_emotions)
        
        self.fc1_orientation = nn.Linear(self.flatten_size, 128)
        self.fc2_orientation = nn.Linear(128, num_orientations)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, self.flatten_size)  # Flatten the features
        
        # Emotion branch
        x_emotion = F.relu(self.fc1_emotion(x))
        x_emotion = self.fc2_emotion(x_emotion)
        
        # Orientation branch
        x_orientation = F.relu(self.fc1_orientation(x))
        x_orientation = self.fc2_orientation(x_orientation)
        
        return x_emotion, x_orientation


## Training loop

### Model setup

In [6]:
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiTaskCNN(num_emotions=4, num_orientations=5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Path to the model file
model_path = 'ckpt.pth'

# Check if a trained model .pt file exists
if os.path.isfile(model_path):
    model.load_state_dict(torch.load(model_path))
    print("Loaded model weights from:", model_path)
else:
    print("No model found, starting training from scratch.")

Loaded model weights from: ckpt.pth


### Train Model

In [24]:

# Training parameters
num_epochs = 10
best_val_loss = float('inf')
patience = 2  # Number of epochs to wait after last improvement before stopping the training
patience_counter = 0

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, emotion_labels, orientation_labels in dataloader:
        images = images.to(device)
        emotion_labels = emotion_labels.to(device)
        orientation_labels = orientation_labels.to(device)

        optimizer.zero_grad()
        emotion_preds, orientation_preds = model(images)
        loss_emotion = criterion(emotion_preds, emotion_labels)
        loss_orientation = criterion(orientation_preds, orientation_labels)
        loss = loss_emotion + loss_orientation
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation phase
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for images, emotion_labels, orientation_labels in validation_dataloader:
            images = images.to(device)
            emotion_labels = emotion_labels.to(device)
            orientation_labels = orientation_labels.to(device)
            emotion_preds, orientation_preds = model(images)
            val_loss_emotion = criterion(emotion_preds, emotion_labels)
            val_loss_orientation = criterion(orientation_preds, orientation_labels)
            val_loss = val_loss_emotion + val_loss_orientation
            total_val_loss += val_loss.item()

    avg_train_loss = total_loss / len(dataloader)
    avg_val_loss = total_val_loss / len(validation_dataloader)
    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')

    # Checkpointing based on improvement in validation loss
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), 'ckpt.pth')  # Save the current best model state
        print(f"Model saved to ckpt.pth after epoch {epoch+1} with Validation Loss: {avg_val_loss:.4f}")
        patience_counter = 0  # Reset the patience counter
    else:
        patience_counter += 1
        if patience_counter > patience:
            print("Early stopping due to no improvement in validation loss.")
            break


Epoch 1, Train Loss: 0.0642, Validation Loss: 0.0600
Model saved to ckpt.pth after epoch 1 with Validation Loss: 0.0600
Epoch 2, Train Loss: 0.0638, Validation Loss: 0.0600
Epoch 3, Train Loss: 0.0652, Validation Loss: 0.0600
Epoch 4, Train Loss: 0.0665, Validation Loss: 0.0600
Early stopping due to no improvement in validation loss.


### Test the model from sample

In [8]:
model_path = 'ckpt.pth'
model = MultiTaskCNN(num_emotions=4, num_orientations=5).to(device)
model.load_state_dict(torch.load(model_path))
model.eval()

MultiTaskCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1_emotion): Linear(in_features=100352, out_features=128, bias=True)
  (fc2_emotion): Linear(in_features=128, out_features=4, bias=True)
  (fc1_orientation): Linear(in_features=100352, out_features=128, bias=True)
  (fc2_orientation): Linear(in_features=128, out_features=5, bias=True)
)

In [28]:
images, emotion_labels, orientation_labels = next(iter(validation_dataloader))

# Randomly select one image and label from the batch
index = torch.randint(0, images.size(0), (1,)).item()
sample_image = images[index].unsqueeze(0).to(device)  # Add batch dimension and send to device
true_emotion_label = emotion_labels[index].item()
true_orientation_label = orientation_labels[index].item()

# Define mappings for printing
emotion_mapping = {0: 'neutral', 1: 'sad', 2: 'happy', 3: 'angry'}
orientation_mapping = {0: 'up', 1: 'down', 2: 'left', 3: 'right', 4: 'straight'}

for _ in range(5):
    # Randomly select one image and label from the entire validation set
    images, emotion_labels, orientation_labels = next(iter(validation_dataloader))
    index = randint(0, images.size(0) - 1)
    sample_image = images[index].unsqueeze(0).to(device)  # Add batch dimension and send to device
    true_emotion_label = emotion_labels[index].item()
    true_orientation_label = orientation_labels[index].item()

    # Make a prediction
    with torch.no_grad():
        emotion_pred, orientation_pred = model(sample_image)
        predicted_emotion = torch.argmax(emotion_pred, 1).item()
        predicted_orientation = torch.argmax(orientation_pred, 1).item()

    # Print results
    print("Sampled Image Results:")
    print(f"True Emotion: {emotion_mapping[true_emotion_label]}, Predicted Emotion: {emotion_mapping[predicted_emotion]}")
    print(f"True Orientation: {orientation_mapping[true_orientation_label]}, Predicted Orientation: {orientation_mapping[predicted_orientation]}")
    print("--------------------------------------------------------------")

Sampled Image Results:
True Emotion: angry, Predicted Emotion: angry
True Orientation: up, Predicted Orientation: up
--------------------------------------------------------------
Sampled Image Results:
True Emotion: neutral, Predicted Emotion: neutral
True Orientation: straight, Predicted Orientation: straight
--------------------------------------------------------------
Sampled Image Results:
True Emotion: happy, Predicted Emotion: happy
True Orientation: left, Predicted Orientation: left
--------------------------------------------------------------
Sampled Image Results:
True Emotion: angry, Predicted Emotion: angry
True Orientation: up, Predicted Orientation: up
--------------------------------------------------------------
Sampled Image Results:
True Emotion: neutral, Predicted Emotion: neutral
True Orientation: left, Predicted Orientation: left
--------------------------------------------------------------
