In [None]:
# !pip install numpy pandas matplotlib seaborn scikit-learn
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 

In [2]:
import os
import torch
from torchvision import datasets,models, transforms
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
import torch.nn as nn
from torch.optim import Adam

In [3]:
# read CSV
csv_path = 'image_labels_merged.csv'
df = pd.read_csv(csv_path, header=None, names=['image_name', 'label'])

# check data
print(df.head())

# Check the distribution of labels
print("distribution of labels:")
print(df['label'].value_counts())

# are there missing labels
missing_labels = df[df['label'].isnull()]
if not missing_labels.empty:
    print(f"null:\n{missing_labels}")
else:
    print("no missing label")


   image_name  label
0  Image Name  Label
1  asd_01.jpg      0
2  asd_02.jpg      0
3  asd_03.jpg      4
4  asd_04.jpg      2
distribution of labels:
label
0        2067
4        1101
3         375
1         366
2         222
Label       1
Name: count, dtype: int64
no missing label


In [8]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.annotations = pd.read_csv(csv_file, header=None, names=['image_name', 'label'])
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = int(self.annotations.iloc[idx, 1])
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

#Data Augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# DataLoader 
dataset = CustomImageDataset(csv_file='image_labels_asd.csv', img_dir='FADC-Dataset/ASD/', transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# DataLoader test 
images, labels = next(iter(train_loader))
print(f"Batch image size: {images.shape}")
print(f"Batch label: {labels}")


Batch image size: torch.Size([32, 3, 224, 224])
Batch label: tensor([0, 3, 2, 0, 4, 3, 2, 0, 0, 3, 2, 0, 1, 0, 0, 2, 3, 0, 4, 0, 0, 0, 0, 4,
        4, 0, 2, 0, 0, 0, 2, 1])


In [9]:
# Create model
class EmotionDetectionModel(nn.Module):
    def __init__(self, num_classes=5):
        super(EmotionDetectionModel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

model = EmotionDetectionModel(num_classes=5)
model.to('cuda')




EmotionDetectionModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequentia

In [6]:
# loss funciton and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(10):  # 10 epoch 
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to('cuda'), labels.to('cuda')
        
        # Forward Pass
        outputs = model(images)
        
        # loss 
        loss = criterion(outputs, labels)
        
        # Backward Pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print(f"Epoch [{epoch+1}/10], Loss: {total_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")


Epoch [1/10], Loss: 1.4544, Accuracy: 44.76%
Epoch [2/10], Loss: 1.2831, Accuracy: 51.30%
Epoch [3/10], Loss: 1.1709, Accuracy: 54.14%
Epoch [4/10], Loss: 1.1114, Accuracy: 56.19%
Epoch [5/10], Loss: 0.9842, Accuracy: 62.96%
Epoch [6/10], Loss: 0.9718, Accuracy: 63.04%
Epoch [7/10], Loss: 0.8636, Accuracy: 67.38%
Epoch [8/10], Loss: 0.8353, Accuracy: 67.38%
Epoch [9/10], Loss: 0.7055, Accuracy: 73.76%
Epoch [10/10], Loss: 0.6091, Accuracy: 76.44%


In [7]:
torch.save(model.state_dict(), 'emotion_detection_model.pth')