### Mout Google Drive to find the data file

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Data Preprocessing

In [None]:
import os
import pandas as pd
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import Counter
from PIL import Image
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# Define paths
image_folder = "/content/drive/MyDrive/iis_data/project/DiffusionFER/DiffusionEmotion_S/cropped"  # Replace with your actual path


# Check if the image folder exists
if not os.path.exists(image_folder):
    raise ValueError(f"Image folder not found at {image_folder}. Please check the path.")


# List the contents of the image folder
print("Contents of image folder:")
for item in os.listdir(image_folder):
    item_path = os.path.join(image_folder, item)
    print(f"- {item} (File)" if os.path.isfile(item_path) else f"- {item} (Directory)")

Contents of image folder:
- neutral (Directory)
- sad (Directory)
- angry (Directory)
- surprise (Directory)
- disgust (Directory)
- fear (Directory)
- happy (Directory)
- .ipynb_checkpoints (Directory)


In [None]:
# Define paths
image_folder = "/content/drive/MyDrive/iis_data/project/DiffusionFER/DiffusionEmotion_S/cropped"  # Replace with your actual path
csv_file_path = "/content/drive/MyDrive/iis_data/project/DiffusionFER/DiffusionEmotion_S/dataset_sheet.csv"

#Check if the image folder exists
if not os.path.exists(image_folder):
    raise ValueError(f"Image folder not found at {image_folder}. Please check the path.")

# List the contents of the image folder
print("Contents of image folder:")
for item in os.listdir(image_folder):
    item_path = os.path.join(image_folder, item)
    print(f"- {item} (File)" if os.path.isfile(item_path) else f"- {item} (Directory)")

Contents of image folder:
- neutral (Directory)
- sad (Directory)
- angry (Directory)
- surprise (Directory)
- disgust (Directory)
- fear (Directory)
- happy (Directory)
- .ipynb_checkpoints (Directory)


In [None]:
csv = pd.read_csv(csv_file_path)
csv.head()

Unnamed: 0,subDirectory_filePath,valence,arousal,expression
0,DiffusionEmotion_S_cropped/neutral/aksjlkjl_0.png,-0.1,0.1,0
1,DiffusionEmotion_S_cropped/neutral/aksndlkn_0.png,0.0,0.0,0
2,DiffusionEmotion_S_cropped/neutral/anavqmjd_0.png,-0.1,-0.1,0
3,DiffusionEmotion_S_cropped/neutral/aovjrrax_0.png,-0.2,-0.1,0
4,DiffusionEmotion_S_cropped/neutral/aptzlpuo_0.png,-0.1,-0.1,0


In [None]:
csv['subDirectory_filePath'] = csv['subDirectory_filePath'].str.replace('DiffusionEmotion_S_cropped', 'DiffusionEmotion_S/cropped')

# Save the updated CSV file (optional)
csv.to_csv(csv_file_path, index=False) # Uncomment this line if you want to save the changes back to the file

In [None]:
column_name = 'expression'

if column_name in csv.columns:
    distinct_values = csv[column_name].unique()
    print(distinct_values)
else:
    print(f"Column '{column_name}' not found in the DataFrame.")

[0 6 1 2 3 4 5]


In [None]:
column_name = 'expression'

if column_name in csv.columns:
    expression_counts = csv.groupby(column_name).size().reset_index(name='counts')
    print(expression_counts)
else:
    print(f"Column '{column_name}' not found in the DataFrame.")

   expression  counts
0           0     413
1           1     338
2           2      89
3           3     166
4           4      73
5           5      53
6           6     160


In [None]:
base_dir  = "/content/drive/MyDrive/iis_data/project/DiffusionFER/"

In [None]:
# Define paths
base_dir = "/content/drive/MyDrive/iis_data/project/DiffusionFER/"
csv_file_path = "/content/drive/MyDrive/iis_data/project/DiffusionFER/DiffusionEmotion_S/dataset_sheet.csv"

# Load the CSV file
csv = pd.read_csv(csv_file_path)

# Construct full image paths
csv['image_path'] = csv['subDirectory_filePath'].apply(lambda x: os.path.join(base_dir, x))

# Custom Dataset class
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['expression']

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

In [None]:
csv

Unnamed: 0,subDirectory_filePath,valence,arousal,expression,image_path
0,DiffusionEmotion_S_cropped/neutral/aksjlkjl_0.png,-0.1,0.1,0,/content/drive/MyDrive/iis_data/project/Diffus...
1,DiffusionEmotion_S_cropped/neutral/aksndlkn_0.png,0.0,0.0,0,/content/drive/MyDrive/iis_data/project/Diffus...
2,DiffusionEmotion_S_cropped/neutral/anavqmjd_0.png,-0.1,-0.1,0,/content/drive/MyDrive/iis_data/project/Diffus...
3,DiffusionEmotion_S_cropped/neutral/aovjrrax_0.png,-0.2,-0.1,0,/content/drive/MyDrive/iis_data/project/Diffus...
4,DiffusionEmotion_S_cropped/neutral/aptzlpuo_0.png,-0.1,-0.1,0,/content/drive/MyDrive/iis_data/project/Diffus...
...,...,...,...,...,...
1287,DiffusionEmotion_S_cropped/angry/zdzxvhjb_6.png,-0.7,0.6,6,/content/drive/MyDrive/iis_data/project/Diffus...
1288,DiffusionEmotion_S_cropped/angry/znxzqwee_6.png,-0.7,0.2,6,/content/drive/MyDrive/iis_data/project/Diffus...
1289,DiffusionEmotion_S_cropped/angry/zoghefwq_6.png,-0.7,0.8,6,/content/drive/MyDrive/iis_data/project/Diffus...
1290,DiffusionEmotion_S_cropped/angry/zsjxfhcl_6.png,0.0,0.6,3,/content/drive/MyDrive/iis_data/project/Diffus...


### Creation of the Dataset

In [None]:
# Create dataset
dataset = ImageDataset(csv, transform=transform)

# Get all images and labels from the dataset, handling missing files
images = []
labels = []
for i in range(len(dataset)):
    try:
        image, label = dataset[i]

        images.append(image.numpy())  # Convert image tensor to NumPy array
        labels.append(label)
    except FileNotFoundError:
        print(f"Warning: Skipping missing file at index {i}")





In [None]:
images = np.array(images)
labels = np.array(labels)


In [None]:
# Split data into training and testing sets
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels
)

# Create datasets
train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_images, dtype=torch.float32), torch.tensor(train_labels, dtype=torch.long))
test_dataset = torch.utils.data.TensorDataset(torch.tensor(test_images, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
def count_labels(dataset_loader):
    label_counts = {}
    for _, labels in dataset_loader:
        for label in labels:
            label = label.item() # Convert tensor to int
            label_counts[label] = label_counts.get(label, 0) + 1
    return label_counts

# Count labels in the training set
train_label_counts = count_labels(train_loader)
print("Training set label counts:")
for label, count in train_label_counts.items():
    print(f"Label {label}: {count}")

# Count labels in the testing set
test_label_counts = count_labels(test_loader)
print("\nTesting set label counts:")
for label, count in test_label_counts.items():
    print(f"Label {label}: {count}")

Training set label counts:
Label 0: 329
Label 1: 270
Label 6: 128
Label 4: 58
Label 3: 133
Label 2: 71
Label 5: 42

Testing set label counts:
Label 2: 18
Label 1: 67
Label 0: 82
Label 4: 15
Label 6: 32
Label 3: 33
Label 5: 11


In [None]:
label_counts = len(set(labels))
print(f"The number of different labels in the dataset is: {label_counts}")

The number of different labels in the dataset is: 7


### Neural Network

In [None]:
class EmotionClassifier(torch.nn.Module):
    def __init__(self, num_classes):
        super(EmotionClassifier, self).__init__()
        self.model = models.resnet18(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = torch.nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.model(x)

num_epochs = 20
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = label_counts
print(f"Number of classes {num_classes}")

# Initialize the model, loss function, and optimizer
model = EmotionClassifier(num_classes).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Validation Accuracy: {100 * correct / total:.2f}%")

Number of classes 7
Epoch [1/20], Loss: 1.1447433522253325
Validation Accuracy: 61.24%
Epoch [2/20], Loss: 0.6928567389647166
Validation Accuracy: 60.85%
Epoch [3/20], Loss: 0.5160968005657196
Validation Accuracy: 67.05%
Epoch [4/20], Loss: 0.406250794728597
Validation Accuracy: 67.83%
Epoch [5/20], Loss: 0.3241411205945593
Validation Accuracy: 60.08%
Epoch [6/20], Loss: 0.44297191895770305
Validation Accuracy: 64.73%
Epoch [7/20], Loss: 0.23857918392979738
Validation Accuracy: 66.28%
Epoch [8/20], Loss: 0.182266909770216
Validation Accuracy: 74.42%
Epoch [9/20], Loss: 0.1363897719561602
Validation Accuracy: 74.03%
Epoch [10/20], Loss: 0.1045261865030184
Validation Accuracy: 68.22%
Epoch [11/20], Loss: 0.12937381270934234
Validation Accuracy: 74.03%
Epoch [12/20], Loss: 0.20620483453526642
Validation Accuracy: 78.29%
Epoch [13/20], Loss: 0.2175884194333445
Validation Accuracy: 72.48%
Epoch [14/20], Loss: 0.07314834012791063
Validation Accuracy: 77.13%
Epoch [15/20], Loss: 0.15933434429

In [None]:
# Evaluation on the training set
model.eval()
train_correct = 0
train_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

print(f"Training Accuracy: {100 * train_correct / train_total:.2f}%")

Training Accuracy: 77.52%


In [None]:
# Save the model
torch.save(model.state_dict(), '/content/drive/MyDrive/iis_data/project/resnet_emotion_classifier.pth')