In [1]:
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [2]:
# load data
df_orig = pd.read_csv('data/fer2013.csv')
df_orig

ERROR! Session/line number was not unique in database. History logging moved to new session 13


Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training
...,...,...,...
35882,6,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...,PrivateTest
35883,3,178 174 172 173 181 188 191 194 196 199 200 20...,PrivateTest
35884,0,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...,PrivateTest
35885,3,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...,PrivateTest


In [3]:
# original emotion_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}
emotion_idx_map = {0: 0, 2: 1, 3: 2, 4: 3, 6: 4}
emotion_map = {0: 'Angry', 1: 'Fear', 2: 'Happy', 3: 'Sad', 4: 'Neutral'}

# remove disgust and surprise emotions
df = df_orig[(df_orig['emotion'] != 1) & (df_orig['emotion'] != 5)].copy()

# change emotion indices
df['emotion'] = df['emotion'].apply(lambda x: emotion_idx_map[x])

# change pixels into 48x48 arrays
df['pixels'] = df['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))

# split train and test data
x_train = np.array(df[df['Usage'] == 'Training']['pixels'].tolist()).reshape(-1, 48, 48)
y_train = np.array(df[df['Usage'] == 'Training']['emotion'].tolist())
x_test = np.array(df[df['Usage'] == 'PublicTest']['pixels'].tolist()).reshape(-1, 48, 48)
y_test = np.array(df[df['Usage'] == 'PublicTest']['emotion'].tolist())

# normalize pixel values

print(len(x_train))
print(x_train.shape)
print(y_train.shape)
assert len(x_train) == len(y_train)

25102
(25102, 48, 48)
(25102,)


In [5]:
# create datasets and dataloaders
class FER2013Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale image to RGB by duplicating channels
    transforms.Resize((224, 224)),  # Resize to VGG11 expected input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize to ImageNet standards
])

out = transform(x_train[0])

train_dataset = FER2013Dataset(x_train, y_train, transform=transform)
test_dataset = FER2013Dataset(x_test, y_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [12]:
# build model
from torchvision.models import mobilenet_v2

model = mobilenet_v2(weights='DEFAULT')
# Modify the final layer to match the number of classes (7 in FER2013 but we only use 5 emotions)
num_classes = 5
model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /home/can/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 13.6M/13.6M [00:00<00:00, 29.0MB/s]


IndexError: index 6 is out of range

In [13]:
# train model
learning_rate = 0.001
num_epochs = 50
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_accuracy = 0
estimate_time = True
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        start_time = time.time()
        
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        batch_size = labels.size(0)
        correct = (predicted == labels).sum().item()
        accuracy = correct / batch_size * 100
        print(f'Batch Accuracy: {accuracy:.4f}')

        if estimate_time:
            end_time = time.time()
            time_per_batch = end_time - start_time
            num_batches = len(train_loader)
            time_per_epoch = time_per_batch * num_batches
            total_training_time = time_per_epoch * num_epochs
            print(f"Time per batch: {time_per_batch:.4f} seconds")
            print(f"Estimated time per epoch: {time_per_epoch / 60:.2f} minutes")
            print(f"Estimated total training time: {total_training_time / 3600:.2f} hours")
            estimate_time = False
        
    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
    torch.save(model.state_dict(), 'model.pth')

    # model.eval()
    # with torch.no_grad():
    #     correct = 0
    #     total = 0
    #     for images, labels in test_loader:        
    #         outputs = model(images)
    #         _, predicted = torch.max(outputs.data, 1)
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    #     accuracy = correct / total * 100
    #     print(f'Accuracy on the test set: {accuracy:.2f}%')
    #     if accuracy > best_accuracy:
    #         best_accuracy = accuracy
    #         torch.save(model.state_dict(), 'model.pth')

Batch Accuracy: 0.0000
Time per batch: 3.9134 seconds
Estimated time per epoch: 25.63 minutes
Estimated total training time: 21.36 hours
Batch Accuracy: 0.0000
Batch Accuracy: 7.8125
Batch Accuracy: 14.0625
Batch Accuracy: 26.5625
Batch Accuracy: 31.2500
Batch Accuracy: 31.2500
Batch Accuracy: 31.2500
Batch Accuracy: 28.1250
Batch Accuracy: 28.1250
Batch Accuracy: 34.3750
Batch Accuracy: 31.2500
Batch Accuracy: 26.5625
Batch Accuracy: 26.5625
Batch Accuracy: 39.0625
Batch Accuracy: 40.6250
Batch Accuracy: 32.8125
Batch Accuracy: 40.6250
Batch Accuracy: 45.3125
Batch Accuracy: 45.3125
Batch Accuracy: 40.6250
Batch Accuracy: 37.5000
Batch Accuracy: 37.5000
Batch Accuracy: 43.7500
Batch Accuracy: 48.4375
Batch Accuracy: 46.8750
Batch Accuracy: 43.7500
Batch Accuracy: 43.7500
Batch Accuracy: 45.3125
Batch Accuracy: 42.1875
Batch Accuracy: 37.5000
Batch Accuracy: 32.8125
Batch Accuracy: 35.9375
Batch Accuracy: 45.3125
Batch Accuracy: 53.1250
Batch Accuracy: 42.1875
Batch Accuracy: 53.1250
B

KeyboardInterrupt: 