In [None]:
! pip install kaggle



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [3]:
! kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 94% 269M/285M [00:02<00:00, 75.4MB/s]
100% 285M/285M [00:02<00:00, 109MB/s] 
Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [4]:
import torch # Main PyTorch Library
from torch import nn # Used for creating the layers and loss function
from torch.optim import Adam # Adam Optimizer
import torchvision.transforms as transforms # Transform function used to modify and preprocess all the images
from torch.utils.data import Dataset, DataLoader # Dataset class and DataLoader for creating the objects
from sklearn.preprocessing import LabelEncoder # Label Encoder to encode the classes from strings to numbers
import matplotlib.pyplot as plt # Used for visualizing the images and plotting the training progress
from PIL import Image # Used to read the images from the directory
import pandas as pd # Used to read/create dataframes (csv) and process tabular data
import numpy as np # preprocessing and numerical/mathematical operations
import os # Used to read the images path from the directory
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.metrics import accuracy_score, classification_report


device = "cuda" if torch.cuda.is_available() else "cpu" # detect the GPU if any, if not use CPU, change cuda to mps if you have a mac
print("Device available: ", device)

Device available:  cuda


In [5]:
data_df = pd.read_csv('icml_face_data.csv')

# # Split data according to original splits
# train_df = data_df[data_df[' Usage'] == 'Training']
# val_df = data_df[data_df[' Usage'] == 'PublicTest']
# test_df = data_df[data_df[' Usage'] == 'PrivateTest']

if ' Usage' not in data_df.columns:
      print("Warning: 'Usage' column not found in dataset")

    # Split data and remove 'Usage' column
train_df = data_df[data_df[' Usage'] == 'Training'].drop(columns=[' Usage'])
val_df = data_df[data_df[' Usage'] == 'PublicTest'].drop(columns=[' Usage'])
test_df = data_df[data_df[' Usage'] == 'PrivateTest'].drop(columns=[' Usage'])

In [6]:
class FERDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx][' pixels']
        pixels = np.array([int(pixel) for pixel in pixels.split()], dtype=np.float32)
        image = pixels.reshape(48, 48, 1)  # HWC format for transforms
        label = self.data.iloc[idx]['emotion']

        if self.transform:
            image = self.transform(image)
        else:
            # Convert to tensor and normalize if no augmentation
            image = torch.FloatTensor(image.transpose(2, 0, 1)) / 255.0

        return image, torch.LongTensor([label])

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

#Hybrid pooling: combines Avg and Max pooling
class HybridPool(nn.Module):
    def __init__(self, kernel_size=2):
        super(HybridPool, self).__init__()
        self.avgpool = nn.AvgPool2d(kernel_size)
        self.maxpool = nn.MaxPool2d(kernel_size)

    def forward(self, x):
        return 0.5 * (self.avgpool(x) + self.maxpool(x))

#Improved model with deeper layers, hybrid pooling, dropout, batchnorm
class BetterFERModel(nn.Module):
    def __init__(self, num_classes=7):
        super(BetterFERModel, self).__init__()

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            HybridPool(),

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            HybridPool(),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            HybridPool(),

            # Block 4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            HybridPool(),

            nn.AdaptiveAvgPool2d((1, 1))  # Global average pooling
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [8]:
train_dataset = FERDataset(train_df)
val_dataset = FERDataset(val_df)
test_dataset = FERDataset(test_df)


# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
import numpy as np
from collections import defaultdict
from torch.optim.lr_scheduler import ReduceLROnPlateau


def train_with_history(model, train_loader, val_loader, num_epochs=15, lr=0.001):
    history = defaultdict(list)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)

    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5, verbose=False)

    best_val_acc = 0.0
    best_model_weights = None

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device).squeeze()

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Store batch metrics every 50 batches
            if batch_idx % 50 == 0:
                history['batch'].append(epoch * len(train_loader) + batch_idx)
                history['train_batch_loss'].append(loss.item())
                history['train_batch_acc'].append((predicted == labels).sum().item() / labels.size(0))

        # Calculate epoch metrics
        avg_train_loss = train_loss / len(train_loader)
        train_acc = correct / total
        history['epoch'].append(epoch)
        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(train_acc)

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device).squeeze()
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_acc)

        # Update learning rate
        scheduler.step(avg_val_loss)

        # Track best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_weights = model.state_dict().copy()

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}')
        print(f'LR: {optimizer.param_groups[0]["lr"]:.6f}')

    # Store confusion matrix data
    model.eval()
    true_labels = []
    pred_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).squeeze()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

    history['confusion_matrix'] = {
        'true': true_labels,
        'pred': pred_labels,
        'class_names': ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
    }

    # Restore best model weights
    if best_model_weights:
        model.load_state_dict(best_model_weights)

    return model, history

In [10]:
model = BetterFERModel(num_classes=7).to(device)

# Create augmented dataset
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
])


# Train with history tracking
trained_model, training_history = train_with_history(
    model,
    train_loader,
    val_loader,
    num_epochs=20,
    lr=0.001
)





Epoch 1/20:
Train Loss: 1.6526 | Train Acc: 0.3406
Val Loss: 2.1592 | Val Acc: 0.3174
LR: 0.001000
Epoch 2/20:
Train Loss: 1.3461 | Train Acc: 0.4857
Val Loss: 1.4947 | Val Acc: 0.4433
LR: 0.001000
Epoch 3/20:
Train Loss: 1.2265 | Train Acc: 0.5344
Val Loss: 1.2825 | Val Acc: 0.4951
LR: 0.001000
Epoch 4/20:
Train Loss: 1.1462 | Train Acc: 0.5689
Val Loss: 1.2889 | Val Acc: 0.4996
LR: 0.001000
Epoch 5/20:
Train Loss: 1.0790 | Train Acc: 0.5969
Val Loss: 1.0876 | Val Acc: 0.5807
LR: 0.001000
Epoch 6/20:
Train Loss: 1.0069 | Train Acc: 0.6263
Val Loss: 1.0827 | Val Acc: 0.5876
LR: 0.001000
Epoch 7/20:
Train Loss: 0.9451 | Train Acc: 0.6488
Val Loss: 1.1962 | Val Acc: 0.5405
LR: 0.001000
Epoch 8/20:
Train Loss: 0.8710 | Train Acc: 0.6780
Val Loss: 1.2500 | Val Acc: 0.5854
LR: 0.001000
Epoch 9/20:
Train Loss: 0.7953 | Train Acc: 0.7085
Val Loss: 1.2197 | Val Acc: 0.5765
LR: 0.001000
Epoch 10/20:
Train Loss: 0.7092 | Train Acc: 0.7406
Val Loss: 1.2512 | Val Acc: 0.5751
LR: 0.000500
Epoch 11/

In [11]:
# Test evaluation
def evaluate(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device).squeeze()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total
test_acc = evaluate(trained_model, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')
training_history['test_acc'] = test_acc

Test Accuracy: 0.6531


In [13]:
!pip install wandb -q
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabarb2022[0m ([33mabarb2022-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [14]:
"""Initialize wandb with custom config"""
wandb.init(
    project="fer-improved-model",
    tags=["CNN", "improved", "FER2013"],
    notes="Improved FER model with better regularization"
)
print(f"View run at: {wandb.run.url}")


"""Log all training artifacts to wandb"""


# 2. Log training history (epoch-level)
for epoch in range(len(training_history['epoch'])):
    wandb.log({
        'epoch': training_history['epoch'][epoch],
        'train_loss': training_history['train_loss'][epoch],
        'train_acc': training_history['train_acc'][epoch],
        'val_loss': training_history['val_loss'][epoch],
        'val_acc': training_history['val_acc'][epoch]
    }, commit=True)

# 3. Log batch metrics (sampled)
if 'batch' in training_history:
    batch_indices = np.linspace(0, len(training_history['batch'])-1, 1000, dtype=int)
    for idx in batch_indices:
        wandb.log({
            'batch': training_history['batch'][idx],
            'train_batch_loss': training_history['train_batch_loss'][idx],
            'train_batch_acc': training_history['train_batch_acc'][idx]
        }, commit=False)

# 4. Log confusion matrix
if 'confusion_matrix' in training_history:
    cm = training_history['confusion_matrix']
    wandb.log({
        "conf_mat": wandb.plot.confusion_matrix(
            y_true=cm['true'],
            preds=cm['pred'],
            class_names=cm['class_names']
        )
    })

# 5. Log model architecture
wandb.watch(model, log='all', log_freq=100, log_graph=True)

# 6. Save and log model
torch.save(model.state_dict(), 'model_weights.pth')
wandb.save('model_weights.pth')

# 7. Log final metrics
wandb.summary.update({
    'best_val_acc': max(training_history['val_acc']),
    'final_train_acc': training_history['train_acc'][-1],
    'test_acc': training_history.get('test_acc', 0)
})

wandb.finish()

View run at: https://wandb.ai/abarb2022-free-university-of-tbilisi-/fer-improved-model/runs/kpw3r3cs


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


0,1
batch,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▃▃▄▄▄▅▅▅▆▇▇▇██████
train_batch_acc,▁
train_batch_loss,▁
train_loss,█▇▆▆▆▅▅▅▄▄▃▃▂▂▁▁▁▁▁▁
val_acc,▁▄▅▅▇▇▆▇▇▇▇▆█▇██████
val_loss,▆▃▂▂▁▁▂▂▂▂▃▄▃▄▅▆▆▇▇█

0,1
batch,8931.0
best_val_acc,0.64335
epoch,19.0
final_train_acc,0.99063
test_acc,0.65311
train_acc,0.99063
train_batch_acc,0.98438
train_batch_loss,0.06372
train_loss,0.03348
val_acc,0.64224
