In [1]:
import kagglehub

path = kagglehub.dataset_download("msambare/fer2013")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'fer2013' dataset.
Path to dataset files: /kaggle/input/fer2013


In [2]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

train_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [3]:
train_ds = ImageFolder('/kaggle/input/fer2013/train', transform=train_tfms)
val_ds   = ImageFolder('/kaggle/input/fer2013/test',  transform=val_tfms)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=2)

print("Classes:", train_ds.classes)
print("Train size:", len(train_ds))
print("Val size:", len(val_ds))

Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
Train size: 28709
Val size: 7178


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# Load pretrained ResNet18
model = models.resnet18(pretrained=True)

# Freeze backbone
for param in model.parameters():
    param.requires_grad = False

# Replace classifier head
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 7)  # 7 emotions

model = model.to(device)

# Loss and optimizer (only train head)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

print(model.fc)

Device: cuda




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 180MB/s]


Linear(in_features=512, out_features=7, bias=True)


In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Loss: {running_loss:.2f} "
          f"Train Acc: {train_acc:.4f}")

Epoch [1/5] Loss: 732.35 Train Acc: 0.3583
Epoch [2/5] Loss: 688.16 Train Acc: 0.4071
Epoch [3/5] Loss: 675.92 Train Acc: 0.4205
Epoch [4/5] Loss: 673.90 Train Acc: 0.4203


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        preds = outputs.argmax(1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

cm = confusion_matrix(all_labels, all_preds)

print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n")
print(classification_report(all_labels, all_preds, target_names=train_ds.classes))

Confusion Matrix:
 [[ 102    0  106  379  185  138   48]
 [   4    0   15   61   13   16    2]
 [  31    0  194  360  188  156   95]
 [  27    0   68 1359  182  107   31]
 [  17    0   85  377  570  144   40]
 [  34    0  135  440  241  373   24]
 [  16    0   97  152  115   23  428]]

Classification Report:

              precision    recall  f1-score   support

       angry       0.44      0.11      0.17       958
     disgust       0.00      0.00      0.00       111
        fear       0.28      0.19      0.23      1024
       happy       0.43      0.77      0.55      1774
     neutral       0.38      0.46      0.42      1233
         sad       0.39      0.30      0.34      1247
    surprise       0.64      0.52      0.57       831

    accuracy                           0.42      7178
   macro avg       0.37      0.33      0.33      7178
weighted avg       0.41      0.42      0.39      7178



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Unfreeze layer4 (last ResNet block)
for name, param in model.named_parameters():
    if "layer4" in name:
        param.requires_grad = True

In [None]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=1e-4)

In [None]:
num_epochs = 3

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    print(f"[Fine-tune] Epoch {epoch+1} Train Acc: {train_acc:.4f}")

[Fine-tune] Epoch 1 Train Acc: 0.5360
[Fine-tune] Epoch 2 Train Acc: 0.6134
[Fine-tune] Epoch 3 Train Acc: 0.6517


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        preds = outputs.argmax(1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

cm = confusion_matrix(all_labels, all_preds)

print("Confusion Matrix (After Fine-Tuning):\n", cm)
print("\nClassification Report (After Fine-Tuning):\n")
print(classification_report(all_labels, all_preds, target_names=train_ds.classes))

Confusion Matrix (After Fine-Tuning):
 [[ 459   14   69   75  200  115   26]
 [  33   42    5   12    8    9    2]
 [ 104    3  294   65  266  172  120]
 [  24    0    8 1523  167   23   29]
 [  34    1   23   79  968  110   18]
 [ 101    2   74   91  432  526   21]
 [  20    0   25   61   99    9  617]]

Classification Report (After Fine-Tuning):

              precision    recall  f1-score   support

       angry       0.59      0.48      0.53       958
     disgust       0.68      0.38      0.49       111
        fear       0.59      0.29      0.39      1024
       happy       0.80      0.86      0.83      1774
     neutral       0.45      0.79      0.57      1233
         sad       0.55      0.42      0.48      1247
    surprise       0.74      0.74      0.74       831

    accuracy                           0.62      7178
   macro avg       0.63      0.56      0.57      7178
weighted avg       0.63      0.62      0.60      7178

