In [None]:

# 0. Clean start + basic setup

import gc, torch, os
gc.collect()
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

!nvidia-smi

Device: cuda
Fri Dec  5 15:31:24 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   38C    P8             12W /   72W |       3MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                   

In [None]:

# 1. Install dependencies

!pip install -q timm albumentations==1.4.3 grad-cam

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/7.8 MB[0m [31m57.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.8/7.8 MB[0m [31m131.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m94.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.0/137.0 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone


In [None]:

# 2. Imports

import os
import cv2
import numpy as np
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:

# 3. Download AffectNet YOLO dataset via Kaggle


from google.colab import files

print("Upload kaggle.json")
uploaded = files.upload()   # select your kaggle.json

os.makedirs("/root/.kaggle", exist_ok=True)
!cp kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# Download dataset
!kaggle datasets download -d fatihkgg/affectnet-yolo-format -p ./data

# Unzip (answer 'n' if it asks to replace existing files)
!unzip -qo ./data/affectnet-yolo-format.zip -d ./data/affectnet

!ls -R ./data/affectnet/YOLO_format

Upload kaggle.json


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
ffhq_2967.txt  image0012219.txt  image0027684.txt  image0037426.txt
ffhq_2968.txt  image0012224.txt  image0027691.txt  image0037428.txt
ffhq_2969.txt  image0012225.txt  image0027692.txt  image0037429.txt
ffhq_296.txt   image0012230.txt  image0027693.txt  image0037430.txt
ffhq_2970.txt  image0012251.txt  image0027696.txt  image0037431.txt
ffhq_2971.txt  image0012254.txt  image0027697.txt  image0037433.txt
ffhq_2972.txt  image0012255.txt  image0027699.txt  image0037434.txt
ffhq_2973.txt  image0012265.txt  image0027700.txt  image0037439.txt
ffhq_2975.txt  image0012268.txt  image0027704.txt  image0037440.txt
ffhq_2977.txt  image0012272.txt  image0027707.txt  image0037441.txt
ffhq_2979.txt  image0012273.txt  image0027713.txt  image0037442.txt
ffhq_297.txt   image0012274.txt  image0027714.txt  image0037444.txt
ffhq_2980.txt  image0012281.txt  image0027715.txt  image0037445.txt
ffhq_2981.txt  image0012282.txt  image0027716.txt  

In [None]:

# 4. Dataset + transforms

data_root = "./data/affectnet/YOLO_format"

train_img_dir = f"{data_root}/train/images"
train_lbl_dir = f"{data_root}/train/labels"

val_img_dir   = f"{data_root}/valid/images"
val_lbl_dir   = f"{data_root}/valid/labels"

print("Train images:", train_img_dir)
print("Val images:",   val_img_dir)

class AffectNetDataset(Dataset):
    def __init__(self, img_dir, lbl_dir, transform=None):
        self.img_dir = img_dir
        self.lbl_dir = lbl_dir
        self.transform = transform

        self.images = sorted([f for f in os.listdir(img_dir)
                              if f.lower().endswith(".jpg")])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        label_name = img_name.replace(".jpg", ".txt")

        img_path = os.path.join(self.img_dir, img_name)
        lbl_path = os.path.join(self.lbl_dir, label_name)

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(lbl_path, "r") as f:
            line = f.readline().strip()
            label = int(line.split()[0])

        if self.transform:
            image = self.transform(image=image)["image"]

        return image, label

# Slightly smaller than 300x300 to save memory
IMG_SIZE = 224

train_tfms = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.4),
    A.ColorJitter(p=0.3),
    A.MotionBlur(blur_limit=3, p=0.2),
    A.GaussNoise(p=0.2),
    A.RandomShadow(p=0.2),
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2(),
])

val_tfms = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2(),
])

train_ds = AffectNetDataset(train_img_dir, train_lbl_dir, transform=train_tfms)
val_ds   = AffectNetDataset(val_img_dir,   val_lbl_dir,   transform=val_tfms)

BATCH_SIZE = 16   # small enough for stability on Colab Pro

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2, pin_memory=True)

len(train_ds), len(val_ds)

Train images: ./data/affectnet/YOLO_format/train/images
Val images: ./data/affectnet/YOLO_format/valid/images


(12167, 3546)

In [None]:

# 5. Compute class weights safely

all_labels = []

for img_name in train_ds.images:
    label_file = img_name.replace(".jpg", ".txt")
    with open(os.path.join(train_lbl_dir, label_file), "r") as f:
        line = f.readline().strip()
        label = int(line.split()[0])
        all_labels.append(label)

unique_labels = sorted(set(all_labels))
print("Unique labels present in train set:", unique_labels)

num_classes = max(unique_labels) + 1
print("Inferred num_classes:", num_classes)

counts = Counter(all_labels)
total = float(sum(counts.values()))

# Inverse frequency weights (higher weight for rare classes)
weights = [total / counts[i] for i in range(num_classes)]
print("Raw class counts:", counts)
print("Class weights:", weights)

class_weights = torch.tensor(weights, dtype=torch.float32, device=device)

Unique labels present in train set: [0, 1, 2, 3, 4, 5, 6, 7]
Inferred num_classes: 8
Raw class counts: Counter({7: 2248, 0: 2026, 3: 1922, 2: 1865, 6: 1582, 1: 1453, 5: 744, 4: 327})
Class weights: [6.00542941757157, 8.373709566414314, 6.523860589812332, 6.330385015608741, 37.20795107033639, 16.353494623655912, 7.690897597977244, 5.412366548042705]


In [None]:

# 6. NovelEmotionModel (micro-expression enhancer + routing)

EMOTIONS = [
    "neutral", "happy", "sad", "angry",
    "surprise", "fear", "disgust", "contempt"
]
assert num_classes == len(EMOTIONS), "Dataset classes != 8, something is off."

class MicroExpEnhancer(nn.Module):
    """
    Tiny conv block that amplifies subtle facial changes.
    x_out = x + factor * Conv(Conv(x))
    """
    def __init__(self, in_ch=3, factor=0.2):
        super().__init__()
        self.factor = factor
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, in_ch, kernel_size=3, padding=1)
        )

    def forward(self, x):
        micro = self.conv(x)
        return x + self.factor * micro


class DynamicEmotionRouting(nn.Module):
    """
    Two experts: one biased to high-arousal emotions,
    one to low-arousal emotions. Gated by features.
    """
    def __init__(self, feat_dim=1536, hidden_dim=512):
        super().__init__()
        self.expert_upper = nn.Sequential(
            nn.Linear(feat_dim, hidden_dim),
            nn.ReLU(inplace=True)
        )
        self.expert_lower = nn.Sequential(
            nn.Linear(feat_dim, hidden_dim),
            nn.ReLU(inplace=True)
        )
        self.gate = nn.Linear(feat_dim, 2)

    def forward(self, feat):
        gate_logits = self.gate(feat)            # [B, 2]
        gate = torch.softmax(gate_logits, dim=-1)
        upper = self.expert_upper(feat)          # [B, H]
        lower = self.expert_lower(feat)          # [B, H]
        out = gate[:, 0:1] * upper + gate[:, 1:2] * lower
        return out, gate


class NovelEmotionModel(nn.Module):
    """
    EfficientNet-B3 backbone + micro enhancer + routing head.
    Outputs:
        - logits: emotion logits [B, num_classes]
        - valence: scalar in R (can later map to [-1,1])
        - arousal: scalar in R
        - gate: routing weights [B, 2]
        - feat: backbone features [B, 1536]
    """
    def __init__(self, num_emotions=8):
        super().__init__()
        self.micro = MicroExpEnhancer(in_ch=3, factor=0.2)

        self.backbone = timm.create_model(
            "efficientnet_b3",
            pretrained=True,
            num_classes=0
        )
        feat_dim = 1536
        hidden_dim = 512

        self.routing = DynamicEmotionRouting(feat_dim, hidden_dim)
        self.head_emotion = nn.Linear(hidden_dim, num_emotions)
        self.head_valence = nn.Linear(hidden_dim, 1)
        self.head_arousal = nn.Linear(hidden_dim, 1)

        # Personalized neutral embedding (can set later)
        self.register_buffer("user_neutral_vec", torch.zeros(feat_dim),
                             persistent=False)

    @torch.no_grad()
    def set_user_neutral(self, feat_vec_batch):
        """
        feat_vec_batch: [N, feat_dim] features for neutral frames.
        Stores the mean as the neutral baseline.
        """
        self.user_neutral_vec.copy_(feat_vec_batch.mean(dim=0))

    def forward(self, x, use_calibration=True):
        x = self.micro(x)
        feat = self.backbone(x)  # [B, 1536]

        if use_calibration:
            feat = feat - self.user_neutral_vec

        routed, gate = self.routing(feat)
        logits = self.head_emotion(routed)
        valence = self.head_valence(routed)
        arousal = self.head_arousal(routed)

        return {
            "logits": logits,
            "valence": valence,
            "arousal": arousal,
            "gate": gate,
            "feat": feat
        }

model = NovelEmotionModel(num_emotions=num_classes).to(device)
print("Model loaded on:", device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

Model loaded on: cuda


In [None]:
# 7. Loss, optimizer, scheduler, AMP scaler

criterion = nn.CrossEntropyLoss(weight=class_weights)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=1e-4,
    weight_decay=1e-5
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=2
)

scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))

  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))


In [None]:

# 8. Train / validate loops (using outputs["logits"])

def train_one_epoch(model, loader, optimizer, criterion, scaler, device):
    model.train()
    total_loss, total_correct, total_samples = 0.0, 0, 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
            out_dict = model(images)
            logits = out_dict["logits"]
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        batch_size = images.size(0)
        total_loss += loss.item() * batch_size
        preds = logits.argmax(dim=1)
        total_correct += (preds == labels).sum().item()
        total_samples += batch_size

    return total_loss / total_samples, total_correct / total_samples


def validate(model, loader, criterion, device):
    model.eval()
    total_loss, total_correct, total_samples = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                out_dict = model(images)
                logits = out_dict["logits"]
                loss = criterion(logits, labels)

            batch_size = images.size(0)
            total_loss += loss.item() * batch_size
            preds = logits.argmax(dim=1)
            total_correct += (preds == labels).sum().item()
            total_samples += batch_size

    return total_loss / total_samples, total_correct / total_samples

In [None]:

# 9. Main training loop

best_val_loss = float("inf")
NUM_EPOCHS = 15  # you can increase if it is stable

for epoch in range(1, NUM_EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion, scaler, device
    )
    val_loss, val_acc = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_loss)

    print(f"Epoch {epoch:02d}/{NUM_EPOCHS}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}%")
    print(f"  Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc*100:.2f}%")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_emotion_model_b3_novel.pth")
        print("  >>> Saved new best model")

print("Training finished. Best val loss:", best_val_loss)
!ls -lh

  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):


Epoch 01/15
  Train Loss: 1.5555 | Train Acc: 41.41%
  Val   Loss: 1.0740 | Val   Acc: 59.17%
  >>> Saved new best model
Epoch 02/15
  Train Loss: 1.0170 | Train Acc: 61.44%
  Val   Loss: 0.9738 | Val   Acc: 63.28%
  >>> Saved new best model
Epoch 03/15
  Train Loss: 0.7653 | Train Acc: 70.97%
  Val   Loss: 0.9127 | Val   Acc: 68.44%
  >>> Saved new best model
Epoch 04/15
  Train Loss: 0.5850 | Train Acc: 77.43%
  Val   Loss: 0.9243 | Val   Acc: 67.94%
Epoch 05/15
  Train Loss: 0.4375 | Train Acc: 83.10%
  Val   Loss: 1.0033 | Val   Acc: 66.95%
Epoch 06/15
  Train Loss: 0.3566 | Train Acc: 86.08%
  Val   Loss: 1.1213 | Val   Acc: 67.74%
Epoch 07/15
  Train Loss: 0.1985 | Train Acc: 92.62%
  Val   Loss: 1.2181 | Val   Acc: 69.46%
Epoch 08/15
  Train Loss: 0.1547 | Train Acc: 94.12%
  Val   Loss: 1.2721 | Val   Acc: 69.54%
Epoch 09/15
  Train Loss: 0.1256 | Train Acc: 95.32%
  Val   Loss: 1.3841 | Val   Acc: 67.91%
Epoch 10/15
  Train Loss: 0.0978 | Train Acc: 96.38%
  Val   Loss: 1.4735

In [None]:

# 10. Quick sanity check on one batch

checkpoint = torch.load("best_emotion_model_b3_novel.pth",
                        map_location=device)
model.load_state_dict(checkpoint)
model.eval()

images, labels = next(iter(val_loader))
images = images.to(device)
labels = labels.to(device)

with torch.no_grad(), torch.cuda.amp.autocast(enabled=(device.type=="cuda")):
    out = model(images)
    logits = out["logits"]
    probs = torch.softmax(logits, dim=1)
    preds = probs.argmax(dim=1)

print("True labels: ", labels[:10].tolist())
print("Pred labels:", preds[:10].tolist())
print("First sample probs:", probs[0].cpu().numpy())

True labels:  [7, 2, 0, 6, 2, 2, 0, 0, 3, 3]
Pred labels: [7, 2, 0, 6, 0, 2, 0, 1, 3, 3]
First sample probs: [0.02882624 0.00683681 0.09027745 0.05560474 0.0008709  0.14447445
 0.00896928 0.66414016]


  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(device.type=="cuda")):


In [None]:
# Download the trained base model (AffectNet-trained)
from google.colab import files
files.download("best_emotion_model_b3_novel.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>