In [1]:
import os
import glob
import cv2
from PIL import Image
from google.colab import files
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T
from sklearn.model_selection import train_test_split

import timm

In [2]:
print("Upload your personal emotion videos (.mp4 / .mov)")
uploaded = files.upload()

video_files = list(uploaded.keys())
print("Uploaded:", video_files)

Upload your personal emotion videos (.mp4 / .mov)


Saving First_video.mov to First_video.mov
Saving second_video.mov to second_video.mov
Saving video3.mov to video3.mov
Saving video4.mov to video4.mov
Uploaded: ['First_video.mov', 'second_video.mov', 'video3.mov', 'video4.mov']


In [7]:
EMOTIONS = [
    "neutral", "happy", "sad", "angry",
    "surprise", "disgust", "fear", "tired"
]

SEGMENTS = [
    ("neutral", 0, 40),
    ("happy", 40, 80),
    ("sad", 80, 120),
    ("angry", 120, 160),
    ("surprise", 160, 200),
    ("disgust", 200, 240),
    ("fear", 240, 280),
    ("tired", 280, 320),
]

ROOT = "personal_dataset"
os.makedirs(ROOT, exist_ok=True)

for emo in EMOTIONS:
    os.makedirs(os.path.join(ROOT, emo), exist_ok=True)

print("Folders created:", os.listdir(ROOT))

Folders created: ['neutral', 'happy', 'disgust', 'sad', 'fear', 'surprise', 'angry', 'tired']


In [8]:
def emotion_from_time(t_sec):
    for emo, start, end in SEGMENTS:
        if start <= t_sec < end:
            return emo
    return None


def extract_from_video(video_path, tag="v1", fps_sample=5):
    print(f"\nProcessing {video_path}")
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error opening", video_path)
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_step = max(1, int(fps // fps_sample))

    frame_idx = 0
    saved = {e: 0 for e in EMOTIONS}

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_step == 0:
            t_sec = frame_idx / fps
            emo = emotion_from_time(t_sec)

            if emo is not None:
                save_path = os.path.join(ROOT, emo, f"{emo}_{tag}_{saved[emo]:05d}.jpg")
                cv2.imwrite(save_path, frame)
                saved[emo] += 1

        frame_idx += 1

    cap.release()
    print("Saved:", saved)


for i, video in enumerate(video_files):
    extract_from_video(video, tag=f"v{i+1}")


Processing First_video.mov
Saved: {'neutral': 240, 'happy': 240, 'sad': 240, 'angry': 240, 'surprise': 239, 'disgust': 240, 'fear': 240, 'tired': 240}

Processing second_video.mov
Saved: {'neutral': 201, 'happy': 200, 'sad': 200, 'angry': 200, 'surprise': 200, 'disgust': 200, 'fear': 200, 'tired': 200}

Processing video3.mov
Saved: {'neutral': 201, 'happy': 200, 'sad': 200, 'angry': 200, 'surprise': 200, 'disgust': 200, 'fear': 200, 'tired': 200}

Processing video4.mov
Saved: {'neutral': 201, 'happy': 200, 'sad': 200, 'angry': 200, 'surprise': 201, 'disgust': 200, 'fear': 200, 'tired': 200}


In [10]:
root = ROOT
EMOTIONS = sorted(os.listdir(root))
label_to_idx = {emo: i for i, emo in enumerate(EMOTIONS)}

print("Label map:", label_to_idx)

Label map: {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'neutral': 4, 'sad': 5, 'surprise': 6, 'tired': 7}


In [11]:
all_images = []
all_labels = []

for emo in EMOTIONS:
    paths = glob.glob(os.path.join(root, emo, "*.jpg"))
    all_images.extend(paths)
    all_labels.extend([emo] * len(paths))

print("Total images:", len(all_images))

Total images: 6723


In [12]:
train_imgs, val_imgs, train_lbls, val_lbls = train_test_split(
    all_images,
    all_labels,
    test_size=0.1,
    stratify=all_labels,
    random_state=42
)

print("Train:", len(train_imgs))
print("Val:", len(val_imgs))

Train: 6050
Val: 673


In [13]:
class PersonalDataset(Dataset):
    def __init__(self, paths, labels, transform=None):
        self.paths = paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img_path = self.paths[idx]
        emo_name = self.labels[idx]           # always string
        label = label_to_idx[emo_name]        # convert to class index

        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label

In [14]:
train_tfms = T.Compose([
    T.Resize((300, 300)),
    T.RandomHorizontalFlip(),
    T.ColorJitter(0.2, 0.2, 0.2),
    T.ToTensor(),
])

val_tfms = T.Compose([
    T.Resize((300, 300)),
    T.ToTensor(),
])

train_ds = PersonalDataset(train_imgs, train_lbls, train_tfms)
val_ds   = PersonalDataset(val_imgs,  val_lbls,  val_tfms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False)

In [15]:
class MicroExpEnhancer(nn.Module):
    def __init__(self, in_ch=3, factor=0.2):
        super().__init__()
        self.factor = factor
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, in_ch, kernel_size=3, padding=1)
        )
    def forward(self, x):
        return x + self.factor * self.conv(x)


class DynamicEmotionRouting(nn.Module):
    def __init__(self, feat_dim=1536, hidden_dim=512):
        super().__init__()
        self.expert_upper = nn.Sequential(nn.Linear(feat_dim, hidden_dim), nn.ReLU())
        self.expert_lower = nn.Sequential(nn.Linear(feat_dim, hidden_dim), nn.ReLU())
        self.gate = nn.Linear(feat_dim, 2)

    def forward(self, feat):
        gate = torch.softmax(self.gate(feat), dim=-1)
        up = self.expert_upper(feat)
        low = self.expert_lower(feat)
        out = gate[:,0:1] * up + gate[:,1:2] * low
        return out, gate


class NovelEmotionModel(nn.Module):
    def __init__(self, num_emotions=8):
        super().__init__()
        self.micro = MicroExpEnhancer()
        self.backbone = timm.create_model("efficientnet_b3", pretrained=True, num_classes=0)

        self.routing = DynamicEmotionRouting(1536, 512)
        self.head_emotion = nn.Linear(512, num_emotions)
        self.head_valence = nn.Linear(512, 1)
        self.head_arousal = nn.Linear(512, 1)

    def forward(self, x):
        x = self.micro(x)
        feat = self.backbone(x)
        routed, gate = self.routing(feat)

        return {
            "logits": self.head_emotion(routed),
            "valence": self.head_valence(routed),
            "arousal": self.head_arousal(routed),
            "gate": gate
        }

In [16]:
print("Upload your pretrained model file (best_emotion_model_b3_novel.pth)")
uploaded_model = files.upload()

Upload your pretrained model file (best_emotion_model_b3_novel.pth)


Saving best_emotion_model_b3_novel.pth to best_emotion_model_b3_novel.pth


In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = NovelEmotionModel(num_emotions=8).to(device)
state = torch.load("best_emotion_model_b3_novel.pth", map_location=device)
model.load_state_dict(state)

print("Loaded pretrained base model!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

Loaded pretrained base model!


In [18]:
for p in model.backbone.parameters():
    p.requires_grad = False

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [20]:
def train_one_epoch(model, loader):
    model.train()
    total, correct, total_loss = 0, 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            out = model(x)
            logits = out["logits"]
            loss = criterion(logits, y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        preds = logits.argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)
        total_loss += loss.item() * y.size(0)

    return total_loss/total, correct/total * 100

In [21]:
def validate(model, loader):
    model.eval()
    total, correct, total_loss = 0, 0, 0

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            logits = out["logits"]
            loss = criterion(logits, y)

            preds = logits.argmax(1)
            correct += (preds == y).sum().item()
            total += y.size(0)
            total_loss += loss.item() * y.size(0)

    return total_loss/total, correct/total * 100

In [22]:
BEST = 999

for epoch in range(12):
    tr_loss, tr_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc = validate(model, val_loader)

    print(f"\nEpoch {epoch+1}/12")
    print(f"Train Loss: {tr_loss:.4f} | Acc: {tr_acc:.2f}%")
    print(f"Val   Loss: {val_loss:.4f} | Acc: {val_acc:.2f}%")

    if val_loss < BEST:
        BEST = val_loss
        torch.save(model.state_dict(), "emotion_model_novel_finetuned.pth")
        print(">>> Saved best model!")

  with torch.cuda.amp.autocast():



Epoch 1/12
Train Loss: 1.7725 | Acc: 39.09%
Val   Loss: 1.1884 | Acc: 67.31%
>>> Saved best model!

Epoch 2/12
Train Loss: 1.0344 | Acc: 73.40%
Val   Loss: 0.7439 | Acc: 82.62%
>>> Saved best model!

Epoch 3/12
Train Loss: 0.7401 | Acc: 82.20%
Val   Loss: 0.5474 | Acc: 84.84%
>>> Saved best model!

Epoch 4/12
Train Loss: 0.5754 | Acc: 86.20%
Val   Loss: 0.4293 | Acc: 88.86%
>>> Saved best model!

Epoch 5/12
Train Loss: 0.4703 | Acc: 88.58%
Val   Loss: 0.3644 | Acc: 90.49%
>>> Saved best model!

Epoch 6/12
Train Loss: 0.4066 | Acc: 89.82%
Val   Loss: 0.3240 | Acc: 91.08%
>>> Saved best model!

Epoch 7/12
Train Loss: 0.3521 | Acc: 91.04%
Val   Loss: 0.2792 | Acc: 93.02%
>>> Saved best model!

Epoch 8/12
Train Loss: 0.3135 | Acc: 92.41%
Val   Loss: 0.2420 | Acc: 93.31%
>>> Saved best model!

Epoch 9/12
Train Loss: 0.2833 | Acc: 93.36%
Val   Loss: 0.2249 | Acc: 94.35%
>>> Saved best model!

Epoch 10/12
Train Loss: 0.2595 | Acc: 93.55%
Val   Loss: 0.2093 | Acc: 94.21%
>>> Saved best model!

In [23]:
torch.save(model.state_dict(), "emotion_model_novel_finetuned.pth")

In [24]:
from google.colab import files
files.download("emotion_model_novel_finetuned.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>