In [1]:
# ============================================
# 1️⃣ Install dependencies
# ============================================
!pip install -q insightface onnxruntime-gpu scikit-learn tqdm opencv-python-headless

import os, glob, cv2, numpy as np
from tqdm import tqdm
from google.colab import drive
from insightface.app import FaceAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/439.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m430.1/439.5 kB[0m [31m18.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.5/300.5 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m127.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m9.2 MB/s[0m et

In [18]:
# ============================================
# 2️⃣ Mount Google Drive
# ============================================
drive.mount('/content/drive', force_remount=True)
drive_base = "/content/drive/MyDrive/FER2013_images"
os.makedirs(drive_base, exist_ok=True)

# Embeddings file paths
train_embeddings_file = os.path.join(drive_base, "arcface_train_embeddings.npz")
test_embeddings_file  = os.path.join(drive_base, "arcface_test_embeddings.npz")

Mounted at /content/drive


In [20]:
!ls "{drive_base}/train"

angry  disgust	fear  happy  neutral  sad  surprise


In [21]:
# ============================================
# 3️⃣ Collect image paths and labels from train/test
# ============================================
emotion_map = {
    'angry': 0,
    'disgust': 1,
    'fear': 2,
    'happy': 3,
    'sad': 4,
    'surprise': 5,
    'neutral': 6
}

train_paths = glob.glob(os.path.join(drive_base, "train", "*", "*.jpg"), recursive=True)
test_paths  = glob.glob(os.path.join(drive_base, "test", "*", "*.jpg"), recursive=True)

train_labels = [emotion_map[os.path.basename(os.path.dirname(p))] for p in train_paths]
test_labels  = [emotion_map[os.path.basename(os.path.dirname(p))] for p in test_paths]

print(f"✅ Train images: {len(train_paths)}, Test images: {len(test_paths)}")

✅ Train images: 28709, Test images: 7178


1) Automatically loads final embeddings → skips extraction if already done.
2) Resumes from partial chunks → no wasted computation if interrupted.
3) Saves in chunks → safe for overnight Colab runs.
4) Optional CLAHE toggle → faster extraction if disabled.

In [45]:
# ============================================
# 4️⃣ Embedding extraction (resumable)
# ============================================
def extract_embeddings(image_paths, labels, embeddings_file, chunk_size=5000, use_CLAHE=False):
    """
    Smart embedding extraction for Buffalo-S:
    - Loads final embeddings if they exist
    - Resumes from partial embeddings if available
    - Saves in chunks to prevent data loss
    """
    # 1️⃣ Load final embeddings if already present
    if os.path.exists(embeddings_file):
        print(f"📥 Final embeddings found. Loading from {embeddings_file}...")
        data = np.load(embeddings_file)
        return data['X'], data['y']

    embeddings, y_list = [], []
    start_idx = 0

    # 2️⃣ Check for partial embeddings
    dir_path = os.path.dirname(embeddings_file)
    if os.path.exists(dir_path):
        partial_files = sorted([f for f in os.listdir(dir_path) if f.startswith("partial_")])
        if partial_files:
            last_file = partial_files[-1]
            print(f"📥 Resuming from partial embeddings: {last_file}")
            data = np.load(os.path.join(dir_path, last_file))
            embeddings.extend(data['X'])
            y_list.extend(data['y'])
            start_idx = int(last_file.split("_")[-1].split(".")[0])  # last index

    # 3️⃣ Initialize Buffalo-S
    app = FaceAnalysis(name="buffalo_s", root="./", providers=['CUDAExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(224, 224))
    print("✅ Buffalo-S FaceAnalysis loaded.")

    def apply_CLAHE(img):
        lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        cl = clahe.apply(l)
        merged = cv2.merge((cl,a,b))
        return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR)

    # 4️⃣ Process images
    for i in tqdm(range(start_idx, len(image_paths)), desc="Extracting embeddings"):
        img = cv2.imread(image_paths[i])
        if img is None:
            continue
        if use_CLAHE:
            img = apply_CLAHE(img)
        faces = app.get(img)
        if len(faces) == 0:
            continue
        emb = faces[0]['embedding']
        emb /= np.linalg.norm(emb)
        embeddings.append(emb)
        y_list.append(labels[i])

        # Save partial chunk
        if (i+1) % chunk_size == 0:
            partial_file = os.path.join(dir_path, f"partial_{i+1}.npz")
            np.savez(partial_file, X=np.array(embeddings), y=np.array(y_list))
            print(f"💾 Saved partial embeddings up to image {i+1} → {partial_file}")

    # 5️⃣ Save final embeddings
    X = np.array(embeddings)
    y = np.array(y_list)
    np.savez(embeddings_file, X=X, y=y)
    print(f"✅ Saved final embeddings to {embeddings_file} ({len(X)} samples)")

    # Optional: remove old partials
    if 'partial_files' in locals():
        for f in partial_files:
            os.remove(os.path.join(dir_path, f))

    return X, y

In [46]:
# ============================================
# 5️⃣ Extract or load embeddings
# ============================================
X_train, y_train = extract_embeddings(train_paths, train_labels, train_embeddings_file)
X_test,  y_test  = extract_embeddings(test_paths, test_labels,  test_embeddings_file)

Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
find model: ./models/buffalo_s/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExec

Extracting embeddings:  17%|█▋        | 5000/28709 [29:19<3:16:08,  2.01it/s]

💾 Saved partial embeddings up to image 5000 → /content/drive/MyDrive/FER2013_images/partial_5000.npz


Extracting embeddings:  35%|███▍      | 10000/28709 [1:08:28<2:04:48,  2.50it/s]

💾 Saved partial embeddings up to image 10000 → /content/drive/MyDrive/FER2013_images/partial_10000.npz


Extracting embeddings:  52%|█████▏    | 15000/28709 [1:46:48<1:35:26,  2.39it/s]

💾 Saved partial embeddings up to image 15000 → /content/drive/MyDrive/FER2013_images/partial_15000.npz


Extracting embeddings:  70%|██████▉   | 20000/28709 [2:24:33<1:04:22,  2.25it/s]

💾 Saved partial embeddings up to image 20000 → /content/drive/MyDrive/FER2013_images/partial_20000.npz


Extracting embeddings:  87%|████████▋ | 25000/28709 [2:59:24<22:58,  2.69it/s]

💾 Saved partial embeddings up to image 25000 → /content/drive/MyDrive/FER2013_images/partial_25000.npz


Extracting embeddings: 100%|██████████| 28709/28709 [3:21:54<00:00,  2.37it/s]


✅ Saved final embeddings to /content/drive/MyDrive/FER2013_images/arcface_train_embeddings.npz (28477 samples)
📥 Resuming from partial embeddings: partial_5000.npz
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'sdpa_kernel': '0', 'use_tf32': '1', 'fuse_conv_bias': '0', 'prefer_nhwc': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_enable': '0', 'use_ep_level_unified_stream': '0', 'device_id': '0', 'has_user_compute_stream': '0', 'gpu_external_empty_cache': '0', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'cudnn_conv1d_pad_to_nc1d': '0', 'gpu_mem_limit': '18446744073709551615', 'gpu_external_alloc': '0', 'gpu_external_free': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'do_copy_in_default_stream': '1', 'enable_cuda_graph': '0', 'user_compute_stream': '0', 'cudnn_conv_use_max_workspace': '1'}}
find model: ./mod

Extracting embeddings: 100%|██████████| 2178/2178 [13:18<00:00,  2.73it/s]

✅ Saved final embeddings to /content/drive/MyDrive/FER2013_images/arcface_test_embeddings.npz (7133 samples)





In [47]:
# ============================================
# 6️⃣ Train classifier and evaluate on test set
# ============================================
clf = LogisticRegression(max_iter=2000, class_weight='balanced', solver='saga')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"📊 Test set Accuracy: {acc*100:.2f}%")
print("✅ Classifier trained and evaluated.")

📊 Test set Accuracy: 68.13%
✅ Classifier trained and evaluated.


