In [None]:
import os
import cv2
import json
import torch
import numpy as np
import pandas as pd
from PIL import Image, ImageOps
from torchvision import transforms
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
import mediapipe as mp

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

TARGET_SIZE = (384, 384)
MARGIN_RATIO = 0.15
MAX_DIM = 1600
MIN_DIM = 256
USE_FACE_ALIGNMENT = True
TEST_DIR = "./Test"


In [None]:
def scan_test_dataset(root_dir):
    image_paths = []
    true_labels = []

    classes = sorted(os.listdir(root_dir))
    classes = [c for c in classes if os.path.isdir(os.path.join(root_dir, c))]

    print("Found classes:", classes)

    for cls in classes:
        cls_dir = os.path.join(root_dir, cls)
        for fname in os.listdir(cls_dir):
            if fname.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
                image_paths.append(os.path.join(cls_dir, fname))
                true_labels.append(cls)

    return image_paths, true_labels

In [None]:
mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh

face_detection = mp_face_detection.FaceDetection(
    model_selection=1,
    min_detection_confidence=0.5
)

face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def CenterEye(landmarks, image_shape):
    h, w = image_shape[:2]
    EYE_LEFT_IDX = [33, 133, 160, 159, 158, 157, 173, 144, 145, 153]
    EYE_RIGHT_IDX = [362, 263, 387, 386, 385, 384, 398, 373, 374, 380]

    left = np.mean([[landmarks.landmark[i].x * w, landmarks.landmark[i].y * h] for i in EYE_LEFT_IDX], axis=0)
    right = np.mean([[landmarks.landmark[i].x * w, landmarks.landmark[i].y * h] for i in EYE_RIGHT_IDX], axis=0)
    return left, right

def FaceAlign(image, left_eye, right_eye):
    dY = right_eye[1] - left_eye[1]
    dX = right_eye[0] - left_eye[0]
    angle = np.degrees(np.arctan2(dY, dX))

    eyes_center = ((left_eye[0] + right_eye[0]) // 2,
                   (left_eye[1] + right_eye[1]) // 2)

    h, w = image.shape[:2]
    M = cv2.getRotationMatrix2D(eyes_center, angle, 1.0)
    return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC), angle

def DetectAlignCrop(image_path):
    print(f"→ {os.path.basename(image_path)}", end=" | ")

    try:
        pil_img = Image.open(image_path)
        pil_img = ImageOps.exif_transpose(pil_img)
        pil_img = pil_img.convert("RGB")
        img = np.array(pil_img)
    except:
        print("Load ERROR")
        return None

    orig_img = img.copy()

    results_mesh = face_mesh.process(img)
    if results_mesh.multi_face_landmarks:
        left, right = CenterEye(results_mesh.multi_face_landmarks[0], img)
        img, _ = FaceAlign(img, left, right)
        print("Align", end=" | ")

    # Face detection
    results = face_detection.process(img)
    if not results.detections:
        print("No-face")
        return orig_img

    det = results.detections[0]
    bbox = det.location_data.relative_bounding_box

    h, w = img.shape[:2]
    x = int(bbox.xmin * w)
    y = int(bbox.ymin * h)
    bw = int(bbox.width * w)
    bh = int(bbox.height * h)

    mx = int(bw * MARGIN_RATIO)
    my = int(bh * MARGIN_RATIO)

    x1 = max(0, x - mx)
    y1 = max(0, y - my)
    x2 = min(w, x + bw + mx)
    y2 = min(h, y + bh + my)

    cropped = img[y1:y2, x1:x2]
    print("OK")
    return cropped

In [None]:
with open("DeepLearningTubes/Results/class_labels.json") as f:
    label_map = json.load(f)

idx_to_label = {v: k for k, v in label_map.items()}
num_classes = len(label_map)

In [None]:
val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    ),
])

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class ArcMarginProduct(nn.Module):
    """
    Implementasi ArcFace head:
    cos(theta + m) * s
    """
    def __init__(self, in_features, out_features, s=25.0, m=0.10, easy_margin=False):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m

        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = torch.cos(torch.tensor(m))
        self.sin_m = torch.sin(torch.tensor(m))
        self.th = torch.cos(torch.tensor(3.14159265 - m))
        self.mm = torch.sin(torch.tensor(3.14159265 - m)) * m

    def forward(self, embeddings, labels):
        embeddings = F.normalize(embeddings)
        W = F.normalize(self.weight)

        cosine = F.linear(embeddings, W)
        sine = torch.sqrt(1.0 - cosine**2)

        phi = cosine * self.cos_m - sine * self.sin_m

        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1,1), 1)

        logits = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        logits = logits * self.s
        return logits


In [None]:
class ResNet100ArcFace(nn.Module):
    """
    Backbone: ResNet101 → embedding 512
    Head: ArcFace (ArcMarginProduct)
    """
    def __init__(self, num_classes: int,
                 embedding_dim: int = 512,
                 s: float = 25.0,
                 m: float = 0.10):
        super().__init__()

        # Gunakan ResNet101 pretrained
        weights = models.ResNet101_Weights.IMAGENET1K_V1
        backbone = models.resnet101(weights=weights)
        embedding_in_features = backbone.fc.in_features

        # Replace fully connected layer
        backbone.fc = nn.Identity()
        self.backbone = backbone

        # Embedding → 512 dim
        self.embedding_head = nn.Sequential(
            nn.Linear(embedding_in_features, embedding_dim),
            nn.BatchNorm1d(embedding_dim)
        )

        nn.init.constant_(self.embedding_head[1].weight, 1.0)
        nn.init.constant_(self.embedding_head[1].bias, 0.0)
        self.embedding_head[1].bias.requires_grad = False

        # ArcFace head
        self.arc_margin = ArcMarginProduct(
            in_features=embedding_dim,
            out_features=num_classes,
            s=s,
            m=m
        )

    def forward(self, x, labels=None):
        x = self.backbone(x)
        emb = self.embedding_head(x)
        emb = F.normalize(emb, dim=1)

        if labels is None:
            # inference → tanp margin (cosine similarity * s)
            logits = F.linear(
                emb,
                F.normalize(self.arc_margin.weight)
            ) * self.arc_margin.s
        else:
            logits = self.arc_margin(emb, labels)

        return logits, emb


In [None]:
model = ResNet100ArcFace(num_classes, 512, s=25.0, m=0.0).to(device)
model.load_state_dict(torch.load("DeepLearningTubes/Models/Resnet100ArcFace.pth", map_location=device))
model.eval()

print("Model loaded!")

In [None]:
test_paths, test_true_labels = scan_test_dataset(TEST_DIR)

all_preds = []
all_trues = []
results = []

for path, true_name in zip(test_paths, test_true_labels):

    cropped = DetectAlignCrop(path)
    if cropped is None:
        img = Image.open(path).convert("RGB")
    else:
        img = Image.fromarray(cropped)

    img = val_tf(img).unsqueeze(0).to(device)

    with torch.no_grad():
        logits, _ = model(img, labels=None)
        pred_idx = logits.argmax(1).item()

    all_preds.append(pred_idx)
    all_trues.append(label_map[true_name])

    # SAVE CSV DATA
    results.append({
        "filename": os.path.basename(path),
        "label": idx_to_label[pred_idx]
    })


In [None]:
pd.DataFrame(results).to_csv("output.csv", index=False)
print("output.csv saved!")

# METRICS
acc = accuracy_score(all_trues, all_preds)
prec = precision_score(all_trues, all_preds, average="macro")
rec = recall_score(all_trues, all_preds, average="macro")
f1 = f1_score(all_trues, all_preds, average="macro")

print("\nRESULTS:")
print("Accuracy :", acc)
print("Precision:", prec)
print("Recall   :", rec)
print("F1 Score :", f1)

print("\nConfusion Matrix:\n", confusion_matrix(all_trues, all_preds))