In [None]:
import os
import cv2
import pickle
import random
import numpy as np
from deepface import DeepFace

def train_model(
    dataset_path="dataset",                 # local dataset folder
    encodings_path="encodings_arcface.pkl", # local pickle file
    model_name="ArcFace",
    test_split=0.2
):
    known_embeddings, known_names, known_ids = [], [], []
    test_records = []

    for folder_name in os.listdir(dataset_path):
        student_path = os.path.join(dataset_path, folder_name)
        if not os.path.isdir(student_path):
            continue

        try:
            name, student_id = folder_name.rsplit("_", 1)
            student_id = str(student_id).zfill(3)
        except ValueError:
            continue

        images = [
            os.path.join(student_path, f)
            for f in os.listdir(student_path)
            if f.lower().endswith((".jpg", ".jpeg", ".png"))
        ]

        if len(images) < 2:
            continue

        random.shuffle(images)
        split_idx = int((1 - test_split) * len(images))
        train_imgs, test_imgs = images[:split_idx], images[split_idx:]

        for img in test_imgs:
            test_records.append((img, name, student_id))

        for img_path in train_imgs:
            try:
                rep = DeepFace.represent(
                    img_path=img_path,
                    model_name=model_name,
                    detector_backend="opencv",
                    enforce_detection=False
                )
                if rep and "embedding" in rep[0]:
                    emb = np.array(rep[0]["embedding"], dtype=np.float32)
                    known_embeddings.append(emb)
                    known_names.append(name)
                    known_ids.append(student_id)
            except Exception:
                continue

    data = {
        "embeddings": np.array(known_embeddings, dtype=np.float32),
        "names": known_names,
        "ids": known_ids,
        "test_records": test_records
    }
    with open(encodings_path, "wb") as f:
        pickle.dump(data, f)

if __name__ == "__main__":
    train_model()


In [None]:
import pickle
import numpy as np
from deepface import DeepFace
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

def evaluate(
    encodings_path="encodings_arcface.pkl",  # local file
    model_name="ArcFace",
    threshold=0.35
):
    with open(encodings_path, "rb") as f:
        data = pickle.load(f)

    embeddings = np.array(data["embeddings"])
    names = data["names"]
    ids = data["ids"]
    test_records = data.get("test_records", [])

    total, correct = 0, 0
    per_student = {}
    sims_correct, sims_incorrect = [], []

    for img_path, true_name, true_id in test_records:
        try:
            rep = DeepFace.represent(
                img_path=img_path,
                model_name=model_name,
                detector_backend="opencv",
                enforce_detection=False
            )
            if not rep or "embedding" not in rep[0]:
                continue

            emb = np.array(rep[0]["embedding"]).reshape(1, -1)

            sims = cosine_similarity(emb, embeddings)[0]
            best_idx = np.argmax(sims)
            best_sim = sims[best_idx]

            if best_sim < threshold:
                pred_name, pred_id = "Unknown", None
            else:
                pred_name, pred_id = names[best_idx], ids[best_idx]

            total += 1
            is_correct = pred_id == true_id
            if is_correct:
                correct += 1
                sims_correct.append(best_sim)
            else:
                sims_incorrect.append(best_sim)

            if true_name not in per_student:
                per_student[true_name] = {"correct": 0, "total": 0}
            per_student[true_name]["total"] += 1
            if is_correct:
                per_student[true_name]["correct"] += 1

        except Exception:
            continue

    acc = (correct / total * 100) if total > 0 else 0
    print(f"Correct: {correct}/{total}")
    print(f"Accuracy: {acc:.2f}%")

    for student, stats in per_student.items():
        s_acc = (stats['correct'] / stats['total'] * 100) if stats['total'] > 0 else 0
        print(f"{student}: {stats['correct']}/{stats['total']} = {s_acc:.2f}%")

    if sims_correct or sims_incorrect:
        plt.hist(sims_correct, bins=30, alpha=0.6, label="Correct Matches", color="g")
        plt.hist(sims_incorrect, bins=30, alpha=0.6, label="Incorrect Matches", color="r")
        plt.axvline(threshold, color="b", linestyle="--", label=f"Threshold = {threshold}")
        plt.xlabel("Cosine Similarity")
        plt.ylabel("Frequency")
        plt.title("Similarity Distributions (Correct vs Incorrect)")
        plt.legend()
        plt.show()

if __name__ == "__main__":
    evaluate()
