In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
# Install required packages
!pip install -U insightface onnxruntime-gpu onnx opencv-python rarfile
!apt-get install -y unrar

# Imports
import os
import shutil
import glob
import cv2
import numpy as np
import onnx
import onnxruntime as ort
import rarfile
from pathlib import Path
from google.colab import drive


In [None]:
# Trimming of model

import onnx.helper, onnx.shape_inference

orig_model    = "" # Original ONNX file
trimmed_model = "" # Trimmed model
output_tensor = "avg_pool"

# 1) Load, remove existing outputs, add avg_pool as only output
model = onnx.load(orig_model)
while len(model.graph.output) > 0:
    model.graph.output.pop()
model.graph.output.extend([onnx.helper.ValueInfoProto(name=output_tensor)])
model = onnx.shape_inference.infer_shapes(model)
onnx.save(model, trimmed_model)
print(f"Trimmed model saved → {trimmed_model}")

# 2) Launch ONNX Runtime session
sess = ort.InferenceSession(
    trimmed_model,
    providers=["CUDAExecutionProvider","CPUExecutionProvider"]
)
inp = sess.get_inputs()[0]
input_name = inp.name
_, _, H, W = inp.shape
print(f"Session ready: input='{input_name}', size={H}×{W}")


In [None]:
# 1) Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# 2) Define and reset workspace folders
BASE    = "face_recognition_data"
GALLERY = os.path.join(BASE, "gallery")
PROBE   = os.path.join(BASE, "probe")
VERIF   = os.path.join(BASE, "verification")
TEMP    = "temp_extracted"

for p in [GALLERY, PROBE, VERIF, TEMP]:
    if os.path.exists(p):
        shutil.rmtree(p)
os.makedirs(GALLERY)
os.makedirs(PROBE)
os.makedirs(VERIF)
os.makedirs(TEMP)


In [None]:
rar_path = "" # Path to your RAR on Drive

# Clear & recreate TEMP
if os.path.exists(TEMP):
    shutil.rmtree(TEMP)
os.makedirs(TEMP, exist_ok=True)

# Extract everything
with rarfile.RarFile(rar_path) as rf:
    rf.extractall(TEMP)

print("Extracted all images to", TEMP)


In [None]:
def get_emb(img):
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype("float32")
    rgb = cv2.resize(rgb, (W, H)).transpose(2,0,1)[None]
    out = sess.run(None, {input_name: rgb})[0].squeeze()
    return out / np.linalg.norm(out)

# Walk TEMP, extract embeddings
records = []
exts = {".jpg", ".jpeg", ".png", ".bmp", ".tiff"}

for root, _, files in os.walk(TEMP):
    identity = os.path.basename(root)
    for fname in files:
        if Path(fname).suffix.lower() in exts:
            fp = os.path.join(root, fname)
            img = cv2.imread(fp)
            if img is None:
                continue
            emb = get_emb(img)
            records.append({"id": identity, "path": fp, "emb": emb})

print(f"Computed embeddings for {len(records)} images from {len(set(r['id'] for r in records))} identities.")


In [None]:
gallery_embs = {}
probe_embs   = {}
verif_embs   = {}
by_id        = {}

# Group by identity
for r in records:
    by_id.setdefault(r["id"], []).append(r)

for id_, recs in by_id.items():
    # 1) Sort once
    recs_sorted = sorted(recs, key=lambda x: x["path"])

    # 2) Compute split point
    half = len(recs_sorted) // 2

    # 3) First half → gallery, also add to verification
    for idx, r in enumerate(recs_sorted[:half]):
        name = f"{id_}_{idx}"
        emb  = r["emb"]
        gallery_embs[name] = emb
        verif_embs[name]  = emb

    # 4) Second half → probe, also add to verification
    for idx, r in enumerate(recs_sorted[half:], start=half):
        name = f"{id_}_{idx}"
        emb  = r["emb"]
        probe_embs[name]  = emb
        verif_embs[name]  = emb


print(f"Gallery: {len(gallery_embs)}, Probe: {len(probe_embs)}, Verification: {len(verif_embs)}")


In [None]:
# Identification

# Create lists of labels and embedding matrices
gallery_labels = list(gallery_embs)
gallery_matrix = np.vstack([gallery_embs[label] for label in gallery_labels])

probe_labels = list(probe_embs)
probe_matrix = np.vstack([probe_embs[label] for label in probe_labels])

# Compute the similarity matrix between probe and gallery embeddings
similarity_matrix = probe_matrix.dot(gallery_matrix.T)

# Find the index of the most similar gallery embedding for each probe
best_match_indices = similarity_matrix.argmax(axis=1)

correct_matches = 0

# Compare the predicted gallery identity with the probe identity
for i, probe_label in enumerate(probe_labels):
    best_gallery_label = gallery_labels[best_match_indices[i]]
    similarity_score = similarity_matrix[i, best_match_indices[i]]

    # Check if the probe and gallery have the same identity (based on the label)
    if probe_label.split("_")[0] == best_gallery_label.split("_")[0]:
        correct_matches += 1
        print(f"Correct: {probe_label} → {best_gallery_label} ({similarity_score:.4f})")
    else:
        print(f"Incorrect: {probe_label} → {best_gallery_label} ({similarity_score:.4f})")

# Calculate identification accuracy
accuracy = (correct_matches / len(probe_labels)) * 100
print(f"\nRank-1 Rate: {accuracy:.2f}%")

In [None]:
# Creation of genuine and imposter pairs

# Build genuine and impostor pairs
pairs = []
# ids = sorted(by_id)
# Genuine pairs
for identity, images in by_id.items():
    for i in range(len(images)):
        for j in range(i + 1, len(images)):
            pairs.append((images[i]["emb"], images[j]["emb"], 0))

# Impostor pairs
identities = sorted(by_id)
for i in range(len(identities)):
    for j in range(i + 1, len(identities)):
        for img1 in by_id[identities[i]]:
            for img2 in by_id[identities[j]]:
                pairs.append((img1["emb"], img2["emb"], 1))


# Count and print how many of each
num_genuine  = sum(1 for *_, lbl in pairs if lbl == 0)
num_imposter = sum(1 for *_, lbl in pairs if lbl == 1)
print(f"Total genuine pairs:  {num_genuine}")
print(f"Total impostor pairs: {num_imposter}\n")

In [None]:
# Check for FAR and FRR at chosen threshold

threshold = 0.5 # Chosen threshold

# Compute scores & labels
scores = np.array([
    float(e1.dot(e2))
    for e1, e2, _ in pairs
])
labels = np.array([lbl for *_, lbl in pairs])

# 1. Predict labels at threshold
predictions = (scores < threshold).astype(int)  # 1 = rejected, 0 = accepted

# Count genuine and impostor errors directly using Boolean indexing
genuine_errors = ((predictions == 1) & (labels == 0)).sum()  # 1 = rejected, 0 = accepted (genuine)
imposter_errors = ((predictions == 0) & (labels == 1)).sum()  # 1 = rejected, 0 = accepted (impostor)

# Count total genuine and impostor samples
total_genuine = (labels == 0).sum()
total_imposter = (labels == 1).sum()

# Calculate rates
FRR = (genuine_errors / total_genuine * 100) if total_genuine > 0 else 0
FAR = (imposter_errors / total_imposter * 100) if total_imposter > 0 else 0

print(f"Threshold at: {threshold}")
print(f"FRR: {FRR:.2f}%")
print(f"FAR at {FAR:.2f}%")

In [None]:
# Calculation of EER

import numpy as np

# Compute scores & labels
scores = np.array([
    float(e1.dot(e2))
    for e1, e2, _ in pairs
])
labels = np.array([lbl for *_, lbl in pairs])

# Compute EER
thresholds = np.linspace(scores.min(), scores.max(), 1000)
best_diff = 1e9
for threshold in thresholds:
    predictions = (scores < threshold).astype(int)  # 1 = rejected, 0 = accepted

    # Count genuine and impostor errors directly using Boolean indexing
    genuine_errors = ((predictions == 1) & (labels == 0)).sum()  # 1 = rejected, 0 = accepted (genuine)
    imposter_errors = ((predictions == 0) & (labels == 1)).sum()  # 1 = rejected, 0 = accepted (impostor)

    # Count total genuine and impostor samples
    total_genuine = (labels == 0).sum()
    total_imposter = (labels == 1).sum()

    # Calculate rates
    FRR = (genuine_errors / total_genuine * 100) if total_genuine > 0 else 0
    FAR = (imposter_errors / total_imposter * 100) if total_imposter > 0 else 0

    # Calculate the absolute difference between FAR and FRR
    diff = abs(FRR - FAR)

    # Find the threshold that minimizes the difference
    if diff < best_diff:
        best_diff = diff
        eer = (FRR + FAR) / 2
        eer_threshold = threshold

print(f"EER ≈ {eer:.2f}% at threshold {eer_threshold:.4f}")


In [None]:
import matplotlib.pyplot as plt
# Compute FAR/FRR curves correctly and find EER

# 1) Candidate thresholds
ths = np.linspace(scores.min(), scores.max(), 1000)

# 2) Compute curves
FAR_curve = [(scores[labels==1] >= t).mean()*100 for t in ths]  # impostors accepted
FRR_curve = [(scores[labels==0] <  t).mean()*100 for t in ths]  # genuines rejected

# 3) Find EER
diffs = np.abs(np.array(FAR_curve) - np.array(FRR_curve))
idx   = diffs.argmin()
eer   = (FAR_curve[idx] + FRR_curve[idx]) / 2
eer_thr = ths[idx]

print(f"EER ≈ {eer:.2f}% at threshold {eer_thr:.4f}")

# 4) Plot
plt.figure(figsize=(8,6))
plt.plot(ths, FAR_curve, label='FAR')
plt.plot(ths, FRR_curve, label='FRR')
plt.scatter([eer_thr], [eer], s=80, c='blue', label=f'EER = {eer:.2f}%')
plt.xlabel('Cosine Threshold')
plt.ylabel('Error Rate (%)')
plt.title('FAR and FRR vs Threshold')
plt.legend()
plt.grid(True)
plt.show()
