In [3]:
%pip install insightface faiss-cpu opencv-python tqdm onnxruntime


Collecting onnxruntime
  Using cached onnxruntime-1.22.0-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Collecting coloredlogs (from onnxruntime)
  Using cached coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting flatbuffers (from onnxruntime)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Using cached humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Collecting pyreadline3 (from humanfriendly>=9.1->coloredlogs->onnxruntime)
  Using cached pyreadline3-3.5.4-py3-none-any.whl.metadata (4.7 kB)
Using cached onnxruntime-1.22.0-cp312-cp312-win_amd64.whl (12.7 MB)
Using cached coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
Using cached humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
Using cached flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
Using cached pyreadline3-3.5.4-py3-none-any.whl (83 kB)
Installing collected packages: flatbuffers, pyreadline3, humanfriendly, coloredl

In [5]:
%pip install --upgrade insightface

Note: you may need to restart the kernel to use updated packages.


In [11]:
import os
import shutil
import numpy as np
import cv2
import faiss
from PIL import Image
from insightface.model_zoo import get_model
from insightface.app import FaceAnalysis

# ---------- 1. Load ArcFace Model ----------
# Load ArcFace model from InsightFace
# model = get_model("antelopev2").prepare(ctx_id=-1)  # Use CPU
model = FaceAnalysis("antelopev2")
if model is None:
    raise ValueError("Model 'antelopev2' not found. Check the model name and InsightFace version.")
print("Model loaded:", model)
model = model.prepare(ctx_id=-1)

# ---------- 2. Preprocessing and Face Detection ----------
# Load OpenCV face detector
detector = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")

# Preprocessing function
def preprocess_face(image_path):
    """Detect and preprocess the face (crop and align)."""
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = detector.detectMultiScale(gray, 1.3, 5)  # Detect faces

    if len(faces) == 0:
        print(f"[warn] No faces detected in {image_path}")
        return None

    # Crop the first detected face (you can extend this to handle multiple faces if needed)
    x, y, w, h = faces[0]
    face = img[y:y+h, x:x+w]  # Crop the face

    # Resize to 112x112 (required by ArcFace)
    face_resized = cv2.resize(face, (112, 112))
    return face_resized

# ---------- 3. Extract Embeddings ----------
def get_face_embedding(face_img):
    """Get the embedding of a face image."""
    embedding = model.get_embedding(face_img)  # Get 512-dimensional embedding
    return embedding / np.linalg.norm(embedding)  # Normalize the embedding

# ---------- 4. Build FAISS Index ----------
def build_faiss_index(embeddings):
    """Build a FAISS index for fast similarity search."""
    d = embeddings.shape[1]
    index = faiss.IndexFlatL2(d)  # L2 distance index (cosine similarity is L2 for unit vectors)
    index.add(embeddings.astype("float32"))
    return index

# ---------- 5. Collect Reference Faces and Gallery ----------
def collect_references(ref_dir):
    """Collect references (suspect images) and generate embeddings."""
    ref_embeddings = []
    ref_paths = []
    for img_path in os.listdir(ref_dir):
        full_path = os.path.join(ref_dir, img_path)
        if os.path.isfile(full_path) and full_path.lower().endswith(('.jpg', '.jpeg', '.png')):
            face = preprocess_face(full_path)
            if face is not None:
                embedding = get_face_embedding(face)
                ref_embeddings.append(embedding)
                ref_paths.append(full_path)
    return np.array(ref_embeddings), ref_paths

def collect_gallery(gallery_dir):
    """Collect gallery images and generate embeddings."""
    gallery_embeddings = []
    gallery_paths = []
    for img_path in os.listdir(gallery_dir):
        full_path = os.path.join(gallery_dir, img_path)
        if os.path.isfile(full_path) and full_path.lower().endswith(('.jpg', '.jpeg', '.png')):
            face = preprocess_face(full_path)
            if face is not None:
                embedding = get_face_embedding(face)
                gallery_embeddings.append(embedding)
                gallery_paths.append(full_path)
    return np.array(gallery_embeddings), gallery_paths

# ---------- 6. Perform Similarity Search and Copy Matches ----------
def search_and_copy(ref_embeddings, gallery_embeddings, gallery_paths, out_dir, threshold=0.6, top_k=5):
    """Search the gallery for matching faces and copy matches."""
    os.makedirs(out_dir, exist_ok=True)
    
    # Build FAISS index for the gallery embeddings
    index = build_faiss_index(gallery_embeddings)
    
    # Search for matches
    hits = set()
    for ref_embedding in ref_embeddings:
        distances, indices = index.search(np.array([ref_embedding], dtype='float32'), top_k)
        for i, dist in zip(indices[0], distances[0]):
            if dist < threshold:  # Match based on distance (lower distance = more similar)
                hits.add(i)
    
    # Copy matching images to the output directory
    for i in hits:
        src_path = gallery_paths[i]
        dst_path = os.path.join(out_dir, os.path.basename(src_path))
        shutil.copy2(src_path, dst_path)
    print(f"Copied {len(hits)} matching images to {out_dir}")

# ---------- 7. Main Function ----------
def main(ref_dir, gallery_dir, out_dir, threshold=0.6, top_k=5):
    ref_embeddings, ref_paths = collect_references(ref_dir)
    gallery_embeddings, gallery_paths = collect_gallery(gallery_dir)
    
    search_and_copy(ref_embeddings, gallery_embeddings, gallery_paths, out_dir, threshold, top_k)

if __name__ == "__main__":
    ref_dir = "../datasets/images/face/reference_images"  # Directory containing suspect images (3–4 faces)
    gallery_dir = "../datasets/images/face/gallery"  # Directory containing gallery images
    out_dir = "../datasets/images/face/matched_images"  # Directory to save matched images
    
    # Run the face similarity search and copy matches
    main(ref_dir, gallery_dir, out_dir, threshold=0.6, top_k=5)


AssertionError: 