# 📚 Required Libraries
### The following Python libraries are required for this notebook to run successfully:

In [1]:
import os                           # For file and directory operations
import cv2                          # OpenCV for image and video processing
import numpy as np                  # For numerical operations on arrays
import shutil                       # For high-level file operations (e.g., moving, copying files)
from glob import glob               # For pattern matching and file path retrieval
from tqdm import tqdm               # For displaying progress bars in loops
from insightface.app import FaceAnalysis  # For face detection and analysis

## 🔍 Face Embedding & Similarity – Step-by-Step
### Step 1: Load InsightFace Model
- Loads the InsightFace model with the `buffalo_l` configuration.
- `ctx_id=0` enables GPU acceleration if available; set `ctx_id=-1` to force CPU usage.

In [2]:
app = FaceAnalysis(name='buffalo_l')
app.prepare(ctx_id=0)  # Set to 0 for GPU; use -1 for CPU fallback

Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CUDAExecutionProvider': {'device_id': '0', 'has_user_compute_stream': '0', 'cudnn_conv1d_pad_to_nc1d': '0', 'user_compute_stream': '0', 'gpu_external_alloc': '0', 'gpu_mem_limit': '18446744073709551615', 'enable_cuda_graph': '0', 'gpu_external_free': '0', 'gpu_external_empty_cache': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'do_copy_in_default_stream': '1', 'cudnn_conv_use_max_workspace': '1', 'tunable_op_enable': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'prefer_nhwc': '0', 'use_ep_level_unified_stream': '0', 'use_tf32': '1', 'sdpa_kernel': '0', 'fuse_conv_bias': '0'}, 'CPUExecutionProvider': {}}
find model: C:\Users\ACER/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']

### Step 2: Extract Face Embedding from Image
- Detects faces in the input image.
- Returns the normalized face embedding for the first detected face.
- Returns None if no face is found.

In [3]:
def get_face_embedding(image):
    faces = app.get(image)
    if not faces:
        return None
    return faces[0].normed_embedding  # Assumes single person in reference

### Step 3: Compute Cosine Similarity
- Computes the cosine similarity between two face embeddings.
- Values range from -1 (completely different) to 1 (identical), with higher values indicating more similarity.

In [4]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

### 🖼️ Step 4: Load and Process Reference Image
- Loads a reference image of the target person using OpenCV.
- The image path should point to a clear, front-facing photo of the person you want to detect in other images.
- If the image fails to load, it raises an error.
- Extracts the face embedding using get_face_embedding().
- If no face is detected in the reference image, another error is raised.
- ✅ Ensure the reference image has a clear, unobstructed view of the person's face for best results.

In [None]:
reference_image_path = r"F:\\projects\\Face Detector\\reference.jpg"  # Replace with your clear face image
ref_image = cv2.imread(reference_image_path)
if ref_image is None:
    raise ValueError("❌ Couldn't load reference image.")

ref_embedding = get_face_embedding(ref_image)
if ref_embedding is None:
    raise ValueError("❌ No face found in the reference image.")

### 📁 Step 5: Load Image Paths from Multiple Folders (Recursively)
- Defines a list of input folders containing images.
- Uses glob with recursive=True to search all subdirectories inside each folder.
- Filters the results to include only valid image files (.jpg, .jpeg, .png).
- The result is a complete list of image paths to be processed later.
- 📌 Replace "Folder_1_path", etc., with the actual folder paths on your system.

In [None]:
input_folders = ["Folder_1_path", "Folder_2_path", "Folder_3_path"]  # Replace with your paths
image_paths = []
for folder in input_folders:
    image_paths.extend(glob(os.path.join(folder, "**", "*.*"), recursive=True))

# Filter only image formats
image_paths = [p for p in image_paths if p.lower().endswith(('.jpg', '.jpeg', '.png'))]

### 📂 Step 6: Create Output Folder for Detected Photos
- Defines the name of the output folder where matched images (i.e., images containing your face) will be saved.
- `os.makedirs(..., exist_ok=True)` ensures the folder is created if it doesn’t already exist.
- Prevents errors in case the folder already exists by setting exist_ok=True.
- 📁 This folder will hold all the images where your face is detected with high similarity.

In [None]:
output_folder = "my_face_photos"
os.makedirs(output_folder, exist_ok=True)

### 🧠 Step 7: Match Faces and Save Similar Images
#### 📝 Description:
- SIMILARITY_THRESHOLD: Controls how strict the face match is. A higher threshold (e.g. 0.7–0.8) means stricter matching; a lower threshold allows looser matches.

- 1. Loops over all images with a progress bar (tqdm) and attempts to:
- 2. Load the image using OpenCV.
- 3. Detect faces using InsightFace.
- 4. Calculate cosine similarity between each detected face and the reference face.
- 5. If similarity ≥ threshold:
    - Copies the image to the output_folder.
    - Stops checking other faces in that image.

* ⚠️ You may need to tune the threshold depending on lighting, angles, or image quality. Start with 0.5–0.6 and adjust as needed.

In [None]:
SIMILARITY_THRESHOLD = 0.5  # Tune based on results

print(f"🔍 Scanning {len(image_paths)} images...")

for image_path in tqdm(image_paths, desc="Processing images"):
    img = cv2.imread(image_path)
    if img is None:
        continue

    faces = app.get(img)
    for face in faces:
        similarity = cosine_similarity(ref_embedding, face.normed_embedding)
        if similarity >= SIMILARITY_THRESHOLD:
            # Copy to output folder
            shutil.copy(image_path, os.path.join(output_folder, os.path.basename(image_path)))
            # print(f"✅ Copied: {image_path} (similarity: {similarity:.2f})")
            break  # No need to check other faces in this image

print("🎉 Done! All matching images are saved in:", output_folder)

🔍 Scanning 5770 images...


Processing images: 100%|██████████| 5770/5770 [53:43<00:00,  1.79it/s]  

🎉 Done! All matching images are saved in: my_face_photos



