# Dataset Loading

In [1]:
# Import necessary libraries
import cv2 as cv
import numpy as np
import os
import pickle

from insightface.app import FaceAnalysis
from huggingface_hub import hf_hub_download

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
temp_folder = "temp_captures"
target_size = (224,244)
threshold = 0.5
window_size = 5

In [3]:
# Initialize temporary image folder
if not os.path.exists(temp_folder):
    os.makedirs(temp_folder)

In [4]:
# Initialize face detector
detector = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider'])
detector.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.5)



Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\User/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\User/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\User/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\User/.insightface\models\buffalo_l\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\User/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127

In [5]:
class RecognitionSmoother:
    def __init__(self, window_size=window_size):
        self.window_size = window_size
        self.history = []
    
    def add_recognition(self, person_id, score):
        self.history.append((person_id, score))
        if len(self.history) > self.window_size:
            self.history.pop(0)
    
    def get_smoothed_result(self):
        if not self.history:
            return None, 0

        weights = np.linspace(0.5, 1.5, len(self.history))
        scores = {}
        
        for (pid, score), weight in zip(self.history, weights):
            if pid not in scores:
                scores[pid] = []
            scores[pid].append(score * weight)
        
        avg_scores = {pid: np.mean(vals) for pid, vals in scores.items()}
        best_pid = max(avg_scores.items(), key=lambda x: x[1])[0]
        best_score = avg_scores[best_pid]
        
        return best_pid, best_score

# Initialize smoother
smoother = RecognitionSmoother(window_size=5)

In [6]:
# Load the dataset from HuggingFace
file_path = hf_hub_download(
    repo_id="jesmine0820/assignment_face_recognition",   
    filename="face_embeddings.pkl",  
    repo_type="dataset"
)

# Load the pickle file
with open(file_path, "rb") as f:
    embeddings_data = pickle.load(f)

# Image Processing

In [7]:
# Detect the brightness
def detect_brightness(image):
    gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    return np.mean(gray)

# Adjust gamma
def adjust_gamma(image, gamma):
    inv_gamma = 1.0 / gamma
    table = np.array([(i / 255.0) ** inv_gamma * 255 for i in np.arange(256)]).astype("uint8")
    return cv.LUT(image, table)

def preprocess_image(img, target_size=target_size):
    lab = cv.cvtColor(img, cv.COLOR_BGR2LAB)
    l,a,b = cv.split(lab)

    clahe = cv.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    l = clahe.apply(l)

    lab = cv.merge((l, a, b))
    img = cv.cvtColor(lab, cv.COLOR_LAB2BGR)

    # Gamma correction based on brightness
    brightness = detect_brightness(img)
    if brightness > 180:
        img = adjust_gamma(img, gamma=1.5)
    elif brightness < 70:
        img = adjust_gamma(img, gamma=0.5)
        
    # Normalize and resize
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    img_resized = cv.resize(img_rgb, target_size, interpolation=cv.INTER_AREA)
    
    # Smart blurring
    if cv.Laplacian(img_resized, cv.CV_64F).var() < 100:
        img_resized = cv.GaussianBlur(img_resized, (3, 3), 0)
    
    return img_resized


# Face Detection

In [8]:
def crop_best_face(image):
    faces = detector.get(image)
    if not faces:
        return None, None

    img_center = np.array([image.shape[1] / 2, image.shape[0] / 2])
    
    # Score faces based on multiple factors
    scored_faces = []
    for face in faces:
        bbox = face.bbox.astype(int)
        
        # Center proximity 
        face_center = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2])
        center_score = 1 - (np.linalg.norm(face_center - img_center) / 
                         np.linalg.norm(img_center))
        
        # Face size 
        face_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
        size_score = face_area / (image.shape[0] * image.shape[1])
        
        # Detection confidence 
        det_score = face.det_score
        
        # Sharpness
        face_roi = image[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        sharpness = cv.Laplacian(cv.cvtColor(face_roi, cv.COLOR_BGR2GRAY), cv.CV_64F).var()
        sharpness_score = sharpness / 1000
        
        total_score = (0.4 * center_score + 0.3 * size_score + 
                      0.2 * det_score + 0.1 * sharpness_score)
        
        scored_faces.append((total_score, face))
    
    if not scored_faces:
        return None, None
        
    scored_faces.sort(reverse=True, key=lambda x: x[0])
    best_face = scored_faces[0][1]
    bbox = best_face.bbox.astype(int)
    cropped_face = image[bbox[1]:bbox[3], bbox[0]:bbox[2]]
    
    return cropped_face, best_face


# Feature Extraction

In [9]:
def get_face_embedding_from_obj(face_obj):
    return face_obj.embedding

# Face Recognition

In [10]:
def recognize_face(embedding, dataset):
    best_score = -1
    best_id = None
    best_name = None
    
    for entry in dataset:
        db_embedding = entry["embedding"]
        
        # Cosine similarity
        cos_sim = np.dot(embedding, db_embedding) / (
            np.linalg.norm(embedding) * np.linalg.norm(db_embedding)
        )
        
        # Euclidean distance 
        eucl_dist = np.linalg.norm(embedding - db_embedding)
        eucl_sim = 1 / (1 + eucl_dist) 
        
        # Combined score 
        similarity = 0.7 * cos_sim + 0.3 * eucl_sim
        
        if similarity > best_score:
            best_score = similarity
            best_id = entry["id"]
            best_name = entry["image_name"]
    
    return best_id, best_name, best_score


# Post Processing

In [11]:
def draw_result(image, face, name, score):
    if face is None:
        return image
        
    bbox = face.bbox.astype(int)

    cv.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

    if hasattr(face, 'landmark') and face.landmark is not None:
        try:
            landmarks = face.landmark.astype(int)
            for point in landmarks:
                cv.circle(image, tuple(point), 2, (0, 0, 255), -1)
        except (AttributeError, cv.error):
            pass 
    
    bar_length = int(100 * min(score, 1.0)) 
    cv.rectangle(image, (bbox[0], bbox[1] - 25),
                (bbox[0] + bar_length, bbox[1] - 5), (0, 255, 0), -1)
    cv.rectangle(image, (bbox[0], bbox[1] - 25),
                (bbox[0] + 100, bbox[1] - 5), (255, 255, 255), 1)

    label = f"{name} ({score:.2f})"
    cv.putText(image, label, (bbox[0], bbox[1] - 30),
              cv.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    return image

# Image Pipeline

In [16]:
img = cv.imread("photos/24WMR08866.jpg")
processed_img = preprocess_image(img)
cropped_face, face_obj = crop_best_face(processed_img)
embedding = get_face_embedding_from_obj(face_obj)
person_id, name, score = recognize_face(embedding, embeddings_data)
smoother.add_recognition(person_id, score)
smoothed_id, smoothed_score = smoother.get_smoothed_result()
print(smoothed_id)
print(smoothed_score)

frame = draw_result(img, face_obj, name, smoothed_score)

cv.imshow("Original", img)
cv.waitKey(0)

cv.imshow("Processed image", processed_img)
cv.waitKey(0)

cv.imshow("Face obj", cropped_face)
cv.waitKey(0)

cv.imshow("Frame", frame)
cv.waitKey(0)

cv.destroyAllWindows()

24WMR08866
0.4918338142736108


# Real Time Pipeline