# Video face tracking and recognition

The idea of this project is to build a example flow for understanding of real time video.

The process consists of some steps:
Person Detection -> Face detection -> Face recognition -> Face store
  |-> Video Understanding -> 


## Virtual Environment
We need a virtual environment to isolate from system installed pip packages, we are using python3 venv package.

In [1]:
from IPython.display import clear_output
!apt update > /dev/null
!apt install -y python3.10-venv cmake
!python -m venv .venv
!source .venv/bin/activate
clear_output(wait=True)
print("Virtual Environment Created")

Virtual Environment Created


## Inialize face detection and tracking

In [None]:
from ultralytics import YOLO
print("asdf")
base_model = YOLO("yolov11n-face.pt")
base_model.export(format="engine")
model = YOLO("yolov11n-face.engine")
stream = model.track("./bar_cam_1.mp4",
                      conf=0.75,
                      stream=True,
                      #verbose=False,
                    )

In [None]:
detections = {}

for frame_i, detection in enumerate(stream):
    boxes = detection.boxes

    for box in boxes:
        detectionid = int(box.id.item()) if hasattr(box, 'id') and box.id is not None else None
        confidence = float(box.conf.item()) if hasattr(box, 'conf') else None

        if detectionid is None:
            continue
        
        if detectionid not in detections:
            detections[detectionid] = {}

        x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])
        face = detection.orig_img[y1:y2, x1:x2]

        detections[detectionid][frame_i] = {
            'face': face,
            'confidence': confidence
        }


## Display faces
Display the detected face tracking groups

In [None]:
import matplotlib.pyplot as plt
import math
import numpy as np
import cv2

filtered_detections = {}

def reduce_list(items, factor=2, limit=200):
    n = len(items)
    
    # Determine how many items to keep
    keep_count = max(10, min(int(np.sqrt(n) * factor), limit))

    # If we don't need to reduce, return the original list
    if keep_count >= n:
        return items
    
    # Select `keep_count` items spaced evenly across the list
    indices = np.linspace(0, n - 1, keep_count, dtype=int)
    return [items[i] for i in indices]

# Filter frames based on confidence and reduce list size
for detectionid, frames in detections.items():
    # Filter out low-confidence frames
    filtered_frames = {frame_i: face_data for frame_i, face_data in frames.items() if face_data['confidence'] >= 0.8}
    
    # Only keep detections with at least 10 frames after filtering
    if len(filtered_frames) >= 6:
        # Sort by frame number and reduce list size
        reduced_frames = reduce_list(sorted(filtered_frames.items()))

        # Convert back to dictionary format
        filtered_detections[detectionid] = dict(reduced_frames)

# Display detections
for detectionid, frames in filtered_detections.items():
    print(f"Detection: {detectionid}")
    num_frames = len(frames)
    cols = min(16, num_frames)  # Set max 16 columns per row
    rows = math.ceil(num_frames / cols)

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))

    # Flatten axes for easy indexing
    axes = axes.flatten() if num_frames > 1 else [axes]

    for i, (frame_i, face_data) in enumerate(frames.items()):
        #confidence = "{:.2f}".format(face_data['confidence'])
        axes[i].imshow(cv2.cvtColor(face_data['face'], cv2.COLOR_RGB2BGR))  # Convert BGR to RGB if using OpenCV
        axes[i].axis("off")
        #axes[i].set_title(confidence)

    # Hide unused subplots
    for i in range(num_frames, len(axes)):
        fig.delaxes(axes[i])

    plt.tight_layout()
    plt.show()

In [None]:
from insightface.model_zoo import model_zoo

rec_model_path = '/kaggle/input/insightface-buffalo_l/onnx/default/1/w600k_r50.onnx'
rec_model = model_zoo.get_model(rec_model_path)

In [None]:
import chromadb

client = chromadb.PersistentClient(path="./facedb")
facedb = client.get_or_create_collection(
    name='facedb',
    metadata={
        "hnsw:space": 'cosine',
    },
)

In [None]:
import matplotlib.pyplot as plt
import os
import cv2
import face_recognition

for detectionid, frames in filtered_detections.items():
    query_results = []
    print(f"Detection {detectionid}")
    for i, (frame_i, face_data) in enumerate(frames.items()):
        face = face_data['face']
        face = cv2.cvtColor(face, cv2.COLOR_RGB2BGR)
        known_face_location = [(0, face.shape[1], face.shape[0], 0)]
        embed = face_recognition.face_encodings(face, known_face_locations=known_face_location)[0]

        qresult = facedb.query(
            query_embeddings=[embed],
            n_results=20
        )
        print(qresult)
        print("\n")
        query_results.append(qresult)
        
        # facedb.add(
        #     ids=[str(detectionid) + ":" +  str(frame_i)],
        #     embeddings=[embed],  # Replace with your embeddings
        #     metadatas=[{'detection': detectionid}]
        # )