In [7]:
import torch
import numpy as np
import cv2
from facenet_pytorch import MTCNN, InceptionResnetV1
import joblib
from datetime import timedelta

In [8]:
def process_video(video_path, output_video_path="Output/output.mp4", log_path="recognition_log.txt",MODEL_PATH=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    mtcnn = MTCNN(image_size=160, margin=20, device=device)
    resnet = InceptionResnetV1(pretrained="vggface2").eval().to(device)

    data = joblib.load(MODEL_PATH)
    clf = data["classifier"]
    encoder = data["encoder"]

    cap = cv2.VideoCapture(video_path)

    # Read and verify FPS
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps == 0 or np.isnan(fps):
        fps = 25.0  # Safe default
        print("⚠️ FPS was 0 or invalid. Defaulting to 25 FPS.")
    else:
        print(f"🎞️ Detected FPS: {fps}")

    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration_sec = total_frames / fps
    print(f"🎥 Input video has {total_frames} frames, duration ~ {duration_sec:.2f} seconds.")

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    log = open(log_path, "w")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(img_rgb)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = map(int, box)
                x1, y1 = max(x1, 0), max(y1, 0)
                x2, y2 = min(x2, frame.shape[1]), min(y2, frame.shape[0])

                face = img_rgb[y1:y2, x1:x2]
                if face.size == 0:
                    continue
                face_resized = cv2.resize(face, (160, 160))
                face_tensor = torch.tensor(face_resized).permute(2, 0, 1).float().unsqueeze(0) / 255.0
                face_tensor = face_tensor.to(device)

                with torch.no_grad():
                    embedding = resnet(face_tensor).cpu().numpy().flatten()

                probs = clf.predict_proba([embedding])[0]
                max_prob = max(probs)
                label = encoder.inverse_transform([np.argmax(probs)])[0] if max_prob > 0.6 else "Unknown"

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{label} ({max_prob:.2f})", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

                timestamp = str(timedelta(seconds=frame_idx / fps))
                log.write(f"[{timestamp}] {label} ({max_prob:.2f})\n")

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    log.close()

    output_duration = frame_idx / fps
    print(f"Finished. Total frames written: {frame_idx} (~{output_duration:.2f} seconds at {fps} FPS).")
    print(f"Output video saved to: {output_video_path}")
    print(f"Log saved to: {log_path}")


In [9]:
MODEL_PATH = "Models/face_recognition2.pkl"

In [10]:
input_video = "test_files/aa.webm"
process_video(input_video,MODEL_PATH=MODEL_PATH)

🎞️ Detected FPS: 29.97002997002997
🎥 Input video has 215 frames, duration ~ 7.17 seconds.
Finished. Total frames written: 215 (~7.17 seconds at 29.97002997002997 FPS).
Output video saved to: Output/output.mp4
Log saved to: recognition_log.txt
