<a href="https://colab.research.google.com/github/adamxandria/ITP2/blob/main/FaceDetector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install facenet-pytorch
!pip install efficientnet-pytorch
!pip install filetype
!pip install face_recognition



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
video_path= '/content/drive/MyDrive/deepfake-test-files/multi faces/Elon Musks Deep Fake Video Promoting a Crypto Scam.mp4'

In [5]:
import os

# Create folder to store frames if it doesn't exist
output_folder = '/content/Threshold-0.6'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [6]:
import warnings
import gc
import cv2
import filetype
from PIL import Image
import numpy as np
from facenet_pytorch import MTCNN
import os
import face_recognition

import torch
from torchvision.transforms import transforms
from efficientnet_pytorch import EfficientNet as _EfficientNet
import torch.nn as nn


warnings.filterwarnings('ignore') # ignore all warnings
torch.manual_seed(1) # Seed for generating random numbers

DEFAULT_FACE_MIN_CONF = 0.95

In [7]:
class VideoTask:
    """
    Class for media analysis, including face detection and recognition
    """
    def __init__(self):
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.mtcnn = MTCNN(select_largest=False, post_process=False, device=self.device)
        self.known_faces = [] #to store known face encondings and names
        self.tracked_faces = [] #to store tracked face encodings and IDs
        self.next_face_id = 0 #to assign unique IDs to each face

    def assign_face_id(self, detected_encoding):
        """
        Assign a unique ID to the face if it matches a previously detected face.
        Args:
        - detected_encoding: The face encoding of the newly detected face.
        Returns:
        - face_id: The unique ID of the face (either a new ID or an existing one).
        """

        if not self.tracked_faces:  # No faces tracked yet
            face_id = self.next_face_id
            self.tracked_faces.append({'encoding': detected_encoding, 'id': face_id})
            self.next_face_id += 1
            return face_id

        # Compare the detected face encoding to previous face encodings
        encodings = [face['encoding'] for face in self.tracked_faces]
        distances = face_recognition.face_distance(encodings, detected_encoding)
        min_distance = np.min(distances)
        best_match_index = np.argmin(distances)

        # Threshold to determine if it's the same person (adjust based on accuracy)
        if min_distance < 0.6:  # Found a match, use the same face ID
            return self.tracked_faces[best_match_index]['id']
        else:  # No match found, assign a new face ID
            face_id = self.next_face_id
            self.tracked_faces.append({'encoding': detected_encoding, 'id': face_id})
            self.next_face_id += 1
            return face_id

    def load_known_faces(self, known_face_images, known_face_names):
        """
        Load and encode known faces.
        Args:
        - known_face_images: List of file paths to images of known individuals.
        - known_face_names: List of names corresponding to the faces in known_face_images.
        """
        for image_path, name in zip(known_face_images, known_face_names):
            image = face_recognition.load_image_file(image_path)
            encoding = face_recognition.face_encodings(image)[0]
            self.known_faces.append({'encoding': encoding, 'name': name})


    def recognize_faces(self, face_tensor):
        """
        Recognize faces using the loaded known faces.
        Args:
        - face_tensor: A tensor representation of a detected face.
        Returns:
        - recognized_name: Name of the recognized face or 'Unknown' if not recognized.
        """
        # Convert the tensor back to an image for face_recognition
        face_image = transforms.ToPILImage()(face_tensor).convert('RGB')
        face_image_np = np.array(face_image)

        # Encode the detected face
        detected_face_encoding = face_recognition.face_encodings(face_image_np)

        # If no encoding is found, skip the frame
        if not detected_face_encoding:
            return 'Unknown'

        # Compare the detected face to the known faces
        matches = face_recognition.compare_faces(
            [face['encoding'] for face in self.known_faces],
            detected_face_encoding[0]
        )

        face_distances = face_recognition.face_distance(
            [face['encoding'] for face in self.known_faces],
            detected_face_encoding[0]
        )

        # Get the best match (smallest distance)
        best_match_index = np.argmin(face_distances)
        if matches[best_match_index]:
            recognized_name = self.known_faces[best_match_index]['name']
        else:
            recognized_name = 'Unknown'

        return recognized_name

    def get_boundingbox(self, var_x1, var_y1, var_x2, var_y2):
        var_y2 += (var_y2 - var_y1) / 10
        var_w = var_x2 - var_x1
        var_h = var_y2 - var_y1
        diff_h_w = (var_h - var_w) / 2
        var_x1 -= diff_h_w
        var_x2 += diff_h_w
        return var_x1, var_y1, var_x2, var_y2

    def crop_face(self, frame, prob, boxes):
        crop_frame = []
        if prob >= DEFAULT_FACE_MIN_CONF:
            xmin, ymin, xmax, ymax = boxes
            xmin, ymin, xmax, ymax = self.get_boundingbox(xmin, ymin, xmax, ymax)
            crop_frame = frame.crop((xmin, ymin, xmax, ymax))  # Crop and Resize
            crop_frame = crop_frame.resize((224, 224))
        return crop_frame

    def image_face(self, frame, face_min_conf):
        boxes, prob = self.mtcnn.detect(frame)
        crop_faces = []
        face_boxes = []
        if prob is not None:
            for p, box in zip(prob, boxes):
                if p >= face_min_conf:
                    cropped_face = self.crop_face(frame, p, box)
                    crop_faces.append(cropped_face)
                    face_boxes.append(np.array(box))  # Store bounding box as an array (float)

        return crop_faces, face_boxes

    def image_norm_tensor(self, frame, face_min_conf):
        crop_faces, face_boxes = self.image_face(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)), face_min_conf)
        tensor_faces = [transforms.ToTensor()(face) for face in crop_faces]  # Convert cropped faces to tensors
        return tensor_faces, face_boxes

    def video_process(self, infile, face_min_conf, max_fr):
        length = 0
        face_data_list = []  # List to store all face data

        if filetype.is_video(infile):
            cap = cv2.VideoCapture(infile)
            length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            max_frame = int(max_fr * length)

            # Correct frame list generation
            # the_list = np.round(np.linspace(0, length, max_frame, endpoint=False)).astype(int)
            the_list = list(range(0, length))  # Process every frame in sequence
            counter_frame = 0  # Frame counter
            frame_number = 0  # Frame number for saving

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                try:
                    if counter_frame in the_list:
                        crop_faces, face_boxes = self.image_norm_tensor(frame, face_min_conf)

                        # Store faces, frame number, and bounding boxes in the list
                        for i, (face, box) in enumerate(zip(crop_faces, face_boxes)):

                            # recognized_name = self.recognize_faces(face)

                            face_image = transforms.ToPILImage()(face).convert('RGB')
                            face_image_np = np.array(face_image)

                            # Encode the detected face
                            detected_face_encoding = face_recognition.face_encodings(face_image_np)
                            if detected_face_encoding:
                                face_id= self.assign_face_id(detected_face_encoding[0])
                                print(f"Assigned ID {face_id} to detected face")



                            # Add to the face data list
                            face_data_list.append({
                                'frame_number': counter_frame,  # Label each face with frame number
                                'face_tensor': face,  # Tensor of the cropped face
                                'bounding_box': box,  # Bounding box coordinates of the face
                                'face_id': face_id
                            })

                            # Convert tensor to PIL image (removing unusual color maps)
                            pil_image = transforms.ToPILImage()(face)

                            # Save the frame as an image
                            frame_filename = f'{output_folder}/frame_{frame_number:04d}_face_{i}_{face_id}.jpg'
                            pil_image.save(frame_filename)
                            print(f'Saved {frame_filename} - assigned ID: {face_id}')
                        frame_number += 1
                except Exception as e:
                    print(f"Error processing frame {counter_frame}: {e}")
                counter_frame += 1
            cap.release()  # Release the video capture object

        return face_data_list  # Return the list containing all face data

# Initialize the VideoTask class
video_task = VideoTask()

# known_faces_paths = ['/content/faces/face1.jpg', '/content/faces/face1.jpg']  # Add paths to your known face images
# known_face_names = ['Person 1', 'Person 2']  # Add corresponding names for the known faces
# video_task.load_known_faces(known_faces_paths, known_face_names)

frame_percent = 0.1  # Process 10% of the video frames
face_data_list = video_task.video_process(video_path, DEFAULT_FACE_MIN_CONF, frame_percent)

# Output the face data for verification
for face_data in face_data_list:
    print(f"Frame: {face_data['frame_number']}, Bounding Box: {face_data['bounding_box']}")

Error processing frame 0: local variable 'face_id' referenced before assignment
Error processing frame 1: local variable 'face_id' referenced before assignment
Error processing frame 2: local variable 'face_id' referenced before assignment
Error processing frame 3: local variable 'face_id' referenced before assignment
Error processing frame 4: local variable 'face_id' referenced before assignment
Error processing frame 5: local variable 'face_id' referenced before assignment
Error processing frame 6: local variable 'face_id' referenced before assignment
Error processing frame 7: local variable 'face_id' referenced before assignment
Error processing frame 8: local variable 'face_id' referenced before assignment
Error processing frame 9: local variable 'face_id' referenced before assignment
Error processing frame 10: local variable 'face_id' referenced before assignment
Error processing frame 11: local variable 'face_id' referenced before assignment
Error processing frame 12: local varia

In [8]:
print(face_data_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

        [[0.5804, 0.5804, 0.5804,  ..., 0.0000, 0.0000, 0.0000],
         [0.5804, 0.5804, 0.5804,  ..., 0.0000, 0.0000, 0.0000],
         [0.5804, 0.5804, 0.5804,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.7608, 0.7059, 0.6588,  ..., 0.0784, 0.0745, 0.0510],
         [0.5608, 0.5137, 0.4784,  ..., 0.1529, 0.1490, 0.1176],
         [0.4118, 0.3843, 0.3529,  ..., 0.1569, 0.1529, 0.1216]]]), 'bounding_box': array([640.5028686523438, 102.36924743652344, 798.8460693359375,
       312.7557373046875], dtype=object), 'face_id': 2}, {'frame_number': 692, 'face_tensor': tensor([[[0.3412, 0.3412, 0.3412,  ..., 0.4471, 0.4471, 0.4471],
         [0.3412, 0.3412, 0.3412,  ..., 0.4471, 0.4471, 0.4471],
         [0.3412, 0.3412, 0.3412,  ..., 0.4471, 0.4471, 0.4471],
         ...,
         [0.3255, 0.2902, 0.2667,  ..., 0.3137, 0.3373, 0.3412],
         [0.2745, 0.2510, 0.2471,  ..., 0.3098, 0.3333, 0.3373],
         [0.2