In [1]:
# For MacOS using MPS with PyTorch
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

%pip install facenet-pytorch opencv-python pillow tqdm pandas timm

%pip install efficientnet_pytorch scikit-learn



Collecting facenet-pytorch
  Using cached facenet_pytorch-2.6.0-py3-none-any.whl (1.9 MB)
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl (37.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pillow
  Downloading pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas
  Downloading pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl (11.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hColl

In [1]:
# Checking MPS
import torch
print(torch.backends.mps.is_available())  # True if MPS is supported


True


In [1]:
!which python3


/Users/aayushshah/Work/Programming/Deep-Fake-Detection/venv/bin/python3


In [None]:
#MTCNN Implementation


import torch
import cv2
import os
import numpy as np
from facenet_pytorch import MTCNN
from tqdm import tqdm
from PIL import Image

# Set device
device = torch.device("cpu")

# Load MTCNN model
mtcnn = MTCNN(keep_all=False, device=device)

# Define target size (XceptionNet requires 299x299)
TARGET_SIZE = (299, 299)

def resize_with_padding(image, target_size):
    """Resize image while maintaining aspect ratio by padding with a white background."""
    old_size = image.size  # (width, height)
    ratio = min(target_size[0] / old_size[0], target_size[1] / old_size[1])
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image
    image = image.resize(new_size, Image.Resampling.LANCZOS)

    # Create new white background image
    new_img = Image.new("RGB", target_size, (255, 255, 255))
    new_img.paste(image, ((target_size[0] - new_size[0]) // 2, (target_size[1] - new_size[1]) // 2))

    return new_img

def detect_and_save_faces(frames, frame_indices, video_path, save_dir):
    """Detect faces from a batch of frames, crop them manually, and resize to a fixed size."""
    boxes, _ = mtcnn.detect(frames)  # Detect faces (bounding boxes)

    for i, (frame, box) in enumerate(zip(frames, boxes)):
        if box is not None and len(box) > 0:  # Ensure at least one face is detected
            frame_np = np.array(frame)  # Convert PIL image to NumPy
            h, w, _ = frame_np.shape  # Get frame dimensions

            # Extract bounding box coordinates
            x1, y1, x2, y2 = map(int, box[0])  # Use only the first detected face

            # Ensure bounding box is within frame limits
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(w, x2), min(h, y2)

            # Ensure nonzero width and height
            if x2 > x1 and y2 > y1:
                face_crop = frame_np[y1:y2, x1:x2]  # Crop face
                face_pil = Image.fromarray(face_crop)  # Convert back to PIL

                # Resize while maintaining aspect ratio
                face_pil = resize_with_padding(face_pil, TARGET_SIZE)

                # Save face image
                face_path = os.path.join(save_dir, f"{os.path.basename(video_path)}_{frame_indices[i]}.jpg")
                face_pil.save(face_path)

def extract_faces(video_path, save_dir, frames_per_video=10, batch_size=4):
    """Extract faces from a video and save them as uniformly resized images."""
    os.makedirs(save_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open {video_path}")
        return

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, frame_count // frames_per_video)

    frame_batch = []
    frame_indices = []

    for i in range(0, frame_count, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            continue

        # Convert to RGB before face detection
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(frame_rgb)

        frame_batch.append(pil_img)
        frame_indices.append(i)

        if len(frame_batch) >= batch_size:
            detect_and_save_faces(frame_batch, frame_indices, video_path, save_dir)
            frame_batch.clear()
            frame_indices.clear()

    if frame_batch:
        detect_and_save_faces(frame_batch, frame_indices, video_path, save_dir)

    cap.release()

# Paths
video_folder = os.path.abspath("CelebDF")
output_folder = "Preprocessing"
video_extensions = {".mp4", ".avi", ".mov", ".mkv"}

# Process all videos
for subfolder in os.listdir(video_folder):
    subfolder_path = os.path.join(video_folder, subfolder)

    if not os.path.isdir(subfolder_path):
        continue

    for video in tqdm(os.listdir(subfolder_path), desc=f"Processing {subfolder}", mininterval=2):
        video_path = os.path.join(subfolder_path, video)
        if os.path.isfile(video_path) and any(video.lower().endswith(ext) for ext in video_extensions):
            extract_faces(video_path, output_folder)


Processing Celeb-synthesis:   0%|          | 4/5639 [00:05<2:08:40,  1.37s/it]


KeyboardInterrupt: 

In [2]:
%pip install insightface
%pip install onnxruntime

Collecting insightface
  Downloading insightface-0.7.3.tar.gz (439 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m686.2 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting onnx
  Downloading onnx-1.17.0-cp311-cp311-macosx_12_0_universal2.whl (16.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting matplotlib
  Downloading matplotlib-3.10.1-cp311-cp311-macosx_11_0_arm64.whl (8.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Collecting scikit-image
  Downloading scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl (13.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp311-cp311-macosx_13_0_universal2.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting flatbuffers (from onnxruntime)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.20.1-cp311-cp311-macosx_13_0_universal2.whl (31.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.0/31.0 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
Downloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
Installing collected packages: flatbuffers, humanfriendly, coloredlogs, onnxruntime
Successfully installed coloredlogs

In [None]:
import os
import cv2
import torch
import torchvision.transforms as T
import numpy as np
from insightface.app import FaceAnalysis  
from tqdm import tqdm
from PIL import Image

# Check if MPS is available, else fallback to CPU
device = torch.device("cpu")  # Must use CPU due to MPS limitations

# Initialize InsightFace RetinaFace
face_detector = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])  # ✅ Load RetinaFace
face_detector.prepare(ctx_id=-1)  # ✅ Run on CPU

# Define target size (Xception requires 299x299)
TARGET_SIZE = (299, 299)

# Torchvision transform for resizing and padding
def resize_with_padding(image, target_size):
    """Resize image while maintaining aspect ratio with padding (using PyTorch)."""
    transform = T.Compose([
        T.Resize(target_size, interpolation=T.InterpolationMode.BILINEAR),
        T.CenterCrop(target_size),  # Ensures it remains square after resizing
        T.ToTensor()
    ])
    return transform(image)

def detect_and_save_faces(frames, frame_indices, video_path, save_dir):
    """Detect faces in a batch of frames using InsightFace RetinaFace and save cropped images."""
    for i, frame in enumerate(frames):
        frame_np = np.array(frame)  # Convert PIL image to NumPy
        h, w, _ = frame_np.shape

        # Detect faces using InsightFace
        faces = face_detector.get(frame_np)  # ✅ Correct method

        if faces:  # Ensure at least one face is detected
            for face in faces:
                x1, y1, x2, y2 = face.bbox.astype(int)  # ✅ Correct way to get bounding box

                # Ensure bounding box stays within frame
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(w, x2), min(h, y2)

                # Extract face region
                if x2 > x1 and y2 > y1:
                    face_crop = frame_np[y1:y2, x1:x2]
                    face_pil = Image.fromarray(face_crop)

                    # Resize while keeping aspect ratio
                    face_tensor = resize_with_padding(face_pil, TARGET_SIZE)

                    # Convert back to PIL for saving
                    face_pil = T.ToPILImage()(face_tensor)

                    # Save face image
                    face_path = os.path.join(save_dir, f"{os.path.basename(video_path)}_{frame_indices[i]}.jpg")
                    face_pil.save(face_path)

def extract_faces(video_path, save_dir, frames_per_video=10, batch_size=4):
    """Extract faces from a video and save them as uniformly resized images."""
    os.makedirs(save_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open {video_path}")
        return

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, frame_count // frames_per_video)

    frame_batch = []
    frame_indices = []

    for i in range(0, frame_count, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            continue

        # Convert to RGB before face detection
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(frame_rgb)

        frame_batch.append(pil_img)
        frame_indices.append(i)

        if len(frame_batch) >= batch_size:
            detect_and_save_faces(frame_batch, frame_indices, video_path, save_dir)
            frame_batch.clear()
            frame_indices.clear()

    if frame_batch:
        detect_and_save_faces(frame_batch, frame_indices, video_path, save_dir)

    cap.release()

# Paths
video_folder = os.path.abspath("CelebDF")
output_folder = "Preprocessing"
video_extensions = {".mp4", ".avi", ".mov", ".mkv"}

# Process all videos
for subfolder in os.listdir(video_folder):
    subfolder_path = os.path.join(video_folder, subfolder)

    if not os.path.isdir(subfolder_path):
        continue

    for video in tqdm(os.listdir(subfolder_path), desc=f"Processing {subfolder}", mininterval=2):
        video_path = os.path.join(subfolder_path, video)
        
        output_check = os.path.join(output_folder, f"{video}_0.jpg")
        if os.path.exists(output_check):
            continue 
        
        if os.path.isfile(video_path) and any(video.lower().endswith(ext) for ext in video_extensions):
            extract_faces(video_path, output_folder)
            
    


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/aayushshah/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/aayushshah/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/aayushshah/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/aayushshah/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/aayushshah/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3,

Processing Celeb-synthesis:  53%|█████▎    | 3000/5639 [2:11:03<1:55:16,  2.62s/it]   


KeyboardInterrupt: 

In [None]:
import os
import cv2
import numpy as np

image_folder = "Preprocessing"
black_images = 0


for img_file in os.listdir(image_folder):
    img_path = os.path.join(image_folder, img_file)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    if img is not None and np.mean(img) < 5:  # Very dark image
        black_images += 1

print(f"Total black images: {black_images}")




Total black images: 14372


NameError: name 'size' is not defined

In [17]:
import os
import cv2
import numpy as np

image_folder = "Preprocessing"
black_images = 0

for img_file in os.listdir(image_folder):
    img_path = os.path.join(image_folder, img_file)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    if img is not None and np.mean(img) < 5:  # Very dark image
        os.remove(img_path)
        black_images += 1

print(f"Deleted {black_images} black images.")


Deleted 14372 black images.
