In [None]:
pip install opencv-python mtcnn tqdm


Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.4.3 mtcnn-1.0.0


In [None]:
import os
import cv2
import numpy as np
from mtcnn import MTCNN
from tqdm import tqdm

In [None]:
# Set input and output paths
INPUT_DIR = "/content/drive/MyDrive/STUDY MATERIAL/MINOR_2/DFDC_FAKE_Face_only_data/"
OUTPUT_DIR = "/content/drive/MyDrive/STUDY MATERIAL/MINOR_2/FF_Face_only_data/"
FRAME_THRESHOLD = 100  # Limit number of frames
FRAME_SIZE = (112, 112)  # Resize output frames
FPS = 30  # Frames per second

In [None]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
detector = MTCNN()

In [None]:
def extract_faces(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    frame_list = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or frame_count >= FRAME_THRESHOLD:
            break  # Stop if the video ends or we reach the threshold

        # Detect faces in frame
        faces = detector.detect_faces(frame)
        if faces:
            x, y, w, h = faces[0]['box']  # Get bounding box of the first detected face
            x, y = max(0, x), max(0, y)  # Ensure coordinates are positive
            cropped_face = frame[y:y + h, x:x + w]  # Crop the face region

            # Resize to uniform size
            cropped_face = cv2.resize(cropped_face, FRAME_SIZE)
            frame_list.append(cropped_face)

        frame_count += 1

    cap.release()

    # Ensure at least some frames are detected
    if len(frame_list) > 0:
        # Create output video file
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # MP4 format
        out = cv2.VideoWriter(output_video_path, fourcc, FPS, FRAME_SIZE)

        # Write frames to video
        for frame in frame_list:
            out.write(frame)

        out.release()

In [None]:
# Process all videos in the dataset
video_files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.mp4')]

In [None]:
for video_file in tqdm(video_files, desc="Processing videos"):
    input_video_path = os.path.join(INPUT_DIR, video_file)
    output_video_path = os.path.join(OUTPUT_DIR, video_file)

    extract_faces(input_video_path, output_video_path)

print("Processing complete! All face-cropped videos saved.")

Processing videos:  18%|█▊        | 289/1566 [59:58<4:25:29, 12.47s/it]