In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import glob
from mtcnn import MTCNN


In [None]:
# Helper function to extract frames from videos
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = True
    while success:
        success, image = vidObj.read()
        if success:
            yield image

# Create a face detector using MTCNN
detector = MTCNN()

In [2]:
def create_face_videos(path_list, out_dir):
    already_present_count = glob.glob(out_dir + '/*.mp4')
    print("No of videos already present:", len(already_present_count))

    for path in tqdm(path_list):
        video_name = os.path.basename(path)  # Extract the video file name (e.g., id60_0001.mp4)
        out_path = os.path.join(out_dir, video_name)  # Join output directory and video file name

        if os.path.exists(out_path):
            print(f"File already exists: {out_path}")
            continue

        frames = []
        # Use H.264 codec (MJPG is replaced with H.264 for better compatibility)
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'avc1'), 30, (112, 112))

        for idx, frame in enumerate(frame_extract(path)):
            if idx <= 150:  # Limit frames to process, or remove this for full video
                frames.append(frame)
                if len(frames) == 4:
                    # Detect faces using MTCNN
                    faces = [detector.detect_faces(f) for f in frames]
                    for i, face in enumerate(faces):
                        if face:
                            x, y, w, h = face[0]['box']
                            try:
                                face_frame = frames[i][y:y+h, x:x+w]
                                out.write(cv2.resize(face_frame, (112, 112)))
                            except Exception as e:
                                print(f"Error processing frame: {e}")
                    frames = []

        out.release()

In [3]:
# Define your paths
folders = {
    'train_real': (r'C:\Users\Systems\Documents\dataset\testing videos\train\real', 
                   r'C:\Users\Systems\Documents\dataset\processed_faces\train_real'),
    'train_fake': (r'C:\Users\Systems\Documents\dataset\testing videos\train\fake', 
                   r'C:\Users\Systems\Documents\dataset\processed_faces\train_fake'),
    'valid_real': (r'C:\Users\Systems\Documents\dataset\testing videos\validation\real', 
                   r'C:\Users\Systems\Documents\dataset\processed_faces\validation_real'),
    'valid_fake': (r'C:\Users\Systems\Documents\dataset\testing videos\validation\fake', 
                   r'C:\Users\Systems\Documents\dataset\processed_faces\validation_fake'),
    'test_real': (r'C:\Users\Systems\Documents\dataset\testing videos\test\real', 
                  r'C:\Users\Systems\Documents\dataset\processed_faces\test_real'),
    'test_fake': (r'C:\Users\Systems\Documents\dataset\testing videos\test\fake', 
                  r'C:\Users\Systems\Documents\dataset\processed_faces\test_fake')
}


In [None]:
# Function to process multiple folders
def process_multiple_folders(folder_dict):
    for label, (video_path, output_path) in folder_dict.items():
        print(f"Processing {label} videos...")
        video_files = glob.glob(video_path + '/*.mp4')
        os.makedirs(output_path, exist_ok=True)
        create_face_videos(video_files, output_path)

# Run the process for all folders
process_multiple_folders(folders)