In [None]:
import glob
import numpy as np
import cv2

# Define base path
base_path = 'C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset'

# Collect all video files from different subdirectories
video_paths = [
    f'{base_path}/Celeb-DF/real/*.mp4',
    f'{base_path}/Celeb-DF/fake/*.mp4',
    f'{base_path}/DFDC/real/*.mp4',
    f'{base_path}/DFDC/fake/*.mp4',
    f'{base_path}/FF++/real/*.mp4',
    f'{base_path}/FF++/fake/*.mp4'
]

video_files = []
for path in video_paths:
    found_files = glob.glob(path)
    video_files.extend(found_files)
    print(f'Searched in: {path}, Found: {len(found_files)} files')

if not video_files:
    print("No video files found! Check your paths.")
else:
    print(f'Total videos found before filtering: {len(video_files)}')

# Filter videos and get frame counts
frame_count = []
valid_videos = []

for video_file in video_files:
    cap = cv2.VideoCapture(video_file)
    frame_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()

    if frame_num >= 150:
        frame_count.append(frame_num)
        valid_videos.append(video_file)

print(f"Total valid videos (with 150+ frames): {len(valid_videos)}")
print('Average frames per valid video:', np.mean(frame_count) if frame_count else 'N/A')


Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/Celeb-DF/real/*.mp4, Found: 588 files
Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/Celeb-DF/fake/*.mp4, Found: 580 files
Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/DFDC/real/*.mp4, Found: 1726 files
Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/DFDC/fake/*.mp4, Found: 1565 files
Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/FF++/real/*.mp4, Found: 993 files
Searched in: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/dataset/FF++/fake/*.mp4, Found: 996 files
Total videos found before filtering: 6448
Total valid videos (with 150+ frames): 6448
Average frames per valid vid

In [4]:
import glob
import os
import cv2
import face_recognition
from tqdm import tqdm


In [5]:
# Extract frames from a video
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = True
    while success:
        success, image = vidObj.read()
        if success:
            yield image

In [6]:
# Process frames to extract faces and create face-only videos
def create_face_videos(path_list, out_dir):
    # Ensure output directory exists
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Check already existing videos
    already_present_count = len(glob.glob(os.path.join(out_dir, '*.mp4')))
    print("No of videos already present:", already_present_count)

    for path in tqdm(path_list):
        out_path = os.path.join(out_dir, os.path.basename(path))

        # Skip if the file already exists
        if os.path.exists(out_path):
            print(f"File Already exists: {out_path}")
            continue

        # Initialize video writer
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (112, 112))

        frames = []
        frame_count = 0

        try:
            for idx, frame in enumerate(frame_extract(path)):
                if frame_count >= 150:
                    break

                frames.append(frame)
                frame_count += 1

                if len(frames) == 1:  # Process one frame at a time to reduce issues
                    faces = face_recognition.batch_face_locations(frames, number_of_times_to_upsample=0, batch_size=1)
                    for i, face in enumerate(faces):
                        if len(face) > 0:
                            top, right, bottom, left = face[0]
                            try:
                                face_crop = cv2.resize(frames[i][top:bottom, left:right], (112, 112))
                                out.write(face_crop)
                            except Exception as e:
                                print(f"Face extraction error in {path}: {e}")

                    # Clear frames buffer
                    frames = []
 
        except Exception as e:
            print(f"Error processing video {path}: {e}")

        out.release()


In [7]:
output_dir = 'C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data'
create_face_videos(video_files, output_dir)

No of videos already present: 6448


 10%|█         | 676/6448 [00:00<00:00, 6555.25it/s]

File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0000.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0001.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0002.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0003.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0004.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0005.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\id0_0006.mp4
File Already exists:

 44%|████▎     | 2820/6448 [00:00<00:00, 9984.48it/s]

File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\djmyetudka.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dkjxtelpyg.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dkkvcedpkn.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dkntaifhkt.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dmegwsvycz.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dmmvuaikkv.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\dmtsxclujy.mp4
File A

100%|██████████| 6448/6448 [00:00<00:00, 11325.99it/s]

File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\csflwluoeb.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\cshdgzvfpe.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\cshlbvfpzi.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\csikkbsnia.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\cslsbufmpu.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\csmvoigpmo.mp4
File Already exists: C:/Users/dwije/Desktop/VIII_SEM_PROJECT/Deepfake_detection_using_ResNet/DataSetPreparation/Face_Only_Data\csnkohqxdv.mp4
File A


