# Statistics of Celeb-DF 

In [4]:
pip install opencv-python

Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.8/54.8 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84
Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
import cv2
import shutil

In [6]:
def calculate_video_statistics(video_folder):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for root, dirs, files in os.walk(video_folder):
        for file in files:
            if file.endswith(".mp4"):  # Assuming all video files are in mp4 format
                video_path = os.path.join(root, file)
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(cap.get(cv2.CAP_PROP_FPS))

                # Ensure FPS is not zero to avoid division by zero error
                if fps != 0:
                    video_length = frame_count / fps
                    total_frame_count += frame_count
                    total_video_length += video_length
                    total_video_count += 1
                cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video

def merge_video_folders(destination_folder, *folders):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    all_files = []
    for folder in folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith(".mp4"):
                    source_file = os.path.join(root, file)
                    destination_file = os.path.join(destination_folder, file)
                    if not os.path.exists(destination_file):
                        shutil.copy2(source_file, destination_file)
                    all_files.append(destination_file)
    return all_files

def calculate_statistics_from_file_list(file_list):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for video_path in file_list:
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Ensure FPS is not zero to avoid division by zero error
        if fps != 0:
            video_length = frame_count / fps
            total_frame_count += frame_count
            total_video_length += video_length
            total_video_count += 1
        cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video


In [7]:
# Define file paths
celebdf_fake_data = '/Users/faishalkamil/Downloads/documents skripsi/Celeb-synthesis'
celebdf_real_data = '/Users/faishalkamil/Downloads/documents skripsi/Celeb-real'

# Calculate statistics for each folder
celebdf_fake_stats = calculate_video_statistics(celebdf_fake_data)
celebdf_real_stats = calculate_video_statistics(celebdf_real_data)


In [8]:
print("Celeb-DF Fake Data:")
print("Total Frame Count:", celebdf_fake_stats[0])
print("Total Video Length (seconds):", celebdf_fake_stats[1])
print("Average Frames per Video:", celebdf_fake_stats[2])
print("Average Video Length per Video (seconds):", celebdf_fake_stats[3])
print()

Celeb-DF Fake Data:
Total Frame Count: 2116768
Total Video Length (seconds): 70633.33711001606
Average Frames per Video: 375.3800319205533
Average Video Length per Video (seconds): 12.525862229121486



In [9]:
print("Celeb-DF Real Data:")
print("Total Frame Count:", celebdf_real_stats[0])
print("Total Video Length (seconds):", celebdf_real_stats[1])
print("Average Frames per Video:", celebdf_real_stats[2])
print("Average Video Length per Video (seconds):", celebdf_real_stats[3])
print()

Celeb-DF Real Data:
Total Frame Count: 225390
Total Video Length (seconds): 7524.276026272573
Average Frames per Video: 382.0169491525424
Average Video Length per Video (seconds): 12.75301021402131



# Data Prepcosessing

In [16]:
pip install dlib

Defaulting to user installation because normal site-packages is not writeable
Collecting dlib
  Using cached dlib-19.24.6-cp39-cp39-macosx_14_0_universal2.whl
Installing collected packages: dlib
Successfully installed dlib-19.24.6
Note: you may need to restart the kernel to use updated packages.


In [17]:
pip install face_recognition

Defaulting to user installation because normal site-packages is not writeable
Collecting face_recognition
  Using cached face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting Click>=6.0 (from face_recognition)
  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Using cached face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Installing collected packages: Click, face_recognition
Successfully installed Click-8.1.7 face_recognition-1.3.0
Note: you may need to restart the kernel to use updated packages.


In [18]:
import os
import cv2
import face_recognition
import random

## Real video of Celeb-DF

In [20]:
# Define file paths
celebdf_fake_data = '/Users/faishalkamil/Downloads/documents skripsi/Celeb-synthesis'
celebdf_real_data = '/Users/faishalkamil/Downloads/documents skripsi/Celeb-real'

# Define output parent directory for extracted frames
extracted_frames_output_parent_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new'

# Define subfolders for extracted data
output_folders = {
    'celebdf_real': os.path.join(extracted_frames_output_parent_dir, 'celebdf real')
    }

In [None]:

# Create subfolders if they don't exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

# Function to extract frames from a video
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = 1
    while success:
        success, image = vidObj.read()
        if success:
            yield image

# Process the frames and detect faces
def create_face_videos_in_batches(path_list, parent_out_dir, max_videos=590, batch_size=20, seed=42):
    random.seed(seed)

    for folder_path, output_folder in path_list:
        video_files = [file for file in os.listdir(folder_path) if file.endswith(".mp4")]
        random.shuffle(video_files)

        if output_folder == output_folders['celebdf_real']:
            video_files = video_files[:max_videos]

        total_files = len(video_files)

        for start_idx in range(0, total_files, batch_size):
            end_idx = min(start_idx + batch_size, total_files)
            batch_files = video_files[start_idx:end_idx]

            for file in batch_files:
                file_path = os.path.join(folder_path, file)
                file_name = os.path.splitext(file)[0]
                out_path = os.path.join(output_folder, f'{file_name}_faces.mp4')

                if os.path.exists(out_path):
                    print("File already exists:", out_path)
                    continue

                out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (256, 256))
                frames = []
                for idx, frame in enumerate(frame_extract(file_path)):
                    if idx <= 150:  # Adjust the number of frames to extract if needed
                        face_locations = face_recognition.face_locations(frame)
                        if face_locations:
                            top, right, bottom, left = face_locations[0]  # Use the first detected face
                            face_frame = frame[top:bottom, left:right]
                            face_frame = cv2.resize(face_frame, (256, 256))
                            frames.append(face_frame)
                            if len(frames) == 4:
                                for frame in frames:
                                    out.write(frame)
                                frames = []
                out.release()
                print("Processed:", out_path)

# Define video folders and their corresponding output folders
video_folders = [
    (celebdf_real_data, output_folders['celebdf_real'])
]

# Process the videos in batches
create_face_videos_in_batches(video_folders, extracted_frames_output_parent_dir,max_videos=590, batch_size=20, seed=42)

Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id29_0007_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id28_0001_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id3_0007_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id41_0004_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id37_0006_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id56_0000_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id40_0003_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id30_0005_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skrips

In [26]:
pip install seed

Defaulting to user installation because normal site-packages is not writeable
Collecting seed
  Downloading seed-0.11.3.tar.gz (366 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting path.py<11,>=10 (from seed)
  Downloading path.py-10.6-py2.py3-none-any.whl.metadata (5.0 kB)
Downloading path.py-10.6-py2.py3-none-any.whl (32 kB)
Building wheels for collected packages: seed
  Building wheel for seed (setup.py) ... [?25ldone
[?25h  Created wheel for seed: filename=seed-0.11.3-py3-none-any.whl size=17755 sha256=6cda7ff04432d1f02d928d2bf7de10941986da5d19b8db1ef7d67b41790e7450
  Stored in directory: /Users/faishalkamil/Library/Caches/pip/wheels/ad/c0/5d/12e840455766f46b7b93a9d9ee70afce9c6614fcc09b48c00b
Successfully built seed
Installing collected packages: path.py, seed
Successfully installed path.py-10.6 seed-0.11.3
Note: you may need to restart the kernel to use updated packages.


In [28]:

import seed 

# Define subfolders for extracted data
output_folders = {
    'celebdf_fake': os.path.join(extracted_frames_output_parent_dir, 'celebdf fake')
    }

In [None]:

# Create subfolders if they don't exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

# Function to extract frames from a video
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = 1
    while success:
        success, image = vidObj.read()
        if success:
            yield image

# Process the frames and detect faces
def create_face_videos_in_batches(path_list, parent_out_dir, max_videos=590, batch_size=20, seed=42):
    random.seed(seed)

    for folder_path, output_folder in path_list:
        video_files = [file for file in os.listdir(folder_path) if file.endswith(".mp4")]
        random.shuffle(video_files)

        if output_folder == output_folders['celebdf_fake']:
            video_files = video_files[:max_videos]

        total_files = len(video_files)

        for start_idx in range(0, total_files, batch_size):
            end_idx = min(start_idx + batch_size, total_files)
            batch_files = video_files[start_idx:end_idx]

            for file in batch_files:
                file_path = os.path.join(folder_path, file)
                file_name = os.path.splitext(file)[0]
                out_path = os.path.join(output_folder, f'{file_name}_faces.mp4')

                if os.path.exists(out_path):
                    print("File already exists:", out_path)
                    continue

                out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (256, 256))
                frames = []
                for idx, frame in enumerate(frame_extract(file_path)):
                    if idx <= 150:  # Adjust the number of frames to extract if needed
                        face_locations = face_recognition.face_locations(frame)
                        if face_locations:
                            top, right, bottom, left = face_locations[0]  # Use the first detected face
                            face_frame = frame[top:bottom, left:right]
                            face_frame = cv2.resize(face_frame, (256, 256))
                            frames.append(face_frame)
                            if len(frames) == 4:
                                for frame in frames:
                                    out.write(frame)
                                frames = []
                out.release()
                print("Processed:", out_path)

# Define video folders and their corresponding output folders
video_folders = [
    (celebdf_fake_data, output_folders['celebdf_fake'])
]

# Process the videos in batches
create_face_videos_in_batches(video_folders, extracted_frames_output_parent_dir,max_videos=590, batch_size=20, seed=42)

Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id40_id41_0006_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id21_id16_0006_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id23_id0_0003_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id30_id4_0006_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id28_id20_0004_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id22_id25_0005_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id27_id19_0001_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake/id45_id48_0000_faces.mp4
Processed: /Users/