# Statistics of FaceForensics++

In [1]:
import os
import cv2
import shutil

In [3]:
def calculate_video_statistics(video_folder):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for root, dirs, files in os.walk(video_folder):
        for file in files:
            if file.endswith(".mp4"):  # Assuming all video files are in mp4 format
                video_path = os.path.join(root, file)
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(cap.get(cv2.CAP_PROP_FPS))

                # Ensure FPS is not zero to avoid division by zero error
                if fps != 0:
                    video_length = frame_count / fps
                    total_frame_count += frame_count
                    total_video_length += video_length
                    total_video_count += 1
                cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video

def merge_video_folders(destination_folder, *folders):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    all_files = []
    for folder in folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith(".mp4"):
                    source_file = os.path.join(root, file)
                    destination_file = os.path.join(destination_folder, file)
                    if not os.path.exists(destination_file):
                        shutil.copy2(source_file, destination_file)
                    all_files.append(destination_file)
    return all_files

def calculate_statistics_from_file_list(file_list):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for video_path in file_list:
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Ensure FPS is not zero to avoid division by zero error
        if fps != 0:
            video_length = frame_count / fps
            total_frame_count += frame_count
            total_video_length += video_length
            total_video_count += 1
        cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video


In [4]:
# Define file paths
ff_real_data = '/Users/faishalkamil/Downloads/documents skripsi/Faceforensics++/actors/c23/videos'

# Calculate statistics for each folder
ff_real_stats = calculate_video_statistics(ff_real_data)

In [5]:
print("Faceforensics++ Real Data:")
print("Total Frame Count:", ff_real_stats[0])
print("Total Video Length (seconds):", ff_real_stats[1])
print("Average Frames per Video:", ff_real_stats[2])
print("Average Video Length per Video (seconds):", ff_real_stats[3])
print()

Faceforensics++ Real Data:
Total Frame Count: 315381
Total Video Length (seconds): 13140.874999999998
Average Frames per Video: 868.8181818181819
Average Video Length per Video (seconds): 36.20075757575757



## Data Preprocessing: Real Data

In [2]:
import os
import cv2
import face_recognition
import random

In [7]:
#  Define file paths
ff_real_data = '/Users/faishalkamil/Downloads/documents skripsi/Faceforensics++/actors/c23/videos'

# Define output parent directory for extracted frames
extracted_frames_output_parent_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new'

# Define subfolders for extracted data
output_folders = {
    'ff_real': os.path.join(extracted_frames_output_parent_dir, 'ff real')
    }

In [None]:

# Create subfolders if they don't exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

# Function to extract frames from a video
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = 1
    while success:
        success, image = vidObj.read()
        if success:
            yield image

# Process the frames and detect faces
def create_face_videos_in_batches(path_list, parent_out_dir, max_videos=364, batch_size=20, seed=42):
    random.seed(seed)

    for folder_path, output_folder in path_list:
        video_files = [file for file in os.listdir(folder_path) if file.endswith(".mp4")]
        random.shuffle(video_files)

        if output_folder == output_folders['ff_real']:
            video_files = video_files[:max_videos]

        total_files = len(video_files)

        for start_idx in range(0, total_files, batch_size):
            end_idx = min(start_idx + batch_size, total_files)
            batch_files = video_files[start_idx:end_idx]

            for file in batch_files:
                file_path = os.path.join(folder_path, file)
                file_name = os.path.splitext(file)[0]
                out_path = os.path.join(output_folder, f'{file_name}_faces.mp4')

                if os.path.exists(out_path):
                    print("File already exists:", out_path)
                    continue

                out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (256, 256))
                frames = []
                for idx, frame in enumerate(frame_extract(file_path)):
                    if idx <= 150:  # Adjust the number of frames to extract if needed
                        face_locations = face_recognition.face_locations(frame)
                        if face_locations:
                            top, right, bottom, left = face_locations[0]  # Use the first detected face
                            face_frame = frame[top:bottom, left:right]
                            face_frame = cv2.resize(face_frame, (256, 256))
                            frames.append(face_frame)
                            if len(frames) == 4:
                                for frame in frames:
                                    out.write(frame)
                                frames = []
                out.release()
                print("Processed:", out_path)

# Define video folders and their corresponding output folders
video_folders = [
    (ff_real_data, output_folders['ff_real'])
]

# Process the videos in batches
create_face_videos_in_batches(video_folders, extracted_frames_output_parent_dir,max_videos=364, batch_size=20, seed=42)

Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/28__exit_phone_room_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/06__walking_down_indoor_hall_disgust_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/15__walk_down_hall_angry_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/17__outside_talking_still_laughing_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/15__exit_phone_room_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/27__outside_talking_pan_laughing_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/21__walking_and_outside_surprised_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw da

## Data Preprocessing: Fake Data

In [3]:
#  Define file paths
ff_fake_data = '/Users/faishalkamil/Downloads/documents skripsi/Faceforensics++/fake/DeepFakeDetection/c23/videos'

# Define output parent directory for extracted frames
extracted_frames_output_parent_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new'

# Define subfolders for extracted data
output_folders = {
    'ff_fake': os.path.join(extracted_frames_output_parent_dir, 'ff fake')
    }

In [None]:

# Create subfolders if they don't exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

# Function to extract frames from a video
def frame_extract(path):
    vidObj = cv2.VideoCapture(path)
    success = 1
    while success:
        success, image = vidObj.read()
        if success:
            yield image

# Process the frames and detect faces
def create_face_videos_in_batches(path_list, parent_out_dir, max_videos=364, batch_size=20, seed=42):
    random.seed(seed)

    for folder_path, output_folder in path_list:
        video_files = [file for file in os.listdir(folder_path) if file.endswith(".mp4")]
        random.shuffle(video_files)

        if output_folder == output_folders['ff_fake']:
            video_files = video_files[:max_videos]

        total_files = len(video_files)

        for start_idx in range(0, total_files, batch_size):
            end_idx = min(start_idx + batch_size, total_files)
            batch_files = video_files[start_idx:end_idx]

            for file in batch_files:
                file_path = os.path.join(folder_path, file)
                file_name = os.path.splitext(file)[0]
                out_path = os.path.join(output_folder, f'{file_name}_faces.mp4')

                if os.path.exists(out_path):
                    print("File already exists:", out_path)
                    continue

                out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (256, 256))
                frames = []
                for idx, frame in enumerate(frame_extract(file_path)):
                    if idx <= 150:  # Adjust the number of frames to extract if needed
                        face_locations = face_recognition.face_locations(frame)
                        if face_locations:
                            top, right, bottom, left = face_locations[0]  # Use the first detected face
                            face_frame = frame[top:bottom, left:right]
                            face_frame = cv2.resize(face_frame, (256, 256))
                            frames.append(face_frame)
                            if len(frames) == 4:
                                for frame in frames:
                                    out.write(frame)
                                frames = []
                out.release()
                print("Processed:", out_path)

# Define video folders and their corresponding output folders
video_folders = [
    (ff_fake_data, output_folders['ff_fake'])
]

In [5]:

# Process the videos in batches
create_face_videos_in_batches(video_folders, extracted_frames_output_parent_dir,max_videos=364, batch_size=20, seed=42)

Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/23_19__walking_and_outside_surprised__H4SUVFTL_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/26_13__podium_speech_happy__U5ELHUN5_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/07_09__kitchen_still__N9CWME71_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/15_02__talking_against_wall__MZWH8ATN_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/06_07__outside_talking_still_laughing__NMGYPBXE_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/03_04__talking_angry_couch__T04P6ELC_faces.mp4
Processed: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake/03_07__kitchen_pan__WPT3Z2KN_faces.mp4
Processed: /