# Data Preparation (After Cropped Face Data)

## Statistics Preprocessed Data

In [1]:
import os
import cv2
import shutil

In [2]:
def calculate_video_statistics(video_folder):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for root, dirs, files in os.walk(video_folder):
        for file in files:
            if file.endswith(".mp4"):  # Assuming all video files are in mp4 format
                video_path = os.path.join(root, file)
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(cap.get(cv2.CAP_PROP_FPS))

                # Ensure FPS is not zero to avoid division by zero error
                if fps != 0:
                    video_length = frame_count / fps
                    total_frame_count += frame_count
                    total_video_length += video_length
                    total_video_count += 1
                cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video

def merge_video_folders(destination_folder, *folders):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    all_files = []
    for folder in folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith(".mp4"):
                    source_file = os.path.join(root, file)
                    destination_file = os.path.join(destination_folder, file)
                    if not os.path.exists(destination_file):
                        shutil.copy2(source_file, destination_file)
                    all_files.append(destination_file)
    return all_files

def calculate_statistics_from_file_list(file_list):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for video_path in file_list:
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Ensure FPS is not zero to avoid division by zero error
        if fps != 0:
            video_length = frame_count / fps
            total_frame_count += frame_count
            total_video_length += video_length
            total_video_count += 1
        cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video


In [None]:
# Define file paths
celebdf_fake_processed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake'
celebdf_real_processed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real'
ff_real_processed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real'
ff_fake_processed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake'

# Calculate statistics for each folder
celebdf_fake_processed_stats = calculate_video_statistics(celebdf_fake_processed_data)
celebdf_real_processed_stats = calculate_video_statistics(celebdf_real_processed_data)
ff_real_processed_stats = calculate_video_statistics(ff_real_processed_data)
ff_fake_processed_stats = calculate_video_statistics(ff_fake_processed_data)


OpenCV: Couldn't read video stream from file "/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id27_0005_faces.mp4"
OpenCV: Couldn't read video stream from file "/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/17__walking_down_street_outside_angry_faces.mp4"


## general stats for original data

In [4]:
print("FaceForensics++ Fake Data:")
print("Total Frame Count:", ff_fake_processed_stats[0])
print("Total Video Length (seconds):", ff_fake_processed_stats[1])
print("Average Frames per Video:", ff_fake_processed_stats[2])
print("Average Video Length per Video (seconds):", ff_fake_processed_stats[3])
print()

FaceForensics++ Fake Data:
Total Frame Count: 52216
Total Video Length (seconds): 1740.533333333336
Average Frames per Video: 143.45054945054946
Average Video Length per Video (seconds): 4.781684981684989



In [7]:
print("FaceForensics++ Real Data:")
print("Total Frame Count:", ff_real_processed_stats[0])
print("Total Video Length (seconds):", ff_real_processed_stats[1])
print("Average Frames per Video:", ff_real_processed_stats[2])
print("Average Video Length per Video (seconds):", ff_real_processed_stats[3])
print()

FaceForensics++ Real Data:
Total Frame Count: 48944
Total Video Length (seconds): 1631.466666666667
Average Frames per Video: 135.20441988950276
Average Video Length per Video (seconds): 4.506813996316759



In [15]:
print("Celeb-DF Fake Data:")
print("Total Frame Count:", celebdf_fake_processed_stats[0])
print("Total Video Length (seconds):", celebdf_fake_processed_stats[1])
print("Average Frames per Video:", celebdf_fake_processed_stats[2])
print("Average Video Length per Video (seconds):", celebdf_fake_processed_stats[3])
print()

Celeb-DF Fake Data:
Total Frame Count: 87256
Total Video Length (seconds): 2908.5333333333506
Average Frames per Video: 147.8915254237288
Average Video Length per Video (seconds): 4.929717514124323



In [25]:
print("Celeb-DF real Data:")
print("Total Frame Count:", celebdf_real_processed_stats[0])
print("Total Video Length (seconds):", celebdf_real_processed_stats[1])
print("Average Frames per Video:", celebdf_real_processed_stats[2])
print("Average Video Length per Video (seconds):", celebdf_real_processed_stats[3])
print()

Celeb-DF real Data:
Total Frame Count: 86580
Total Video Length (seconds): 2886.0000000000177
Average Frames per Video: 146.9949066213922
Average Video Length per Video (seconds): 4.8998302207131035



## transformed data stats (generated after process of extract from transformed is done)

In [12]:
import os
import cv2
import shutil

# Base directory where the extracted frames are stored
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames/transformed'

# Define paths for the datasets
datasets = {
    'celebdf_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_real',
    'celebdf_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_fake',
    'ff_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_real',
    'ff_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_fake'
}

def calculate_video_statistics(video_folder):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for root, dirs, files in os.walk(video_folder):
        for file in files:
            if file.endswith(".mp4"):  # Assuming all video files are in mp4 format
                video_path = os.path.join(root, file)
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(cap.get(cv2.CAP_PROP_FPS))

                # Ensure FPS is not zero to avoid division by zero error
                if fps != 0:
                    video_length = frame_count / fps
                    total_frame_count += frame_count
                    total_video_length += video_length
                    total_video_count += 1
                cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video

def merge_video_folders(destination_folder, *folders):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    all_files = []
    for folder in folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith(".mp4"):
                    source_file = os.path.join(root, file)
                    destination_file = os.path.join(destination_folder, file)
                    if not os.path.exists(destination_file):
                        shutil.copy2(source_file, destination_file)
                    all_files.append(destination_file)
    return all_files

def calculate_statistics_from_file_list(file_list):
    total_frame_count = 0
    total_video_length = 0
    total_video_count = 0

    for video_path in file_list:
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Ensure FPS is not zero to avoid division by zero error
        if fps != 0:
            video_length = frame_count / fps
            total_frame_count += frame_count
            total_video_length += video_length
            total_video_count += 1
        cap.release()

    # Calculate average frames per video
    average_frame_per_video = total_frame_count / total_video_count if total_video_count != 0 else 0
    # Calculate average video length per video
    average_video_length_per_video = total_video_length / total_video_count if total_video_count != 0 else 0
    
    return total_frame_count, total_video_length, average_frame_per_video, average_video_length_per_video


In [None]:
# Define file paths based on the transformed datasets
celebdf_fake_transformed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_fake'
celebdf_real_transformed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_real'
ff_real_transformed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_real'
ff_fake_transformed_data = '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_fake'

# Calculate statistics for each folder
celebdf_fake_transformed_stats = calculate_video_statistics(celebdf_fake_transformed_data)
celebdf_real_transformed_stats = calculate_video_statistics(celebdf_real_transformed_data)
ff_real_transformed_stats = calculate_video_statistics(ff_real_transformed_data)
ff_fake_transformed_stats = calculate_video_statistics(ff_fake_transformed_data)


In [11]:
print("FaceForensics++ Fake Data Transformed:")
print("Total Frame Count:", ff_fake_transformed_stats[0])
print("Total Video Length (seconds):", ff_fake_transformed_stats[1])
print("Average Frames per Video:", ff_fake_transformed_stats[2])
print("Average Video Length per Video (seconds):", ff_fake_transformed_stats[3])
print()

FaceForensics++ Fake Data Transformed:
Total Frame Count: 0
Total Video Length (seconds): 0
Average Frames per Video: 0
Average Video Length per Video (seconds): 0



## Extract the frames for all dataset (faceforensics++ and celebdf (both of real and fake data))

## not used

In [17]:
import os
import cv2
from torch.utils.data import Dataset

In [18]:
# Set the main directory path for extracted frames
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames'

# Create the directory if it doesn't exist
if not os.path.exists(extracted_frames_main_dir):
    os.makedirs(extracted_frames_main_dir)

# Custom dataset class to load videos and extract frames
class VideoDataset(Dataset):
    def __init__(self, video_dir, output_dir):
        self.video_dir = video_dir
        self.output_dir = output_dir
        self.video_files = [os.path.join(video_dir, f) for f in os.listdir(video_dir) if f.endswith('.mp4')]
        self.valid_video_files = self._filter_valid_videos()

    def _filter_valid_videos(self):
        valid_videos = []
        for video_path in self.video_files:
            cap = cv2.VideoCapture(video_path)
            valid = False
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                valid = True
                break
            cap.release()
            if valid:
                valid_videos.append(video_path)
            else:
                print(f"No frames extracted from video: {video_path}")
        return valid_videos

    def __len__(self):
        return len(self.valid_video_files)

    def __getitem__(self, idx):
        video_path = self.valid_video_files[idx]
        cap = cv2.VideoCapture(video_path)
        frames = []
        label = 1 if 'fake' in video_path else 0  # Assuming you determine labels based on file path

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()

        if len(frames) == 0:
            raise RuntimeError(f"No frames extracted from video: {video_path}")

        # Pad or truncate to max_frames
        if len(frames) < self.max_frames:
            pad_size = self.max_frames - len(frames)
            frames.extend([torch.zeros_like(frames[0])] * pad_size)
        elif len(frames) > self.max_frames:
            frames = frames[:self.max_frames]

        return frames, label  # Only return frames and label

    def extract_frames(self):
        """
        Extracts frames from each video and saves them in a structured directory.
        """
        for video_path in self.valid_video_files:
            cap = cv2.VideoCapture(video_path)
            video_name = os.path.splitext(os.path.basename(video_path))[0]

            # Define the output directory for this video's frames
            video_output_dir = os.path.join(self.output_dir, video_name)
            os.makedirs(video_output_dir, exist_ok=True)

            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Save each frame as a .jpg file
                frame_filename = os.path.join(video_output_dir, f'frame_{frame_count:04d}.jpg')
                cv2.imwrite(frame_filename, frame)
                frame_count += 1

            cap.release()
            print(f"Extracted {frame_count} frames from video: {video_name}")

In [19]:
# Define paths for the datasets
datasets = {
    'celebdf_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real',
    'celebdf_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake',
    'ff_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real',
    'ff_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake'
}

In [20]:
# Extract frames for each dataset
for dataset_name, dataset_path in datasets.items():
    print(f"Processing dataset: {dataset_name}")

    # Define the output directory for extracted frames for this dataset
    output_dir = os.path.join(extracted_frames_main_dir, dataset_name)
    os.makedirs(output_dir, exist_ok=True)

    # Create a VideoDataset instance for this dataset
    video_dataset = VideoDataset(video_dir=dataset_path, output_dir=output_dir)

    # Extract frames for all videos in the dataset
    video_dataset.extract_frames()

Processing dataset: celebdf_real
No frames extracted from video: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id27_0005_faces.mp4


OpenCV: Couldn't read video stream from file "/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id27_0005_faces.mp4"


Extracted 148 frames from video: id6_0003_faces
Extracted 148 frames from video: id7_0008_faces
Extracted 148 frames from video: id46_0002_faces
Extracted 148 frames from video: id53_0002_faces
Extracted 148 frames from video: id17_0001_faces
Extracted 148 frames from video: id52_0009_faces
Extracted 148 frames from video: id47_0009_faces
Extracted 148 frames from video: id28_0008_faces
Extracted 148 frames from video: id0_0005_faces
Extracted 148 frames from video: id29_0003_faces
Extracted 148 frames from video: id11_0007_faces
Extracted 148 frames from video: id40_0004_faces
Extracted 148 frames from video: id55_0004_faces
Extracted 148 frames from video: id22_0002_faces
Extracted 148 frames from video: id37_0002_faces
Extracted 148 frames from video: id36_0009_faces
Extracted 148 frames from video: id23_0009_faces
Extracted 148 frames from video: id59_0008_faces
Extracted 148 frames from video: id58_0003_faces
Extracted 148 frames from video: id60_0007_faces
Extracted 148 frames fr

KeyboardInterrupt: 

## Extract frames for TRANSFORMED 

In [5]:
import os
import cv2
from PIL import Image
from torchvision import transforms

In [None]:
# Set the main directory path for extracted frames and transformed frames
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames'
transformed_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames/transformed'

# Create the directory for transformed frames if it doesn't exist
if not os.path.exists(transformed_frames_main_dir):
    os.makedirs(transformed_frames_main_dir)

# Transformation for the frames with data augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((112, 112)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor()
])

# Custom class to load videos, extract frames, and save transformed frames
class VideoDataset:
    def __init__(self, video_dir, output_dir, transformed_output_dir):
        self.video_dir = video_dir
        self.output_dir = output_dir
        self.transformed_output_dir = transformed_output_dir
        self.video_files = [os.path.join(video_dir, f) for f in os.listdir(video_dir) if f.endswith('.mp4')]
        self.valid_video_files = self._filter_valid_videos()

    def _filter_valid_videos(self):
        valid_videos = []
        for video_path in self.video_files:
            cap = cv2.VideoCapture(video_path)
            valid = False
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                valid = True
                break
            cap.release()
            if valid:
                valid_videos.append(video_path)
            else:
                print(f"No frames extracted from video: {video_path}")
        return valid_videos

    def extract_and_transform_frames(self):
        """
        Extracts frames from each video, transforms them, and saves both original and transformed frames in a structured directory.
        """
        for video_path in self.valid_video_files:
            cap = cv2.VideoCapture(video_path)
            video_name = os.path.splitext(os.path.basename(video_path))[0]

            # Define the output directories for this video's frames
            video_output_dir = os.path.join(self.output_dir, video_name)
            transformed_output_dir = os.path.join(self.transformed_output_dir, video_name)

            # Create directories if they do not exist
            os.makedirs(video_output_dir, exist_ok=True)
            os.makedirs(transformed_output_dir, exist_ok=True)

            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Save each original frame as a .jpg file
                frame_filename = os.path.join(video_output_dir, f'frame_{frame_count:04d}.jpg')
                cv2.imwrite(frame_filename, frame)

                # Convert the frame from BGR to RGB for transformation
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # Apply transformations to the frame
                transformed_frame = transform(frame_rgb)

                # Convert transformed frame back to a format suitable for saving as an image
                transformed_frame_np = (transformed_frame.permute(1, 2, 0).numpy() * 255).astype('uint8')

                # Save transformed frame
                transformed_frame_filename = os.path.join(transformed_output_dir, f'transformed_frame_{frame_count:04d}.jpg')
                cv2.imwrite(transformed_frame_filename, cv2.cvtColor(transformed_frame_np, cv2.COLOR_RGB2BGR))

                frame_count += 1

            cap.release()
            print(f"Extracted and transformed {frame_count} frames from video: {video_name}")

# Define paths for the datasets
datasets = {
    'celebdf_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real',
    'celebdf_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake',
    'ff_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real',
    'ff_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake'
}


In [9]:
# Extract and transform frames for each dataset
for dataset_name, dataset_path in datasets.items():
    print(f"Processing dataset: {dataset_name}")

    # Define the output directories for extracted and transformed frames for this dataset
    output_dir = os.path.join(extracted_frames_main_dir, dataset_name)
    transformed_output_dir = os.path.join(transformed_frames_main_dir, dataset_name)

    # Create directories if they do not exist
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(transformed_output_dir, exist_ok=True)

    # Create a VideoDataset instance for this dataset
    video_dataset = VideoDataset(video_dir=dataset_path, output_dir=output_dir, transformed_output_dir=transformed_output_dir)

    # Extract and transform frames for all videos in the dataset
    video_dataset.extract_and_transform_frames()

Processing dataset: celebdf_real
No frames extracted from video: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id27_0005_faces.mp4


OpenCV: Couldn't read video stream from file "/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real/id27_0005_faces.mp4"


Extracted and transformed 148 frames from video: id6_0003_faces
Extracted and transformed 148 frames from video: id7_0008_faces
Extracted and transformed 148 frames from video: id46_0002_faces
Extracted and transformed 148 frames from video: id53_0002_faces
Extracted and transformed 148 frames from video: id17_0001_faces
Extracted and transformed 148 frames from video: id52_0009_faces
Extracted and transformed 148 frames from video: id47_0009_faces
Extracted and transformed 148 frames from video: id28_0008_faces
Extracted and transformed 148 frames from video: id0_0005_faces
Extracted and transformed 148 frames from video: id29_0003_faces
Extracted and transformed 148 frames from video: id11_0007_faces
Extracted and transformed 148 frames from video: id40_0004_faces
Extracted and transformed 148 frames from video: id55_0004_faces
Extracted and transformed 148 frames from video: id22_0002_faces
Extracted and transformed 148 frames from video: id37_0002_faces
Extracted and transformed 14

OpenCV: Couldn't read video stream from file "/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/17__walking_down_street_outside_angry_faces.mp4"


No frames extracted from video: /Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real/17__walking_down_street_outside_angry_faces.mp4
Extracted and transformed 132 frames from video: 14__kitchen_pan_faces
Extracted and transformed 148 frames from video: 10__talking_against_wall_faces
Extracted and transformed 148 frames from video: 01__walking_down_indoor_hall_disgust_faces
Extracted and transformed 148 frames from video: 24__talking_angry_couch_faces
Extracted and transformed 144 frames from video: 14__walking_outside_cafe_disgusted_faces
Extracted and transformed 148 frames from video: 22__walking_down_indoor_hall_disgust_faces
Extracted and transformed 120 frames from video: 19__secret_conversation_faces
Extracted and transformed 148 frames from video: 12__walking_and_outside_surprised_faces
Extracted and transformed 136 frames from video: 21__walking_down_street_outside_angry_faces
Extracted and transformed 148 frames from video: 23__talking_against_wall

## Statistics: Calculate Average Extracted Frames

In [None]:
import os

# Base directory where the extracted frames are stored
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames'

# List of datasets and categories
datasets = ['celebdf', 'ff']
categories = ['real', 'fake']

# Function to calculate average extracted frames for each dataset and category
def calculate_average_frames(dataset, category):
    # Path to the specific dataset and category
    path = os.path.join(extracted_frames_main_dir, f"{dataset}_{category}")

    if not os.path.exists(path):
        print(f"No extracted frames found for {dataset} {category}.")
        return

    # Initialize variables to calculate the total frames and count of videos
    total_frames = 0
    video_count = 0

    # Iterate over each video directory
    for video_folder in os.listdir(path):
        video_path = os.path.join(path, video_folder)

        # Check if it is a directory
        if os.path.isdir(video_path):
            # Count the number of frames in the video folder
            frame_count = len([f for f in os.listdir(video_path) if f.endswith('.jpg')])
            total_frames += frame_count
            video_count += 1

    # Calculate the average number of frames
    if video_count > 0:
        average_frames = total_frames / video_count
        print(f"Average extracted frames for {dataset} {category}: {average_frames:.2f}")
    else:
        print(f"No valid videos found for {dataset} {category}.")

# Iterate through each dataset and category
for dataset in datasets:
    for category in categories:
        calculate_average_frames(dataset, category)


Average extracted frames for celebdf real: 146.99
Average extracted frames for celebdf fake: 147.89
Average extracted frames for ff real: 135.20
Average extracted frames for ff fake: 143.45


## Statistics: Calculate Average Extracted Transformed Frames

In [5]:
import os

# Base directory where the transformed frames are stored
transformed_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames/transformed'

# Define paths for the datasets
datasets = {
    'celebdf_real': os.path.join(transformed_frames_main_dir, 'celebdf_real'),
    'celebdf_fake': os.path.join(transformed_frames_main_dir, 'celebdf_fake'),
    'ff_real': os.path.join(transformed_frames_main_dir, 'ff_real'),
    'ff_fake': os.path.join(transformed_frames_main_dir, 'ff_fake')
}

# Function to calculate the average extracted frames for each dataset and category
def calculate_average_frames(dataset_name, category_path):
    if not os.path.exists(category_path):
        print(f"No extracted frames found for {dataset_name}.")
        return

    # Initialize variables to calculate the total frames and count of videos
    total_frames = 0
    video_count = 0

    # Iterate over each video directory
    for video_folder in os.listdir(category_path):
        video_folder_path = os.path.join(category_path, video_folder)

        # Check if it is a directory
        if os.path.isdir(video_folder_path):
            # Count the number of frames in the video folder
            frame_count = len([f for f in os.listdir(video_folder_path) if f.endswith('.jpg')])
            total_frames += frame_count
            video_count += 1

    # Calculate the average number of frames
    if video_count > 0:
        average_frames = total_frames / video_count
        print(f"Average extracted frames for {dataset_name.replace('_', ' ') } transformed: {average_frames:.2f}")
    else:
        print(f"No valid videos found for {dataset_name}.")

# Iterate through each dataset and category to calculate the average frames
for dataset_name, dataset_path in datasets.items():
    calculate_average_frames(dataset_name, dataset_path)


Average extracted frames for celebdf real transformed: 146.99
Average extracted frames for celebdf fake transformed: 147.89
Average extracted frames for ff real transformed: 135.20
Average extracted frames for ff fake transformed: 143.45


##  Statistics: Calculate the Mode of Extracted Transformed Frames

In [6]:
import os
from collections import Counter

# Base directory where the transformed frames are stored
transformed_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames/transformed'

# Define paths for the datasets
datasets = {
    'celebdf_real': os.path.join(transformed_frames_main_dir, 'celebdf_real'),
    'celebdf_fake': os.path.join(transformed_frames_main_dir, 'celebdf_fake'),
    'ff_real': os.path.join(transformed_frames_main_dir, 'ff_real'),
    'ff_fake': os.path.join(transformed_frames_main_dir, 'ff_fake')
}

# Function to calculate the mode of extracted frames for each dataset
def calculate_mode_frames(dataset_name, dataset_path):
    if not os.path.exists(dataset_path):
        print(f"No extracted frames found for {dataset_name}.")
        return

    # List to store the frame counts of all videos
    frame_counts = []

    # Iterate over each video directory
    for video_folder in os.listdir(dataset_path):
        video_path = os.path.join(dataset_path, video_folder)

        # Check if it is a directory
        if os.path.isdir(video_path):
            # Count the number of frames in the video folder
            frame_count = len([f for f in os.listdir(video_path) if f.endswith('.jpg')])
            frame_counts.append(frame_count)

    # Calculate the mode of frame counts
    if frame_counts:
        frame_count_counter = Counter(frame_counts)
        mode_frame_count = frame_count_counter.most_common(1)  # Get the most common frame count

        # Display the mode and its frequency
        mode_value, frequency = mode_frame_count[0]
        print(f"Mode of extracted frames for {dataset_name}: {mode_value} frames (Frequency: {frequency})")
    else:
        print(f"No valid videos found for {dataset_name}.")

# Iterate through each dataset
for dataset_name, dataset_path in datasets.items():
    calculate_mode_frames(dataset_name, dataset_path)


Mode of extracted frames for celebdf_real: 148 frames (Frequency: 566)
Mode of extracted frames for celebdf_fake: 148 frames (Frequency: 581)
Mode of extracted frames for ff_real: 148 frames (Frequency: 201)
Mode of extracted frames for ff_fake: 148 frames (Frequency: 300)


##  Statistics: Calculate the Mode of Extracted Frames

In [1]:
import os
from collections import Counter

# Base directory where the transformed frames are stored
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames'

# Define paths for the datasets
datasets = {
    'celebdf_real': os.path.join(extracted_frames_main_dir, 'celebdf_real'),
    'celebdf_fake': os.path.join(extracted_frames_main_dir, 'celebdf_fake'),
    'ff_real': os.path.join(extracted_frames_main_dir, 'ff_real'),
    'ff_fake': os.path.join(extracted_frames_main_dir, 'ff_fake')
}

# Function to calculate the mode of extracted frames for each dataset
def calculate_mode_frames(dataset, path):
    if not os.path.exists(path):
        print(f"No extracted frames found at path: {path}")
        return

    # List to store the frame counts of all videos
    frame_counts = []

    # Iterate over each video directory
    for video_folder in os.listdir(path):
        video_path = os.path.join(path, video_folder)

        # Check if it is a directory
        if os.path.isdir(video_path):
            # Count the number of frames in the video folder
            frame_count = len([f for f in os.listdir(video_path) if f.endswith('.jpg')])
            
            
            
            frame_counts.append(frame_count)

    # Calculate the mode of frame counts
    if frame_counts:
        frame_count_counter = Counter(frame_counts)
        mode_frame_count = frame_count_counter.most_common(1)  # Get the most common frame count

        # Display the mode and its frequency
        mode_value, frequency = mode_frame_count[0]
        print(f"Mode of extracted frames for {dataset}: {mode_value} frames (Frequency: {frequency})")
    else:
        print(f"No valid videos found in {dataset} directory.")

# Iterate through each dataset and category path
for dataset, path in datasets.items():
    calculate_mode_frames(dataset, path)


Mode of extracted frames for celebdf_real: 148 frames (Frequency: 566)
Mode of extracted frames for celebdf_fake: 148 frames (Frequency: 581)
Mode of extracted frames for ff_real: 148 frames (Frequency: 201)
Mode of extracted frames for ff_fake: 148 frames (Frequency: 300)


## Statistics: Calculate the extracted frames (non-transformed)

In [1]:
import os

# Base directory where the extracted frames are stored
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames'

# Define paths for the datasets
datasets = {
    'celebdf_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf real',
    'celebdf_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/celebdf fake',
    'ff_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff real',
    'ff_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/ff fake'
}

# Function to count extracted frames for each dataset and category
def count_extracted_frames(category_path):
    total_frames = 0
    video_count = 0

    # Iterate over each video directory in the dataset category
    for video_folder in os.listdir(category_path):
        video_folder_path = os.path.join(category_path, video_folder)

        if os.path.isdir(video_folder_path):
            # Count the number of frames (i.e., .jpg files) in the video folder
            frame_count = len([f for f in os.listdir(video_folder_path) if f.endswith('.jpg')])
            total_frames += frame_count
            video_count += 1

    return total_frames

# Iterate through each dataset and category to count extracted frames
for dataset_name, dataset_path in datasets.items():
    # Define the path to the extracted frames directory for each dataset
    category_path = os.path.join(extracted_frames_main_dir, dataset_name)

    # Count the extracted frames for the current dataset category
    total_frames = count_extracted_frames(category_path)

    # Print the total frames for each dataset category
    print(f"{dataset_name.replace('_', ' ')}: {total_frames} frames")


celebdf real: 86580 frames
celebdf fake: 87256 frames
ff real: 48944 frames
ff fake: 52216 frames


## Statistics: Calculate the extracted frames (transformed dataset)

In [4]:
import os

# Base directory where the extracted frames are stored
extracted_frames_main_dir = '/Users/faishalkamil/Downloads/documents skripsi/extracted frames/transformed'

# Define paths for the datasets
datasets = {
    'celebdf_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_real',
    'celebdf_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/celebdf_fake',
    'ff_real': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_real',
    'ff_fake': '/Users/faishalkamil/Downloads/documents skripsi/extracted raw dataset new/transformed/ff_fake'
}

# Function to count extracted frames for each dataset and category
def count_extracted_frames(category_path):
    total_frames = 0
    video_count = 0

    # Iterate over each video directory in the dataset category
    for video_folder in os.listdir(category_path):
        video_folder_path = os.path.join(category_path, video_folder)

        if os.path.isdir(video_folder_path):
            # Count the number of frames (i.e., .jpg files) in the video folder
            frame_count = len([f for f in os.listdir(video_folder_path) if f.endswith('.jpg')])
            total_frames += frame_count
            video_count += 1

    return total_frames

# Iterate through each dataset and category to count extracted frames
for dataset_name, dataset_path in datasets.items():
    # Define the path to the extracted frames directory for each dataset
    category_path = os.path.join(extracted_frames_main_dir, dataset_name)

    # Count the extracted frames for the current dataset category
    total_frames = count_extracted_frames(category_path)

    # Print the total frames for each dataset category
    print(f"{dataset_name.replace('_', ' ')}: {total_frames} frames")


celebdf real: 86580 frames
celebdf fake: 87256 frames
ff real: 48944 frames
ff fake: 52216 frames
