# __Project 1: Computer Vision__

We'll use CelebDF over CelebDF-v2 due to less likelihood of problems caused by class imbalance

In [None]:
from ultralytics import YOLOWorld
import torch

# For Preprocessing
import os
import shutil
import random

In [2]:
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0)}")

CUDA Available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


### __Data Preprocessing__

In [None]:
def create_splits(root_dir,
                  test_list_filename="List_of_testing_videos.txt",
                  subfolders=["Celeb-synthesis", "Youtube-real", "Celeb-real"],
                  train_ratio=0.8):
    """
    Reads the test list file (ignoring the first number on each line) and
    copies videos from the specified subfolders into a 'test' folder if their
    relative path (e.g., "Youtube-real/00170.mp4") is in the list.
    Then, the remaining videos are randomly split into 'train' and 'val'
    folders according to the train_ratio.
    
    Parameters:
      root_dir (str): The path to the "celeb-df" folder.
      test_list_filename (str): Name of the text file containing test video paths.
      subfolders (list of str): The subfolders (relative to root_dir) to search.
      train_ratio (float): Fraction of the remaining (non-test) videos to go to train.
    """
    # Create output directories
    test_dir = os.path.join(root_dir, "test")
    train_dir = os.path.join(root_dir, "train")
    val_dir   = os.path.join(root_dir, "val")
    os.makedirs(test_dir, exist_ok=True)
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    # Read the test file and parse each line.
    # Each line is assumed to be: "<number> <relative_path>"
    test_list_path = os.path.join(root_dir, test_list_filename)
    test_paths = set()
    with open(test_list_path, 'r') as f:
        for line in f:
            # Split the line by whitespace
            parts = line.strip().split()
            if len(parts) >= 2:
                # Use the second token as the relative path
                rel_path = parts[1].strip()
                # For robust matching, store as lower-case
                test_paths.add(rel_path.lower())

    remaining_files = []  # List to store non-test video file paths

    # Loop through each specified subfolder
    for folder in subfolders:
        folder_path = os.path.join(root_dir, folder)
        if not os.path.isdir(folder_path):
            print(f"Folder '{folder_path}' not found. Skipping.")
            continue

        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith(".mp4"):
                # Construct the relative path as it would appear in the test list.
                # For example: "Youtube-real/00170.mp4"
                relative_path = os.path.join(folder, file_name).replace("\\", "/")
                # Compare in lower-case for robustness.
                if relative_path.lower() in test_paths:
                    dest_file = os.path.join(test_dir, file_name)
                    print(f"Copying {os.path.join(folder_path, file_name)} to {dest_file} (test set)")
                    shutil.copy2(os.path.join(folder_path, file_name), dest_file)
                else:
                    remaining_files.append(os.path.join(folder_path, file_name))

    # Shuffle the remaining (non-test) videos
    random.shuffle(remaining_files)
    num_remaining = len(remaining_files)
    num_train = int(train_ratio * num_remaining)

    train_files = remaining_files[:num_train]
    val_files = remaining_files[num_train:]

    # Copy training files
    for source_file in train_files:
        file_name = os.path.basename(source_file)
        dest_file = os.path.join(train_dir, file_name)
        print(f"Copying {source_file} to {dest_file} (train set)")
        shutil.copy2(source_file, dest_file)

    # Copy validation files
    for source_file in val_files:
        file_name = os.path.basename(source_file)
        dest_file = os.path.join(val_dir, file_name)
        print(f"Copying {source_file} to {dest_file} (val set)")
        shutil.copy2(source_file, dest_file)

    print(f"Done. Total non-test videos: {num_remaining}. Train: {len(train_files)}, Val: {len(val_files)}.")

# Set your root directory (e.g., the folder "celeb-df")
root_directory = "Celeb-DF"
create_splits(root_directory)

Copying Celeb-DF\Celeb-synthesis\id10_id11_0001.mp4 to Celeb-DF\test\id10_id11_0001.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id11_0004.mp4 to Celeb-DF\test\id10_id11_0004.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id12_0001.mp4 to Celeb-DF\test\id10_id12_0001.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id12_0004.mp4 to Celeb-DF\test\id10_id12_0004.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id13_0001.mp4 to Celeb-DF\test\id10_id13_0001.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id13_0004.mp4 to Celeb-DF\test\id10_id13_0004.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id7_0001.mp4 to Celeb-DF\test\id10_id7_0001.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id10_id7_0004.mp4 to Celeb-DF\test\id10_id7_0004.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id11_id7_0008.mp4 to Celeb-DF\test\id11_id7_0008.mp4 (test set)
Copying Celeb-DF\Celeb-synthesis\id16_id0_0011.mp4 to Celeb-DF\test\id16_id0_0011.mp4 (test set)
Copying Celeb-DF\C

### __YOLO Implementation to isolate faces (Object Detection)__

In [4]:
model = YOLOWorld('yolov8s-worldv2.pt')

### __LaDeDa/Tiny-LaDeDa Implementation to discern between fake and real videos (Classification)__

### __Non-DL Implementation__

### __Naive Approach__