In [7]:
!pip install opencv-python-headless
!pip install numpy
!pip install tensorflow
!pip install scikit-learn
!pip install matplotlib

Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy<2.3.0,>=2 (from opencv-python-headless)
  Using cached numpy-2.2.6-cp312-cp312-win_amd64.whl.metadata (60 kB)
Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl (38.9 MB)
   ---------------------------------------- 0.0/38.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.9 MB ? eta -:--:--
    --------------------------------------- 0.5/38.9 MB 1.5 MB/s eta 0:00:26
   - -------------------------------------- 1.3/38.9 MB 2.8 MB/s eta 0:00:14
   -- ------------------------------------- 2.9/38.9 MB 4.5 MB/s eta 0:00:08
   ----- ---------------------------------- 5.0/38.9 MB 6.0 MB/s eta 0:00:06
   ----- ---------------------------------- 5.2/38.9 MB 6.1 MB/s eta 0:00:06
   --------- ------------------------------ 9.4/38.9 MB 7.7 MB/s eta 0:00:04


  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.2.6 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.2.6 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.6 which is incompatible.
streamlit 1.37.1 requires protobuf<6,>=3.20, but you have protobuf 6.32.0 which is incompatible.


Collecting numpy>=1.23 (from matplotlib)
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
Successfully installed numpy-1.26.4


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
streamlit 1.37.1 requires protobuf<6,>=3.20, but you have protobuf 6.32.0 which is incompatible.


In [77]:
# ==========================
# 1. Imports
# ==========================
import os
import cv2
import numpy as np
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [79]:
# Disable oneDNN for reproducibility
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

In [81]:
# ==========================
# 2. Functions
# ==========================
def extract_frames(video_path, label, max_frames=10):
    frames = []
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frames.append([np.array(frame), label])
        frame_count += 1
    cap.release()
    return frames

def load_data(paths, label, max_frames=10):
    data = []
    for path in paths:
        print(f"Processing path: {path}")
        for video in os.listdir(path):
            video_path = os.path.join(path, video)
            try:
                data.extend(extract_frames(video_path, label, max_frames))
            except Exception as e:
                print(f"Error processing {video_path}: {e}")
    return data

def batch_generator(data, batch_size=32, augmentor=None, class_weights=None):
    n = len(data)
    while True:
        np.random.shuffle(data)
        for i in range(0, n, batch_size):
            batch = data[i:i+batch_size]
            X_batch = np.array([x[0] for x in batch], dtype=np.float32)
            y_batch = np.array([x[1] for x in batch], dtype=np.float32)
            
            if class_weights:
                y_batch = np.array([y * class_weights[int(y)] for y in y_batch], dtype=np.float32)
            
            if augmentor:
                X_batch = augmentor.flow(X_batch, batch_size=batch_size, shuffle=False).next()
            
            X_batch /= 255.0
            yield X_batch, y_batch

def val_generator(data, batch_size=32):
    n = len(data)
    for i in range(0, n, batch_size):
        batch = data[i:i+batch_size]
        X_batch = np.array([x[0] for x in batch], dtype=np.float32) / 255.0
        y_batch = np.array([x[1] for x in batch], dtype=np.float32)
        yield X_batch, y_batch

def build_model():
    model = Sequential([
        Input(shape=(224, 224, 3)),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [52]:
real_paths = [
    r"C:\Dataset_Celeb_df\Celeb-DF\Celeb-real",
    r"C:\Dataset_Celeb_df\Celeb-DF\YouTube-real",
    r"C:\Dataset_Celeb_df\Celeb-DF-v2\Celeb-real",
    r"C:\Dataset_Celeb_df\Celeb-DF-v2\YouTube-real"
]

synthetic_paths = [
    r"C:\Dataset_Celeb_df\Celeb-DF-v2\Celeb-synthesis",
    r"C:\Dataset_Celeb_df\Celeb-DF\Celeb-synthesis"
]

In [83]:
# ==========================
# 4. Load Data
# ==========================
real_data = load_data(real_paths, label=0, max_frames=5)
synthetic_data = load_data(synthetic_paths, label=1, max_frames=5)
all_data = real_data + synthetic_data
X, y = zip(*all_data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Processing path: C:\Dataset_Celeb_df\Celeb-DF\Celeb-real
Processing path: C:\Dataset_Celeb_df\Celeb-DF\YouTube-real
Processing path: C:\Dataset_Celeb_df\Celeb-DF-v2\Celeb-real
Processing path: C:\Dataset_Celeb_df\Celeb-DF-v2\YouTube-real
Processing path: C:\Dataset_Celeb_df\Celeb-DF-v2\Celeb-synthesis
Processing path: C:\Dataset_Celeb_df\Celeb-DF\Celeb-synthesis


In [85]:
# ==========================
# 5. Data Augmentation
# ==========================
train_datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [71]:
# ==========================
# 6. Class Weights
# ==========================
from sklearn.utils import class_weight
class_weights_array = class_weight.compute_class_weight(
    class_weight='balanced', 
    classes=np.unique(y_train), 
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights_array))
print("Class weights:", class_weights_dict)

Class weights: {0: 3.4980226733456368, 1: 0.5833882952996526}


In [73]:
# 7. Build Model
# ==========================
my_model = build_model()
my_model.summary()


In [89]:
# ==========================
# 8. Training
# ==========================
def batch_generator(data, batch_size=32, augmentor=None, class_weights=None):
    n = len(data)
    while True:
        np.random.shuffle(data)
        for i in range(0, n, batch_size):
            batch = data[i:i+batch_size]
            X_batch = np.array([x[0] for x in batch], dtype=np.float32)
            y_batch = np.array([x[1] for x in batch], dtype=np.float32)
            
            # Apply class weights manually
            if class_weights:
                y_batch = np.array([y * class_weights[int(y)] for y in y_batch], dtype=np.float32)
            
            # Apply augmentation
            if augmentor:
                X_batch = next(augmentor.flow(X_batch, batch_size=batch_size, shuffle=False))
            
            X_batch /= 255.0  # rescale
            yield X_batch, y_batch


In [63]:
# ==========================
# 9. Evaluation
# ==========================
def evaluate_model(model, data, batch_size=32):
    val_gen = val_generator(data, batch_size=batch_size)
    steps = len(data) // batch_size
    if len(data) % batch_size != 0:
        steps += 1
    loss, accuracy = model.evaluate(val_gen, steps=steps)
    print(f"Test Accuracy: {accuracy*100:.2f}%")

evaluate_model(my_model, list(zip(X_test, y_test)), batch_size=batch_size)

[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 71ms/step - accuracy: 0.1690 - loss: 0.7146
Test Accuracy: 16.90%
