In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, GlobalAveragePooling2D, LSTM, Dense
from tensorflow.keras.optimizers import Adam

In [3]:

# ========= المسارات ==========
real_path = "/Users/abdelrhman/Downloads/SDFVD Small-scale Deepfake Forgery Video Dataset/SDFVD/videos_real"
fake_path = "/Users/abdelrhman/Downloads/SDFVD Small-scale Deepfake Forgery Video Dataset/SDFVD/videos_fake"

# ========= إعدادات ==========
IMG_SIZE = 224
SEQ_LENGTH = 10  # عدد الفريمات من كل فيديو

def extract_frames(video_path, seq_length=SEQ_LENGTH):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_idxs = np.linspace(0, total_frames - 1, seq_length).astype(int)
    
    for i in range(total_frames):
        ret, frame = cap.read()
        if not ret:
            break
        if i in frame_idxs:
            frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
            frame = frame / 255.0
            frames.append(frame)
    cap.release()
    
    # padding لو الفيديو فيه عدد فريمات أقل من المطلوب
    while len(frames) < seq_length:
        frames.append(np.zeros((IMG_SIZE, IMG_SIZE, 3)))
        
    return np.array(frames)

In [5]:

# ========= تحميل البيانات ==========
def load_dataset(real_path, fake_path):
    X, y = [], []
    print("[INFO] Loading real videos...")
    for file in tqdm(os.listdir(real_path)):
        if not file.endswith(".mp4"): continue
        frames = extract_frames(os.path.join(real_path, file))
        X.append(frames)
        y.append(0)  # real

    print("[INFO] Loading fake videos...")
    for file in tqdm(os.listdir(fake_path)):
        if not file.endswith(".mp4"): continue
        frames = extract_frames(os.path.join(fake_path, file))
        X.append(frames)
        y.append(1)  # fake

    return np.array(X), np.array(y)

print("[INFO] Preparing dataset...")
X, y = load_dataset(real_path, fake_path)

# One-hot encoding
y = to_categorical(y, 2)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


[INFO] Preparing dataset...
[INFO] Loading real videos...


100%|███████████████████████████████████████████| 53/53 [00:04<00:00, 10.87it/s]


[INFO] Loading fake videos...


100%|███████████████████████████████████████████| 53/53 [00:04<00:00, 10.84it/s]


In [7]:
# ========= بناء النموذج ==========
print("[INFO] Building the model...")
base_cnn = MobileNetV2(include_top=False, weights='imagenet', input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_cnn.trainable = False  # Freeze base model

model = Sequential([
    TimeDistributed(base_cnn, input_shape=(SEQ_LENGTH, IMG_SIZE, IMG_SIZE, 3)),
    TimeDistributed(GlobalAveragePooling2D()),
    LSTM(64, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(2, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

[INFO] Building the model...


  super().__init__(**kwargs)


In [9]:
print("[INFO] Training the model...")
history = model.fit(X_train, y_train, epochs=10, batch_size=4, validation_data=(X_test, y_test))

# ========= التقييم ==========
loss, acc = model.evaluate(X_test, y_test)
print(f"🎯 Test Accuracy: {acc * 100:.2f}%")

[INFO] Training the model...
Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 481ms/step - accuracy: 0.5208 - loss: 0.7331 - val_accuracy: 0.4545 - val_loss: 0.7000
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 305ms/step - accuracy: 0.5897 - loss: 0.6755 - val_accuracy: 0.3636 - val_loss: 0.7263
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 305ms/step - accuracy: 0.7074 - loss: 0.6708 - val_accuracy: 0.2727 - val_loss: 0.7534
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 300ms/step - accuracy: 0.7093 - loss: 0.6577 - val_accuracy: 0.2273 - val_loss: 0.8006
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 327ms/step - accuracy: 0.7037 - loss: 0.6457 - val_accuracy: 0.0909 - val_loss: 0.8272
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 314ms/step - accuracy: 0.7938 - loss: 0.6218 - val_accuracy: 0.1364 - val_loss: 0.883

In [15]:
def predict_video(video_path, model, seq_length=SEQ_LENGTH):
    print(f"[INFO] Predicting video: {video_path}")
    frames = extract_frames(video_path, seq_length=seq_length)
    input_array = np.expand_dims(frames, axis=0)  # (1, SEQ_LENGTH, IMG_SIZE, IMG_SIZE, 3)

    prediction = model.predict(input_array)
    class_idx = np.argmax(prediction)
    confidence = prediction[0][class_idx]

    label = "REAL" if class_idx == 0 else "FAKE"
    print(f"✅ Prediction: {label} ({confidence * 100:.2f}%)")
    return label, confidence


In [19]:
# مسار الفيديو الجديد اللي عايز تجرب عليه
test_video = "vs38.mp4"

# تحميل الموديل (لو ما زال محمّل في الذاكرة مش لازم تعيد)
# model = tf.keras.models.load_model("deepfake_detector_lstm.h5")

# توقع
predict_video(test_video, model)

[INFO] Predicting video: vs38.mp4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
✅ Prediction: FAKE (58.38%)


OpenCV: Couldn't read video stream from file "vs38.mp4"


('FAKE', 0.58376724)

In [21]:
model.save("deepfake_detector_lstm.h5")

