In [5]:
import cv2
import os

def extract_frames_from_video(video_path, max_frames=10, resize_shape=(128, 128)):
    import cv2
    frames = []
    cap = cv2.VideoCapture(video_path)
    count = 0
    while cap.isOpened() and count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize_shape)
        frame = frame / 255.0  # normalize
        frames.append(frame)
        count += 1
    cap.release()
    return frames


In [7]:
X, y = [], []

base_path = "/content/drive/MyDrive/deepfake_dataset/train"
for label, folder in enumerate(["real", "fake"]):
    folder_path = os.path.join(base_path, folder)
    if not os.path.exists(folder_path):
        print(f"Folder not found: {folder_path}")
        continue
    print(f"Loading videos from {folder_path}")
    for file in os.listdir(folder_path):
        if not file.lower().endswith((".mp4", ".avi", ".mov")):
            continue

        video_path = os.path.join(folder_path, file)
        frames = extract_frames_from_video(video_path)
        if len(frames) == 0:
            print(f"No frames extracted from {video_path}")
            continue
        X.extend(frames)  # Add all frames from this video
        y.extend([label] * len(frames))  # Same label for all frames

Loading videos from /content/drive/MyDrive/deepfake_dataset/train/real
Loading videos from /content/drive/MyDrive/deepfake_dataset/train/fake


In [8]:
import numpy as np

X = np.array(X).astype('float32') / 255.0
y = np.array(y).astype('float32')

print("Total frames:", X.shape[0])
print("Shape of each frame:", X.shape[1:])
print("Labels shape:", y.shape)


Total frames: 3600
Shape of each frame: (128, 128, 3)
Labels shape: (3600,)


In [9]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [31]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

IMG_SIZE = 128

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

history = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, y_val)
)


Epoch 1/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - accuracy: 0.5068 - loss: 0.6934 - val_accuracy: 0.5000 - val_loss: 0.6930
Epoch 2/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.5219 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6923
Epoch 3/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5150 - loss: 0.6926 - val_accuracy: 0.6403 - val_loss: 0.6908
Epoch 4/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.5607 - loss: 0.6914 - val_accuracy: 0.6764 - val_loss: 0.6861
Epoch 5/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.5934 - loss: 0.6872 - val_accuracy: 0.6097 - val_loss: 0.6716
Epoch 6/200
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5925 - loss: 0.6775 - val_accuracy: 0.7264 - val_loss: 0.6484
Epoch 7/200
[1m90/90[0m [

In [32]:
def predict_video(video_path):
    frames = extract_frames_from_video(video_path)
    frames = np.array(frames).astype('float32') / 255.0
    preds = model.predict(frames)
    avg_pred = preds.mean()
    print(f"Average prediction score: {avg_pred:.4f}")
    if avg_pred >= 0.5:
        print("🔍 Video predicted as FAKE")
    else:
        print("🔍 Video predicted as REAL")


In [41]:
model.save("deepfake_detection_model.h5")
print("✅ Model saved to deepfake_detection_model.h5")




✅ Model saved to deepfake_detection_model.h5


In [34]:
from tensorflow.keras.models import load_model

model = load_model("deepfake_detection_model.h5")
print("✅ Model loaded from disk")




✅ Model loaded from disk


In [35]:
from sklearn.metrics import classification_report

# Predict on test frames
y_pred_probs = model.predict(X_val)
y_pred = (y_pred_probs > 0.5).astype(int)

print(classification_report(y_val, y_pred))


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
              precision    recall  f1-score   support

         0.0       0.91      0.78      0.84       360
         1.0       0.81      0.92      0.86       360

    accuracy                           0.85       720
   macro avg       0.86      0.85      0.85       720
weighted avg       0.86      0.85      0.85       720



In [36]:
def predict_video(video_path):
    frames = extract_frames_from_video(video_path)
    frames = np.array(frames).astype('float32') / 255.0
    preds = model.predict(frames)
    avg_pred = preds.mean()
    label = "FAKE" if avg_pred >= 0.5 else "REAL"
    print(f"Video prediction: {label} (score: {avg_pred:.4f})")


In [39]:
predict_video('/content/drive/MyDrive/deepfake_dataset/test/real/01__podium_speech_happy.mp4')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Video prediction: FAKE (score: 0.9579)
