In [1]:
%pip install tensorflow opencv-python scikit-learn matplotlib streamlit

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import cv2
import os

# Input video folders
real_videos_path =r"C:\Users\akans\OneDrive\Desktop\FF++\original"
fake_videos_path =r"C:\Users\akans\OneDrive\Desktop\FF++\Deepfakes"

# Output dataset folders
output_real_path = "dataset/real"
output_fake_path = "dataset/fake"

# Create output dirs if not exist
os.makedirs(output_real_path, exist_ok=True)
os.makedirs(output_fake_path, exist_ok=True)

def extract_frames(video_path, output_folder, prefix, frame_interval=30):
    """
    Extract frames from video every `frame_interval` frames.
    Saves them with video filename included.
    """
    video_name = os.path.splitext(os.path.basename(video_path))[0]  # e.g., video1
    cap = cv2.VideoCapture(video_path)

    frame_count = 0
    saved_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Save every `frame_interval` frames
        if frame_count % frame_interval == 0:
            frame_filename = f"{video_name}_{saved_count}.jpg"
            frame_path = os.path.join(output_folder, frame_filename)
            cv2.imwrite(frame_path, frame)
            saved_count += 1

        frame_count += 1

    cap.release()
    print(f"Extracted {saved_count} frames from {video_path}")

# Process all real videos
for file in os.listdir(real_videos_path):
    if file.endswith(".mp4"):
        extract_frames(os.path.join(real_videos_path, file), output_real_path, "real")

# Process all fake videos
for file in os.listdir(fake_videos_path):
    if file.endswith(".mp4"):
        extract_frames(os.path.join(fake_videos_path, file), output_fake_path, "fake")


Extracted 14 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\000.mp4
Extracted 16 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\001.mp4
Extracted 24 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\002.mp4
Extracted 11 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\003.mp4
Extracted 11 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\004.mp4
Extracted 13 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\005.mp4
Extracted 11 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\006.mp4
Extracted 17 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\007.mp4
Extracted 22 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\008.mp4
Extracted 20 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\009.mp4
Extracted 16 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\010.mp4
Extracted 22 frames from C:\Users\akans\OneDrive\Desktop\FF++\original\011.mp4
Extracted 13 frames from C:\Users\akans\OneDrive\Des

In [3]:
%pip install tensorflow

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from tensorflow.keras.applications.xception import Xception, preprocess_input # type: ignore
from tensorflow.keras.models import Model # type: ignore
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.callbacks import ModelCheckpoint # type: ignore

# -----------------------------
# 1. Extract frames from videos
# -----------------------------
def extract_frames(video_path, output_dir, label, max_frames=20):
    cap = cv2.VideoCapture(video_path)
    count = 0
    saved = 0
    os.makedirs(output_dir, exist_ok=True)
    while cap.isOpened() and saved < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        if count % 10 == 0:  # sample every 10th frame
            frame_path = os.path.join(output_dir, f"{label}_{os.path.basename(video_path)}_{saved}.jpg")
            cv2.imwrite(frame_path, frame)
            saved += 1
        count += 1
    cap.release()

def prepare_dataset(real_videos_path, fake_videos_path, frame_dir="frames"):
    os.makedirs(frame_dir, exist_ok=True)
    all_frames = []
    labels = []

    # extract frames from real videos
    for vid in os.listdir(real_videos_path):
        vpath = os.path.join(real_videos_path, vid)
        extract_frames(vpath, frame_dir, "real")
    
    # extract frames from fake videos
    for vid in os.listdir(fake_videos_path):
        vpath = os.path.join(fake_videos_path, vid)
        extract_frames(vpath, frame_dir, "fake")

    # collect all frames
    for img in os.listdir(frame_dir):
        if img.endswith(".jpg"):
            all_frames.append(os.path.join(frame_dir, img))
            labels.append(0 if "real_" in img else 1)  # 0=real, 1=fake

    return all_frames, labels

# -----------------------------
# 2. Create generators
# -----------------------------
def create_generator(paths, labels, batch_size=8, is_train=True):
    datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        horizontal_flip=is_train,
        rotation_range=10 if is_train else 0
    )
    
    df = pd.DataFrame({
        "filename": paths,
        "label": labels
    })
    
    generator = datagen.flow_from_dataframe(
        dataframe=df,
        x_col="filename",
        y_col="label",
        target_size=(150, 150),  # smaller for CPU
        class_mode="raw",        # labels are numeric
        batch_size=batch_size,
        shuffle=is_train
    )
    return generator, df

# -----------------------------
# 3. Define model
# -----------------------------
def build_model():
    base = Xception(weights="imagenet", include_top=False, input_shape=(150, 150, 3))
    x = GlobalAveragePooling2D()(base.output)
    out = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=base.input, outputs=out)
    model.compile(optimizer=Adam(1e-4), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# 4. Plot graphs + metrics
# -----------------------------
def plot_metrics(history, model, val_gen, val_df, output_dir="plots"):
    os.makedirs(output_dir, exist_ok=True)

    # Accuracy
    plt.figure(figsize=(8,6))
    plt.plot(history.history["accuracy"], label="Train Accuracy")
    plt.plot(history.history["val_accuracy"], label="Val Accuracy")
    plt.title("Model Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig(os.path.join(output_dir, "accuracy.png"))
    plt.close()

    # Loss
    plt.figure(figsize=(8,6))
    plt.plot(history.history["loss"], label="Train Loss")
    plt.plot(history.history["val_loss"], label="Val Loss")
    plt.title("Model Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(os.path.join(output_dir, "loss.png"))
    plt.close()

    # ROC Curve
    val_gen.reset()
    y_true = val_df["label"].values
    y_pred = model.predict(val_gen, verbose=1)
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8,6))
    plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], color="red", lw=2, linestyle="--")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend(loc="lower right")
    plt.savefig(os.path.join(output_dir, "roc_curve.png"))
    plt.close()

    # Confusion Matrix
    y_pred_class = (y_pred > 0.5).astype(int)
    cm = confusion_matrix(y_true, y_pred_class)
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["REAL", "FAKE"],
                yticklabels=["REAL", "FAKE"])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.savefig(os.path.join(output_dir, "confusion_matrix.png"))
    plt.close()

    # Classification Report
    report = classification_report(y_true, y_pred_class, target_names=["REAL", "FAKE"])
    with open(os.path.join(output_dir, "classification_report.txt"), "w") as f:
        f.write(report)

    print(f" Training metrics saved in '{output_dir}/'")
    print(report)

# -----------------------------
# 5. Main training
# -----------------------------
if __name__ == "__main__":
    real_videos_path = r"C:\Users\akans\OneDrive\Desktop\FF++\original"   
    fake_videos_path = r"C:\Users\akans\OneDrive\Desktop\FF++\Deepfakes"

    print("Extracting frames...")
    all_frames, labels = prepare_dataset(real_videos_path, fake_videos_path)
    print(f"Found {len([l for l in labels if l==0])} real frames, {len([l for l in labels if l==1])} fake frames")

    # train/val split
    train_paths, val_paths, y_train, y_val = train_test_split(
        all_frames, labels, test_size=0.2, random_state=42, stratify=labels
    )

    # generators
    train_gen, train_df = create_generator(train_paths, y_train, batch_size=8, is_train=True)
    val_gen, val_df = create_generator(val_paths, y_val, batch_size=8, is_train=False)

    # model
    model = build_model()

    # save best model
    checkpoint = ModelCheckpoint("deepfake_xception_demo.h5", save_best_only=True, monitor="val_accuracy", mode="max")

    print("Training model...")
    history = model.fit(train_gen, validation_data=val_gen, epochs=5, callbacks=[checkpoint])

    print(" Training complete! Model saved as deepfake_xception_demo.h5")

    # plot training graphs + metrics
    plot_metrics(history, model, val_gen, val_df)



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.
Extracting frames...
Found 400 real frames, 400 fake frames
Found 640 validated image filenames.
Found 160 validated image filenames.
Training model...


  self._warn_if_super_not_called()


Epoch 1/5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.6261 - loss: 0.6274



[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 1s/step - accuracy: 0.6687 - loss: 0.5696 - val_accuracy: 0.7312 - val_loss: 0.5268
Epoch 2/5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7723 - loss: 0.4179



[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 2s/step - accuracy: 0.7484 - loss: 0.4374 - val_accuracy: 0.7812 - val_loss: 0.4042
Epoch 3/5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8205 - loss: 0.3888



[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - accuracy: 0.8000 - loss: 0.3749 - val_accuracy: 0.8250 - val_loss: 0.3029
Epoch 4/5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - accuracy: 0.8125 - loss: 0.3332 - val_accuracy: 0.8250 - val_loss: 0.4365
Epoch 5/5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 1s/step - accuracy: 0.8266 - loss: 0.3439 - val_accuracy: 0.8125 - val_loss: 0.3272
 Training complete! Model saved as deepfake_xception_demo.h5
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 263ms/step
 Training metrics saved in 'plots/'
              precision    recall  f1-score   support

        REAL       0.82      0.80      0.81        80
        FAKE       0.80      0.82      0.81        80

    accuracy                           0.81       160
   macro avg       0.81      0.81      0.81       160
weighted avg       0.81      0.81      0.81       160



In [4]:
import tensorflow as tf
from tensorflow.keras.models import load_model # type: ignore
from tensorflow.keras.applications.xception import preprocess_input # type: ignore
from tensorflow.keras.preprocessing import image # type: ignore
import numpy as np
import cv2
import os

# Load trained model
model = load_model("deepfake_xception_demo.h5")
print(" Model loaded")



 Model loaded


In [5]:
def predict_frame(frame):
    # Resize to 150x150 (same as training)
    frame_resized = cv2.resize(frame, (150, 150))
    img_array = image.img_to_array(frame_resized)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Prediction: output close to 0 = real, close to 1 = fake
    pred = model.predict(img_array)[0][0]
    return pred

In [6]:
def predict_video(video_path, max_frames=50):
    cap = cv2.VideoCapture(video_path)
    preds = []
    count = 0
    
    while cap.isOpened() and count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        pred = predict_frame(frame)
        preds.append(pred)
        count += 1
    
    cap.release()
    avg_pred = np.mean(preds)
    print(f" Average prediction score: {avg_pred:.4f}")
    if avg_pred > 0.5:
        print("DEEPFAKE")
    else:
        print("REAL")
    return avg_pred


In [7]:
# Replace with one of your saved test videos
video_path = r"C:\Users\akans\OneDrive\Desktop\FF++\Deepfakes\000_003.mp4"
predict_video(video_path, max_frames=30)

video_path = r"C:\Users\akans\OneDrive\Desktop\FF++\original\012.mp4"
predict_video(video_path, max_frames=30)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97

np.float32(0.009155247)