
# 🧪 Deepfake Detection — Jupyter Notebook (Keras + Gradio)

This notebook lets you **train** and **demo** a Deepfake Detector end‑to‑end:

- ✅ Dataset loaders (train/val split via `flow_from_directory`)
- ✅ Choice of **Simple CNN** or **EfficientNetB0** (transfer learning)
- ✅ Saves `models/deepfake_detector_keras.h5` and `models/labels.json`
- ✅ Quick evaluation & **threshold** suggestion
- ✅ **Gradio UI** for uploads and **webcam** (if supported)
- ✅ **Mock Mode** fallback if no model is loaded

> **Dataset format (binary classes):**
>
> ```
> data/dataset/
> ├── real/
> └── fake/
> ```


## 1) Setup

In [1]:

# If needed, install packages (uncomment as necessary)
# %pip install tensorflow==2.15.0 pillow numpy opencv-python-headless gradio==4.44.0 scikit-learn

import os, json, math, time, pathlib, typing, warnings
from pathlib import Path
warnings.filterwarnings('ignore')

import numpy as np
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers, callbacks

print("TensorFlow:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))


TensorFlow: 2.19.0
GPU Available: []


## 2) Configure paths

In [2]:

# Change these if your structure differs
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / "data" / "dataset"   # expects subfolders: real/, fake/
MODEL_DIR = BASE_DIR / "models"
MODEL_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODEL_DIR / "deepfake_detector_keras.h5"
LABELS_PATH = MODEL_DIR / "labels.json"

IMG_SIZE = (128, 128)   # can override to (224,224) for EfficientNet
BATCH = 64
VAL_SPLIT = 0.2
EPOCHS = 3              # start with 8–15, tune later

print("BASE_DIR:", BASE_DIR)
print("DATA_DIR exists:", DATA_DIR.exists())
print("MODEL_DIR:", MODEL_DIR)


BASE_DIR: d:\Mtech\Research paper\DeepFake Detection\Draft 4
DATA_DIR exists: True
MODEL_DIR: d:\Mtech\Research paper\DeepFake Detection\Draft 4\models


## 3) Data generators (train/val split)

In [3]:

if not DATA_DIR.exists():
    raise SystemExit(f"Dataset folder not found: {DATA_DIR}. Put images under real/ and fake/ subfolders.")

train_datagen = ImageDataGenerator(
    rescale=1/255.0,
    validation_split=VAL_SPLIT,
    horizontal_flip=True,
    rotation_range=10,
    width_shift_range=0.02,
    height_shift_range=0.02,
    zoom_range=0.5,
)

train_gen = train_datagen.flow_from_directory(
    DATA_DIR.as_posix(),
    target_size=IMG_SIZE,
    batch_size=BATCH,
    class_mode="binary",
    subset="training",
    shuffle=True
)

val_gen = train_datagen.flow_from_directory(
    DATA_DIR.as_posix(),
    target_size=IMG_SIZE,
    batch_size=BATCH,
    class_mode="binary",
    subset="validation",
    shuffle=False
)

# Save class indices for later (UI needs to know which class is index 1)
with open(LABELS_PATH, "w") as f:
    json.dump(train_gen.class_indices, f)
print("Saved class indices:", train_gen.class_indices)
print("labels.json:", LABELS_PATH)


Found 16002 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.
Saved class indices: {'Fake': 0, 'Real': 1}
labels.json: d:\Mtech\Research paper\DeepFake Detection\Draft 4\models\labels.json


## 4) Choose model: Simple CNN or EfficientNetB0

In [4]:

USE_EFFICIENTNET = False   # <-- flip to True for transfer learning

def build_simple_cnn(input_shape=(128,128,3)):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, activation="relu", padding="same")(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, 3, activation="relu", padding="same")(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    model = models.Model(inputs, outputs)
    model.compile(optimizer=optimizers.Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
    return model

def build_efficientnet_b0(input_shape=(224,224,3)):
    from tensorflow.keras.applications import EfficientNetB0
    from tensorflow.keras.applications.efficientnet import preprocess_input

    base = EfficientNetB0(include_top=False, input_shape=input_shape, weights="imagenet")
    base.trainable = False  # freeze first stage

    inputs = layers.Input(shape=input_shape)
    x = preprocess_input(inputs)
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.35)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    model = models.Model(inputs, outputs)
    model.compile(optimizer=optimizers.Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
    return model

# pick model
if USE_EFFICIENTNET:
    IMG_SIZE = (224,224)
    # re-build generators with new image size
    train_gen = train_datagen.flow_from_directory(
        DATA_DIR.as_posix(),
        target_size=IMG_SIZE, batch_size=BATCH, class_mode="binary", subset="training", shuffle=True
    )
    val_gen = train_datagen.flow_from_directory(
        DATA_DIR.as_posix(),
        target_size=IMG_SIZE, batch_size=BATCH, class_mode="binary", subset="validation", shuffle=False
    )
    model = build_efficientnet_b0(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
else:
    model = build_simple_cnn(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

model.summary()


## 5) Train

In [5]:

ckpt = callbacks.ModelCheckpoint(MODEL_PATH.as_posix(), monitor="val_accuracy", save_best_only=True, mode="max")
es = callbacks.EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=[ckpt, es],
    verbose=1
)

model.save(MODEL_PATH.as_posix())
print("Saved model to:", MODEL_PATH)


Epoch 1/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 862ms/step - accuracy: 0.5281 - loss: 0.6914



[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 1s/step - accuracy: 0.5296 - loss: 0.6913 - val_accuracy: 0.5203 - val_loss: 0.6911
Epoch 2/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step - accuracy: 0.5414 - loss: 0.6893



[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 404ms/step - accuracy: 0.5451 - loss: 0.6871 - val_accuracy: 0.5370 - val_loss: 0.6911
Epoch 3/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step - accuracy: 0.5484 - loss: 0.6873



[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 422ms/step - accuracy: 0.5594 - loss: 0.6834 - val_accuracy: 0.6430 - val_loss: 0.6631




Saved model to: d:\Mtech\Research paper\DeepFake Detection\Draft 4\models\deepfake_detector_keras.h5


## 6) Quick evaluation & threshold suggestion

In [6]:

import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.metrics import f1_score


val_gen.reset()
p1 = model.predict(val_gen, verbose=0).ravel()  # prob of class index 1
y_true = val_gen.classes

# Load mapping and infer which class is index 1
with open(LABELS_PATH, "r") as f:
    idx = json.load(f)  # e.g., {'fake': 0, 'real': 1}
inv = {v:k for k,v in idx.items()}
class1 = inv.get(1, None)
print("Class index 1 is:", class1)

# p(fake) depends on what class 1 represents
if class1 and class1.lower() == "real":
    p_fake = 1.0 - p1
else:
    p_fake = p1  # class 1 is 'fake' or unknown mapping

ths = np.linspace(0.3, 0.7, 41)
best_f1, best_th = -1, 0.5
for th in ths:
    y_hat = (p_fake >= th).astype(int)
    f1 = f1_score(y_true, y_hat, average="macro")
    if f1 > best_f1:
        best_f1, best_th = f1, th

print(f"Suggested threshold: {best_th:.2f} (F1={best_f1:.3f})")
print(confusion_matrix(y_true, (p_fake >= best_th).astype(int)))
print(classification_report(y_true, (p_fake >= best_th).astype(int), target_names=[inv.get(0,'class0'), inv.get(1,'class1')]))


Class index 1 is: Real
Suggested threshold: 0.46 (F1=0.350)
[[ 157 1843]
 [ 435 1565]]
              precision    recall  f1-score   support

        Fake       0.27      0.08      0.12      2000
        Real       0.46      0.78      0.58      2000

    accuracy                           0.43      4000
   macro avg       0.36      0.43      0.35      4000
weighted avg       0.36      0.43      0.35      4000



## 7) Gradio demo (upload & webcam)

In [13]:
%pip install opencv-python
%pip install gradio==4.44.0


Note: you may need to restart the kernel to use updated packages.
Collecting gradio==4.44.0
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio==4.44.0)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio==4.44.0)
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting ffmpy (from gradio==4.44.0)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio==4.44.0)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio==4.44.0)
  Downloading orjson-3.11.1-cp312-cp312-win_amd64.whl.metadata (43 kB)
Collecting pydub (from gradio==4.44.0)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio==4.44.0)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio==4.44.0)

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
pyppeteer 2.0.0 requires pyee<12.0.0,>=11.0.0, but you have pyee 12.0.0 which is incompatible.
pyppeteer 2.0.0 requires urllib3<2.0.0,>=1.25.8, but you have urllib3 2.5.0 which is incompatible.


In [14]:

import io, base64, cv2, gradio as gr
from PIL import Image, ImageFilter

# Reload model (for safety if you restart kernel cells)
try:
    mdl = tf.keras.models.load_model(MODEL_PATH.as_posix())
    with open(LABELS_PATH, "r") as f:
        idx = json.load(f)
    inv = {v:k for k,v in idx.items()}
    class1 = inv.get(1, None)
    model_loaded = True
except Exception as e:
    print("[WARN] Failed to load model, using Mock Mode:", e)
    mdl = None
    class1 = None
    model_loaded = False

THRESH = float(globals().get("best_th", 0.5))  # pick the suggested threshold if available

def preprocess_pil(img: Image.Image, size=(128,128)):
    img = img.convert("RGB").resize(size)
    arr = np.asarray(img).astype("float32") / 255.0
    arr = np.expand_dims(arr, axis=0)
    return arr

def mock_fake_probability(img: Image.Image) -> float:
    try:
        img_cv = cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2BGR)
        var_lap = cv2.Laplacian(img_cv, cv2.CV_64F).var()
    except Exception:
        var_lap = 50.0
    edges = img.convert("L").filter(ImageFilter.FIND_EDGES)
    edge_mean = np.array(edges).mean()
    sharp = np.tanh(var_lap / 200.0)
    edginess = np.tanh(edge_mean / 64.0)
    score = 0.6 * (1 - sharp) + 0.4 * (1 - edginess)
    return float(np.clip(score, 0, 1))

def predict_image(pil_image):
    if pil_image is None:
        return "No image", 0.0, "Provide an image."
    if model_loaded and mdl is not None:
        size = mdl.inputs[0].shape[1:3]
        size = (int(size[0]), int(size[1]))
        arr = preprocess_pil(pil_image, size=size)
        p1 = float(mdl.predict(arr, verbose=0)[0][0])  # prob of class index 1
        if class1 and class1.lower() == "real":
            p_fake = 1.0 - p1
        else:
            p_fake = p1
    else:
        p_fake = mock_fake_probability(pil_image)

    label = "FAKE" if p_fake >= THRESH else "REAL"
    note = "Using trained model" if model_loaded else "Using Mock Mode (demo heuristic)"
    return label, p_fake, note

title = "Deepfake Detector (Notebook Demo)"
desc = "Upload an image or use webcam. The app returns a label and a fake probability. Shows demo heuristic if model isn't loaded."

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"""### {title}
{desc}

**Model loaded:** {model_loaded}  
**Threshold:** {THRESH:.2f}""")
    with gr.Row():
        with gr.Column():
            img_in = gr.Image(type="pil", label="Input image", sources=["upload", "webcam"])
            btn = gr.Button("Analyze")
        with gr.Column():
            out_label = gr.Textbox(label="Prediction", interactive=False)
            out_prob = gr.Slider(0, 1, value=0.0, step=0.001, label="Fake probability", interactive=False)
            out_note = gr.Textbox(label="Note", interactive=False)
    btn.click(fn=predict_image, inputs=img_in, outputs=[out_label, out_prob, out_note])

demo




Gradio Blocks instance: 1 backend functions
-------------------------------------------
fn_index=0
 inputs:
 |-<gradio.components.image.Image object at 0x000001C077E234D0>
 outputs:
 |-<gradio.components.textbox.Textbox object at 0x000001C05601C530>
 |-<gradio.components.slider.Slider object at 0x000001C077F2FBC0>
 |-<gradio.components.textbox.Textbox object at 0x000001C077F5CE30>