In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tapakah68/face-segmentation")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/tapakah68/face-segmentation?dataset_version_number=4...


100%|██████████| 42.1M/42.1M [00:00<00:00, 100MB/s] 

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/tapakah68/face-segmentation/versions/4


In [2]:
!pip install opencv-python --quiet

import os
import cv2
import numpy as np
import random
from glob import glob

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.utils import to_categorical


In [10]:
# ====== CONFIG ======
DATA_DIR = "/root/.cache/kagglehub/datasets/tapakah68/face-segmentation/versions/4"  # <-- CHANGE THIS TO YOUR FOLDER

IMG_HEIGHT = 224
IMG_WIDTH = 224
CHANNELS = 3

PATCH_ROWS = 4
PATCH_COLS = 4

FRAMES_PER_VIDEO = 16        # how many frames to sample per video
BATCH_SIZE = 4               # adjust based on GPU memory
EPOCHS = 5                   # increase later
TEST_SPLIT = 0.2
VAL_SPLIT = 0.1              # from remaining train set

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [7]:
real_images = glob(os.path.join(DATA_DIR, "img", "*.jpg")) + \
              glob(os.path.join(DATA_DIR, "img", "*.png")) + \
              glob(os.path.join(DATA_DIR, "img", "*.jpeg"))

fake_images = glob(os.path.join(DATA_DIR, "masks", "*.jpg")) + \
              glob(os.path.join(DATA_DIR, "masks", "*.png")) + \
              glob(os.path.join(DATA_DIR, "masks", "*.jpeg"))

print("Real images:", len(real_images))
print("Fake images:", len(fake_images))

image_paths = real_images + fake_images
labels = [0] * len(real_images) + [1] * len(fake_images)  # 0=real, 1=fake

# Shuffle together
combined = list(zip(image_paths, labels))
random.shuffle(combined)
image_paths, labels = zip(*combined)
image_paths, labels = list(image_paths), list(labels)


Real images: 20
Fake images: 20


In [8]:
total = len(image_paths)
test_size = int(total * TEST_SPLIT)
train_val_size = total - test_size
val_size = int(train_val_size * VAL_SPLIT)

train_paths = image_paths[:train_val_size - val_size]
train_labels = labels[:train_val_size - val_size]

val_paths = image_paths[train_val_size - val_size:train_val_size]
val_labels = labels[train_val_size - val_size:train_val_size]

test_paths = image_paths[train_val_size:]
test_labels = labels[train_val_size:]

print(f"Train: {len(train_paths)}, Val: {len(val_paths)}, Test: {len(test_paths)}")


Train: 29, Val: 3, Test: 8


In [11]:
def image_to_patches(img_path,
                     img_height=IMG_HEIGHT,
                     img_width=IMG_WIDTH,
                     patch_rows=PATCH_ROWS,
                     patch_cols=PATCH_COLS):
    """
    Load image, resize, split into patches, preprocess.
    Returns array of shape (TIME_STEPS, patch_h, patch_w, 3)
    """
    # Read image
    img = cv2.imread(img_path)
    if img is None:
        # If broken, return zeros
        patch_h = img_height // patch_rows
        patch_w = img_width  // patch_cols
        return np.zeros((patch_rows * patch_cols, patch_h, patch_w, 3), dtype=np.float32)

    # Convert BGR->RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Resize
    img = cv2.resize(img, (img_width, img_height))

    patch_h = img_height // patch_rows
    patch_w = img_width  // patch_cols

    patches = []
    for r in range(patch_rows):
        for c in range(patch_cols):
            y1 = r * patch_h
            y2 = (r + 1) * patch_h
            x1 = c * patch_w
            x2 = (c + 1) * patch_w
            patch = img[y1:y2, x1:x2, :]
            patches.append(patch)

    patches = np.array(patches, dtype=np.float32)
    # Preprocess for EfficientNet
    patches = preprocess_input(patches)
    return patches  # (TIME_STEPS, patch_h, patch_w, 3)


In [12]:
class ImageSequence(tf.keras.utils.Sequence):
    def __init__(self, image_paths, labels, batch_size=BATCH_SIZE, shuffle=True):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.image_paths))
        self.on_epoch_end()

        # Determine patch size using 1 sample
        sample = image_to_patches(self.image_paths[0])
        self.time_steps, self.patch_h, self.patch_w, _ = sample.shape

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_paths = [self.image_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        X = np.zeros((len(batch_paths),
                      self.time_steps,
                      self.patch_h,
                      self.patch_w,
                      CHANNELS),
                     dtype=np.float32)
        y = np.array(batch_labels, dtype=np.float32)

        for i, p in enumerate(batch_paths):
            X[i] = image_to_patches(p)

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [13]:
train_seq = ImageSequence(train_paths, train_labels, batch_size=BATCH_SIZE)
val_seq   = ImageSequence(val_paths,   val_labels,   batch_size=BATCH_SIZE, shuffle=False)
test_seq  = ImageSequence(test_paths,  test_labels,  batch_size=BATCH_SIZE, shuffle=False)

# Check shapes
X_batch, y_batch = next(iter(train_seq))
print("X_batch:", X_batch.shape, "y_batch:", y_batch.shape)


X_batch: (4, 16, 56, 56, 3) y_batch: (4,)


In [14]:
# Get patch size from generator
_, TIME_STEPS, PATCH_H, PATCH_W, _ = X_batch.shape

input_shape = (TIME_STEPS, PATCH_H, PATCH_W, CHANNELS)
inputs = layers.Input(shape=input_shape)

# Pretrained CNN base (no top)
cnn_base = EfficientNetB0(include_top=False, weights="imagenet", pooling="avg")
cnn_base.trainable = False  # freeze for initial training

# Apply CNN to each patch (TimeDistributed)
x = layers.TimeDistributed(cnn_base)(inputs)  # (batch, TIME_STEPS, feature_dim)

# RNN over patch sequence
x = layers.LSTM(128, return_sequences=False)(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(1, activation="sigmoid")(x)

model = models.Model(inputs, outputs)
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [16]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    train_seq,
    validation_data=val_seq,
    epochs=EPOCHS
)


Epoch 1/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 772ms/step - accuracy: 0.4781 - loss: 0.8632

  self._warn_if_super_not_called()


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 8s/step - accuracy: 0.4863 - loss: 0.8626 - val_accuracy: 0.6667 - val_loss: 0.6630
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 701ms/step - accuracy: 0.7159 - loss: 0.6434 - val_accuracy: 0.6667 - val_loss: 0.6230
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 781ms/step - accuracy: 0.5924 - loss: 0.6291 - val_accuracy: 0.6667 - val_loss: 0.5752
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 667ms/step - accuracy: 0.7340 - loss: 0.5166 - val_accuracy: 0.6667 - val_loss: 0.4693
Epoch 5/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 817ms/step - accuracy: 0.7351 - loss: 0.4847 - val_accuracy: 1.0000 - val_loss: 0.3617


In [18]:
from sklearn.metrics import confusion_matrix, classification_report

test_loss, test_acc = model.evaluate(test_seq)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# Detailed metrics
y_true = np.array(test_labels)
y_pred_probs = model.predict(test_seq).ravel()
y_pred = (y_pred_probs >= 0.5).astype(int)

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("\nClassification Report:")
print(classification_report(y_true, y_pred, digits=4))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 1.0000 - loss: 0.3078
Test Loss: 0.2870, Test Accuracy: 1.0000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 254ms/step
Confusion Matrix:
[[8]]

Classification Report:
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000         8

    accuracy                         1.0000         8
   macro avg     1.0000    1.0000    1.0000         8
weighted avg     1.0000    1.0000    1.0000         8





In [19]:
# After training
model.save("deepfake_cnn_rnn.h5")
print("Model saved.")




Model saved.


In [20]:
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.efficientnet import preprocess_input

loaded_model = load_model("deepfake_cnn_rnn.h5")
model = loaded_model  # just to keep same name




In [21]:
!pip install gradio --quiet
import gradio as gr
import numpy as np
import cv2


In [23]:
def preprocess_image_for_gradio(image):
    """
    Preprocesses a raw image (NumPy array) from Gradio for model prediction.
    Resizes it, splits it into patches, applies preprocessing, and adds the batch dimension.
    """
    if image is None:
        return None

    # Resize the image
    img = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))

    # Calculate patch dimensions
    patch_h = IMG_HEIGHT // PATCH_ROWS
    patch_w = IMG_WIDTH  // PATCH_COLS

    patches = []
    # Extract patches
    for r in range(PATCH_ROWS):
        for c in range(PATCH_COLS):
            y1 = r * patch_h
            y2 = (r + 1) * patch_h
            x1 = c * patch_w
            x2 = (c + 1) * patch_w
            patch = img[y1:y2, x1:x2, :]
            patches.append(patch)

    # Convert to NumPy array and apply EfficientNet preprocessing
    patches = np.array(patches, dtype=np.float32)
    patches = preprocess_input(patches)

    return patches

print("preprocess_image_for_gradio function defined.")

preprocess_image_for_gradio function defined.


In [27]:
def predict_image(gradio_image):
    """
    Predicts whether an image is 'Real' or 'Fake' using the trained model.
    """
    if gradio_image is None:
        # Handle case where no image is provided, return default values for both outputs
        return {"No image provided.": 0.0}, 0.0

    # Preprocess the image using the previously defined function
    preprocessed_patches = preprocess_image_for_gradio(gradio_image)

    if preprocessed_patches is None:
        # Handle preprocessing error, return default values
        return {"Error: Could not preprocess image.": 0.0}, 0.0

    # Add batch dimension: (1, TIME_STEPS, patch_h, patch_w, CHANNELS)
    input_tensor = np.expand_dims(preprocessed_patches, axis=0)

    # Make prediction
    prediction_probability = model.predict(input_tensor)[0][0]

    # For gr.Label(num_top_classes=2), a dictionary of {class_name: probability} is ideal
    label_output = {
        "Real": (1 - prediction_probability),
        "Fake": prediction_probability
    }

    # For gr.Number, return the raw prediction probability
    number_output = prediction_probability

    return label_output, number_output

print("predict_image function defined.")

predict_image function defined.


In [28]:
title = "Deepfake Image Detector (CNN + RNN)"
description = """
Upload a face image. The model analyses patch-wise features with a CNN + RNN architecture and predicts whether it is REAL or FAKE.
"""

demo = gr.Interface(
    fn=predict_image,
    inputs=gr.Image(type="numpy", label="Upload face image"),
    outputs=[
        gr.Label(num_top_classes=2, label="Prediction (REAL vs FAKE)"),
        gr.Number(label="Fake probability (0 to 1)")
    ],
    title=title,
    description=description,
    examples=None
)

demo.launch(debug=True)

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://11bc788e243bef5778.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 44s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 284ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
Keyboard interruption in main thread... closing server.


KeyboardInterrupt: 