In [None]:
# Pre-Trained Model base DeepFake vs Real images classifier
!pip install gradio transformers timm datasets evaluate transformers accelerate evaluate torch torchvision matplotlib opencv-python scikit-learn --quiet
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import timm
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import os

import gradio as gr
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import io
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#Dataset
# Upload kaggle.json
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

#Download the dataset (Kaggle dataset)
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

# Unzip into dataset/ folder
!unzip -q deepfake-and-real-images.zip -d dataset

# ------------------------------
# Load Pretrained Deepfake Detector
# ------------------------------
MODEL_NAME = "dima806/deepfake_vs_real_image_detection"
processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = AutoModelForImageClassification.from_pretrained(MODEL_NAME)
model.eval()

id2label = model.config.id2label

# ------------------------------
# Grad-CAM utility
# ------------------------------
def generate_gradcam(img, model, processor, target_class=None):
    """Generates Grad-CAM heatmap for an input image"""
    inputs = processor(images=img, return_tensors="pt")
    img_tensor = inputs["pixel_values"].requires_grad_(True)

    # Forward pass
    outputs = model(img_tensor)
    logits = outputs.logits
    if target_class is None:
        target_class = logits.argmax(dim=1).item()

    score = logits[:, target_class]
    score.backward()

    # Extract gradients and activations
    gradients = img_tensor.grad[0].mean(dim=[1, 2]).detach().numpy()
    activations = img_tensor[0].detach().numpy()

    # Weighted sum
    cam = np.zeros(activations.shape[1:], dtype=np.float32)
    for i, w in enumerate(gradients):
        cam += w * activations[i]

    cam = np.maximum(cam, 0)
    cam = cv2.resize(cam, (img.size[0], img.size[1]))
    cam = cam - cam.min()
    cam = cam / cam.max() if cam.max() != 0 else cam
    heatmap = (cam * 255).astype(np.uint8)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)

    # Overlay heatmap on image
    img_np = np.array(img.convert("RGB"))
    overlay = cv2.addWeighted(img_np, 0.6, heatmap, 0.4, 0)
    return Image.fromarray(overlay)

# ------------------------------
# Prediction Function
# ------------------------------
def predict(img):
    try:
        inputs = processor(images=img, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)

        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].numpy()
        results = {id2label[i]: float(probs[i]) for i in range(len(probs))}

        # Generate Grad-CAM overlay
        cam_img = generate_gradcam(img, model, processor, target_class=np.argmax(probs))

        return results, img, cam_img

    except Exception as e:
        return {"error": str(e)}, img, img

# -----------------
# Dataset Evaluation
# -----------------
from transformers import pipeline
from tqdm import tqdm

#Load model pipeline
classifier = pipeline(
    "image-classification",
    model=MODEL_NAME,
    device=device
)

dataset_root = "dataset/Dataset/"

def evaluate_dataset(folder_path):
    y_true, y_pred = [], []

    for label in ["Real", "Fake"]:
        class_dir = os.path.join(folder_path, label)
        if not os.path.exists(class_dir):
            print(f"⚠️ Skipping {class_dir}, not found")
            continue

        for img_name in tqdm(os.listdir(class_dir), desc=f"Processing {label}"):
            img_path = os.path.join(class_dir, img_name)

            try:
                img = Image.open(img_path).convert("RGB")
                preds = classifier(img)
                pred_label = max(preds, key=lambda x: x["score"])["label"]

                y_true.append(label)
                y_pred.append(pred_label)

            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                continue

    # Compute metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, pos_label="Fake")
    rec = recall_score(y_true, y_pred, pos_label="Fake")
    f1 = f1_score(y_true, y_pred, pos_label="Fake")

    metrics = {
        "Accuracy": round(acc * 100, 2),
        "Precision": round(prec * 100, 2),
        "Recall": round(rec * 100, 2),
        "F1-score": round(f1 * 100, 2),
    }
    return metrics

# Example: test on validation dataset
test_folder = os.path.join(dataset_root, "Test")
results = evaluate_dataset(test_folder)

print("📊 Evaluation Results on Test Set:")
print(results)

# -----------------
# Gradio App
# -----------------
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil", label="Upload Face Image"),
    outputs=[
        gr.Label(num_top_classes=2, label="Prediction"),
        gr.Image(type="pil", label="Original Image"),
        gr.Image(type="pil", label="Grad-CAM Heatmap")
    ],
    title="🛡 DeepShield: Deepfake Detection Demo",
    description="Upload a face image. The model predicts Real vs Fake and shows manipulated regions using Grad-CAM.",
    examples=[
        ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cat.png"],
    ]
)

demo.launch()


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hUsing device: cpu


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
License(s): unknown
Downloading deepfake-and-real-images.zip to /content
 99% 1.67G/1.68G [00:14<00:00, 104MB/s] 
100% 1.68G/1.68G [00:14<00:00, 125MB/s]


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


preprocessor_config.json:   0%|          | 0.00/325 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/343M [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/140002 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/140002 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Resolving data files:   0%|          | 0/39428 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/39428 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 140002
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 39428
    })
})
Dataset features: {'image': Image(mode=None, decode=True), 'label': ClassLabel(names=['Fake', 'Real'])}
✅ Using image column: image


Map:   0%|          | 0/140002 [00:00<?, ? examples/s]

OSError: [Errno 28] No space left on device

In [None]:
# ----------------------------
# Deep Learning based Deepfake vs Real Classifier
# ----------------------------

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import gradio as gr

#  Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Dataset
# STEP 1: Upload kaggle.json
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# STEP 2: Download the dataset ( Kaggle dataset)
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

# STEP 3: Unzip into dataset/ folder
!unzip -q deepfake-and-real-images.zip -d dataset/

# ----------------------------
# 1. Data Loading
# ----------------------------
data_dir = "dataset"
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

train_data = datasets.ImageFolder(os.path.join(data_dir, "Dataset/Train"), transform=transform)
val_data   = datasets.ImageFolder(os.path.join(data_dir, "Dataset/Validation"), transform=transform)
test_data  = datasets.ImageFolder(os.path.join(data_dir, "Dataset/Test"), transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_data, batch_size=32, shuffle=False)

class_names = train_data.classes
print("Classes:", class_names)

# ----------------------------
# 2. Model
# ----------------------------
model = models.resnet18(weights=None)  # small ResNet
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ----------------------------
# 3. Training Loop
# ----------------------------
def train_model(epochs=3):
    for epoch in range(epochs):
        model.train()
        running_loss, correct = 0.0, 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        acc = 100 * correct / len(train_data)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}, Train Acc: {acc:.2f}%")

train_model(epochs=2)  # train for 2 epochs (increase if needed)

# ----------------------------
# 4. Evaluation
# ----------------------------
def evaluate(loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            correct += (outputs.argmax(1) == labels).sum().item()
    return 100 * correct / len(loader.dataset)

print("Validation Accuracy:", evaluate(val_loader))
print("Test Accuracy:", evaluate(test_loader))

# ----------------------------
# 5. Gradio App
# ----------------------------
from PIL import Image

def predict(img):
    model.eval()
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(img)
        probs = torch.softmax(outputs, dim=1)[0]
        pred_class = class_names[probs.argmax().item()]
        return {class_names[i]: float(probs[i]) for i in range(len(class_names))}

demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=gr.Label(num_top_classes=2),
    title="Deepfake vs Real Classifier",
    description="Upload an image to classify as Real or Fake"
)

demo.launch()

Using device: cuda


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
License(s): unknown
Downloading deepfake-and-real-images.zip to /content
 95% 1.60G/1.68G [00:16<00:01, 49.9MB/s]
100% 1.68G/1.68G [00:16<00:00, 107MB/s] 
Classes: ['Fake', 'Real']
Epoch 1/2, Loss: 1052.0222, Train Acc: 89.53%
Epoch 2/2, Loss: 538.2255, Train Acc: 95.17%
Validation Accuracy: 94.59521152480471
Test Accuracy: 92.64557542411738
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://82b3547c1731c87be4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https

