# Paper 3 – Deepfake Detection Benchmark


This notebook implements the **Paper 3** variant of the deepfake detector.



The code cells below typically follow this structure:

- Import libraries and configure dataset paths and hyperparameters.
- Construct the dataset and data loaders for training and evaluation.
- Define the model architecture corresponding to Paper 3.
- Train the model and report performance on FF++ and cross-dataset tests.



> Run the cells from top to bottom to reproduce the results reported for Paper 3.

Paper link : https://arxiv.org/pdf/2503.19683 (2503.19683v1.pdf)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import open_clip
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from PIL import Image
import os
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

IMG_SIZE = 224
BATCH_SIZE = 8
EPOCHS = 5
LR = 8e-5   # paper uses ~8e-5 initial LR

FFPP_REAL_PATH = "PATH_TO_REAL"
FFPP_FAKE_PATH = "PATH_TO_FAKE"

MODE = "ln_tuning"
# options:
# "linear_probe"
# "ln_tuning"


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

IMG_SIZE = 224
BATCH_SIZE = 8
EPOCHS = 5
LR = 8e-5   # paper uses ~8e-5 initial LR


FFPP_REAL_PATH = r""
FFPP_FAKE_PATH = r""

MODE = "ln_tuning"
# options:
# "linear_probe"
# "ln_tuning"


In [4]:
class FFPPDataset(Dataset):
    def __init__(self, real_path, fake_path):
        self.samples = []

        for f in os.listdir(real_path):
            self.samples.append((os.path.join(real_path,f),0))

        for f in os.listdir(fake_path):
            self.samples.append((os.path.join(fake_path,f),1))

        self.transform = T.Compose([
            T.Resize((IMG_SIZE,IMG_SIZE)),
            T.ToTensor(),
            T.Normalize([0.5]*3,[0.5]*3)
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self,idx):
        path,label=self.samples[idx]
        img=Image.open(path).convert("RGB")
        img=self.transform(img)
        return img,label


In [5]:
clip_model, _, _ = open_clip.create_model_and_transforms(
    "ViT-B-16",
    pretrained="openai"
)

clip_model = clip_model.to(DEVICE)

for p in clip_model.parameters():
    p.requires_grad = False




In [6]:
def enable_ln_tuning(model):
    for name, param in model.named_parameters():
        if "ln" in name.lower():
            param.requires_grad = True

if MODE == "ln_tuning":
    enable_ln_tuning(clip_model)


In [7]:
class HypersphereNorm(nn.Module):
    def forward(self,x):
        return F.normalize(x, dim=-1)



In [8]:
class CLIPHiddenPotential(nn.Module):
    def __init__(self):
        super().__init__()
        self.clip = clip_model
        self.norm = HypersphereNorm()
        self.classifier = nn.Linear(512,2)

    def forward(self,x):
        feat = self.clip.encode_image(x)

        # hypersphere projection (paper core idea)
        feat = self.norm(feat)

        logits = self.classifier(feat)
        return logits


In [9]:
model = CLIPHiddenPotential().to(DEVICE)

opt = torch.optim.Adam(
    filter(lambda p:p.requires_grad, model.parameters()),
    lr=LR
)

criterion = nn.CrossEntropyLoss()


In [12]:
dataset = FFPPDataset(FFPP_REAL_PATH,FFPP_FAKE_PATH)
loader = DataLoader(dataset,batch_size=BATCH_SIZE,shuffle=True)

for epoch in range(EPOCHS):

    model.train()
    total_loss=0

    for imgs,labels in tqdm(loader):
        imgs=imgs.to(DEVICE)
        labels=labels.to(DEVICE)

        opt.zero_grad()
        logits=model(imgs)
        loss=criterion(logits,labels)
        loss.backward()
        opt.step()

        total_loss+=loss.item()

    print("Epoch",epoch+1,"Loss:",total_loss/len(loader))


100%|██████████| 4782/4782 [2:01:18<00:00,  1.52s/it]  


Epoch 1 Loss: 0.4399120878238918


100%|██████████| 4782/4782 [37:02<00:00,  2.15it/s] 


Epoch 2 Loss: 0.26639035208963263


100%|██████████| 4782/4782 [36:50<00:00,  2.16it/s] 


Epoch 3 Loss: 0.22321713484139333


100%|██████████| 4782/4782 [36:59<00:00,  2.15it/s]  


Epoch 4 Loss: 0.19441754860660923


100%|██████████| 4782/4782 [36:55<00:00,  2.16it/s] 

Epoch 5 Loss: 0.1740376332361681





In [None]:
model.eval()
correct=0
total=0

with torch.no_grad():
    for imgs,labels in loader:
        imgs=imgs.to(DEVICE)
        labels=labels.to(DEVICE)

        preds=model(imgs).argmax(1)
        correct+=(preds==labels).sum().item()
        total+=labels.size(0)

print("Accuracy:",correct/total)


In [14]:
SAVE_DIR = "./checkpoints"
os.makedirs(SAVE_DIR, exist_ok=True)

MODEL_NAME = "paper3_model"   # change per notebook
best_loss = float("inf")

def save_checkpoint(model, optimizer, epoch, loss):
    path = os.path.join(SAVE_DIR, f"{MODEL_NAME}_BEST.pth")
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "loss": loss
    }, path)
    print("Saved BEST checkpoint:", path)
save_checkpoint(model, opt, epoch+1, total_loss/len(loader))

Saved BEST checkpoint: ./checkpoints\paper3_model_BEST.pth


In [15]:
# %% =========================
# LOAD BEST MODEL FOR TESTING
# =========================

BEST_MODEL_PATH = "checkpoints/paper3_model_BEST.pth"

print("\nLoading best trained model from:", BEST_MODEL_PATH)

# Create fresh model instance
model = CLIPHiddenPotential().to(DEVICE)

# Load weights
state_dict = torch.load(BEST_MODEL_PATH, map_location=DEVICE)
model.load_state_dict(state_dict["model_state_dict"])

model.eval()

print("✔ Best model loaded successfully")



Loading best trained model from: checkpoints/paper3_model_BEST.pth


  state_dict = torch.load(BEST_MODEL_PATH, map_location=DEVICE)


✔ Best model loaded successfully


In [16]:
# %% =========================
# Evaluation Utilities (Paper1)
# =============================

import numpy as np
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
)
from tqdm import tqdm
import torch.nn.functional as F


@torch.no_grad()
def evaluate(loader, model):
    model.eval()

    all_probs = []
    all_preds = []
    all_labels = []

    for imgs, labels in tqdm(loader, desc="Evaluating", leave=False):
        imgs = imgs.to(DEVICE)

        logits = model(imgs)                  # (B,2)
        probs = F.softmax(logits, dim=1)[:,1]   # fake prob

        preds = (probs >= 0.5).long().cpu()

        all_probs.append(probs.cpu())
        all_preds.append(preds)
        all_labels.append(labels)

    probs = torch.cat(all_probs).numpy()
    preds = torch.cat(all_preds).numpy()
    labels = torch.cat(all_labels).numpy()

    return {
        "acc": accuracy_score(labels, preds),
        "auc": roc_auc_score(labels, probs),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall": recall_score(labels, preds, zero_division=0),
        "f1": f1_score(labels, preds, zero_division=0),
    }


In [None]:
# %% =========================
# FF++ TEST SET | 3-RUN AVG
# =========================

print("\n===== FF++ TEST (Paper1) | 3-RUN AVG =====")

FFPP_REAL_PATH = r""
FFPP_FAKE_PATH = r""

NUM_RUNS = 1
all_metrics = []

ffpp_test_dataset = FFPPDataset(FFPP_REAL_PATH, FFPP_FAKE_PATH)
ffpp_test_loader = DataLoader(
    ffpp_test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

for run in range(NUM_RUNS):
    print(f"\nRun {run+1}/{NUM_RUNS}")

    metrics = evaluate(ffpp_test_loader, model)
    all_metrics.append(metrics)
    print(metrics)

# Average
avg = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\nAVG:", avg)



===== FF++ TEST (Paper1) | 3-RUN AVG =====

Run 1/1


                                                                

{'acc': 0.8839200059036233, 'auc': 0.9386293973816205, 'precision': 0.9536800227337312, 'recall': 0.9027977044476327, 'f1': 0.9275415726196509}

AVG: {'acc': np.float64(0.8839200059036233), 'auc': np.float64(0.9386293973816205), 'precision': np.float64(0.9536800227337312), 'recall': np.float64(0.9027977044476327), 'f1': np.float64(0.9275415726196509)}


In [19]:
# %% =========================
# JPEG COMPRESSION TEST
# =========================

from PIL import Image
import io

class JPEGCompression:
    def __init__(self, quality):
        self.quality = quality

    def __call__(self, img_tensor):

        # UNNORMALIZE
        img = img_tensor.clone()
        img = img * 0.5 + 0.5     # [-1,1] -> [0,1]
        img = img.clamp(0,1)

        img = img.permute(1,2,0).cpu().numpy()
        img = (img * 255).astype(np.uint8)

        pil_img = Image.fromarray(img)
        buffer = io.BytesIO()
        pil_img.save(buffer, format="JPEG", quality=self.quality)
        buffer.seek(0)

        comp = Image.open(buffer).convert("RGB")
        comp = np.array(comp) / 255.0
        comp = torch.tensor(comp).permute(2,0,1).float()

        # RENORMALIZE
        comp = (comp - 0.5) / 0.5

        return comp



print("\n===== JPEG COMPRESSION TEST (Paper1) | 3-RUN AVG =====")

jpeg_qualities = [100, 90, 75, 50, 30]

for q in jpeg_qualities:
    print(f"\n--- JPEG Quality {q} ---")

    class JPEGWrapper(torch.utils.data.Dataset):
        def __init__(self, base_dataset, quality):
            self.base = base_dataset
            self.comp = JPEGCompression(quality)

        def __len__(self):
            return len(self.base)

        def __getitem__(self, idx):
            img, label = self.base[idx]
            img = self.comp(img)
            return img, label

    metrics_runs = []

    for run in range(NUM_RUNS):

        jpeg_dataset = JPEGWrapper(ffpp_test_dataset, q)
        jpeg_loader = DataLoader(
            jpeg_dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=0,
        )

        metrics = evaluate(jpeg_loader, model)
        metrics_runs.append(metrics)

    avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
    print("AVG:", avg)



===== JPEG COMPRESSION TEST (Paper1) | 3-RUN AVG =====

--- JPEG Quality 100 ---


Evaluating:   0%|          | 0/1694 [00:00<?, ?it/s]

                                                               

AVG: {'acc': np.float64(0.8665043170245739), 'auc': np.float64(0.9356247790955461), 'precision': np.float64(0.9602916543501823), 'recall': np.float64(0.8739239598278336), 'f1': np.float64(0.915074409652129)}

--- JPEG Quality 90 ---


                                                               

AVG: {'acc': np.float64(0.8178732196885838), 'auc': np.float64(0.9208130607085807), 'precision': np.float64(0.9671831289003658), 'recall': np.float64(0.8060437589670014), 'f1': np.float64(0.8792917930157488)}

--- JPEG Quality 75 ---


                                                               

AVG: {'acc': np.float64(0.8637000959338794), 'auc': np.float64(0.8967784879280761), 'precision': np.float64(0.9018049917954918), 'recall': np.float64(0.9363342898134863), 'f1': np.float64(0.9187453257665743)}

--- JPEG Quality 50 ---


                                                               

AVG: {'acc': np.float64(0.8183897867316066), 'auc': np.float64(0.84573565444234), 'precision': np.float64(0.9046466151410746), 'recall': np.float64(0.8711441893830703), 'f1': np.float64(0.8875793705175643)}

--- JPEG Quality 30 ---


                                                               

AVG: {'acc': np.float64(0.8185373773153273), 'auc': np.float64(0.8246097691051328), 'precision': np.float64(0.8952441574974993), 'recall': np.float64(0.8828012912482066), 'f1': np.float64(0.8889791864192514)}




In [None]:
# %% =========================
# DFDC CROSS DATASET TEST
# ============================

DFDC_REAL_PATH = r""
DFDC_FAKE_PATH = r""
print("\n===== DFDC CROSS-DATASET (Paper1) | 3-RUN AVG =====")

dfdc_dataset = FFPPDataset(DFDC_REAL_PATH, DFDC_FAKE_PATH)
dfdc_loader = DataLoader(
    dfdc_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

metrics_runs = []

for run in range(NUM_RUNS):
    print(f"Run {run+1}/{NUM_RUNS}")
    metrics = evaluate(dfdc_loader, model)
    metrics_runs.append(metrics)

avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
print("\nAVG:", avg)


===== DFDC CROSS-DATASET (Paper1) | 3-RUN AVG =====
Run 1/1


                                                                  


AVG: {'acc': np.float64(0.6569848593012477), 'auc': np.float64(0.5270011643382294), 'precision': np.float64(0.7813216851416876), 'recall': np.float64(0.7775514667687344), 'f1': np.float64(0.7794320167174814)}


In [None]:
# %% =========================
# CELEB-DF CROSS DATASET TEST
# =========================

CELEB_REAL_PATH = r""
CELEB_FAKE_PATH = r""

print("\n===== CELEB-DF CROSS-DATASET (Paper1) | 3-RUN AVG =====")

celeb_dataset = FFPPDataset(CELEB_REAL_PATH, CELEB_FAKE_PATH)
celeb_loader = DataLoader(
    celeb_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

metrics_runs = []

for run in range(NUM_RUNS):
    print(f"Run {run+1}/{NUM_RUNS}")
    metrics = evaluate(celeb_loader, model)
    metrics_runs.append(metrics)

avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
print("\nAVG:", avg)



===== CELEB-DF CROSS-DATASET (Paper1) | 3-RUN AVG =====
Run 1/1


                                                               


AVG: {'acc': np.float64(0.8286710384328049), 'auc': np.float64(0.7344185636518228), 'precision': np.float64(0.9236711244146188), 'recall': np.float64(0.8825130310438674), 'f1': np.float64(0.9026231364408379)}
