# Paper 5 – Deepfake Detection Benchmark


This notebook implements the **Paper 5** variant of the deepfake detector.



The code cells below typically follow this structure:

- Import all required libraries and define configuration variables.
- Create datasets and data loaders for FF++ and any cross-dataset tests.
- Build the model architecture described in Paper 5.
- Train, validate, and test the model, logging performance metrics.



> Run the cells from top to bottom to reproduce the results reported for Paper 5.

Paper link : https://arxiv.org/pdf/2411.19715 (Cui_Forensics_Adapter_Adapting_CLIP_for_Generalizable_Face_Forgery_Detection_CVPR_2025_paper.pdf)

In [1]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T
import open_clip

from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

IMG_SIZE = 224
BATCH_SIZE = 8
EPOCHS = 5
LR = 1e-4

FFPP_REAL_PATH = r""
FFPP_FAKE_PATH = r""


In [5]:
class ImageDataset(Dataset):
    def __init__(self, real_path, fake_path, jpeg_quality=None):

        self.samples=[]

        for f in os.listdir(real_path):
            self.samples.append((os.path.join(real_path,f),0))

        for f in os.listdir(fake_path):
            self.samples.append((os.path.join(fake_path,f),1))

        self.jpeg_quality=jpeg_quality

        self.tf=T.Compose([
            T.Resize((IMG_SIZE,IMG_SIZE)),
            T.ToTensor(),
            T.Normalize([0.5]*3,[0.5]*3)
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self,idx):

        path,label=self.samples[idx]
        img=Image.open(path).convert("RGB")

        if self.jpeg_quality:
            from io import BytesIO
            buf=BytesIO()
            img.save(buf,"JPEG",quality=self.jpeg_quality)
            img=Image.open(buf)

        img=self.tf(img)
        return img,label


In [6]:
clip_model,_,_=open_clip.create_model_and_transforms(
    "ViT-B-16",
    pretrained="openai"
)

clip_model=clip_model.to(DEVICE)

for p in clip_model.parameters():
    p.requires_grad=False




In [7]:
class ForensicsAdapter(nn.Module):
    def __init__(self,dim=512):
        super().__init__()

        self.fc1=nn.Linear(dim,dim//2)
        self.fc2=nn.Linear(dim//2,dim)

    def forward(self,x):
        z=F.relu(self.fc1(x))
        z=self.fc2(z)
        return x+z


In [8]:
class CLIPForensicsAdapter(nn.Module):

    def __init__(self):
        super().__init__()

        self.clip=clip_model
        self.adapter=ForensicsAdapter()
        self.classifier=nn.Linear(512,2)

    def forward(self,x):

        feat=self.clip.encode_image(x)

        # adapter interaction (knowledge transfer idea)
        feat=self.adapter(feat)

        logits=self.classifier(feat)
        return logits


In [9]:
model=CLIPForensicsAdapter().to(DEVICE)

optimizer=torch.optim.AdamW(
    filter(lambda p:p.requires_grad,model.parameters()),
    lr=LR
)

criterion=nn.CrossEntropyLoss()

train_loader=DataLoader(
    ImageDataset(FFPP_REAL_PATH,FFPP_FAKE_PATH),
    batch_size=BATCH_SIZE,
    shuffle=True
)


In [10]:
for epoch in range(EPOCHS):

    model.train()
    total_loss=0

    for imgs,labels in tqdm(train_loader):

        imgs=imgs.to(DEVICE)
        labels=labels.to(DEVICE)

        optimizer.zero_grad()

        logits=model(imgs)
        loss=criterion(logits,labels)

        loss.backward()
        optimizer.step()

        total_loss+=loss.item()

    print("Epoch",epoch+1,"Loss:",total_loss/len(train_loader))


100%|██████████| 4782/4782 [21:33<00:00,  3.70it/s] 


Epoch 1 Loss: 0.371468704170265


100%|██████████| 4782/4782 [19:57<00:00,  3.99it/s]


Epoch 2 Loss: 0.2552276040158721


100%|██████████| 4782/4782 [19:37<00:00,  4.06it/s]


Epoch 3 Loss: 0.19329743511679368


100%|██████████| 4782/4782 [19:56<00:00,  4.00it/s]


Epoch 4 Loss: 0.15711199014734967


100%|██████████| 4782/4782 [20:43<00:00,  3.85it/s]

Epoch 5 Loss: 0.13236913136403788





In [20]:
SAVE_DIR = "./checkpoints"
os.makedirs(SAVE_DIR, exist_ok=True)

MODEL_NAME = "paper5_model"
best_loss = float("inf")

def save_checkpoint(model, optimizer, epoch, loss):
    path = os.path.join(SAVE_DIR, f"{MODEL_NAME}_BEST.pth")
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "loss": loss
    }, path)
    print("Saved BEST checkpoint:", path)

# Save checkpoint after training (uses last epoch's stats)
save_checkpoint(model, optimizer, epoch+1, total_loss/len(train_loader))

Saved BEST checkpoint: ./checkpoints\paper5_model_BEST.pth


In [21]:
# Load best saved model for evaluation
BEST_MODEL_PATH = "checkpoints/paper5_model_BEST.pth"

print("\nLoading best trained model from:", BEST_MODEL_PATH)

best_model = CLIPForensicsAdapter().to(DEVICE)

state = torch.load(BEST_MODEL_PATH, map_location=DEVICE)
best_model.load_state_dict(state["model_state_dict"])

best_model.eval()
print("✔ Best model loaded successfully")


Loading best trained model from: checkpoints/paper5_model_BEST.pth


  state = torch.load(BEST_MODEL_PATH, map_location=DEVICE)


✔ Best model loaded successfully


In [22]:
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    roc_curve,
    precision_score,
    recall_score,
    f1_score
)

@torch.no_grad()
def evaluate_model(loader):

    model.eval()

    all_probs = []
    all_labels = []
    all_preds = []

    correct = 0
    total = 0

    # progress bar so large datasets (e.g., DFDC) are visible
    for imgs, labels in tqdm(loader, desc="Evaluating", leave=False):

        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)

        logits = model(imgs)

        probs = torch.softmax(logits, dim=1)[:,1]
        preds = torch.argmax(logits, dim=1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

        all_probs.extend(probs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

    # ===== Core Metrics =====
    acc = correct / total
    auc = roc_auc_score(all_labels, all_probs)
    ap  = average_precision_score(all_labels, all_probs)

    precision = precision_score(all_labels, all_preds)
    recall    = recall_score(all_labels, all_preds)
    f1        = f1_score(all_labels, all_preds)

    # ===== EER =====
    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    fnr = 1 - tpr
    eer = fpr[np.nanargmin(np.abs(fnr - fpr))]

    return {
        "ACC": acc,
        "AUC": auc,
        "Precision": precision,
        "Recall": recall,
        "F1": f1,
        "AP": ap,
        "EER": eer
    }

In [None]:
print("\n===== FF++ Evaluation (TEST SET) =====")

# Use FF++ test split for evaluation
FFPP_REAL_PATH = r""
FFPP_FAKE_PATH = r""
ffpp_loader=DataLoader(
    ImageDataset(FFPP_REAL_PATH,FFPP_FAKE_PATH),
    batch_size=BATCH_SIZE,
    shuffle=False
)

print(evaluate_model(ffpp_loader))


===== FF++ Evaluation (TEST SET) =====


                                                               

{'ACC': 0.7621577743339975, 'AUC': 0.7876746752442882, 'Precision': 0.8916329151437321, 'Recall': 0.8093615494978479, 'F1': 0.8485076380728555, 'AP': 0.9468427258579883, 'EER': np.float64(0.30220925385577324)}




In [None]:
CELEBDF_REAL_PATH = r""
CELEBDF_FAKE_PATH = r""

# Use DFDC validation split for faster cross-dataset evaluation
DFDC_REAL_PATH = r""
DFDC_FAKE_PATH = r""

print("\n===== Cross Dataset =====")

celeb_loader=DataLoader(ImageDataset(CELEBDF_REAL_PATH,CELEBDF_FAKE_PATH),batch_size=BATCH_SIZE)
dfdc_loader=DataLoader(ImageDataset(DFDC_REAL_PATH,DFDC_FAKE_PATH),batch_size=BATCH_SIZE)

print("CelebDF:",evaluate_model(celeb_loader))
print("DFDC:",evaluate_model(dfdc_loader))



===== Cross Dataset =====


                                                               

CelebDF: {'ACC': 0.8583464763011538, 'AUC': 0.6136435284759549, 'Precision': 0.9118787769459727, 'Recall': 0.9326998477789566, 'F1': 0.9221718012450687, 'AP': 0.927307062857764, 'EER': np.float64(0.42318840579710143)}


                                                               

DFDC: {'ACC': 0.8018769890238359, 'AUC': 0.5563413954834425, 'Precision': 0.8043903711918585, 'Recall': 0.9958005249343832, 'F1': 0.889919347563286, 'AP': 0.8331370580303815, 'EER': np.float64(0.45413833139824183)}


In [None]:
print("\n===== JPEG Robustness (FF++ TEST) =====")

# Ensure FF++ test split here
FFPP_REAL_PATH = r""
FFPP_FAKE_PATH = r""

for q in [90,70,50,30]:

    jpeg_loader=DataLoader(
        ImageDataset(FFPP_REAL_PATH,FFPP_FAKE_PATH,jpeg_quality=q),
        batch_size=BATCH_SIZE
    )

    print(f"JPEG {q}:",evaluate_model(jpeg_loader))


===== JPEG Robustness (FF++ TEST) =====


                                                               

JPEG 90: {'ACC': 0.7068851007305734, 'AUC': 0.7967087516438879, 'Precision': 0.9185124737701096, 'Recall': 0.7065100430416069, 'F1': 0.798682209832742, 'AP': 0.949289272927045, 'EER': np.float64(0.2922050854522718)}


                                                               

JPEG 70: {'ACC': 0.6572946646003985, 'AUC': 0.7367716544674581, 'Precision': 0.9007389162561577, 'Recall': 0.6558464849354376, 'F1': 0.7590286425902865, 'AP': 0.930761788593094, 'EER': np.float64(0.3397248853689037)}


                                                               

JPEG 50: {'ACC': 0.5629104863109734, 'AUC': 0.7121905020205095, 'Precision': 0.9190575412726398, 'Recall': 0.514167862266858, 'F1': 0.6594215398769478, 'AP': 0.9237217663235735, 'EER': np.float64(0.35556481867444767)}


                                                               

JPEG 30: {'ACC': 0.4867537451110619, 'AUC': 0.7178265371511204, 'Precision': 0.9360066486598795, 'Recall': 0.40396341463414637, 'F1': 0.5643595364860633, 'AP': 0.9224309207855355, 'EER': np.float64(0.35181325552313464)}
