# Paper 2 – Deepfake Detection Benchmark


This notebook implements the **Paper 2** variant of the deepfake detector.



The code cells below typically follow this structure:

- Import libraries and set up configuration (paths, hyperparameters, device).
- Define datasets and data loaders for FF++, DFDC, Celeb-DF, or related benchmarks.
- Build the model architecture specific to this paper.
- Train and evaluate the model, printing key metrics for comparison across papers.



> Run the cells from top to bottom to reproduce the results reported for Paper 2.

Paper link : https://arxiv.org/pdf/2402.12927 (2402.12927v1.pdf)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import open_clip
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from PIL import Image
import os
from tqdm import tqdm


In [6]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

IMG_SIZE = 224
BATCH_SIZE = 8
EPOCHS = 5
LR = 1e-4

FFPP_REAL_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\FFPP_CViT\train\real"
FFPP_FAKE_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\FFPP_CViT\train\fake"

ADAPT_MODE = "prompt"  
# options:
# "linear"
# "adapter"
# "prompt"

In [7]:
class FFPPDataset(Dataset):
    def __init__(self, real_path, fake_path):
        self.samples = []

        for f in os.listdir(real_path):
            self.samples.append((os.path.join(real_path,f),0))

        for f in os.listdir(fake_path):
            self.samples.append((os.path.join(fake_path,f),1))

        self.transform = T.Compose([
            T.Resize((IMG_SIZE,IMG_SIZE)),
            T.ToTensor(),
            T.Normalize([0.5]*3,[0.5]*3)
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self,idx):
        path,label=self.samples[idx]
        img=Image.open(path).convert("RGB")
        img=self.transform(img)
        return img,label


In [8]:
clip_model, _, preprocess = open_clip.create_model_and_transforms(
    "ViT-B-16",
    pretrained="openai"
)

clip_model = clip_model.to(DEVICE)

for p in clip_model.parameters():
    p.requires_grad = False


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [9]:
class CLIPAdapter(nn.Module):
    def __init__(self, dim=512):
        super().__init__()
        self.fc1 = nn.Linear(dim, dim//4)
        self.fc2 = nn.Linear(dim//4, dim)

    def forward(self,x):
        z = F.relu(self.fc1(x))
        z = self.fc2(z)
        return x + z


In [10]:
class PromptLearner(nn.Module):
    def __init__(self, clip_model, n_ctx=8):
        super().__init__()
        self.n_ctx = n_ctx
        dim = clip_model.token_embedding.weight.shape[1]

        self.context = nn.Parameter(torch.randn(n_ctx, dim))

        self.class_tokens = ["real","fake"]

        tokenizer = open_clip.get_tokenizer("ViT-B-16")
        self.tokenized = tokenizer(self.class_tokens)

    def forward(self):
        return self.context


In [11]:
class CLIPDeceptionModel(nn.Module):
    def __init__(self, mode="linear"):
        super().__init__()

        self.clip = clip_model
        self.mode = mode

        self.adapter = CLIPAdapter() if mode=="adapter" else None
        self.prompt = PromptLearner(clip_model) if mode=="prompt" else None

        self.classifier = nn.Linear(512,2)

    def encode_image(self,x):
        img_feat = self.clip.encode_image(x)

        if self.adapter is not None:
            img_feat = self.adapter(img_feat)

        return img_feat

    def forward(self,x):
        feat = self.encode_image(x)
        logits = self.classifier(feat)
        return logits


In [12]:
model = CLIPDeceptionModel(mode=ADAPT_MODE).to(DEVICE)

opt = torch.optim.AdamW(
    filter(lambda p:p.requires_grad, model.parameters()),
    lr=LR
)

criterion = nn.CrossEntropyLoss()


In [13]:
dataset = FFPPDataset(FFPP_REAL_PATH,FFPP_FAKE_PATH)
loader = DataLoader(dataset,batch_size=BATCH_SIZE,shuffle=True)

for epoch in range(EPOCHS):

    model.train()
    total_loss=0

    for imgs,labels in tqdm(loader):
        imgs=imgs.to(DEVICE)
        labels=labels.to(DEVICE)

        opt.zero_grad()
        logits=model(imgs)
        loss=criterion(logits,labels)
        loss.backward()
        opt.step()

        total_loss+=loss.item()

    print("Epoch",epoch+1,"Loss:",total_loss/len(loader))


100%|██████████| 4782/4782 [22:17<00:00,  3.58it/s] 


Epoch 1 Loss: 0.4891992380452276


100%|██████████| 4782/4782 [50:36<00:00,  1.57it/s]  


Epoch 2 Loss: 0.43646546307092726


100%|██████████| 4782/4782 [51:44<00:00,  1.54it/s]  


Epoch 3 Loss: 0.41555371246639905


100%|██████████| 4782/4782 [54:23<00:00,  1.47it/s]  


Epoch 4 Loss: 0.4026165883325972


100%|██████████| 4782/4782 [1:18:50<00:00,  1.01it/s]

Epoch 5 Loss: 0.3929246483041135





In [14]:
model.eval()
correct=0
total=0

with torch.no_grad():
    for imgs,labels in loader:
        imgs=imgs.to(DEVICE)
        labels=labels.to(DEVICE)

        preds=model(imgs).argmax(1)
        correct+=(preds==labels).sum().item()
        total+=labels.size(0)

print("Accuracy:",correct/total)


Accuracy: 0.8075917599079787


In [17]:
SAVE_DIR = "./checkpoints"
os.makedirs(SAVE_DIR, exist_ok=True)

MODEL_NAME = "paper2_model"   # change per notebook
best_loss = float("inf")

def save_checkpoint(model, optimizer, epoch, loss):
    path = os.path.join(SAVE_DIR, f"{MODEL_NAME}_BEST.pth")
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "loss": loss
    }, path)
    print("Saved BEST checkpoint:", path)
save_checkpoint(model, opt, epoch+1, total_loss/len(loader))

Saved BEST checkpoint: ./checkpoints\paper2_model_BEST.pth


In [20]:
# %% =========================
# LOAD BEST MODEL FOR TESTING
# =========================

BEST_MODEL_PATH = "checkpoints/paper2_model_BEST.pth"

print("\nLoading best trained model from:", BEST_MODEL_PATH)

# Create fresh model instance
model = CLIPDeceptionModel(mode=ADAPT_MODE).to(DEVICE)

# Load weights
state_dict = torch.load(BEST_MODEL_PATH, map_location=DEVICE)
model.load_state_dict(state_dict["model_state_dict"])

model.eval()

print("✔ Best model loaded successfully")



Loading best trained model from: checkpoints/paper2_model_BEST.pth


  state_dict = torch.load(BEST_MODEL_PATH, map_location=DEVICE)


✔ Best model loaded successfully


In [24]:
# %% =========================
# Evaluation Utilities (Paper1)
# =============================

import numpy as np
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
)
from tqdm import tqdm
import torch.nn.functional as F


@torch.no_grad()
def evaluate(loader, model):
    model.eval()

    all_probs = []
    all_preds = []
    all_labels = []

    for imgs, labels in tqdm(loader, desc="Evaluating", leave=False):
        imgs = imgs.to(DEVICE)

        logits = model(imgs)                  # (B,2)
        probs = F.softmax(logits, dim=1)[:,1]   # fake prob

        preds = (probs >= 0.5).long().cpu()

        all_probs.append(probs.cpu())
        all_preds.append(preds)
        all_labels.append(labels)

    probs = torch.cat(all_probs).numpy()
    preds = torch.cat(all_preds).numpy()
    labels = torch.cat(all_labels).numpy()

    return {
        "acc": accuracy_score(labels, preds),
        "auc": roc_auc_score(labels, probs),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall": recall_score(labels, preds, zero_division=0),
        "f1": f1_score(labels, preds, zero_division=0),
    }


In [25]:
# %% =========================
# FF++ TEST SET | 3-RUN AVG
# =========================

print("\n===== FF++ TEST (Paper1) | 3-RUN AVG =====")
FFPP_REAL_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\FFPP_CViT\test\real"
FFPP_FAKE_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\FFPP_CViT\test\fake"

NUM_RUNS = 1
all_metrics = []

ffpp_test_dataset = FFPPDataset(FFPP_REAL_PATH, FFPP_FAKE_PATH)
ffpp_test_loader = DataLoader(
    ffpp_test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

for run in range(NUM_RUNS):
    print(f"\nRun {run+1}/{NUM_RUNS}")

    metrics = evaluate(ffpp_test_loader, model)
    all_metrics.append(metrics)
    print(metrics)

# Average
avg = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\nAVG:", avg)



===== FF++ TEST (Paper1) | 3-RUN AVG =====

Run 1/1


                                                               

{'acc': 0.8085750129141761, 'auc': 0.8221906597560079, 'precision': 0.8952521707001663, 'recall': 0.8690817790530847, 'f1': 0.8819728819728819}

AVG: {'acc': np.float64(0.8085750129141761), 'auc': np.float64(0.8221906597560079), 'precision': np.float64(0.8952521707001663), 'recall': np.float64(0.8690817790530847), 'f1': np.float64(0.8819728819728819)}


In [None]:
# %% =========================
# JPEG COMPRESSION TEST
# =========================

from PIL import Image
import io

class JPEGCompression:
    def __init__(self, quality):
        self.quality = quality-

    def __call__(self, img_tensor):

        # UNNORMALIZE
        img = img_tensor.clone()
        img = img * 0.5 + 0.5     # [-1,1] -> [0,1]
        img = img.clamp(0,1)

        img = img.permute(1,2,0).cpu().numpy()
        img = (img * 255).astype(np.uint8)

        pil_img = Image.fromarray(img)
        buffer = io.BytesIO()
        pil_img.save(buffer, format="JPEG", quality=self.quality)
        buffer.seek(0)

        comp = Image.open(buffer).convert("RGB")
        comp = np.array(comp) / 255.0
        comp = torch.tensor(comp).permute(2,0,1).float()

        # RENORMALIZE
        comp = (comp - 0.5) / 0.5

        return comp



print("\n===== JPEG COMPRESSION TEST (Paper1) | 3-RUN AVG =====")

jpeg_qualities = [100, 90, 75, 50, 30]

for q in jpeg_qualities:
    print(f"\n--- JPEG Quality {q} ---")

    class JPEGWrapper(torch.utils.data.Dataset):
        def __init__(self, base_dataset, quality):
            self.base = base_dataset
            self.comp = JPEGCompression(quality)

        def __len__(self):
            return len(self.base)

        def __getitem__(self, idx):
            img, label = self.base[idx]
            img = self.comp(img)
            return img, label

    metrics_runs = []

    for run in range(NUM_RUNS):

        jpeg_dataset = JPEGWrapper(ffpp_test_dataset, q)
        jpeg_loader = DataLoader(
            jpeg_dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=0,
        )

        metrics = evaluate(jpeg_loader, model)
        metrics_runs.append(metrics)

    avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
    print("AVG:", avg)



===== JPEG COMPRESSION TEST (Paper1) | 3-RUN AVG =====

--- JPEG Quality 100 ---


                                                               

AVG: {'acc': np.float64(0.7983174673455834), 'auc': np.float64(0.8201353886393361), 'precision': np.float64(0.9023993882038046), 'recall': np.float64(0.8464849354375896), 'f1': np.float64(0.873548327395549)}

--- JPEG Quality 90 ---


                                                               

AVG: {'acc': np.float64(0.7135266769980075), 'auc': np.float64(0.8214622917966178), 'precision': np.float64(0.9303812455600284), 'recall': np.float64(0.7046269727403156), 'f1': np.float64(0.8019185631186856)}

--- JPEG Quality 75 ---


                                                               

AVG: {'acc': np.float64(0.783853590140949), 'auc': np.float64(0.7954025185649449), 'precision': np.float64(0.8863102508691159), 'recall': np.float64(0.8458572453371592), 'f1': np.float64(0.8656113787565956)}

--- JPEG Quality 50 ---


                                                               

AVG: {'acc': np.float64(0.6986938233340713), 'auc': np.float64(0.7727650636653364), 'precision': np.float64(0.9080937536081284), 'recall': np.float64(0.705254662840746), 'f1': np.float64(0.7939231817493565)}

--- JPEG Quality 30 ---


                                                               

AVG: {'acc': np.float64(0.6080732049295255), 'auc': np.float64(0.771129230675383), 'precision': np.float64(0.9375280898876405), 'recall': np.float64(0.561154949784792), 'f1': np.float64(0.7020811129186066)}




In [27]:
# %% =========================
# DFDC CROSS DATASET TEST
# ============================

DFDC_REAL_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\DFDC\train\real"
DFDC_FAKE_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\DFDC\train\fake"
print("\n===== DFDC CROSS-DATASET (Paper1) | 3-RUN AVG =====")

dfdc_dataset = FFPPDataset(DFDC_REAL_PATH, DFDC_FAKE_PATH)
dfdc_loader = DataLoader(
    dfdc_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

metrics_runs = []

for run in range(NUM_RUNS):
    print(f"Run {run+1}/{NUM_RUNS}")
    metrics = evaluate(dfdc_loader, model)
    metrics_runs.append(metrics)

avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
print("\nAVG:", avg)



===== DFDC CROSS-DATASET (Paper1) | 3-RUN AVG =====
Run 1/1


                                                                  


AVG: {'acc': np.float64(0.7786005774988546), 'auc': np.float64(0.5126801438294898), 'precision': np.float64(0.7792707766793572), 'recall': np.float64(0.9988927468081035), 'f1': np.float64(0.8755189456342669)}


In [28]:
# %% =========================
# CELEB-DF CROSS DATASET TEST
# =========================

CELEB_REAL_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\CelebDF_images\train\real"
CELEB_FAKE_PATH = r"C:\Users\vk200\OneDrive\Desktop\Benchmarking\CelebDF_images\train\fake"

print("\n===== CELEB-DF CROSS-DATASET (Paper1) | 3-RUN AVG =====")

celeb_dataset = FFPPDataset(CELEB_REAL_PATH, CELEB_FAKE_PATH)
celeb_loader = DataLoader(
    celeb_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)

metrics_runs = []

for run in range(NUM_RUNS):
    print(f"Run {run+1}/{NUM_RUNS}")
    metrics = evaluate(celeb_loader, model)
    metrics_runs.append(metrics)

avg = {k: np.mean([m[k] for m in metrics_runs]) for k in metrics_runs[0]}
print("\nAVG:", avg)



===== CELEB-DF CROSS-DATASET (Paper1) | 3-RUN AVG =====
Run 1/1


                                                               


AVG: {'acc': np.float64(0.8617498132315099), 'auc': np.float64(0.5896062222392854), 'precision': np.float64(0.9054627419782552), 'recall': np.float64(0.9450159140181743), 'f1': np.float64(0.9248166121205281)}


