# Deepfake detection using DINOv2

In [None]:
!apt-get install -y unrar
!pip install -q timm transformers accelerate scikit-learn huggingface_hub

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unrar is already the newest version (1:6.1.5-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import io
import zipfile
import subprocess
import time
import torch
import torch.nn as nn
import timm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.metrics import classification_report, accuracy_score
from huggingface_hub import hf_hub_download
from tqdm.auto import tqdm

class Config:
    HF_REPO_ID = "pujanpaudel/deepfake_face_classification"

    DOWNLOAD_DIR = '/content/downloads'
    DATA_ROOT = '/content/dataset'

    MODEL_NAME = "vit_small_patch14_reg4_dinov2"

    RESOLUTION = 224
    BATCH_SIZE = 128
    LR = 5e-6
    LR_HEAD = 1e-4
    EPOCHS = 5

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    NUM_WORKERS = os.cpu_count()

print(f"Configuration loaded. Device: {Config.DEVICE}")

Configuration loaded. Device: cuda


In [None]:
def setup_data():
    os.makedirs(Config.DOWNLOAD_DIR, exist_ok=True)

    files = ['train.rar', 'val.zip', 'test.zip']
    paths = {}
    print(f"Fetching data from {Config.HF_REPO_ID}...")

    for filename in files:
        paths[filename] = hf_hub_download(
            repo_id=Config.HF_REPO_ID,
            filename=filename,
            repo_type="dataset",
            local_dir=Config.DOWNLOAD_DIR
        )

    print("Extracting files...")
    train_dest = os.path.join(Config.DATA_ROOT, 'train')
    if not os.path.exists(train_dest):
        os.makedirs(train_dest)
        subprocess.run(['unrar', 'x', paths['train.rar'], train_dest],
                       stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)

    val_dest = os.path.join(Config.DATA_ROOT, 'val')
    if not os.path.exists(val_dest):
        with zipfile.ZipFile(paths['val.zip'], 'r') as z:
            z.extractall(val_dest)

    test_dest = os.path.join(Config.DATA_ROOT, 'test')
    if not os.path.exists(test_dest):
        with zipfile.ZipFile(paths['test.zip'], 'r') as z:
            z.extractall(test_dest)

    return train_dest, val_dest, test_dest

class RAMBufferedDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform
        self.samples = []
        self.targets = []

        search_dir = root_dir
        if 'real' not in os.listdir(root_dir) and 'fake' not in os.listdir(root_dir):
            subdirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
            if subdirs: search_dir = os.path.join(root_dir, subdirs[0])

        print(f"Indexing {search_dir}...")
        classes = {'real': 0, 'fake': 1}

        for class_name, label in classes.items():
            class_path = os.path.join(search_dir, class_name)
            if not os.path.exists(class_path): continue

            files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            for img_name in tqdm(files, desc=f"   Loading {class_name}", leave=False):
                with open(os.path.join(class_path, img_name), 'rb') as f:
                    self.samples.append(f.read()) # Store raw bytes
                self.targets.append(label)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_bytes = self.samples[idx]
        image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.targets[idx]

def get_transforms(resolution):
    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    train_tf = transforms.Compose([
        transforms.Resize((resolution, resolution), interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    val_tf = transforms.Compose([
        transforms.Resize((resolution, resolution), interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    return train_tf, val_tf

In [None]:
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct, count = 0, 0, 0

    for imgs, lbls in tqdm(loader, desc="Training", leave=False):
        imgs, lbls = imgs.to(Config.DEVICE), lbls.to(Config.DEVICE)

        optimizer.zero_grad()

        outputs = model(imgs)

        loss = criterion(outputs, lbls)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(1) == lbls).sum().item()
        count += lbls.size(0)

    return total_loss/len(loader), correct/count

def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for imgs, lbls in tqdm(loader, desc="Evaluating", leave=False):
            imgs = imgs.to(Config.DEVICE)

            outputs = model(imgs)
            all_preds.extend(outputs.argmax(1).cpu().numpy())
            all_labels.extend(lbls.numpy())

    return accuracy_score(all_labels, all_preds), all_labels, all_preds

In [None]:
train_dir, val_dir, test_dir = setup_data()

Fetching data from pujanpaudel/deepfake_face_classification...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


train.rar:   0%|          | 0.00/3.94G [00:00<?, ?B/s]

val.zip:   0%|          | 0.00/617M [00:00<?, ?B/s]

test.zip:   0%|          | 0.00/857M [00:00<?, ?B/s]

Extracting files...


In [None]:
train_tf, val_tf = get_transforms(Config.RESOLUTION)

print("\nLoading Datasets into RAM...")
train_ds = RAMBufferedDataset(train_dir, transform=train_tf)
val_ds = RAMBufferedDataset(val_dir, transform=val_tf)
test_ds = RAMBufferedDataset(test_dir, transform=val_tf)

print(f"CReating DataLoaders (Batch Size: {Config.BATCH_SIZE})...")
train_loader = DataLoader(train_ds, batch_size=Config.BATCH_SIZE, shuffle=True,
                          num_workers=Config.NUM_WORKERS, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_ds, batch_size=Config.BATCH_SIZE, shuffle=False,
                        num_workers=Config.NUM_WORKERS, pin_memory=True, persistent_workers=True)
test_loader = DataLoader(test_ds, batch_size=Config.BATCH_SIZE, shuffle=False,
                         num_workers=Config.NUM_WORKERS, pin_memory=True, persistent_workers=True)


Loading Datasets into RAM...
Indexing /content/dataset/train/train...


   Loading real:   0%|          | 0/12848 [00:00<?, ?it/s]

   Loading fake:   0%|          | 0/12848 [00:00<?, ?it/s]

Indexing /content/dataset/val/val...


   Loading real:   0%|          | 0/1606 [00:00<?, ?it/s]

   Loading fake:   0%|          | 0/1606 [00:00<?, ?it/s]

Indexing /content/dataset/test/test...


   Loading real:   0%|          | 0/1606 [00:00<?, ?it/s]

   Loading fake:   0%|          | 0/1606 [00:00<?, ?it/s]

CReating DataLoaders (Batch Size: 128)...


### Full fine tune

In [None]:
#model
model = timm.create_model(
    Config.MODEL_NAME,
    pretrained=True,
    num_classes=2,   # 0=real, 1=fake
    img_size=Config.RESOLUTION
).to(Config.DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=Config.LR)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

print("Model initialized")

model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]

Model initialized


In [None]:
print("\nStarting Training...")
start_time = time.time()

for epoch in range(Config.EPOCHS):
    loss, train_acc = train_epoch(model, train_loader, optimizer, criterion)
    val_acc, _, _ = evaluate(model, val_loader)

    print(f"Epoch {epoch+1}/{Config.EPOCHS} | "
          f"Loss: {loss:.4f} | Train Acc: {train_acc:.4%} | Val Acc: {val_acc:.4%}")

print(f"\nTraining Finished in {(time.time()-start_time)/60:.1f} minutes...")
torch.save(model.state_dict(),"/content/drive/MyDrive/dinov2_reg4_deepfake.pth")
print("Model saved to google drive")


Starting Training...


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 1/5 | Loss: 0.3241 | Train Acc: 91.9287% | Val Acc: 98.9415%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 2/5 | Loss: 0.2139 | Train Acc: 99.4007% | Val Acc: 96.8244%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 3/5 | Loss: 0.2056 | Train Acc: 99.6887% | Val Acc: 98.8481%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 4/5 | Loss: 0.2047 | Train Acc: 99.7081% | Val Acc: 99.0037%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 5/5 | Loss: 0.2016 | Train Acc: 99.8599% | Val Acc: 99.0660%

Training Finished in 16.3 minutes...
Model saved to google drive


In [None]:
print("\n--- FULL FINE TUNE EVALUATION ---")

acc, true, pred = evaluate(model, test_loader)
print(f"Test Accuracy: {acc:.2%}")
print(classification_report(true, pred, target_names=['Real', 'Fake'], digits=4))


--- FULL FINE TUNE EVALUATION ---


Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Test Accuracy: 97.60%
              precision    recall  f1-score   support

        Real     0.9619    0.9913    0.9764      1606
        Fake     0.9910    0.9608    0.9757      1606

    accuracy                         0.9760      3212
   macro avg     0.9765    0.9760    0.9760      3212
weighted avg     0.9765    0.9760    0.9760      3212



### Head only Train

In [None]:
model_head_only = timm.create_model(
    Config.MODEL_NAME,
    pretrained = True,
    num_classes = 2,
    img_size = Config.RESOLUTION
).to(Config.DEVICE)

#freeze backbone
for param in model_head_only.parameters():
  param.requires_grad = False

#unfreeze head
for param in model_head_only.head.parameters():
  param.requires_grad = True

optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model_head_only.parameters()), lr=Config.LR_HEAD)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

print("Model initialized")

Model initialized


In [None]:
print("\nStarting Training...")
start_time = time.time()

for epoch in range(Config.EPOCHS):
    loss, train_acc = train_epoch(model_head_only, train_loader, optimizer, criterion)
    val_acc, _, _ = evaluate(model_head_only, val_loader)

    print(f"Epoch {epoch+1}/{Config.EPOCHS} | "
          f"Loss: {loss:.4f} | Train Acc: {train_acc:.4%} | Val Acc: {val_acc:.4%}")

print(f"\nTraining Finished in {(time.time()-start_time)/60:.1f} minutes...")

torch.save(model_head_only.state_dict(),"/content/drive/MyDrive/dinov2_reg4_deepfake_head_only.pth")
print("Model saved to google drive")


Starting Training...


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 1/5 | Loss: 0.6065 | Train Acc: 68.9718% | Val Acc: 83.2192%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 2/5 | Loss: 0.4935 | Train Acc: 81.2500% | Val Acc: 84.8381%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 3/5 | Loss: 0.4579 | Train Acc: 83.7601% | Val Acc: 84.4334%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 4/5 | Loss: 0.4393 | Train Acc: 84.7330% | Val Acc: 85.6164%


Training:   0%|          | 0/201 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Epoch 5/5 | Loss: 0.4269 | Train Acc: 85.8227% | Val Acc: 85.8655%

Training Finished in 6.7 minutes...
Model saved to google drive


In [None]:
print("--- HEAD ONLY EVALUATION ---")

acc, true, pred = evaluate(model_head_only, test_loader)
print(f"Test Accuracy: {acc:.2%}")
print(classification_report(true, pred, target_names=['Real', 'Fake'], digits=4))

--- HEAD ONLY EVALUATION ---


Evaluating:   0%|          | 0/26 [00:00<?, ?it/s]

Test Accuracy: 87.64%
              precision    recall  f1-score   support

        Real     0.8684    0.8873    0.8777      1606
        Fake     0.8848    0.8655    0.8750      1606

    accuracy                         0.8764      3212
   macro avg     0.8766    0.8764    0.8764      3212
weighted avg     0.8766    0.8764    0.8764      3212

