In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install numpy opencv-python timm einops scikit-learn matplotlib


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->tim

In [None]:
import os, random, glob, cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import timm
import re

VIDEO_DIR = '/content/drive/MyDrive/Celebs'
FRAME_SIZE = 224
FRAMES_PER_CLIP = 4
GRID_SIZE = (2, 2)
MASK_SIZE = 32
BATCH_SIZE = 4
EPOCHS = 10
LEARNING_RATE = 1.5e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'


In [None]:
def create_id_mapping(real_paths, synthetic_paths):
    real_to_synth = {}
    for real_path in real_paths:
        real_id = os.path.splitext(os.path.basename(real_path))[0]
        real_to_synth[real_id] = []

    for synth_path in synthetic_paths:
        synth_id = os.path.splitext(os.path.basename(synth_path))[0]
        match = re.match(r"(id\d+)_id\d+_(\d{4})", synth_id)
        if match:
            real_part = f"{match.group(1)}_{match.group(2)}"
            if real_part in real_to_synth:
                real_to_synth[real_part].append(synth_path)
    return real_to_synth

In [None]:
def generate_tall_thumbnail(frames, mask_size=MASK_SIZE):
    H, W, C = frames[0].shape
    r, c = GRID_SIZE
    mask = np.ones((H, W), dtype=np.uint8)
    h, w = np.random.randint(H), np.random.randint(W)
    h1, h2 = max(0, h - mask_size // 2), min(H, h + mask_size // 2)
    w1, w2 = max(0, w - mask_size // 2), min(W, w + mask_size // 2)
    mask[h1:h2, w1:w2] = 0
    thumbnails = [cv2.resize(f * mask[:, :, None], (FRAME_SIZE // c, FRAME_SIZE // r)) for f in frames]
    top = np.concatenate(thumbnails[:2], axis=1)
    bottom = np.concatenate(thumbnails[2:], axis=1)
    return np.concatenate([top, bottom], axis=0)

class DeepfakeDataset(Dataset):
    def __init__(self, video_paths, labels, transform=None):
        self.video_paths = video_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        path = self.video_paths[idx]
        cap = cv2.VideoCapture(path)
        frames = []
        while len(frames) < FRAMES_PER_CLIP:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
        cap.release()

        if len(frames) < FRAMES_PER_CLIP:
            return self.__getitem__((idx + 1) % len(self))

        start = random.randint(0, len(frames) - FRAMES_PER_CLIP)
        clip = frames[start:start + FRAMES_PER_CLIP]
        thumb = generate_tall_thumbnail(clip)
        if self.transform:
            thumb = self.transform(thumb)
        return thumb, self.labels[idx]


In [None]:
real_paths = glob.glob(os.path.join(VIDEO_DIR, 'real', '*.mp4'))
synthetic_paths = glob.glob(os.path.join(VIDEO_DIR, 'synthetic', '*.mp4'))

id_mapping = create_id_mapping(real_paths, synthetic_paths)

all_video_paths, all_labels = [], []
for real_path in real_paths:
    real_id = os.path.splitext(os.path.basename(real_path))[0]
    all_video_paths.append(real_path)
    all_labels.append(0)
    for synth_path in id_mapping.get(real_id, []):
        all_video_paths.append(synth_path)
        all_labels.append(1)

train_paths, val_paths, train_labels, val_labels = train_test_split(
    all_video_paths, all_labels, test_size=0.2, random_state=42, stratify=all_labels)

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((FRAME_SIZE, FRAME_SIZE)),
    transforms.ToTensor()
])

train_dataset = DeepfakeDataset(train_paths, train_labels, transform=transform)
val_dataset = DeepfakeDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
from torchvision import models
resnet = models.resnet50(pretrained=True)
resnet.fc = nn.Linear(resnet.fc.in_features, 1)
resnet = resnet.to(DEVICE)

import timm
import torch.nn as nn

swin = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True, num_classes=1)
swin = swin.to(DEVICE)

loss_fn = nn.BCEWithLogitsLoss()




In [None]:
def train(model, dataloader, optimizer):
    model.train()
    total_loss = 0

    for inputs, labels in dataloader:
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE).float().view(-1, 1)

        optimizer.zero_grad()
        outputs = model(inputs)


        if outputs.shape != labels.shape:

            outputs = outputs.view(labels.shape)

        loss = nn.BCEWithLogitsLoss()(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

In [None]:
def evaluate(model, loader):
    model.eval()
    total_loss = 0
    preds, labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(DEVICE), y.float().unsqueeze(1).to(DEVICE)
            out = model(x)
            loss = loss_fn(out, y)
            total_loss += loss.item()
            preds.extend(torch.sigmoid(out).cpu().numpy())
            labels.extend(y.cpu().numpy())

    preds = np.array(preds)
    labels = np.array(labels)
    auc = roc_auc_score(labels, preds)
    acc = accuracy_score(labels, (preds > 0.5).astype(int))
    fpr, tpr, _ = roc_curve(labels, preds)
    return total_loss / len(loader), acc, auc, fpr, tpr


In [None]:
for model_name, model in [('Swin', swin),('ResNet', resnet)]:
    print(f"Training {model_name}...")
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(10):
        train_loss = train(model, train_loader, optimizer)
        val_loss, acc, auc, fpr, tpr = evaluate(model, val_loader)
        print(f"[{model_name} Epoch {epoch+1}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {acc:.4f}, AUC: {auc:.4f}")

    plt.figure()
    plt.plot(fpr, tpr)
    plt.title(f'ROC Curve - {model_name}')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.grid(True)
    plt.show()


Training Swin...
[Swin Epoch 1] Train Loss: 0.3883, Val Loss: 0.3393, Acc: 0.8780, AUC: 0.7253
[Swin Epoch 2] Train Loss: 0.3480, Val Loss: 0.3402, Acc: 0.8780, AUC: 0.6967
[Swin Epoch 3] Train Loss: 0.3255, Val Loss: 0.3388, Acc: 0.8780, AUC: 0.6833
[Swin Epoch 4] Train Loss: 0.3097, Val Loss: 0.3565, Acc: 0.8585, AUC: 0.6382
[Swin Epoch 5] Train Loss: 0.2903, Val Loss: 0.3599, Acc: 0.8683, AUC: 0.6640
