# 02 – Model Development: Deepfake Detection

Goal: extract image frames from videos, build a train/val split, and prepare data
loaders for a CNN based deepfake classifier.

Imports and setup

In [1]:
from pathlib import Path
import json

import pandas as pd
import cv2

import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T
import torchvision.models as models

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

Torch version: 2.9.1+cpu
CUDA available: False


Load metadata again

In [None]:
from pathlib import Path
import json
import pandas as pd

# project root is one level up from notebooks/
project_root = Path("..").resolve()

# data/raw under project root
data_root = project_root / "data" / "raw"

meta_path = data_root / "train_sample_videos" / "metadata.json"
print(meta_path, meta_path.exists())

with open(meta_path, "r") as f:
    meta = json.load(f)

rows = []
for fname, info in meta.items():
    rows.append(
        {
            "filename": fname,
            "label": info["label"],          # "FAKE" or "REAL"
            "original": info.get("original") # original real video, if available
        }
    )

df = pd.DataFrame(rows)
df.head(), df["label"].value_counts()

C:\Users\adamc\Documents\Fall 25\Machine and Deep Learning\CSC422_DeepfakeDetection_Final_Aguilar_Adam\data\raw\train_sample_videos\metadata.json True


(         filename label        original
 0  aagfhgtpmv.mp4  FAKE  vudstovrck.mp4
 1  aapnvogymq.mp4  FAKE  jdubbvfswz.mp4
 2  abarnvbtwb.mp4  REAL            None
 3  abofeumbvv.mp4  FAKE  atvmxvwyns.mp4
 4  abqwwspghj.mp4  FAKE  qzimuostzz.mp4,
 label
 FAKE    323
 REAL     77
 Name: count, dtype: int64)

Create processed frame folders

In [4]:
frames_root = Path("data/processed/frames")
train_frames_root = frames_root / "train"

for label in ["REAL", "FAKE"]:
    (train_frames_root / label).mkdir(parents=True, exist_ok=True)

train_frames_root

WindowsPath('data/processed/frames/train')

Helper to extract a center frame from each video

In [5]:
def extract_center_frame(video_path: Path, out_path: Path) -> bool:
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print("Could not open", video_path)
        return False

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count == 0:
        cap.release()
        print("No frames in", video_path)
        return False

    center_idx = frame_count // 2
    cap.set(cv2.CAP_PROP_POS_FRAMES, center_idx)
    ok, frame = cap.read()
    cap.release()

    if not ok:
        print("Could not read frame from", video_path)
        return False

    # OpenCV is BGR, but for saving jpg it is fine to keep BGR
    out_path.parent.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(out_path), frame)
    return True

Run frame extraction (one per video)

In [None]:
from tqdm.auto import tqdm

success = 0
fail = 0

for row in tqdm(df.itertuples(), total=len(df)):
    fname = row.filename
    label = row.label.upper()          # "REAL" or "FAKE"

    video_path = data_root / "train_sample_videos" / fname
    out_name = fname.replace(".mp4", ".jpg")
    out_path = train_frames_root / label / out_name

    if out_path.exists():
        continue

    if extract_center_frame(video_path, out_path):
        success += 1
    else:
        fail += 1

success, fail

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 400/400 [01:49<00:00,  3.67it/s]


(400, 0)

Filter to videos that actually have frames

In [None]:
from pathlib import Path

# we already used these earlier
frames_root = Path("data/processed/frames")
train_frames_root = frames_root / "train"

def has_extracted_frame(row):
    label_str = row["label"].upper()
    fname = row["filename"].replace(".mp4", ".jpg")
    img_path = train_frames_root / label_str / fname
    return img_path.exists()

df_frames = df[df.apply(has_extracted_frame, axis=1)].reset_index(drop=True)
len(df), len(df_frames), df_frames["label"].value_counts()

(400,
 400,
 label
 FAKE    323
 REAL     77
 Name: count, dtype: int64)

Create a dataset and dataloaders

In [9]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T

class FrameDataset(Dataset):
    def __init__(self, root, df, transform=None):
        self.root = Path(root)
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.label_map = {"REAL": 0, "FAKE": 1}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        label_str = row["label"].upper()
        label = self.label_map[label_str]

        fname = row["filename"].replace(".mp4", ".jpg")
        img_path = self.root / label_str / fname

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, label

In [11]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    df_frames,
    test_size=0.2,
    stratify=df_frames["label"],
    random_state=42,
)

image_size = 224

train_transform = T.Compose([
    T.Resize((image_size, image_size)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

val_transform = T.Compose([
    T.Resize((image_size, image_size)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

train_ds = FrameDataset(train_frames_root, train_df, transform=train_transform)
val_ds   = FrameDataset(train_frames_root, val_df,   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=16, shuffle=False, num_workers=0)

len(train_ds), len(val_ds)

(320, 80)

Define and train a small model (this uses ResNet18 with 2 output classes)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

num_classes = 2

model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Using device: cpu
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\adamc/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:02<00:00, 21.5MB/s]


Training loop (keep epochs small so it doesnt take forever)

In [13]:
from tqdm.auto import tqdm

def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    with torch.set_grad_enabled(train):
        for images, labels in tqdm(loader, leave=False):
            images = images.to(device)
            labels = labels.to(device)

            if train:
                optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            if train:
                loss.backward()
                optimizer.step()

            running_loss += loss.item() * labels.size(0)
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return running_loss / total, correct / total

num_epochs = 5  # you can bump to 8–10 if it’s still fast enough

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss, val_acc     = run_epoch(val_loader,   train=False)

    print(f"  Train loss: {train_loss:.4f}  acc: {train_acc:.3f}")
    print(f"  Val   loss: {val_loss:.4f}  acc: {val_acc:.3f}")


Epoch 1/5


                                               

  Train loss: 0.7157  acc: 0.594
  Val   loss: 0.4969  acc: 0.800

Epoch 2/5


                                               

  Train loss: 0.4834  acc: 0.787
  Val   loss: 0.5102  acc: 0.750

Epoch 3/5


                                               

  Train loss: 0.4033  acc: 0.812
  Val   loss: 0.5025  acc: 0.800

Epoch 4/5


                                               

  Train loss: 0.3592  acc: 0.831
  Val   loss: 0.5418  acc: 0.775

Epoch 5/5


                                               

  Train loss: 0.3821  acc: 0.825
  Val   loss: 0.4773  acc: 0.812




Save the trained model for notebook 3

In [14]:
# Save trained model weights for later evaluation
project_root = Path("..").resolve()
models_dir = project_root / "models"
models_dir.mkdir(parents=True, exist_ok=True)

model_path = models_dir / "deepfake_resnet18.pth"
torch.save(model.state_dict(), model_path)
print("Saved model to:", model_path)

Saved model to: C:\Users\adamc\Documents\Fall 25\Machine and Deep Learning\CSC422_DeepfakeDetection_Final_Aguilar_Adam\models\deepfake_resnet18.pth
