In [105]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import cv2
import numpy as np
from PIL import Image
import os
import shutil
import random
from sklearn.metrics import classification_report, confusion_matrix

In [107]:
# Distributing images. Mixtures of fake and real go to test and train folders
IMAGE_PATH = "/Users/Hung.Le/Downloads/df-training-images/"
SOURCE = 'source/'
REAL = 'real/'
FAKE = 'fake/'
AUTHENTIC = IMAGE_PATH + SOURCE + REAL
TAMPERED = IMAGE_PATH + SOURCE + FAKE
ELA = 'ela/'
EDGE = 'edge/'
LAPLACIAN = 'laplacian/'
TRAIN_IMAGE_FOLDER = "./train_images"
TEST_IMAGE_FOLDER = "./test_images"
TRAIN_PERCENTAGE = 0.9
TEST_PERCENTAGE = 1 - TRAIN_PERCENTAGE

def create_labels() -> dict:
    labels = {}
    for f in os.listdir(AUTHENTIC):
        labels[f] = 0
    for f in os.listdir(TAMPERED):
        labels[f] = 1
    return labels

def create_data_folder(target_image_dir: str, percent: float) -> dict:
    labels = {}
    if os.path.isdir(target_image_dir):
        shutil.rmtree(target_image_dir) 
    os.mkdir(target_image_dir)
    
    authentic_list = os.listdir(AUTHENTIC)
    print(len(authentic_list))
    authentic_size = int(len(authentic_list) * percent)
    selected_authentic_list = random.sample(authentic_list, k=authentic_size)
    for f in selected_authentic_list:
        labels[f] = 0
        shutil.copy(AUTHENTIC + "/" + f, target_image_dir)
    
    tampered_list = os.listdir(TAMPERED)
    print(len(tampered_list))
    tampered_size = int(len(tampered_list) * percent)
    selected_tampered_list = random.sample(tampered_list, k=tampered_size)
    for f in selected_tampered_list:
        labels[f] = 1
        shutil.copy(TAMPERED + "/" + f, target_image_dir)

    print("Number of images = " + str(len(os.listdir(target_image_dir))))
    return labels


In [108]:
# Feature extraction functions
def convert_to_ela_image(image_path, quality=90):
    original = Image.open(image_path).convert("RGB")
    resaved_path = "temp.jpg"
    original.save(resaved_path, "JPEG", quality=quality)
    resaved = Image.open(resaved_path)
    
    ela_image = Image.fromarray(
        np.abs(np.array(original, dtype=np.int16) - np.array(resaved, dtype=np.int16)).astype(np.uint8)
    )
    return ela_image

def compute_edge_density(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    edges = cv2.Canny(img, 100, 200)
    return Image.fromarray(edges)

def compute_laplacian(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    laplacian = cv2.Laplacian(img, cv2.CV_64F)
    laplacian = np.uint8(np.abs(laplacian))
    return Image.fromarray(laplacian)

def laplacian_variance(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    return np.var(laplacian)

def compute_noise_map(image_path):
    # Read grayscale image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Apply Gaussian blur to smooth the image
    blurred = cv2.GaussianBlur(img, (5, 5), 0)
    # Compute residual noise (high-frequency component)
    noise_map = cv2.absdiff(img, blurred)
    # Normalize to full range [0, 255]
    norm_noise = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX)
    # Convert to PIL Image
    return Image.fromarray(norm_noise.astype(np.uint8))

def compute_frequency_map(image_path):
    # Read grayscale image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Convert to float32 and compute FFT
    f = np.fft.fft2(img)
    fshift = np.fft.fftshift(f)  # Shift zero freq to center
    # Compute magnitude spectrum (log scale)
    magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-5)  # avoid log(0)
    # Normalize to [0, 255]
    magnitude_spectrum = cv2.normalize(magnitude_spectrum, None, 0, 255, cv2.NORM_MINMAX)
    # Convert to PIL image
    return Image.fromarray(magnitude_spectrum.astype(np.uint8))

In [109]:
# Class to set up dataset
class DeepfakeDataset(Dataset):
    def __init__(self, image_dir, labels, transform=None):
        self.image_dir = image_dir
        self.labels = labels
        self.transform = transform
        self.image_files = list(labels.keys())  # Image filenames

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        label = self.labels[img_name]

        img_path = os.path.join(self.image_dir, img_name)
        
        # Generate feature maps
        ela_img = convert_to_ela_image(img_path).convert("L")
        edge_img = compute_edge_density(img_path).convert("L")
        lap_img = compute_laplacian(img_path).convert("L")
        noise_img = compute_noise_map(img_path).convert("L")
        fft_img = compute_frequency_map(img_path).convert("L")

        # Convert to tensors
        ela_tensor = self.transform(ela_img)
        edge_tensor = self.transform(edge_img)
        lap_tensor = self.transform(lap_img)
        noise_tensor = self.transform(noise_img)
        fft_tensor = self.transform(fft_img)

        # Stack into 5-channel input
        input_tensor = torch.cat([ela_tensor, edge_tensor, lap_tensor, noise_tensor, fft_tensor], dim=0)
        return input_tensor, torch.tensor(label, dtype=torch.long)


In [110]:
# Get and Modify the model
def get_vgg16_model():
    model = models.vgg16(pretrained=True)
    old_conv = model.features[0]

    new_conv = nn.Conv2d(5, 64, kernel_size=3, stride=1, padding=1)
    with torch.no_grad():
        new_conv.weight[:, :3, :, :] = old_conv.weight
        nn.init.kaiming_normal_(new_conv.weight[:, 3:, :, :])
        new_conv.bias = old_conv.bias

    model.features[0] = new_conv
    model.classifier[6] = nn.Linear(4096, 2)
    return model

In [111]:
def train_model(model, train_loader, criterion, optimizer, device, epochs=10):
    model.to(device)
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    return model


In [112]:
def test_model(model, test_loader, device):
    model.to(device)
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print("Confusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=["Real", "Fake"]))

In [None]:
# Define data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_labels = create_data_folder(TRAIN_IMAGE_FOLDER, TRAIN_PERCENTAGE)
print(len(train_labels))
test_labels = create_data_folder(TEST_IMAGE_FOLDER, 1 - TRAIN_PERCENTAGE)
print(len(test_labels))

# Prepare datasets
train_dataset = DeepfakeDataset(TRAIN_IMAGE_FOLDER, train_labels, transform)
test_dataset = DeepfakeDataset(TEST_IMAGE_FOLDER, test_labels, transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4)

# Build and train
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = get_vgg16_model()
criterion = nn.CrossEntropyLoss()
# Tuning #1 - Changed learning rate from 1e-3 to 1e-5
optimizer = optim.Adam(model.parameters(), lr=1e-5)

train_model(model, train_loader, criterion, optimizer, device, epochs=10)

# Save model
torch.save(model.state_dict(), "vgg16_deepfake.pth")
print("Model saved!")

# Load and test
model = get_vgg16_model()
model.load_state_dict(torch.load("vgg16_deepfake.pth", map_location=device))
test_model(model, test_loader, device)


7437
5123
Number of images = 11303
11303
7437
5123
Number of images = 1255
1255




Epoch 1/10 - Loss: 0.6448, Accuracy: 0.6271
Epoch 2/10 - Loss: 0.5815, Accuracy: 0.6811
Epoch 3/10 - Loss: 0.5027, Accuracy: 0.7436
Epoch 4/10 - Loss: 0.4232, Accuracy: 0.7955
Epoch 5/10 - Loss: 0.3767, Accuracy: 0.8208
Epoch 6/10 - Loss: 0.3390, Accuracy: 0.8408
Epoch 7/10 - Loss: 0.3142, Accuracy: 0.8517
Epoch 8/10 - Loss: 0.2976, Accuracy: 0.8606
Epoch 9/10 - Loss: 0.2783, Accuracy: 0.8692
Epoch 10/10 - Loss: 0.2593, Accuracy: 0.8791
Model saved!
Confusion Matrix:
[[625 118]
 [ 27 485]]

Classification Report:
              precision    recall  f1-score   support

        Real       0.96      0.84      0.90       743
        Fake       0.80      0.95      0.87       512

    accuracy                           0.88      1255
   macro avg       0.88      0.89      0.88      1255
weighted avg       0.90      0.88      0.89      1255

