In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import shutil
import random
import timm

In [2]:
# Image transformations for training and testing
train_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]
)

test_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]
)

In [3]:
# Load the dataset
IMAGE_PATH = "/Users/Hung.Le/Downloads/df-training-images/"
AUTHENTIC = IMAGE_PATH + "authentic"
TAMPERED = IMAGE_PATH + "tampered"
TRAIN_REAL_PATH = 'dataset/train/real'
TRAIN_FAKE_PATH = 'dataset/train/fake'
TEST_REAL_PATH = 'dataset/test/real'
TEST_FAKE_PATH = 'dataset/test/fake'
PERCENT = 0.9

authentic_list = os.listdir(AUTHENTIC)
tampered_list = os.listdir(TAMPERED)
# print(len(authentic_list))
# print(len(tampered_list))
train_real_size = int(len(authentic_list) * PERCENT)
train_fake_size = int(len(tampered_list) * PERCENT)
# print(train_real_size)
# print(train_fake_size)

if os.path.exists('dataset'):
    shutil.rmtree('dataset')

sub_dirs = [
    TRAIN_REAL_PATH,
    TRAIN_FAKE_PATH,
    TEST_REAL_PATH,
    TEST_FAKE_PATH,
]

for sub_dir in sub_dirs:
    os.makedirs(sub_dir)

for f in random.sample(os.listdir(AUTHENTIC), k=train_real_size):
    shutil.copy(AUTHENTIC + '/' + f, TRAIN_REAL_PATH )
for f in random.sample(os.listdir(TAMPERED), k=train_fake_size):
    shutil.copy(TAMPERED + '/' + f, TRAIN_FAKE_PATH)
for f in random.sample(os.listdir(AUTHENTIC), k=len(authentic_list) - train_real_size):
    shutil.copy(AUTHENTIC + '/' + f, TEST_REAL_PATH )
for f in random.sample(os.listdir(TAMPERED), k=len(tampered_list) - train_fake_size):
    shutil.copy(TAMPERED + '/' + f, TEST_FAKE_PATH)
# print(len(os.listdir(TRAIN_REAL_PATH)))
# print(len(os.listdir(TRAIN_FAKE_PATH)))
# print(len(os.listdir(TEST_REAL_PATH)))
# print(len(os.listdir(TEST_FAKE_PATH)))

In [26]:
# Load the dataset
train_dir = 'dataset/train'
test_dir = 'dataset/test'

# Create datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Check class indices
print("Class to Index Mapping:", train_dataset.class_to_idx)

Class to Index Mapping: {'fake': 0, 'real': 1}


In [27]:
# Load the Xception model from timm
model = timm.create_model('xception', pretrained=True)

# Modify the final layer for binary classification
model.fc = nn.Linear(model.fc.in_features, 1)

# Freeze all layers except the final layer
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

for param in model.fc.parameters():
    param.requires_grad = True  # Unfreeze only the final layer


In [28]:
# Define the Loss function & optimizer

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr= 0.0001)


In [29]:
# Train the dragon

def train_model(model, criterion, optimizer, num_epochs= 10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Back-propagation and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Calculate accuracy
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct /total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")

    print("Training is complete")
    torch.save(model.state_dict(), 'deepfake_detector_w_xception.pth')

train_model(model, criterion, optimizer, num_epochs=10)

Epoch [1/10], Loss: 0.5439, Accuracy: 77.81%
Epoch [2/10], Loss: 0.5158, Accuracy: 78.28%
Epoch [3/10], Loss: 0.5023, Accuracy: 78.29%
Epoch [4/10], Loss: 0.4920, Accuracy: 78.56%
Epoch [5/10], Loss: 0.4813, Accuracy: 78.80%
Epoch [6/10], Loss: 0.4749, Accuracy: 78.92%
Epoch [7/10], Loss: 0.4671, Accuracy: 79.27%
Epoch [8/10], Loss: 0.4660, Accuracy: 79.38%
Epoch [9/10], Loss: 0.4601, Accuracy: 79.44%
Epoch [10/10], Loss: 0.4590, Accuracy: 79.47%
Training is complete
