# Deepfake Detection with XceptionNet
This notebook loads a dataset of real and fake images, trains a classifier using the Xception model, and evaluates its performance.

In [1]:
# Install required libraries
# !pip install torch torchvision timm scikit-learn tqdm

In [2]:
# import os
# import shutil
# from sklearn.model_selection import train_test_split
# from glob import glob

# # Original folders
# ai_folder = 'archive/my_real_vs_ai_dataset/my_real_vs_ai_dataset/ai_images'
# real_folder = 'archive/my_real_vs_ai_dataset/my_real_vs_ai_dataset/real'

# # Destination folders
# train_fake = 'dataset/train/fake'
# train_real = 'dataset/train/real'
# val_fake = 'dataset/val/fake'
# val_real = 'dataset/val/real'

# # Create directories
# for folder in [train_fake, train_real, val_fake, val_real]:
#     os.makedirs(folder, exist_ok=True)

# # Get image file paths
# ai_images = glob(os.path.join(ai_folder, '*'))
# real_images = glob(os.path.join(real_folder, '*'))

# # Split into train and validation (80% train, 20% val)
# ai_train, ai_val = train_test_split(ai_images, test_size=0.2, random_state=42)
# real_train, real_val = train_test_split(real_images, test_size=0.2, random_state=42)

# # Function to move files
# def move_files(file_list, destination):
#     for file in file_list:
#         if os.path.isfile(file):
#             shutil.copy(file, os.path.join(destination, os.path.basename(file)))

# # Move files into respective directories
# move_files(ai_train, train_fake)
# move_files(ai_val, val_fake)
# move_files(real_train, train_real)
# move_files(real_val, val_real)

# print("✅ Dataset organized for ImageFolder format.")


In [1]:
# dataloader.py
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import os

def get_dataloaders(data_dir, batch_size):
    train_transforms = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transforms)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val"), transform=val_transforms)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    return train_loader, val_loader, train_dataset.classes

In [2]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("Device Name:", torch.cuda.get_device_name(0))

CUDA Available: True
Device Name: NVIDIA GeForce RTX 3050 Laptop GPU


In [None]:
# model_xception.py
import torch.nn as nn
from timm import create_model

class XceptionNet(nn.Module):
    def __init__(self, num_classes=2):
        super(XceptionNet, self).__init__()
        self.model = create_model('xception', pretrained=True, num_classes=num_classes)

    def forward(self, x):
        return self.model(x)


In [8]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

# Load your custom modules here
# Assuming your Xception model is in model.py
  # Assuming dataloader setup is here

# ✅ Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n✅ Device: {device}")
if device.type == 'cuda':
    print(f"🚀 GPU Available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ CUDA not available. Using CPU. Expect slower training.")

# ✅ Hyperparameters
EPOCHS = 7
BATCH_SIZE = 8
LR = 1e-4
PATIENCE = 2
DATA_DIR = "dataset"
CHECKPOINT_PATH = "checkpoint.pth"

# ✅ Data loaders
train_loader, val_loader, class_names = get_dataloaders(DATA_DIR, BATCH_SIZE)

# ✅ Model, Loss, Optimizer
model = XceptionNet(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
scaler = GradScaler()

# ✅ Load from checkpoint if exists
start_epoch = 0
best_val_loss = float('inf')
if os.path.exists(CHECKPOINT_PATH):
    checkpoint = torch.load(CHECKPOINT_PATH)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    scaler.load_state_dict(checkpoint["scaler_state_dict"])
    start_epoch = checkpoint["epoch"] + 1
    best_val_loss = checkpoint["best_val_loss"]
    print(f"🔄 Resumed from epoch {start_epoch}")



#for ploting 
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []



# ✅ Training loop
patience_counter = 0
for epoch in range(start_epoch, EPOCHS):
    model.train()
    total_loss, correct = 0, 0
    loop = tqdm(train_loader, desc=f"🧠 Epoch [{epoch+1}/{EPOCHS}]")

    for inputs, labels in loop:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()
        loop.set_postfix(loss=loss.item(), accuracy=correct/len(train_loader.dataset))

    train_acc = correct / len(train_loader.dataset)

    # ✅ Validation
    model.eval()
    val_loss, val_correct = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_correct += (outputs.argmax(1) == labels).sum().item()

    val_acc = val_correct / len(val_loader.dataset)
    val_loss /= len(val_loader)

    print(f"\n📊 Epoch {epoch+1}: Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val Loss: {val_loss:.4f}")

    #for ploting extra
    train_losses.append(total_loss / len(train_loader))
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)


    # ✅ Save checkpoint
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scaler_state_dict": scaler.state_dict(),
        "best_val_loss": best_val_loss,
    }, CHECKPOINT_PATH)

    # ✅ Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_xception_model.pth")
        print("🎉 New best model saved.")
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"🕓 No improvement. Patience: {patience_counter}/{PATIENCE}")
        if patience_counter >= PATIENCE:
            print("⛔ Early stopping.")
            break

# ✅ Save training history for plotting
import pickle

with open("training_history.pkl", "wb") as f:
    pickle.dump({
        "train_losses": train_losses,
        "val_losses": val_losses,
        "train_accuracies": train_accuracies,
        "val_accuracies": val_accuracies
    }, f)
print("📁 Training history saved to training_history.pkl")




✅ Device: cuda
🚀 GPU Available: NVIDIA GeForce RTX 3050 Laptop GPU


  scaler = GradScaler()


🔄 Resumed from epoch 6


  with autocast():
🧠 Epoch [7/7]:   0%|          | 85/20000 [00:21<1:23:45,  3.96it/s, accuracy=0.00419, loss=0.00102]  


KeyboardInterrupt: 

In [None]:
# evaluate.py
import torch
from sklearn.metrics import classification_report


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = XceptionNet(num_classes=2)
model.load_state_dict(torch.load("best_xception_model.pth", map_location=device))
model.to(device)
model.eval()

_, val_loader, class_names = get_dataloaders("dataset", batch_size=8)

y_true, y_pred = [], []
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        y_true.extend(labels.numpy())
        y_pred.extend(preds)

print(classification_report(y_true, y_pred, target_names=class_names))

              precision    recall  f1-score   support

        fake       0.96      0.97      0.96     20000
        real       0.97      0.96      0.96     20000

    accuracy                           0.96     40000
   macro avg       0.96      0.96      0.96     40000
weighted avg       0.96      0.96      0.96     40000



In [None]:
import pickle

with open("training_history.pkl", "wb") as f:
    pickle.dump({
        "train_losses": train_losses,
        "val_losses": val_losses,
        "train_accuracies": train_accuracies,
        "val_accuracies": val_accuracies
    }, f)

In [None]:
# plot_results.py
import matplotlib.pyplot as plt
import pickle
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
import numpy as np

# Load training history
with open("training_history.pkl", "rb") as f:
    history = pickle.load(f)

train_losses = history["train_losses"]
val_losses = history["val_losses"]
train_acc = history["train_accuracies"]
val_acc = history["val_accuracies"]

# 1. Plot Loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss over Epochs")
plt.legend()
plt.grid(True)
plt.show()

# 2. Plot Accuracy
plt.figure(figsize=(10, 5))
plt.plot(train_acc, label="Train Accuracy")
plt.plot(val_acc, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy over Epochs")
plt.legend()
plt.grid(True)
plt.show()

# 3. Confusion Matrix + ROC (Optional: only if test_loader exists)
from model import XceptionNet  # your model class
from dataset_loader import get_dataloaders  # or your loader function

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = XceptionNet(num_classes=2).to(device)
model.load_state_dict(torch.load("best_xception_model.pth"))
model.eval()

_, test_loader, _ = get_dataloaders("dataset", 8)  # adjust if needed
all_preds = []
all_labels = []
all_probs = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())
        all_probs.extend(probs)

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap="Blues")
plt.title("Confusion Matrix")
plt.show()

# ROC Curve
fpr, tpr, _ = roc_curve(all_labels, all_probs)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f"ROC curve (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
