In [None]:
import kagglehub
import os
import random
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.metrics import accuracy_score
from PIL import Image
import torch.nn.functional as F

In [None]:
# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Download dataset
path = kagglehub.dataset_download("xhlulu/140k-real-and-fake-faces")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/140k-real-and-fake-faces


In [None]:
data_dir = os.path.join(path, 'real_vs_fake', 'real-vs-fake', 'test')

# 2. Define image transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

In [None]:
# 3. Load full dataset
dataset_tmp = datasets.ImageFolder(root=data_dir, transform=transform)

In [None]:
# 4. Balanced subset: target 500 images per class
samples_per_class = float('inf')
class_indices = {0: [], 1: []}  # class 0 = real, class 1 = fake

for idx, (_, label) in enumerate(dataset_tmp.samples):
    if len(class_indices[label]) < samples_per_class:
        class_indices[label].append(idx)
    if all(len(lst) == samples_per_class for lst in class_indices.values()):
        break

print(f"Collected samples: Real={len(class_indices[0])}, Fake={len(class_indices[1])}")

final_indices = class_indices[0] + class_indices[1]
random.shuffle(final_indices)

# Make balanced subset
dataset = Subset(dataset_tmp, final_indices)

# 5. Split into train, val, test
total_len = len(dataset)
train_len = int(0.7 * total_len)
val_len = int(0.15 * total_len)
test_len = total_len - train_len - val_len

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_len, val_len, test_len])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

print(f"Train samples: {train_len}, Val samples: {val_len}, Test samples: {test_len}")


Collected samples: Real=10000, Fake=10000
Train samples: 14000, Val samples: 3000, Test samples: 3000


In [None]:
# 6. Load pretrained ResNet-50 and modify final layer
model = models.resnet50(pretrained=True)

# Freeze all layers initially
for param in model.parameters():
    param.requires_grad = False

# Replace final fully connected layer (for 2 classes)
model.fc = nn.Linear(model.fc.in_features, 2)

# Only the final layer's parameters are trainable for now
for param in model.fc.parameters():
    param.requires_grad = True

model = model.to(device)

# 7. Loss and optimizer (only parameters with requires_grad=True are updated)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 95.8MB/s]


In [None]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# 8. Training function
def train_model(model, train_loader, val_loader, epochs=5):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

        # Validation
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.numpy())

        acc = accuracy_score(all_labels, all_preds)
        print(f"Validation Accuracy: {acc*100:.2f}%\n")

# 9. Optionally, function to unfreeze some layers for fine-tuning
def unfreeze_model(model, unfreeze_from_layer=6):
    # ResNet layers: layer1, layer2, layer3, layer4
    # unfreeze_from_layer: number between 1 and 4 to unfreeze from that layer onwards
    layers = [model.layer1, model.layer2, model.layer3, model.layer4]

    for param in model.parameters():
        param.requires_grad = False  # Freeze all first

    for param in model.fc.parameters():
        param.requires_grad = True  # Always train final fc layer

    # Unfreeze from specified layer onwards
    for i in range(unfreeze_from_layer - 1, len(layers)):
        for param in layers[i].parameters():
            param.requires_grad = True

    print(f"Unfroze layers from layer{unfreeze_from_layer} onwards")

    # Reset optimizer to include unfrozen params
    global optimizer
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5)

# 10. Prediction function (without display)
def predict_image_without_display(image_path, model, transform, class_names):
    model.eval()
    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        print(f"Error loading image: {e}")
        return None

    input_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(input_tensor)
        probs = F.softmax(output, dim=1)
        confidence, pred_idx = torch.max(probs, 1)

    predicted_label = class_names[pred_idx.item()]
    return predicted_label


In [None]:
# 11. Train the model on frozen layers (only final layer training)
train_model(model, train_loader, val_loader, epochs=5)


Epoch [1/5], Loss: 0.5925
Validation Accuracy: 74.67%

Epoch [2/5], Loss: 0.5217
Validation Accuracy: 78.07%

Epoch [3/5], Loss: 0.4940
Validation Accuracy: 79.83%

Epoch [4/5], Loss: 0.4772
Validation Accuracy: 79.70%

Epoch [5/5], Loss: 0.4636
Validation Accuracy: 80.27%



In [None]:
# 12. Evaluate on test set and report accuracy
def evaluate_test_set(model, test_loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())
    acc = accuracy_score(all_labels, all_preds)
    print(f"Test set accuracy: {acc*100:.2f}%")


In [None]:
evaluate_test_set(model, test_loader, dataset_tmp.classes)

Test set accuracy: 78.40%


In [None]:
# 13. unfreeze deeper layers to fine-tune
unfreeze_model(model, unfreeze_from_layer=3)
train_model(model, train_loader, val_loader, epochs=5)  # Fine-tune with low LR

Unfroze layers from layer3 onwards
Epoch [1/5], Loss: 0.2614
Validation Accuracy: 95.50%

Epoch [2/5], Loss: 0.0605
Validation Accuracy: 96.67%

Epoch [3/5], Loss: 0.0222
Validation Accuracy: 97.03%

Epoch [4/5], Loss: 0.0104
Validation Accuracy: 97.10%

Epoch [5/5], Loss: 0.0101
Validation Accuracy: 97.33%



In [None]:
# 12. Evaluate on test set and report accuracy after unfreezing layers
evaluate_test_set(model, test_loader, dataset_tmp.classes)

Test set accuracy: 96.93%
