In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
# Define transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [3]:
import os
import shutil
import random
from pathlib import Path

# Define the source directories
train_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Train'
val_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Validation'
test_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Test'

# Define temporary directories for downsampled data
temp_data_dir = '/kaggle/temp_downsampled_data'
temp_train_dir = os.path.join(temp_data_dir, 'Train')
temp_val_dir = os.path.join(temp_data_dir, 'Validation')
temp_test_dir = os.path.join(temp_data_dir, 'Test')

# Function to copy a fraction of files from source to destination
def downsample_directory(source_dir, dest_dir, fraction=0.1):
    Path(dest_dir).mkdir(parents=True, exist_ok=True)
    for class_dir in os.listdir(source_dir):
        full_class_dir = os.path.join(source_dir, class_dir)
        dest_class_dir = os.path.join(dest_dir, class_dir)
        Path(dest_class_dir).mkdir(parents=True, exist_ok=True)
        
        # Get all files in the current class directory and shuffle
        files = os.listdir(full_class_dir)
        random.shuffle(files)
        
        # Calculate the number of files to copy
        num_files_to_copy = int(len(files) * fraction)
        selected_files = files[:num_files_to_copy]
        
        # Copy the selected files to the new directory
        for file in selected_files:
            shutil.copy2(os.path.join(full_class_dir, file), os.path.join(dest_class_dir, file))

# Downsample each dataset
downsample_directory(train_dir, temp_train_dir, fraction=0.25)
downsample_directory(val_dir, temp_val_dir, fraction=0.25)
downsample_directory(test_dir, temp_test_dir, fraction=0.25)

# Load the downsampled dataset with ImageFolder
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets with ImageFolder
train_dataset = datasets.ImageFolder(root=temp_train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=temp_val_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=temp_test_dir, transform=transform)

# Data loaders for the downsampled datasets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Verify downsampled subset sizes
print(f"Downsampled Training dataset size: {len(train_dataset)}")
print(f"Downsampled Validation dataset size: {len(val_dataset)}")
print(f"Downsampled Test dataset size: {len(test_dataset)}")

Downsampled Training dataset size: 35000
Downsampled Validation dataset size: 9856
Downsampled Test dataset size: 2726


In [4]:
# Load a pre-trained model (Xception is not in torchvision; using ResNet here)
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 1)  # Adjust for binary classification

# Move the model to the GPU if available
model = model.to(device)
num_gpus = torch.cuda.device_count()
if num_gpus > 1:
    model = nn.DataParallel(model)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 153MB/s] 


In [5]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Binary classification with logits
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [6]:
# Assuming all previous setup (model, dataloaders, etc.) is already done
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
# Define the directory to save checkpoints
checkpoint_dir = '/kaggle/working/'
os.makedirs(checkpoint_dir, exist_ok=True)

# Training function with progress bar
def train_model(model, dataloader, criterion, optimizer, epoch):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}", unit="batch")
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.float().to(device).unsqueeze(1)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        progress_bar.set_postfix(train_loss=(running_loss / (progress_bar.n + 1)))
        
    return running_loss / len(dataloader)

In [7]:
# Validation function remains the same
def evaluate_model(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.float().to(device).unsqueeze(1)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Calculate accuracy
            predicted = torch.round(torch.sigmoid(outputs))
            correct += (predicted.cpu() == labels.cpu()).sum().item()
            total += labels.size(0)
            
            running_loss += loss.item()
    accuracy = 100 * correct / total
    return running_loss / len(dataloader), accuracy

In [8]:
num_epochs = 15
best_val_accuracy = 0.0
patience = 3  # Number of epochs to wait for improvement
epochs_no_improve = 0  # Counter for epochs without improvement

for epoch in range(num_epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer, epoch)
    val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, "
          f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")
    
    # Check for improvement in validation accuracy
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        epochs_no_improve = 0  # Reset the counter
        checkpoint_path = os.path.join(checkpoint_dir, f"best_model_epoch_{epoch+1}.pth")
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")
    else:
        epochs_no_improve += 1  # Increment the counter
        
    # Check for early stopping
    if epochs_no_improve >= patience:
        print(f"Early stopping triggered. No improvement for {patience} epochs.")
        break  # Exit the training loop


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 1: 100%|██████████| 547/547 [04:42<00:00,  1.94batch/s, train_loss=0.169]


Epoch [1/15], Train Loss: 0.1688, Validation Loss: 0.2488, Validation Accuracy: 90.64%
Checkpoint saved at /kaggle/working/best_model_epoch_1.pth


Epoch 2: 100%|██████████| 547/547 [04:41<00:00,  1.94batch/s, train_loss=0.0986]


Epoch [2/15], Train Loss: 0.0986, Validation Loss: 0.2041, Validation Accuracy: 90.64%


Epoch 3: 100%|██████████| 547/547 [04:43<00:00,  1.93batch/s, train_loss=0.0781]


Epoch [3/15], Train Loss: 0.0781, Validation Loss: 0.3207, Validation Accuracy: 88.86%


Epoch 4: 100%|██████████| 547/547 [04:47<00:00,  1.90batch/s, train_loss=0.0685]


Epoch [4/15], Train Loss: 0.0685, Validation Loss: 0.1302, Validation Accuracy: 95.03%
Checkpoint saved at /kaggle/working/best_model_epoch_4.pth


Epoch 5: 100%|██████████| 547/547 [04:41<00:00,  1.94batch/s, train_loss=0.0608]


Epoch [5/15], Train Loss: 0.0608, Validation Loss: 0.1446, Validation Accuracy: 94.25%


Epoch 6: 100%|██████████| 547/547 [04:42<00:00,  1.94batch/s, train_loss=0.055] 


Epoch [6/15], Train Loss: 0.0550, Validation Loss: 0.1283, Validation Accuracy: 94.93%


Epoch 7: 100%|██████████| 547/547 [04:47<00:00,  1.90batch/s, train_loss=0.0502]


Epoch [7/15], Train Loss: 0.0502, Validation Loss: 0.1952, Validation Accuracy: 93.94%
Early stopping triggered. No improvement for 3 epochs.


In [9]:
# Test the model
test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.5442, Test Accuracy: 84.52%


In [2]:
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms
import torch.nn as nn
from torchvision import models

# Define the transformation pipeline (must match training preprocessing)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the trained model and move it to the appropriate device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)
checkpoint_path = '/kaggle/working/checkpoints/best_model_epoch_10.pth'  # Replace X with the appropriate epoch
model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 1)
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model = model.to(device)  # Move model to the appropriate device
model.eval()

# Define a function to classify an image as Fake or Real
def classify_image(image_path):
    # Open the image file
    img = Image.open(image_path).convert('RGB')
    
    # Apply the transformations
    img_tensor = transform(img).unsqueeze(0)  # Add a batch dimension
    
    # Move the tensor to the appropriate device
    img_tensor = img_tensor.to(device)
    
    # Perform inference
    with torch.no_grad():
        output = model(img_tensor)
        prediction = torch.sigmoid(output).item()  # Get the probability
    
    # Classify based on the output
    if prediction >= 0.5:
        return "Real", prediction
    else:
        return "Fake", prediction

# Example usage
image_path = '/kaggle/input/deepfake-and-real-images/Dataset/Test/Real/real_1002.jpg'
label, confidence = classify_image(image_path)
print(f"The image is classified as {label} with a confidence of {confidence:.2f}")


cpu


  model.load_state_dict(torch.load(checkpoint_path, map_location=device))


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/working/checkpoints/best_model_epoch_10.pth'