<a href="https://colab.research.google.com/github/VenkataBhanuTejaKonijeti/DeepfakeDetection/blob/main/test_accuracy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ttach

Collecting ttach
  Downloading ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Installing collected packages: ttach
Successfully installed ttach-0.0.3


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import zipfile
import os
import shutil
import random
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import timm
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from google.colab import drive

# ================================
# 1️⃣ MOUNT GOOGLE DRIVE & SET PATHS
# ================================
drive.mount('/content/drive', force_remount=True)  # Ensure remount
dataset_zip = "/content/drive/MyDrive/dataset.zip"
extract_path = "/content/dataset_extracted"

# ================================
# 2️⃣ EXTRACT THE DATASET
# ================================
if not os.path.exists(extract_path):
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
print("✅ Dataset extracted successfully!")

# ================================
# 3️⃣ ORGANIZE IMAGES INTO 'REAL' & 'FAKE' FOLDERS
# ================================
train_real_folder = os.path.join(extract_path, "training_real")
train_fake_folder = os.path.join(extract_path, "training_fake")
train_path = os.path.join(extract_path, "train")
test_path = os.path.join(extract_path, "test")

for category in ["real", "fake"]:
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)

def move_images(source_folder, dest_folder):
    valid_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
    for image_name in os.listdir(source_folder):
        if any(image_name.lower().endswith(ext) for ext in valid_extensions):
            shutil.move(os.path.join(source_folder, image_name), os.path.join(dest_folder, image_name))

move_images(train_real_folder, os.path.join(train_path, "real"))
move_images(train_fake_folder, os.path.join(train_path, "fake"))

# ================================
# 4️⃣ SPLIT DATASET INTO TRAIN & TEST (80-20)
# ================================
def split_data(source_folder, train_dest, test_dest, split_ratio=0.8):
    files = os.listdir(source_folder)
    random.shuffle(files)
    split_index = int(len(files) * split_ratio)
    for f in files[split_index:]:
        shutil.move(os.path.join(source_folder, f), os.path.join(test_dest, f))

split_data(os.path.join(train_path, "real"), os.path.join(train_path, "real"), os.path.join(test_path, "real"))
split_data(os.path.join(train_path, "fake"), os.path.join(train_path, "fake"), os.path.join(test_path, "fake"))

# ================================
# 5️⃣ LOAD DATASET USING PYTORCH
# ================================
transform = transforms.Compose([
    transforms.Resize((300, 300)),  # Reduced from 380x380 to 300x300 to save memory
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
test_dataset = datasets.ImageFolder(root=test_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

# ================================
# 6️⃣ DEFINE MODEL (EfficientNet-B6 + AGSK)
# ================================
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Debug mode
torch.backends.cuda.matmul.allow_tf32 = False  # Ensure precision
torch.cuda.set_device(0)  # Ensure the correct GPU is used

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
efficientnet_b6 = timm.create_model("tf_efficientnet_b6_ns", pretrained=True)  # Fixed model name

class AGSK_EfficientNetB6(nn.Module):
    def __init__(self, base_model):
        super(AGSK_EfficientNetB6, self).__init__()
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])  # Adjusted feature extractor
        self.agsk = nn.Conv2d(2304, 2304, kernel_size=3, padding=1, groups=2, bias=False)
        self.fc = nn.Sequential(
            nn.Linear(2304, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        if x.dim() != 4:
            raise ValueError(f"Unexpected input shape: {x.shape}")

        x = self.feature_extractor(x)
        if x.dim() != 4:
            raise ValueError(f"Unexpected feature extractor output shape: {x.shape}")

        x = x.mean(dim=[2, 3])  # Safely apply GAP
        x = self.fc(x)

        if torch.isnan(x).any():
            print("⚠️ NaN detected in model output! Skipping batch.")
            return torch.zeros_like(x)  # Return dummy tensor to avoid crashes

        return x

model = AGSK_EfficientNetB6(efficientnet_b6).to(device)
print(f"✅ Model moved to {device}")

# ================================
# 7️⃣ TRAIN THE MODEL
# ================================
criterion = nn.BCELoss()
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5)
num_epochs = 50
torch.cuda.empty_cache()

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct, total = 0, 0
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        predictions = (outputs > 0.5).float()
        correct += (predictions == labels).sum().item()
        total += labels.size(0)

    accuracy = correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}, Accuracy: {accuracy:.4f}")

torch.save(model.state_dict(), "efficientnet_b6_agsk.pth")
print("✅ Training completed and model saved!")


Mounted at /content/drive
✅ Dataset extracted successfully!


  model = create_fn(


✅ Model moved to cuda
Epoch [1/50], Loss: 0.6729, Accuracy: 0.6000
Epoch [2/50], Loss: 0.6065, Accuracy: 0.6759
Epoch [3/50], Loss: 0.5601, Accuracy: 0.7264
Epoch [4/50], Loss: 0.4977, Accuracy: 0.7716
Epoch [5/50], Loss: 0.4670, Accuracy: 0.7847
Epoch [6/50], Loss: 0.4219, Accuracy: 0.8107
Epoch [7/50], Loss: 0.3369, Accuracy: 0.8621
Epoch [8/50], Loss: 0.3049, Accuracy: 0.8728
Epoch [9/50], Loss: 0.2423, Accuracy: 0.9019
Epoch [10/50], Loss: 0.2172, Accuracy: 0.9134


In [None]:
torch.save(model.state_dict(), "efficientnet_b6_agsk_weights.pth")
print("✅ Model weights saved successfully!")


✅ Model weights saved successfully!


In [None]:
torch.save(model, "efficientnet_b6_agsk_full.pth")
print("✅ Full model saved successfully!")


✅ Full model saved successfully!


In [None]:
# Initialize the model
model = AGSK_EfficientNetB6(efficientnet_b6).to(device)

# Load saved weights
model.load_state_dict(torch.load("efficientnet_b6_agsk_weights.pth", map_location=device))
model.eval()  # Set to evaluation mode
print("✅ Model weights loaded successfully!")


  model.load_state_dict(torch.load("efficientnet_b6_agsk_weights.pth", map_location=device))


✅ Model weights loaded successfully!


In [None]:
checkpoint = {
    "epoch": num_epochs,
    "model_state": model.state_dict(),
    "optimizer_state": optimizer.state_dict()
}

torch.save(checkpoint, "efficientnet_b6_agsk_checkpoint.pth")
print("✅ Checkpoint saved successfully!")


✅ Checkpoint saved successfully!


In [None]:
# Initialize model
model = AGSK_EfficientNetB6(efficientnet_b6).to(device)
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5)

# Load checkpoint
checkpoint = torch.load("efficientnet_b6_agsk_checkpoint.pth", map_location=device)
model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optimizer_state"])

print(f"✅ Resumed training from Epoch {checkpoint['epoch']}!")


  checkpoint = torch.load("efficientnet_b6_agsk_checkpoint.pth", map_location=device)


✅ Resumed training from Epoch 50!


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import zipfile
import os
import random
import shutil

dataset_zip = "/content/drive/MyDrive/dataset.zip"
extract_path = "/content/dataset_extracted"

# Extract dataset
if not os.path.exists(extract_path):
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("✅ Dataset extracted successfully!")
else:
    print("⚡ Dataset already extracted!")

# Define paths
train_real_folder = os.path.join(extract_path, "training_real")
train_fake_folder = os.path.join(extract_path, "training_fake")

train_path = os.path.join(extract_path, "train")
test_path = os.path.join(extract_path, "test")

# Create train/test directories
for category in ["real", "fake"]:
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)


⚡ Dataset already extracted!


In [None]:
train_real_folder = os.path.join(extract_path, "training_real")
train_fake_folder = os.path.join(extract_path, "training_fake")

train_path = os.path.join(extract_path, "train")
test_path = os.path.join(extract_path, "test")

# Create train/test directories
for category in ["real", "fake"]:
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)


In [None]:
def split_data(source_folder, train_dest, test_dest, split_ratio=0.8):
    files = os.listdir(source_folder)
    random.shuffle(files)
    split_index = int(len(files) * split_ratio)

    for f in files[:split_index]:  # Move training data
        shutil.move(os.path.join(source_folder, f), os.path.join(train_dest, f))

    for f in files[split_index:]:  # Move testing data
        shutil.move(os.path.join(source_folder, f), os.path.join(test_dest, f))

split_data(train_real_folder, os.path.join(train_path, "real"), os.path.join(test_path, "real"))
split_data(train_fake_folder, os.path.join(train_path, "fake"), os.path.join(test_path, "fake"))

print("✅ Dataset split into training and testing sets!")


✅ Dataset split into training and testing sets!


In [None]:
# Load trained weights
checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)
if isinstance(checkpoint, dict):
    model.load_state_dict(checkpoint)  # Load weights properly
else:
    print("⚠️ The checkpoint file contains an entire model, not just state_dict.")



  checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)


In [None]:
import zipfile
import os
import random
import shutil
import torch
import torch.nn as nn
import timm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Paths
dataset_zip = "/content/drive/MyDrive/dataset.zip"
extract_path = "/content/dataset_extracted"

# Extract dataset if not already extracted
if not os.path.exists(extract_path):
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("✅ Dataset extracted successfully!")
else:
    print("⚡ Dataset already extracted!")

# Define paths
train_real_folder = os.path.join(extract_path, "training_real")
train_fake_folder = os.path.join(extract_path, "training_fake")

train_path = os.path.join(extract_path, "train")
test_path = os.path.join(extract_path, "test")

# Create train/test directories
for category in ["real", "fake"]:
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)

# ✅ Improved Data Splitting (Stratified & Copied)
def split_data(source_folder, train_dest, test_dest, split_ratio=0.8):
    files = os.listdir(source_folder)
    random.shuffle(files)
    split_index = int(len(files) * split_ratio)

    for f in files[:split_index]:  # Copy training data
        shutil.copy(os.path.join(source_folder, f), os.path.join(train_dest, f))

    for f in files[split_index:]:  # Copy testing data
        shutil.copy(os.path.join(source_folder, f), os.path.join(test_dest, f))

split_data(train_real_folder, os.path.join(train_path, "real"), os.path.join(test_path, "real"))
split_data(train_fake_folder, os.path.join(train_path, "fake"), os.path.join(test_path, "fake"))

print("✅ Dataset split into training and testing sets!")

# ✅ Data Preprocessing
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

test_dataset = datasets.ImageFolder(root=test_path, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# ✅ Model Definition
class AGSK_EfficientNetB6(nn.Module):
    def __init__(self, base_model):
        super(AGSK_EfficientNetB6, self).__init__()
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])
        self.agsk = nn.Conv2d(2304, 2304, kernel_size=3, padding=1, groups=2, bias=False)
        self.fc = nn.Sequential(
            nn.Linear(2304, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.mean(dim=[2, 3])  # Global Average Pooling
        x = self.fc(x)
        return x

# ✅ Load Model & Weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = timm.create_model("tf_efficientnet_b6_ns", pretrained=True, num_classes=0)
model = AGSK_EfficientNetB6(base_model).to(device)

checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)
if isinstance(checkpoint, dict):
    model.load_state_dict(checkpoint)
    print("✅ Model weights loaded successfully!")
else:
    print("⚠️ The checkpoint file contains an entire model, not just state_dict.")

model.eval()

# ✅ Model Evaluation
def evaluate_model(model, test_loader):
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = (outputs > 0.5).float().cpu().numpy().flatten()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    print(f"🔥 Model Accuracy: {accuracy * 100:.2f}%")

evaluate_model(model, test_loader)


⚡ Dataset already extracted!
✅ Dataset split into training and testing sets!


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/173M [00:00<?, ?B/s]

  checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)


✅ Model weights loaded successfully!
🔥 Model Accuracy: 96.58%


In [None]:
import zipfile
import os
import random
import shutil
import torch
import torch.nn as nn
import timm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Paths
dataset_zip = "/content/drive/MyDrive/dataset.zip"
extract_path = "/content/dataset_extracted"

# Extract dataset if not already extracted
if not os.path.exists(extract_path):
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("✅ Dataset extracted successfully!")
else:
    print("⚡ Dataset already extracted!")

# Define paths
train_real_folder = os.path.join(extract_path, "training_real")
train_fake_folder = os.path.join(extract_path, "training_fake")

train_path = os.path.join(extract_path, "train")
test_path = os.path.join(extract_path, "test")

# Create train/test directories
for category in ["real", "fake"]:
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(test_path, category), exist_ok=True)

# ✅ Improved Data Splitting (Stratified & Copied)
def split_data(source_folder, train_dest, test_dest, split_ratio=0.8):
    files = os.listdir(source_folder)
    random.shuffle(files)
    split_index = int(len(files) * split_ratio)

    for f in files[:split_index]:  # Copy training data
        shutil.copy(os.path.join(source_folder, f), os.path.join(train_dest, f))

    for f in files[split_index:]:  # Copy testing data
        shutil.copy(os.path.join(source_folder, f), os.path.join(test_dest, f))

split_data(train_real_folder, os.path.join(train_path, "real"), os.path.join(test_path, "real"))
split_data(train_fake_folder, os.path.join(train_path, "fake"), os.path.join(test_path, "fake"))

print("✅ Dataset split into training and testing sets!")

# ✅ Data Preprocessing
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

test_dataset = datasets.ImageFolder(root=test_path, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# ✅ Model Definition
class AGSK_EfficientNetB6(nn.Module):
    def __init__(self, base_model):
        super(AGSK_EfficientNetB6, self).__init__()
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])
        self.agsk = nn.Conv2d(2304, 2304, kernel_size=3, padding=1, groups=2, bias=False)
        self.fc = nn.Sequential(
            nn.Linear(2304, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.mean(dim=[2, 3])  # Global Average Pooling
        x = self.fc(x)
        return x

# ✅ Load Model & Weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = timm.create_model("tf_efficientnet_b6_ns", pretrained=True, num_classes=0)
model = AGSK_EfficientNetB6(base_model).to(device)

checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)
if isinstance(checkpoint, dict):
    model.load_state_dict(checkpoint)
    print("✅ Model weights loaded successfully!")
else:
    print("⚠️ The checkpoint file contains an entire model, not just state_dict.")

model.eval()

# ✅ Model Evaluation (with Accuracy, Precision, Recall, and F1)
def evaluate_model(model, test_loader):
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = (outputs > 0.5).float().cpu().numpy().flatten()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average="binary")
    recall = recall_score(all_labels, all_preds, average="binary")
    f1 = f1_score(all_labels, all_preds, average="binary")

    print(f"🔥 Testing Accuracy: {accuracy * 100:.2f}%")
    print(f"🎯 Precision: {precision:.4f}")
    print(f"📌 Recall: {recall:.4f}")
    print(f"⚡ F1-Score: {f1:.4f}")

evaluate_model(model, test_loader)


⚡ Dataset already extracted!
✅ Dataset split into training and testing sets!


  model = create_fn(
  checkpoint = torch.load("/content/drive/MyDrive/efficientnet_b6_agsk_weights.pth", map_location=device)


✅ Model weights loaded successfully!
🔥 Testing Accuracy: 95.84%
🎯 Precision: 0.9673
📌 Recall: 0.9539
⚡ F1-Score: 0.9606


In [None]:
test_real_count = len(os.listdir(os.path.join(test_path, "real")))
test_fake_count = len(os.listdir(os.path.join(test_path, "fake")))

total_test_images = test_real_count + test_fake_count
print(f"📊 Total Testing Images: {total_test_images} (Real: {test_real_count}, Fake: {test_fake_count})")


📊 Total Testing Images: 409 (Real: 217, Fake: 192)
