In [43]:
!pip install -q datasets timm


In [44]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
import os

BASE_DIR = "/content/drive/MyDrive/ai_image_dataset"
CLASSES = ["real", "ai_generated"]
SPLITS = ["train", "val", "test"]

for split in SPLITS:
    for cls in CLASSES:
        os.makedirs(f"{BASE_DIR}/{split}/{cls}", exist_ok=True)

print("Folders ready in Drive âœ…")

Folders ready in Drive âœ…


In [46]:
from datasets import load_dataset
from collections import defaultdict
import random
from PIL import Image

COUNTS_PER_CLASS = {"real": 200, "ai_generated": 200}
SPLIT_RATIO = {"train": 0.7, "val": 0.15, "test": 0.15}
IMAGE_SIZE = (224, 224)

def save_image(img, path):
    img = img.convert("RGB").resize(IMAGE_SIZE)
    img.save(path, "JPEG", quality=85)

dataset = load_dataset("Parveshiiii/AI-vs-Real", split="train", streaming=True)
saved_counts = defaultdict(lambda: defaultdict(int))

for sample in dataset:
    label = "real" if sample["binary_label"] == 0 else "ai_generated"

    total_class = sum(saved_counts[s][label] for s in SPLITS)
    if total_class >= COUNTS_PER_CLASS[label]:
        continue

    r = random.random()
    if r < SPLIT_RATIO["train"]:
        split = "train"
    elif r < SPLIT_RATIO["train"] + SPLIT_RATIO["val"]:
        split = "val"
    else:
        split = "test"

    max_split = int(COUNTS_PER_CLASS[label] * SPLIT_RATIO[split])
    if saved_counts[split][label] >= max_split:
        continue

    path = f"{BASE_DIR}/{split}/{label}/{saved_counts[split][label]}.jpg"
    save_image(sample["image"], path)
    saved_counts[split][label] += 1

    if all(sum(saved_counts[s][c] for s in SPLITS) >= COUNTS_PER_CLASS[c] for c in CLASSES):
        break

print("Dataset saved to Drive âœ…")


Dataset saved to Drive âœ…


In [47]:
!cp -r /content/drive/MyDrive/ai_image_dataset /content/


In [48]:
import torch
import timm
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_DIR = "/content/ai_image_dataset"
MODEL_PATH = "/content/drive/MyDrive/ai_detector_efficientnet.pth"

BATCH_SIZE = 16
EPOCHS = 12
LR = 3e-4


In [49]:
train_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2,0.2,0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])


In [50]:
class AIDataset(Dataset):
    def __init__(self, root, split, transform):
        self.paths, self.labels = [], []
        for idx, cls in enumerate(CLASSES):
            folder = os.path.join(root, split, cls)
            for f in os.listdir(folder):
                self.paths.append(os.path.join(folder, f))
                self.labels.append(idx)
        self.transform = transform

    def __len__(self): return len(self.paths)

    def __getitem__(self, i):
        img = Image.open(self.paths[i]).convert("RGB")
        return self.transform(img), self.labels[i]


In [51]:
train_ds = AIDataset(DATA_DIR, "train", train_tfms)
val_ds = AIDataset(DATA_DIR, "val", val_tfms)
test_ds = AIDataset(DATA_DIR, "test", val_tfms)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

len(train_ds), len(val_ds), len(test_ds)


(280, 60, 60)

In [52]:
model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=2)
model = model.to(DEVICE)

for name, param in model.named_parameters():
    if "blocks.6" not in name and "classifier" not in name:
        param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)


In [53]:
best_acc = 0

for epoch in range(EPOCHS):
    model.train()
    total, correct, loss_sum = 0, 0, 0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()*imgs.size(0)
        preds = out.argmax(1)
        total += labels.size(0)
        correct += (preds==labels).sum().item()

    train_acc = 100*correct/total

    model.eval()
    val_correct, val_total = 0,0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)
            out = model(imgs)
            preds = out.argmax(1)
            val_total += labels.size(0)
            val_correct += (preds==labels).sum().item()

    val_acc = 100*val_correct/val_total
    print(f"Epoch {epoch+1}: Train Acc {train_acc:.2f}% | Val Acc {val_acc:.2f}%")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), MODEL_PATH)
        print("âœ… Model Saved to Drive")


  imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)
  imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)


Epoch 1: Train Acc 68.93% | Val Acc 80.00%
âœ… Model Saved to Drive
Epoch 2: Train Acc 90.36% | Val Acc 83.33%
âœ… Model Saved to Drive
Epoch 3: Train Acc 92.86% | Val Acc 88.33%
âœ… Model Saved to Drive
Epoch 4: Train Acc 94.29% | Val Acc 88.33%
Epoch 5: Train Acc 95.00% | Val Acc 90.00%
âœ… Model Saved to Drive
Epoch 6: Train Acc 97.86% | Val Acc 90.00%
Epoch 7: Train Acc 98.21% | Val Acc 90.00%
Epoch 8: Train Acc 98.21% | Val Acc 90.00%
Epoch 9: Train Acc 97.86% | Val Acc 88.33%
Epoch 10: Train Acc 97.86% | Val Acc 86.67%
Epoch 11: Train Acc 95.71% | Val Acc 83.33%
Epoch 12: Train Acc 97.50% | Val Acc 90.00%


In [54]:
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

correct,total = 0,0
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)
        preds = model(imgs).argmax(1)
        total += labels.size(0)
        correct += (preds==labels).sum().item()

print(f"ðŸŽ¯ Test Accuracy: {100*correct/total:.2f}%")


ðŸŽ¯ Test Accuracy: 90.00%


  imgs, labels = imgs.to(DEVICE), torch.tensor(labels).to(DEVICE)


In [55]:
def predict_image(path):
    img = Image.open(path).convert("RGB")
    img = val_tfms(img).unsqueeze(0).to(DEVICE)
    model.eval()
    with torch.no_grad():
        out = model(img)
        prob = torch.softmax(out,1)
        cls = CLASSES[out.argmax(1).item()]
        conf = prob.max().item()*100
    print(f"Prediction: {cls} ({conf:.2f}%)")

# Example:
# predict_image("/content/test.jpg")


In [56]:
import os
print(os.path.exists("/content/drive/MyDrive/ai_detection_model.pth"))


False
