Group:

Members:

In [None]:
# Mount google drive

from google.colab import drive

drive.mount('/content/drive')

In [None]:
!rm -r data/
!rm -r __MACOSX/
!rm -r test/
!rm -r train/
!rm -r valid/
!rm -r 0_real/
!rm -r 1_fake/
!rm data.*
!rm README.*

In [None]:
!mkdir data
!cp drive/MyDrive/CS4487/data.zip ./data.zip

In [None]:
!rm -r __MACOSX/
!rm -r AIGC-Detection-Dataset-2025

In [None]:
!unzip ./data.zip

In [None]:
!pip install git+https://github.com/openai/CLIP.git

In [None]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from timm import create_model
import shutil
from pathlib import Path
import random
import clip
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14@336px", device=device)

In [None]:
# @title
def split_train_to_val(source_dir, val_ratio=0.2, seed=42):
    source_dir = Path(source_dir)
    val_dir = source_dir.parent / "val"

    random.seed(seed)

    # Define class subdirectories
    classes = ["0_real", "1_fake"]

    for class_name in classes:
        train_class_dir = source_dir / class_name
        val_class_dir = val_dir / class_name

        if not train_class_dir.exists():
            print(f"Warning: {train_class_dir} does not exist. Skipping.")
            continue

        # Create validation directory
        val_class_dir.mkdir(parents=True, exist_ok=True)

        # Get all image files
        image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
        files = [f for f in train_class_dir.iterdir()
                if f.suffix.lower() in image_extensions and f.is_file()]

        if len(files) == 0:
            print(f"No images found in {train_class_dir}")
            continue

        # Calculate number to move
        num_to_move = max(1, int(len(files) * val_ratio))  # at least 1 image
        print(f"Moving {num_to_move}/{len(files)} images from {class_name} to validation")

        # Randomly select files
        files_to_move = random.sample(files, num_to_move)

        # Move them
        for file_path in files_to_move:
            dest_path = val_class_dir / file_path.name
            shutil.move(str(file_path), str(dest_path))
            # print(f"Moved: {file_path.name} → {dest_path}")

    print(f"\nDone! Validation set created at: {val_dir}")

In [None]:
split_train_to_val('data/train', val_ratio=0.25, seed=42)

In [None]:
class data_loader(Dataset):
    def __init__(self, data_dir):
        real = os.path.join(data_dir, '0_real')
        fake = os.path.join(data_dir, '1_fake')

        file_names_real = [f for f in os.listdir(real) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
        file_names_fake = [f for f in os.listdir(fake) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]

        self.full_filenames_real = [os.path.join(real, f) for f in file_names_real]
        self.full_filenames_fake = [os.path.join(fake, f) for f in file_names_fake]
        self.full_filenames = self.full_filenames_real + self.full_filenames_fake

        self.labels = [0] * len(file_names_real) + [1] * len(file_names_fake)

    def __len__(self):
        return len(self.full_filenames)

    def __getitem__(self, idx):
        image_path = self.full_filenames[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]
        return preprocess(image), label

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from transformers import CLIPVisionModel, CLIPImageProcessor
import os
from tqdm import tqdm # For progress bars

# ==========================================
# 1. Configuration
# ==========================================
# Paths
TRAIN_DIR = "./data/train"
VAL_DIR = "./data/val"
SAVE_PATH = "clip_aigc_detector.pth"

# Hyperparameters
MODEL_NAME = "openai/clip-vit-base-patch32"
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
EPOCHS = 10
NUM_WORKERS = 4 # Adjust based on your CPU cores

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ==========================================
# 2. Data Preparation
# ==========================================

# Initialize the CLIP processor
processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)

def clip_transform(image):
    """
    A custom transform function to bridge PIL images
    and the Hugging Face CLIP processor.
    """
    # The processor handles resizing, center cropping, and normalization
    # return_tensors="pt" gives us PyTorch tensors
    inputs = processor(images=image, return_tensors="pt")

    # The processor returns a batch dimension (1, 3, 224, 224).
    # We use squeeze(0) to remove it because the DataLoader adds its own batch dim.
    return inputs.pixel_values.squeeze(0)

print("Loading datasets...")

# ImageFolder automatically assigns labels based on folder names
# 0_real -> Label 0
# 1_fake -> Label 1
train_dataset = ImageFolder(root=TRAIN_DIR, transform=clip_transform)
val_dataset = ImageFolder(root=VAL_DIR, transform=clip_transform)

print(f"Classes found: {train_dataset.classes}") # Should print ['0_real', '1_fake']

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

# ==========================================
# 3. Model Definition
# ==========================================

class CLIPBinaryClassifier(nn.Module):
    def __init__(self, base_model_name):
        super(CLIPBinaryClassifier, self).__init__()

        # Load the pre-trained CLIP Vision Model (Backbone)
        self.backbone = CLIPVisionModel.from_pretrained(base_model_name)

        # FREEZE the backbone
        # We assume CLIP already knows how to "see" images well.
        # We only want to train the decision maker at the end.
        for param in self.backbone.parameters():
            param.requires_grad = False

        hidden_size = self.backbone.config.hidden_size

        # Classification Head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 1) # Output 1 raw score (logit)
        )

        # Sigmoid activation to turn logit into probability (0.0 to 1.0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, pixel_values):
        # 1. Extract features using CLIP
        outputs = self.backbone(pixel_values=pixel_values)

        # pooler_output represents the embedding of the [CLS] token
        # Shape: [Batch_Size, Hidden_Size]
        embeddings = outputs.pooler_output

        # 2. Pass through classifier
        logits = self.classifier(embeddings)

        # 3. Convert to probability
        probs = self.sigmoid(logits)
        return probs

# ==========================================
# 4. Training Loop
# ==========================================

def train_model():
    model = CLIPBinaryClassifier(MODEL_NAME).to(device)

    # We only optimize the classifier parameters, not the frozen backbone
    optimizer = optim.Adam(model.classifier.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCELoss() # Binary Cross Entropy Loss

    best_val_acc = 0.0

    for epoch in range(EPOCHS):
        print(f"\nEpoch {epoch+1}/{EPOCHS}")
        print("-" * 10)

        # --- Training Phase ---
        model.train()
        running_loss = 0.0

        # Tqdm creates a nice progress bar in the terminal
        loop = tqdm(train_loader, leave=True)

        for images, labels in loop:
            images = images.to(device)
            # Labels need to be float and reshaped to match output (Batch, 1)
            labels = labels.float().unsqueeze(1).to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            loop.set_description(f"Train Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader)
        print(f"Average Train Loss: {epoch_loss:.4f}")

        # --- Validation Phase ---
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.float().unsqueeze(1).to(device)

                outputs = model(images)

                # If probability > 0.5, classify as 1 (Fake), else 0 (Real)
                predicted = (outputs > 0.5).float()

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f"Validation Accuracy: {val_acc:.2f}%")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), SAVE_PATH)
            print(f"New best model saved to {SAVE_PATH}")

if __name__ == "__main__":
    # Ensure data folders exist before running
    if not os.path.exists(TRAIN_DIR) or not os.path.exists(VAL_DIR):
        print(f"Error: Data directories not found at {TRAIN_DIR} or {VAL_DIR}")
    else:
        train_model()

In [None]:
if __name__ == "__main__":
    main()


In [None]:
!cp clip_aigc_detector.pth drive/MyDrive/CS4487/clip_aigc_detector.pth