In [13]:
!pip install git+https://github.com/openai/CLIP.git
!pip install open_clip_torch

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-b_zfvj70
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-b_zfvj70
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [14]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
import open_clip

device = "cuda" if torch.cuda.is_available() else "cpu"

In [18]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
from tqdm import tqdm
from transformers import CLIPVisionModel, AutoProcessor

# -------------------------------------------------
# 1. Configuration & Setup
# -------------------------------------------------
MODEL_NAME = "openai/clip-vit-base-patch32"
MODEL_PATH = "clip_aigc_detector.pth"
TEST_DIR = "./test"
OUTPUT_CSV = "submission.csv"
BATCH_SIZE = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# -------------------------------------------------
# 2. The Model Architecture (Fixed Dimensions)
# -------------------------------------------------
class AIGCDetector(nn.Module):
    def __init__(self, base_model_name):
        super().__init__()
        self.backbone = CLIPVisionModel.from_pretrained(base_model_name)

        # Freeze backbone
        for param in self.backbone.parameters():
            param.requires_grad = False

        # FIX: The error showed the saved weight was [1, 512].
        # This means the final layer must be Linear(512, 1).
        self.classifier = nn.Sequential(
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(512, 1)  # <--- Changed from 2 to 1
        )

    def forward(self, pixel_values):
        outputs = self.backbone(pixel_values=pixel_values)
        features = outputs.pooler_output
        logits = self.classifier(features)
        return logits

# -------------------------------------------------
# 3. Test Dataset
# -------------------------------------------------
class TestDataset(Dataset):
    def __init__(self, test_dir, processor):
        self.test_dir = test_dir
        self.processor = processor
        self.files = sorted([
            f for f in os.listdir(test_dir)
            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.webp'))
        ])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_name = self.files[idx]
        path = os.path.join(self.test_dir, file_name)
        image = Image.open(path).convert("RGB")

        # Preprocess
        inputs = self.processor(images=image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)

        img_id = file_name.rsplit(".", 1)[0]
        return pixel_values, img_id

# -------------------------------------------------
# 4. Main Execution
# -------------------------------------------------
def test_model():
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model weights not found at {MODEL_PATH}")
    if not os.path.exists(TEST_DIR):
        raise FileNotFoundError(f"Test folder not found at {TEST_DIR}")

    print(f"Using device: {DEVICE}")

    # 1. Initialize Processor
    print(f"Loading processor: {MODEL_NAME}...")
    processor = AutoProcessor.from_pretrained(MODEL_NAME)

    # 2. Initialize Model
    print("Initializing model architecture...")
    model = AIGCDetector(MODEL_NAME).to(DEVICE)

    # 3. Load Weights
    print(f"Loading weights from {MODEL_PATH}...")
    state_dict = torch.load(MODEL_PATH, map_location=DEVICE)

    # We load the state dict directly. The error confirmed the keys match
    # the full model structure (backbone + classifier).
    model.load_state_dict(state_dict)
    model.eval()

    # 4. Prepare DataLoader
    test_dataset = TestDataset(TEST_DIR, processor)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                             num_workers=4, pin_memory=True)

    print(f"Starting inference on {len(test_dataset)} images...")

    results = []

    with torch.no_grad():
        for pixel_values, ids in tqdm(test_loader, desc="Predicting"):
            pixel_values = pixel_values.to(DEVICE)

            # Forward pass
            logits = model(pixel_values) # Shape: (Batch_Size, 1)

            # FIX: Use Sigmoid for single-output binary classification
            probs = torch.sigmoid(logits).view(-1) # Flatten to (Batch_Size,)

            # Thresholding
            preds = (probs > 0.5).int()

            for img_id, pred in zip(ids, preds.cpu().tolist()):
                results.append({"ID": img_id, "label": pred})

    # 5. Save Submission
    df = pd.DataFrame(results)
    df = df.sort_values("ID").reset_index(drop=True)
    df.to_csv(OUTPUT_CSV, index=False)

    print(f"\nSuccess! {OUTPUT_CSV} saved with {len(df)} rows.")

if __name__ == "__main__":
    test_model()

Using device: cuda
Loading processor: openai/clip-vit-base-patch32...
Initializing model architecture...
Loading weights from clip_aigc_detector.pth...
Starting inference on 2500 images...


Predicting: 100%|██████████| 20/20 [00:05<00:00,  3.35it/s]


Success! submission.csv saved with 2500 rows.





In [20]:
data_root = "path_to_dataset"
model_path = "saved_model.pth"