In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 32
IMAGE_SIZE = 224
NUM_CLASSES = 2  # Real (0) vs AI (1)

def find_test_images(root_path='/kaggle/input'):
    """Find directory containing test images"""
    for dirname, _, filenames in os.walk(root_path):
        if any(fname.lower().endswith(('.jpg', '.jpeg', '.png')) for fname in filenames):
            print(f"Found test images in: {dirname}")
            return dirname
    raise FileNotFoundError(f"No test images found under {root_path}")

def load_and_preprocess_image(image_path, transform):
    """Load and preprocess single image"""
    try:
        image = Image.open(image_path).convert('RGB')
        return transform(image)
    except Exception as e:
        print(f"Error loading {image_path}: {str(e)}")
        return None

def extract_features(image_paths, model, transform):
    """Extract features using pre-trained CNN"""
    features = []
    valid_paths = []
    
    model.eval()
    with torch.no_grad():
        for path in image_paths:
            img_tensor = load_and_preprocess_image(path, transform)
            if img_tensor is not None:
                img_tensor = img_tensor.unsqueeze(0).to(DEVICE)
                feature = model(img_tensor).cpu().numpy().flatten()
                features.append(feature)
                valid_paths.append(path)
    
    return np.array(features), valid_paths

def main():
    # Find test images
    test_dir = find_test_images()
    test_images = [os.path.join(test_dir, f) for f in os.listdir(test_dir) 
                  if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    if not test_images:
        raise ValueError("No valid test images found")
    
    print(f"Found {len(test_images)} test images")

    # Load pre-trained model (EfficientNet works well for this task)
    feature_extractor = models.efficientnet_b0(pretrained=True)
    feature_extractor.classifier = torch.nn.Identity()  # Remove final classification layer
    feature_extractor = feature_extractor.to(DEVICE)
    
    # Image transformations
    transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Extract features from test images
    test_features, valid_paths = extract_features(test_images, feature_extractor, transform)
    
    # Load trained classifier (in a real scenario, you'd have this pre-trained)
    # For demo purposes, we'll create a dummy classifier - in practice you should:
    # 1. Train on a dataset like Real vs AI images
    # 2. Save the trained model
    # 3. Load it here
    print("Creating dummy classifier (replace with your trained model in production)")
    classifier = RandomForestClassifier(n_estimators=100)
    
    # Dummy training data - REPLACE WITH YOUR ACTUAL TRAINING DATA
    # This is just for demonstration - you should train on proper datasets
    dummy_features = np.random.rand(100, test_features.shape[1])
    dummy_labels = np.random.randint(0, 2, 100)
    classifier.fit(dummy_features, dummy_labels)
    
    # Predict on test images
    test_preds = classifier.predict_proba(test_features)
    
    # Prepare submission
    submission = []
    for path, probs in zip(valid_paths, test_preds):
        image_id = os.path.splitext(os.path.basename(path))[0]
        confidence = np.round(probs[1], 6)  # Probability it's AI-generated
        
        # For object detection tasks, you might want bounding boxes
        # Here we'll just use the whole image area
        prediction_string = f"1 {confidence} 0.5 0.5 1.0 1.0"  # class, conf, x,y,w,h
        
        submission.append({
            "image_id": image_id,
            "prediction_string": prediction_string
        })
    
    # Create DataFrame and save
    submission_df = pd.DataFrame(submission)
    submission_df.to_csv('/kaggle/working/submission.csv', index=False)
    print("✅ Submission file created successfully")

if __name__ == "__main__":
    main()

Found test images in: /kaggle/input/synthetic-2-real-object-detection-challenge-2/Synthetic to Real Object Detection Challenge 2/testImages/images
Found 159 test images


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 152MB/s]


Creating dummy classifier (replace with your trained model in production)
✅ Submission file created successfully
