In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/Users/aarohiverma/Documents/ai_vs_real/resize_comp/train_split.csv')

In [3]:
df = df.drop(columns=['Unnamed: 0'])

## Train on original dataset

In [4]:
import torch
import clip
import pandas as pd
from PIL import Image
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm import tqdm  # âœ… Import tqdm for progress bar

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device)

# Dataset class for batch processing
class ImageDataset(Dataset):
    def __init__(self, df, base_path):
        self.df = df
        self.base_path = base_path
        self.transform = preprocess  # CLIP preprocessing

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = f"{self.base_path}/{self.df.iloc[idx]['file_name']}"
        label = self.df.iloc[idx]['label']
        image = self.transform(Image.open(img_path))
        return image, label

# Create DataLoader
batch_size = 64
dataset = ImageDataset(df, "/Users/aarohiverma/Documents/ai_vs_real/")
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# Extract features with progress bar
features = []
labels = []

with torch.no_grad():
    for images, lbls in tqdm(dataloader, desc="Extracting CLIP Features"):
        images = images.to(device)
        batch_features = model.encode_image(images).cpu().numpy()
        features.append(batch_features)
        labels.append(lbls.numpy())

# Convert and save
X = np.vstack(features)
y = np.concatenate(labels)

np.save("resize_train_clip_features.npy", X)
np.save("resize_train_clip_labels.npy", y)

print("Feature extraction completed!")


Extracting CLIP Features: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1000/1000 [3:36:17<00:00, 12.98s/it] 

Feature extraction completed!





In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# Load CLIP features and labels
features = np.load("/Users/aarohiverma/Documents/ai_vs_real/resize_comp/resize_train_clip_features.npy")
labels = np.load("//Users/aarohiverma/Documents/ai_vs_real/resize_comp/resize_train_clip_labels.npy")

# Normalize Features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

indices = np.random.permutation(len(features))
features = features[indices]
labels = labels[indices] 

# Convert to Torch Tensors
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure it's (N,1)

# Split into Train & Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create PyTorch DataLoaders
batch_size = 64
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# Define MLP Model
class MLPClassifier(nn.Module):
    def __init__(self, input_dim):
        super(MLPClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.6),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.6),

            nn.Linear(256, 256),  # ðŸ”¹ Keep 256 Instead of 128
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.7),

            nn.Linear(256, 1),
            nn.Sigmoid()
        )


    def forward(self, x):
        return self.model(x)


# Initialize Model
input_dim = features.shape[1]  # Either 768 or 1024
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim).to(device)

# Loss & Optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy for Classification
optimizer = optim.AdamW(model.parameters(), lr=1e-4)  #  AdamW 

# Training Loop
num_epochs = 40
for epoch in range(num_epochs):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        # Metrics
        total_loss += loss.item()
        preds = (outputs > 0.5).float()
        correct += (preds == y_batch).sum().item()
        total += y_batch.size(0)

    acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}, Accuracy: {acc:.2f}%")

Epoch 1/40, Loss: 0.3509, Accuracy: 83.64%
Epoch 2/40, Loss: 0.1330, Accuracy: 95.16%
Epoch 3/40, Loss: 0.0994, Accuracy: 96.35%
Epoch 4/40, Loss: 0.0823, Accuracy: 97.07%
Epoch 5/40, Loss: 0.0712, Accuracy: 97.42%
Epoch 6/40, Loss: 0.0671, Accuracy: 97.58%
Epoch 7/40, Loss: 0.0598, Accuracy: 97.77%
Epoch 8/40, Loss: 0.0545, Accuracy: 98.06%
Epoch 9/40, Loss: 0.0521, Accuracy: 98.08%
Epoch 10/40, Loss: 0.0456, Accuracy: 98.36%
Epoch 11/40, Loss: 0.0449, Accuracy: 98.34%
Epoch 12/40, Loss: 0.0422, Accuracy: 98.45%
Epoch 13/40, Loss: 0.0411, Accuracy: 98.50%
Epoch 14/40, Loss: 0.0364, Accuracy: 98.68%
Epoch 15/40, Loss: 0.0355, Accuracy: 98.74%
Epoch 16/40, Loss: 0.0342, Accuracy: 98.74%
Epoch 17/40, Loss: 0.0320, Accuracy: 98.83%
Epoch 18/40, Loss: 0.0324, Accuracy: 98.85%
Epoch 19/40, Loss: 0.0302, Accuracy: 98.84%
Epoch 20/40, Loss: 0.0276, Accuracy: 99.00%
Epoch 21/40, Loss: 0.0286, Accuracy: 98.91%
Epoch 22/40, Loss: 0.0270, Accuracy: 99.03%
Epoch 23/40, Loss: 0.0249, Accuracy: 99.0

In [None]:
import math
transformations = {
    'jpeg75': lambda im: jpeg_compress(im, quality=75),
    'jpeg50': lambda im: jpeg_compress(im, quality=50),
    'jpeg30': lambda im: jpeg_compress(im, quality=30),
    'blur': gaussian_blur,
    'resize128': resize_and_restore,
    'crop_pad': random_crop_pad,
    'color_jitter': lambda im: color_jitter(im),
}

for name, transform_fn in transformations.items():
    emb_list = []
    for _, row in test_df.iterrows():
        img = Image.open(row['filepath']).convert('RGB')
        img_t = transform_fn(img)
        emb = compute_clip_embedding(img_t)
        emb_list.append(emb)
    emb_stack = torch.cat(emb_list, dim=0)
    torch.save({'embeddings': emb_stack, 'labels': torch.tensor(clean_labels)}, f'test_{name}_clip_embeddings.pt')
    print(f"Saved embeddings for {name}: {emb_stack.shape}")


### Test on transformed images

In [11]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


model.eval()  # Set model to evaluation mode

# Define a function to evaluate the model on a transformation
def evaluate_model_on_transformation(transformation_name, batch_size=64):
    # Load the saved embeddings
    data = torch.load(f'test_{transformation_name}_clip_embeddings.pt')
    embeddings = data['embeddings']
    labels = data['labels']
    
    # Normalize the features (scale them using the same scaler used for training)
    scaler = StandardScaler()
    embeddings = scaler.fit_transform(embeddings)
    embeddings = torch.tensor(embeddings, dtype=torch.float32).to(device)
    labels = labels.to(device)

    # Create DataLoader for the current transformation's embeddings
    test_loader = DataLoader(TensorDataset(embeddings, labels), batch_size=batch_size, shuffle=False)

    # Evaluate the model on the test set
    total_preds, total_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            preds = (outputs > 0.5).float()
            total_preds.append(preds.cpu().numpy())
            total_labels.append(y_batch.cpu().numpy())
    
    # Flatten the lists to compute accuracy
    total_preds = np.concatenate(total_preds)
    total_labels = np.concatenate(total_labels)
    acc = accuracy_score(total_labels, total_preds)

    print(f"Accuracy for {transformation_name} transformation: {acc * 100:.2f}%")

    # Precision, Recall, F1-Score
    print(f"Classification Report for {transformation_name}:")
    print(classification_report(total_labels, total_preds))

    # Confusion Matrix
    cm = confusion_matrix(total_labels, total_preds)
    print(f"Confusion Matrix for {transformation_name}:")
    print(cm)

# List of transformations (names should match your saved files)
transformations = ['jpeg75', 'jpeg50', 'jpeg30', 'blur', 'resize128']

# Evaluate the model on each transformation
for transform in transformations:
    evaluate_model_on_transformation(transform)

Accuracy for jpeg75 transformation: 98.52%
Classification Report for jpeg75:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      8002
           1       0.99      0.98      0.99      7988

    accuracy                           0.99     15990
   macro avg       0.99      0.99      0.99     15990
weighted avg       0.99      0.99      0.99     15990

Confusion Matrix for jpeg75:
[[7894  108]
 [ 129 7859]]
Accuracy for jpeg50 transformation: 96.50%
Classification Report for jpeg50:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      8002
           1       0.97      0.96      0.97      7988

    accuracy                           0.97     15990
   macro avg       0.97      0.97      0.97     15990
weighted avg       0.97      0.97      0.97     15990

Confusion Matrix for jpeg50:
[[7723  279]
 [ 280 7708]]
Accuracy for jpeg30 transformation: 96.63%
Classification Report for jpeg30:
   