In [None]:
!unzip -n -q CollabDiff.zip


In [None]:
!pip install gdown
!gdown 'https://drive.google.com/uc?id=1A0xoL44Yg68ixd-FuIJn2VC4vdZ6M2gn'



In [1]:
from pathlib import PosixPath
image_path = PosixPath("e4e")  # Standard quotes
train_dir = image_path / "train"  # Standard quotes
test_dir = image_path / "val"  # Standard quotes

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import matplotlib.pyplot as plt
from tqdm import tqdm 

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None, limit=None):
        self.root_dir = root_dir
        self.transform = transform

        self.image_files = []
        self.labels = []
        for label_folder in tqdm(['0_real', '1_fake'], desc="Loading dataset"):
            full_path = os.path.join(root_dir, label_folder)
            for idx, file_name in enumerate(os.listdir(full_path)):
                if limit and idx >= limit:
                    break  # Limit the number of files loaded
                if file_name.endswith(('.jpg', '.png', '.jpeg')):  # Ensure image files
                    self.image_files.append(os.path.join(full_path, file_name))
                    if 'real' in label_folder:
                        self.labels.append(0)  # Label 0 for real images
                    else:
                        self.labels.append(1)  # Label 1 for fake images

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")  # Ensure image is 3 channels
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Data transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Paths to the training and validation directories
train_dir = "e4e/train"
val_dir = "e4e/val"

# Load datasets
train_dataset = CustomDataset(root_dir=train_dir, transform=data_transforms)
val_dataset = CustomDataset(root_dir=val_dir, transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True)

Loading dataset: 100%|██████████| 2/2 [00:00<00:00, 71.46it/s]
Loading dataset: 100%|██████████| 2/2 [00:00<00:00, 2449.94it/s]


In [9]:
!pip install --upgrade certifi
import ssl
import certifi

# Set the SSL context to use certifi's certificates
ssl._create_default_https_context = ssl.create_default_context
ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())


Requirement already up-to-date: certifi in /scratch/user/nkolloju/venv_name/lib/python3.6/site-packages


In [5]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
import matplotlib.pyplot as plt

# Load pre-trained ResNet50 and fine-tune
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

# Replace the final layer for binary classification (real/fake)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes (real/fake)

# Hook functions to capture low, mid, and high-level features
low_level_features, mid_level_features, high_level_features = [], [], []

def hook_fn(module, input, output, storage_list):
    storage_list.append(output.clone().detach())

# Register forward hooks to capture intermediate features
model.layer1[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, low_level_features))  # Low-level features
model.layer3[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, mid_level_features))  # Mid-level features
model.layer4[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, high_level_features))  # High-level features

# Set up loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Mixed precision training with GradScaler
scaler = GradScaler()

# Training with hooks and early stopping
def train_model_with_early_stopping(
    model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10, patience=3, accumulation_steps=4
):
    train_loss_history, val_loss_history = [], []
    train_acc_history, val_acc_history = [], []

    best_val_loss = float('inf')  # Initialize best validation loss
    epochs_without_improvement = 0  # Track epochs with no improvement

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss, correct, total = 0.0, 0, 0

        # Training loop with progress bar
        train_loader_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]")
        optimizer.zero_grad()  # Zero gradients at the start
        for batch_idx, (inputs, labels) in enumerate(train_loader_iter):
            inputs, labels = inputs.to(device), labels.to(device)

            with autocast():  # Enable mixed precision training
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss = loss / accumulation_steps  # Normalize loss for gradient accumulation

            scaler.scale(loss).backward()  # Scale the loss for stability

            if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(train_loader):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()  # Zero gradients after the step

            # Update running loss and accuracy
            running_loss += loss.item() * inputs.size(0) * accumulation_steps  # Undo normalization
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
            total += labels.size(0)

            del inputs, labels, outputs, loss  # Free memory
            torch.cuda.empty_cache()  # Clear unused memory

        # Calculate training loss and accuracy
        train_loss = running_loss / total
        train_acc = correct.double() / total
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)

        # Validation loop
        model.eval()  # Set model to evaluation mode
        val_loss, correct_val, total_val = 0.0, 0, 0
        val_loader_iter = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]")
        with torch.no_grad():  # Disable gradient computation
            for inputs, labels in val_loader_iter:
                inputs, labels = inputs.to(device), labels.to(device)
                with autocast():  # Mixed precision in validation
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct_val += torch.sum(preds == labels.data)
                total_val += labels.size(0)

                del inputs, labels, outputs, loss  # Free memory
                torch.cuda.empty_cache()  # Clear unused memory

        # Calculate validation loss and accuracy
        if total_val > 0:
            val_loss = val_loss / total_val
            val_acc = correct_val.double() / total_val
            val_loss_history.append(val_loss)
            val_acc_history.append(val_acc)
        else:
            print("Validation dataset is empty or no samples were processed.")
            val_loss = float('inf')  # Assign a high loss for empty validation
            val_acc = 0.0  # No accuracy for empty validation
            val_loss_history.append(val_loss)
            val_acc_history.append(val_acc)

        # Early stopping logic: Check if validation loss improved
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model_resnet50_e4e.pth')  # Save the best model
            epochs_without_improvement = 0  # Reset the counter
        else:
            epochs_without_improvement += 1
            print(f"Epochs without improvement: {epochs_without_improvement}")
        
        # Trigger early stopping if no improvement for 'patience' epochs
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

        # Step the learning rate scheduler
        scheduler.step()

        # Print epoch results
        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

    # Return history for plotting
    return train_loss_history, val_loss_history, train_acc_history, val_acc_history

# Fine-tune the model with early stopping
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Train with early stopping: Patience set to 3 (meaning stop if no improvement after 3 epochs)
train_loss, val_loss, train_acc, val_acc = train_model_with_early_stopping(
    model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=11, patience=2
)

# Save the final trained model (this may not be the best model if early stopping occurred)
torch.save(model.state_dict(), 'final_resnet50_finetuned_e4e.pth')

# Plot loss and accuracy curves
# Plot loss and accuracy curves
def plot_curves(train_loss, val_loss, train_acc, val_acc):
    epochs = range(1, len(train_loss) + 1)

    plt.figure(figsize=(12, 4))

    # Plot loss curves
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, label='Train Loss')
    plt.plot(epochs, val_loss, label='Val Loss')
    plt.title('Loss Curve')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Convert accuracy tensors to CPU and NumPy
    train_acc = [acc.cpu().item() for acc in train_acc]
    val_acc = [acc.cpu().item() for acc in val_acc]

    # Plot accuracy curves
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_acc, label='Train Acc')
    plt.plot(epochs, val_acc, label='Val Acc')
    plt.title('Accuracy Curve')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Save the plot
    plt.savefig('training_curves_e4e.png')
    plt.show()

# Call the plot_curves function with your training and validation metrics
plot_curves(train_loss, val_loss, train_acc, val_acc)



  scaler = GradScaler()
  with autocast():  # Enable mixed precision training
Epoch 1/11 [Training]: 100%|██████████| 3646/3646 [00:53<00:00, 68.08it/s]
Epoch 1/11 [Validation]: 0it [00:00, ?it/s]


Validation dataset is empty or no samples were processed.
Epochs without improvement: 1
Epoch 1/11, Train Loss: 0.0053, Train Acc: 0.9999, Val Loss: inf, Val Acc: 0.0000


Epoch 2/11 [Training]:  24%|██▍       | 870/3646 [00:13<00:43, 63.51it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 39.39 GiB of which 12.25 MiB is free. Including non-PyTorch memory, this process has 39.35 GiB memory in use. Of the allocated memory 38.82 GiB is allocated by PyTorch, and 18.04 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from tqdm import tqdm
import os

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transformations (ResNet-style transformations)
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Custom dataset class
# Custom dataset class
class CustomDatasetNew(Dataset):
    def __init__(self, root_dir, transform=None, limit=None):
        self.root_dir = root_dir
        self.transform = transform

        self.image_files = []
        self.labels = []
        for label_folder in tqdm(['real', 'fake'], desc="Loading dataset"):
            full_path = os.path.join(root_dir, label_folder)
            for idx, file_name in enumerate(os.listdir(full_path)):
                if limit and idx >= limit:
                    break  # Limit the number of files loaded
                if file_name.endswith(('.jpg', '.png', '.jpeg')):  # Ensure image files
                    self.image_files.append(os.path.join(full_path, file_name))
                    if 'real' in label_folder:
                        self.labels.append(0)  # Label 0 for real images
                    else:
                        self.labels.append(1)  # Label 1 for fake images

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]  # This is a string path
        image = Image.open(img_path).convert("RGB")  # Ensure image is 3 channels
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, img_path  # Ensure that the path returned is a string (not a tensor)


# Load ResNet model and capture features
def load_saved_resnet_model(model_path):
    model = torchvision.models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False  # Freeze all layers

    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)  # Binary classification (real/fake)

    model.load_state_dict(torch.load(model_path))  # Load the saved model
    model = model.to(device)

    # Hook functions to capture low, mid, and high-level features
    model.layer1[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, low_level_features))
    model.layer3[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, mid_level_features))
    model.layer4[0].register_forward_hook(lambda m, i, o: hook_fn(m, i, o, high_level_features))

    model.eval()
    return model

# Hook functions to capture ResNet features
low_level_features, mid_level_features, high_level_features = [], [], []

def hook_fn(module, input, output, storage_list):
    storage_list.append(output.clone().detach())

# Define linear layers to convert ResNet features to 768 dimensions
# Define linear layers to convert ResNet features to 768 dimensions
low_to_768 = nn.Linear(256, 768).to(device)   # For low-level features
mid_to_768 = nn.Linear(1024, 768).to(device)  # For mid-level features
high_to_768 = nn.Linear(2048, 768).to(device) # For high-level features

def extract_resnet_features(model, image):
    low_level_features.clear()
    mid_level_features.clear()
    high_level_features.clear()

    with torch.no_grad():
        image = image.unsqueeze(0).to(device)  # Add batch dimension and move to device
        model(image)

    # Pool ResNet features and map to 768 dimensions
    low_pooled = F.adaptive_avg_pool2d(low_level_features[-1].to(device), (1, 1)).squeeze()
    mid_pooled = F.adaptive_avg_pool2d(mid_level_features[-1].to(device), (1, 1)).squeeze()
    high_pooled = F.adaptive_avg_pool2d(high_level_features[-1].to(device), (1, 1)).squeeze()

    low_768 = low_to_768(low_pooled)   # Shape [1, 768]
    mid_768 = mid_to_768(mid_pooled)   # Shape [1, 768]
    high_768 = high_to_768(high_pooled) # Shape [1, 768]

    return low_768, mid_768, high_768


# Function to preprocess the image using ViT's transforms
def pipeline_preprocessor():
    vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT
    return vit_weights.transforms()

# Function to extract ViT embeddings
def get_vit_embedding(vit_model, image_path):
    preprocessing = pipeline_preprocessor()  # Preprocessing from ViT
    img = Image.open(image_path).convert("RGB")  # Ensure we load image by path (string)
    img = preprocessing(img).unsqueeze(0).to(device)  # Add batch dimension

    with torch.no_grad():
        feats = vit_model._process_input(img)
        batch_class_token = vit_model.class_token.expand(img.shape[0], -1, -1)
        feats = torch.cat([batch_class_token, feats], dim=1)
        feats = vit_model.encoder(feats)
        vit_hidden = feats[:, 0]  # CLS token
    return vit_hidden

# Load ViT model
def load_vit_model(pretrained_weights_path):
    vit_model = torchvision.models.vit_b_16(pretrained=False).to(device)
    pretrained_vit_weights = torch.load(pretrained_weights_path, map_location=device)
    vit_model.load_state_dict(pretrained_vit_weights, strict=False)
    vit_model.eval()  # Set to evaluation mode
    return vit_model

# Add a sequence dimension (if missing) before applying attention
def ensure_correct_shape(tensor):
    if len(tensor.shape) == 2:  # If shape is [batch_size, embedding_dim]
        tensor = tensor.unsqueeze(1)  # Add a sequence dimension: [batch_size, 1, embedding_dim]
    elif len(tensor.shape) == 1:  # If shape is [embedding_dim]
        tensor = tensor.unsqueeze(0).unsqueeze(1)  # Add batch and sequence dimensions: [1, 1, embedding_dim]
    return tensor


# Scaled dot product attention function
def scaled_dot_product_attention(Q, K, V):
    # Ensure Q, K, and V have the correct shapes
    Q = ensure_correct_shape(Q)  # Should be [batch_size, 1, embedding_dim]
    K = ensure_correct_shape(K)  # Should be [batch_size, 1, embedding_dim]
    V = ensure_correct_shape(V)  # Should be [batch_size, 1, embedding_dim]

#     print(f"Q shape after unsqueeze: {Q.shape}, K shape after unsqueeze: {K.shape}, V shape after unsqueeze: {V.shape}")  # Debugging
    d_k = Q.size(-1)
    scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(d_k, dtype=torch.float32).to(Q.device))
    attn_weights = F.softmax(scores, dim=-1)
    output = torch.matmul(attn_weights, V)
    return output

# Save features for each dataset (train/val/test)
import csv

# Save features for each dataset (train/val/test) as CSV
def save_features_to_csv(model, vit_model, data_loader, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    with open(save_path, mode="w", newline="") as file:
        writer = csv.writer(file)
        # Write the CSV header
        writer.writerow(["image_name", "features", "label"])

        for images, img_paths in tqdm(data_loader, desc="Extracting features"):
            for i in range(len(images)):
                image = images[i].to(device)  # Move image to the correct device
                img_path = img_paths[i]  # Image path

                # Ensure img_path is a string
                if isinstance(img_path, torch.Tensor):
                    img_path = img_path.item() if img_path.dim() == 0 else str(img_path)

                # Extract ResNet features
                try:
                    low_768, mid_768, high_768 = extract_resnet_features(model, image)
                except Exception as e:
                    print(f"Error extracting ResNet features for {img_path}: {e}")
                    continue

                # Extract ViT features
                try:
                    vit_hidden = get_vit_embedding(vit_model, img_path)  # img_path should be a string
                except Exception as e:
                    print(f"Error extracting ViT features for {img_path}: {e}")
                    continue

                # Apply attention between ResNet and ViT features
                try:
                    output_1 = scaled_dot_product_attention(vit_hidden, low_768, low_768)
                    output_2 = scaled_dot_product_attention(output_1, mid_768, mid_768)
                    final_output = scaled_dot_product_attention(output_2, high_768, high_768)
                except Exception as e:
                    print(f"Error applying attention for {img_path}: {e}")
                    continue

                # Convert features to a flattened list
                features = final_output.detach().cpu().numpy().flatten().tolist()


                # Extract label from the image path
                label = 0 if "real" in img_path else 1

                # Write the row to the CSV
                writer.writerow([os.path.basename(img_path), features, label])

    print(f"Features saved to {save_path}")


# Load models
resnet_model = load_saved_resnet_model('best_model_resnet50_collabdiff.pth')
vit_model = load_vit_model('collabdiff_vit_state_dict.pth')

train_dir = "CollabDiff/train"
val_dir = "CollabDiff/val"
test_dir="CollabDiff/test"

train_dataset = CustomDatasetNew(root_dir=train_dir, transform=data_transforms)
val_dataset = CustomDatasetNew(root_dir=val_dir, transform=data_transforms)
test_dataset = CustomDatasetNew(root_dir=test_dir, transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

print("Processing Train Dataset:")
save_features_to_csv(resnet_model, vit_model, train_loader, save_path="features_collabDiff/train_features.csv")

print("Processing Validation Dataset:")
save_features_to_csv(resnet_model, vit_model, val_loader, save_path="features_collabDiff/val_features.csv")

print("Processing Test Dataset:")
save_features_to_csv(resnet_model, vit_model, test_loader, save_path="features_collabDiff/test_features.csv")

  model.load_state_dict(torch.load(model_path))  # Load the saved model
  pretrained_vit_weights = torch.load(pretrained_weights_path, map_location=device)
Loading dataset: 100%|██████████| 2/2 [00:00<00:00, 564.40it/s]
Loading dataset: 100%|██████████| 2/2 [00:00<00:00, 1927.97it/s]
Loading dataset: 100%|██████████| 2/2 [00:00<00:00, 726.48it/s]


Processing Train Dataset:


Extracting features: 100%|██████████| 1400/1400 [00:46<00:00, 29.97it/s]


Features saved to features_collabDiff/train_features.csv
Processing Validation Dataset:


Extracting features: 100%|██████████| 200/200 [00:06<00:00, 30.09it/s]


Features saved to features_collabDiff/val_features.csv
Processing Test Dataset:


Extracting features: 100%|██████████| 400/400 [00:39<00:00, 10.22it/s]

Features saved to features_collabDiff/test_features.csv





In [None]:
# Now handling test directory structure with subfolders (twitter, facebook, reddit)
test_dir = "WildRF/test"
for test_subdir in ['twitter', 'facebook', 'reddit']:
    test_dataset = CustomDatasetNew(root_dir=os.path.join(test_dir, test_subdir), transform=data_transforms)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    
    print(f"Processing Test Dataset: {test_subdir}")
    save_features(resnet_model, vit_model, test_loader, save_dir=f"features/test/{test_subdir}")

In [None]:
import shutil
import os

def folder_to_zip(folder_path, zip_name):
    # Check if the folder exists
    if not os.path.isdir(folder_path):
        print("The folder does not exist.")
        return

    # Create a zip file from the folder
    shutil.make_archive(zip_name, 'zip', folder_path)
    print(f"Folder '{folder_path}' has been successfully zipped to '{zip_name}.zip'.")

# Example usage
folder_to_zip('/kaggle/working/features', 'features')
