In [3]:
%pip install openai-clip

Defaulting to user installation because normal site-packages is not writeable
Collecting openai-clip
  Downloading openai-clip-1.0.1.tar.gz (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: openai-clip
  Building wheel for openai-clip (setup.py) ... [?25ldone
[?25h  Created wheel for openai-clip: filename=openai_clip-1.0.1-py3-none-any.whl size=1368647 sha256=da59b05af418fc1585ea5028fd279bd8c91f534ea201715a32c7aab6f1bb5161
  Stored in directory: /home/aditya_sridhar/.cache/pip/wheels/08/77/8e/8d2f862df6bf7fb4e2007062d2cbaeae49862ec7b56d041229
Successfully built openai-clip
Installing collected packages: ftfy, o

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import csv
import clip
from torchvision import transforms


# Path to the CUB dataset root directory
CUB_ROOT = './data/CUB_200_2011/CUB_200_2011/'

# CSV File Setup for Logging Results
RESULTS_CSV_PATH = 'hyperparameter_tuning_complex_cnn_results.csv'

# Ensure the results CSV file has headers
if not os.path.exists(RESULTS_CSV_PATH):
    with open(RESULTS_CSV_PATH, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([
            "Model_Architecture_Index", "Learning_Rate", 
            "Classification_Weight", "Attribute_Weight", 
            "Training_Loss", "Training_Accuracy", 
            "Attribute_Training_Loss", "Classification_Test_Accuracy", 
            "Test_Loss"
        ])


# Hyperparameters
BATCH_SIZE = 32
LABEL_VECTOR_SIZE = 15  # Labels to include in training/testing
IMAGE_SIZE = (150, 150)  # Resizing dimensions for images

# Image transformations
transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
])



In [None]:

# Dataset Classes
class CUBAttributeDataset(Dataset):
    """
    Custom dataset for loading CUB-200-2011 images, attributes, and labels.
    """

    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform

        # Load metadata
        self.images_df = pd.read_csv(os.path.join(root_dir, 'images.txt'), sep=' ', header=None, names=['image_id', 'image_path'])
        self.labels_df = pd.read_csv(os.path.join(root_dir, 'image_class_labels.txt'), sep=' ', header=None, names=['image_id', 'class_id'])
        attributes_path = os.path.join(root_dir, 'attributes/image_attribute_labels.txt')
        self.attributes_df = self._load_attributes(attributes_path)

        # Process attributes into vectors
        self.attribute_vectors = self._process_attributes()

    def _load_attributes(self, filepath):
        # Read the file and filter rows with exactly 5 columns
        attributes_data = []
        with open(filepath, 'r') as file:
            for line in file:
                row = line.split()
                if len(row) == 5:
                    attributes_data.append(row)

        return pd.DataFrame(attributes_data, columns=['image_id', 'attribute_id', 'is_present', 'certainty', 'time']).astype({
            'image_id': int, 'attribute_id': int, 'is_present': int, 'certainty': float, 'time': float
        })

    def _process_attributes(self):
        # Create a dictionary mapping image IDs to attribute vectors
        attribute_vectors = {}
        for image_id in tqdm(self.images_df['image_id'], desc="Processing Attributes"):
            attributes = self.attributes_df[self.attributes_df['image_id'] == image_id]
            attribute_vector = torch.zeros(312)  # 312 attributes
            attribute_ids = attributes[attributes['is_present'] == 1]['attribute_id']
            attribute_vector[attribute_ids.values - 1] = 1
            attribute_vectors[image_id] = attribute_vector
        return attribute_vectors

    def __len__(self):
        return len(self.images_df)

    def __getitem__(self, idx):
        # Load image
        image_path = os.path.join(self.root_dir, 'images', self.images_df.iloc[idx, 1])
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        # Get attribute vector and label
        image_id = self.images_df.iloc[idx, 0]
        attribute_vector = self.attribute_vectors[image_id]
        label = self.labels_df[self.labels_df['image_id'] == image_id]['class_id'].values[0] - 1  # Zero-indexed

        return image, attribute_vector, label


# Models
class CLIPWithComplexCNNClassifier(nn.Module):
    def __init__(self, clip_model, attribute_vector_size=312, num_classes=200):
        super(CLIPWithComplexCNNClassifier, self).__init__()
        
        self.clip_model = clip_model
        self.fc_attributes = nn.Linear(512, attribute_vector_size)  # Assuming the output features from CLIP are 512-dimensional
        self.fc_classifier = nn.Linear(attribute_vector_size, num_classes)

    def forward(self, x):
        # Extract features using the CLIP model
        image_features = self.clip_model.encode_image(x)  # CLIP's image encoder
        # Pass through the attribute classifier
        attributes = self.fc_attributes(image_features)
        # Pass through the final classification layer
        class_logits = self.fc_classifier(attributes)
        return attributes, class_logits


# Loss Function
def calculate_losses(attribute_vector, class_logits, true_attribute, true_label, classification_weight=0.5, attribute_weight=0.5):
    """
    Calculate combined losses for attributes and classification.
    """
    attribute_loss = F.binary_cross_entropy_with_logits(attribute_vector, true_attribute)
    classification_loss = F.cross_entropy(class_logits, true_label)
    total_loss = classification_weight * classification_loss + attribute_weight * attribute_loss
    return total_loss, classification_loss, attribute_loss


# Training and Evaluation Functions
def train_epoch(model, train_loader, optimizer, device, classification_weight=0.5, attribute_weight=0.5):
    """
    Train model for one epoch.
    """
    model.train()
    total_loss = total_class_loss = total_attribute_loss = 0

    for images, attributes, labels in tqdm(train_loader, desc="Training"):
        images, attributes, labels = images.to(device), attributes.to(device), labels.to(device)

        optimizer.zero_grad()
        print(images.shape)
        predicted_attributes, class_logits = model(images)
        loss, class_loss, attr_loss = calculate_losses(predicted_attributes, class_logits, attributes, labels, classification_weight, attribute_weight)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_class_loss += class_loss.item()
        total_attribute_loss += attr_loss.item()

    return total_loss / len(train_loader), total_class_loss / len(train_loader), total_attribute_loss / len(train_loader)


def evaluate_epoch(model, test_loader, device, classification_weight=0.5, attribute_weight=0.5):
    """
    Evaluate model on test set.
    """
    model.eval()
    total_loss = total_class_loss = total_attribute_loss = 0

    with torch.no_grad():
        for images, attributes, labels in tqdm(test_loader, desc="Evaluating"):
            images, attributes, labels = images.to(device), attributes.to(device), labels.to(device)
            predicted_attributes, class_logits = model(images)
            loss, class_loss, attr_loss = calculate_losses(predicted_attributes, class_logits, attributes, labels, classification_weight, attribute_weight)
            total_loss += loss.item()
            total_class_loss += class_loss.item()
            total_attribute_loss += attr_loss.item()

    return total_loss / len(test_loader), total_class_loss / len(test_loader), total_attribute_loss / len(test_loader)

def evaluate_accuracy(model, test_loader, device):
    """
    Evaluate the classification accuracy on the test dataset.
    """
    model.eval()
    correct = total = 0

    with torch.no_grad():
        for images, _, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            _, class_logits = model(images)
            predicted_labels = class_logits.argmax(dim=1)
            correct += (predicted_labels == labels).sum().item()
            total += labels.size(0)

    return correct / total


def train_and_evaluate_clip_model(train_loader, test_loader, device, num_epochs=10):
    """
    Hyperparameter tuning for the CLIP model with a Complex CNN classifier.
    """
    hyperparameter_grid = [
        {"model_index": 4, "learning_rate": 1e-3, "classification_weight": 0.3, "attribute_weight": 0.7},
        {"model_index": 4, "learning_rate": 1e-3, "classification_weight": 0.5, "attribute_weight": 0.5},
        {"model_index": 4, "learning_rate": 1e-3, "classification_weight": 0.7, "attribute_weight": 0.3},
        {"model_index": 4, "learning_rate": 5e-4, "classification_weight": 0.3, "attribute_weight": 0.7},
        {"model_index": 4, "learning_rate": 5e-4, "classification_weight": 0.5, "attribute_weight": 0.5},
        {"model_index": 4, "learning_rate": 5e-4, "classification_weight": 0.7, "attribute_weight": 0.3},
    ]

    # Load pretrained CLIP model
    clip_model, _ = clip.load("ViT-B/32", device=device)

    for config in hyperparameter_grid:
        print(f"Tuning with config: {config}")
        model = CLIPWithComplexCNNClassifier(clip_model, attribute_vector_size=312, num_classes=200)
        optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])
        model.to(device)

        for epoch in range(num_epochs):
            print(f"Epoch {epoch + 1}/{num_epochs}")
            train_loss, train_class_loss, train_attr_loss = train_epoch(
                model, train_loader, optimizer, device,
                classification_weight=config["classification_weight"],
                attribute_weight=config["attribute_weight"]
            )
            eval_loss, eval_class_loss, eval_attr_loss = evaluate_epoch(
                model, test_loader, device,
                classification_weight=config["classification_weight"],
                attribute_weight=config["attribute_weight"]
            )
            print(f"Train Loss: {train_loss:.4f}, Eval Loss: {eval_loss:.4f}")
            print("-" * 50)

        classification_test_accuracy = evaluate_accuracy(model, test_loader, device)

        # Log results to CSV
        with open(RESULTS_CSV_PATH, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                config["model_index"], config["learning_rate"], 
                config["classification_weight"], config["attribute_weight"], 
                train_loss, classification_test_accuracy, train_attr_loss, 
                eval_loss, eval_class_loss
            ])



    # Load dataset
cub_dataset = CUBAttributeDataset(root_dir=CUB_ROOT, transform=transform)

# Filter dataset by labels4
INCLUDE_LABELS = list(range(LABEL_VECTOR_SIZE))
filtered_indices = [idx for idx, (_, _, label) in enumerate(cub_dataset) if label in INCLUDE_LABELS]
filtered_dataset = torch.utils.data.Subset(cub_dataset, filtered_indices)

# Split into train/test datasets
train_size = int(0.8 * len(filtered_dataset))
test_size = len(filtered_dataset) - train_size
train_dataset, test_dataset = random_split(filtered_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_and_evaluate_clip_model(train_loader, test_loader, device, num_epochs=10)



Processing Attributes: 100%|██████████| 11788/11788 [00:53<00:00, 220.62it/s]


Tuning with config: {'model_index': 4, 'learning_rate': 0.001, 'classification_weight': 0.3, 'attribute_weight': 0.7}
Epoch 1/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

torch.Size([32, 3, 150, 150])





RuntimeError: The size of tensor a (17) must match the size of tensor b (50) at non-singleton dimension 1

In [8]:
!pip install statsmodels

Defaulting to user installation because normal site-packages is not writeable
Collecting statsmodels
  Downloading statsmodels-0.14.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting patsy>=0.5.6
  Downloading patsy-1.0.1-py2.py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.9/232.9 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: patsy, statsmodels
Successfully installed patsy-1.0.1 statsmodels-0.14.4
