In [1]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-jf6v0iba
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-jf6v0iba
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting ftfy (from clip==1.0)
  Downloading ftfy-6.2.3-py3-none-any.whl.metadata (7.8 kB)
Downloading ftfy-6.2.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25ldone
[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=9768d99a332a39a33284404c04438d7e5097cf4db4922f606e9ca565c13e8763
  Stored in directory: /tmp/pip-ephem-wheel-cache-0r01yszx/wheels/da/2b/4c/d6691fa9597aac8bb

In [2]:
import clip
import torch
import os
from PIL import Image
from torchvision import transforms

# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

original_dir = '/kaggle/input/task5-final/task5_original_images'
noisy_dir = '/kaggle/input/task5-final/task5_noisy_images'
scrambled_dir = '/kaggle/input/task5-final/task5_scrambled_images'
stylized_dir = '/kaggle/input/task5-final/task5_stylized_images'

# these are subsets of cifar10
# in the stylized directory class name folders have a '_stylized' suffix attached

100%|████████████████████████████████████████| 338M/338M [00:02<00:00, 176MiB/s]


In [3]:
def evaluate_clip_on_dataset(dataset_dir, class_names):
    correct = 0
    total = 0

    for class_name in class_names:
        class_path = os.path.join(dataset_dir, class_name)
        if not os.path.exists(class_path):
            continue

        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)

            # Load and preprocess the image
            image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

            # Generate text prompts for each class
            text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in class_names]).to(device)

            # Get image and text features
            with torch.no_grad():
                image_features = model.encode_image(image)
                text_features = model.encode_text(text_inputs)

                # Calculate similarity and get the prediction
                similarity = (image_features @ text_features.T).softmax(dim=-1)
                prediction = torch.argmax(similarity).item()

                # Check if the prediction is correct
                if class_names[prediction] == class_name:
                    correct += 1

                total += 1

    accuracy = correct / total if total > 0 else 0
    print(f"Accuracy for {dataset_dir}: {accuracy:.4f}")
    return accuracy

In [4]:
# Define class names based on CIFAR-10 (adjust as per your dataset)
cifar10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                   'dog', 'frog', 'horse', 'ship', 'truck']

In [5]:
# Evaluate on original dataset
original_accuracy = evaluate_clip_on_dataset(original_dir, cifar10_classes)
print(f"Original Accuracy: {original_accuracy:.4f}")
# Evaluate on noisy dataset
noisy_accuracy = evaluate_clip_on_dataset(noisy_dir, cifar10_classes)
print(f"Noisy Accuracy: {noisy_accuracy:.4f}")
# Evaluate on scrambled dataset
scrambled_accuracy = evaluate_clip_on_dataset(scrambled_dir, cifar10_classes)
print(f"Scrambled Accuracy: {scrambled_accuracy:.4f}")
# Adjust class names for stylized dataset, if necessary
stylized_classes = [c + '_stylized' for c in cifar10_classes]
stylized_accuracy = evaluate_clip_on_dataset(stylized_dir, stylized_classes)
print(f"Stylized Accuracy: {stylized_accuracy:.4f}")

Accuracy for /kaggle/input/task5-final/task5_original_images: 0.7920
Original Accuracy: 0.7920
Accuracy for /kaggle/input/task5-final/task5_noisy_images: 0.6597
Noisy Accuracy: 0.6597
Accuracy for /kaggle/input/task5-final/task5_scrambled_images: 0.3380
Scrambled Accuracy: 0.3380
Accuracy for /kaggle/input/task5-final/task5_stylized_images: 0.2949
Stylized Accuracy: 0.2949


In [6]:
print("Noise accuracy ratio: ", noisy_accuracy/original_accuracy)
print("Scrambled accuracy ratio: ", scrambled_accuracy/original_accuracy)
print("Stylized accuracy ratio: ", stylized_accuracy/original_accuracy)

Noise accuracy ratio:  0.832921287466742
Scrambled accuracy ratio:  0.4267676767676768
Stylized accuracy ratio:  0.3723761047979798
