## Task 1 - Zero-Shot Evaluation

In [1]:
#@title GPU / Python / Torch sanity
import os, sys, subprocess, json, platform, torch
print("Python :", sys.version)
print("CUDA   :", torch.version.cuda)
print("Torch  :", torch.__version__)
print("Device :", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")
!nvidia-smi || true

Python : 3.10.19 | packaged by conda-forge | (main, Oct 22 2025, 22:29:10) [GCC 14.3.0]
CUDA   : 12.1
Torch  : 2.3.1+cu121
Device : NVIDIA GeForce RTX 4090
Fri Nov  7 15:22:39 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.03             Driver Version: 550.144.03     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:01:00.0 Off |                  Off |
|  0%   46C    P8             19W /  450W |    4062MiB /  24564MiB |      0%      Default |
|                                         |                        |        

In [2]:
# some imports
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel, logging
from peft import LoraConfig, get_peft_model, TaskType
from torchinfo import summary
from tqdm.autonotebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import json
import warnings

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# some settings
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_ID = "openai/clip-vit-large-patch14" # pre-trained CLIP model (ViT-L/14)
BATCH_SIZE = 256 # adjust based on your GPU memory
gradient_accumulation_steps = 1 # adjust based on your GPU memory
# For Linear Probe & LoRA
NUM_EPOCHS = 200
print(f"Using device: {DEVICE}")

DATA_FOLDER = "./data"  # folder to store datasets
os.makedirs(DATA_FOLDER, exist_ok=True)

Using device: cuda


In [5]:
# CLIP settings
# --- Load CLIP Processor ---
processor = CLIPProcessor.from_pretrained(MODEL_ID)
# --- Define a transform to process images for CLIP ---
class CLIPTransform:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, image):
        # The processor expects a PIL image or list of images
        # It returns a dict, we extract 'pixel_values'
        # .squeeze(0) removes the batch dimension the processor adds
        return self.processor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)

clip_transform = CLIPTransform(processor)



In [6]:
# dataset related imports
from torchvision.datasets import Flowers102 
from datasets import load_dataset

# --- Flowers102 ---
# prepare Flowers102 dataset
flowers102_test_dts = Flowers102(root=DATA_FOLDER, split="test", transform=object, download=True) # evaluation on this set
print(f"Total test samples: {len(flowers102_test_dts)}") # should be 6149

# prepare class names for Flowers102
with open("./data/cat_to_name.json", "r") as f:
    flowers102_class_names = json.load(f)

# --- CUB-200-2011 ---
birds_200 = load_dataset("bentrevett/caltech-ucsd-birds-200-2011", cache_dir=DATA_FOLDER, download_mode="reuse_dataset_if_exists")
cub_bird_test_dts = birds_200["test"]
print(f"Total test samples: {len(cub_bird_test_dts)}") # should be 5794

# prepare class names for CUB-200-2011
cub_bird_class_names = cub_bird_test_dts.features["label"].names

# === Create DataLoaders ===
flowers102_test_loader = DataLoader(
    flowers102_test_dts, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True
)
cub_bird_test_loader = DataLoader(
    cub_bird_test_dts, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True
)


Downloading https://thor.robots.ox.ac.uk/flowers/102/102flowers.tgz to data/flowers-102/102flowers.tgz


100%|██████████| 344862509/344862509 [00:35<00:00, 9704779.01it/s] 


Extracting data/flowers-102/102flowers.tgz to data/flowers-102
Downloading https://thor.robots.ox.ac.uk/flowers/102/imagelabels.mat to data/flowers-102/imagelabels.mat


100%|██████████| 502/502 [00:00<00:00, 380542.31it/s]


Downloading https://thor.robots.ox.ac.uk/flowers/102/setid.mat to data/flowers-102/setid.mat


100%|██████████| 14989/14989 [00:00<00:00, 7317940.01it/s]


Total test samples: 6149


Generating train split: 100%|██████████| 5994/5994 [00:06<00:00, 996.13 examples/s] 
Generating test split: 100%|██████████| 5794/5794 [00:05<00:00, 1030.08 examples/s]

Total test samples: 5794





In [None]:
print("--- Starting Method 1: Zero-Shot Classification ---")

# === 1. Load the full CLIP model ===
model = 

# === 2. Create and encode text prompts ===
# handcrafted prompts and custom prompts

# === 3. Evaluate on the test set ===
with torch.no_grad():
    for epoch, batch in enumerate(tqdm(flowers102_test_loader, desc="Zero-Shot Evaluation"), 1):
        
        pass

with torch.no_grad():
    for epoch, batch in enumerate(tqdm(cub_bird_test_loader, desc="Zero-Shot Evaluation"), 1):
        pass
    
# === 4. Result Analysis ===

print(f"\nZero-Shot Test Accuracy: {flowers102_accuracy * 100:.2f}%")

print(f"\nZero-Shot Test Accuracy: {cub_bird_accuracy * 100:.2f}%")

# also can do the "classification_report" and "confusion_matrix" here

# === 5. Visualization ===
# use plt to visualize some predictions