In [28]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import pandas as pd
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


In [None]:
df = pd.read_csv("./file_labels.csv")  # columns: filename,label

image_paths = df['filename'].tolist()
true_labels = df['label'].tolist()

# Get all unique class prompts from labels
class_names = sorted(list(set(true_labels)))


def compute_text_embeddings(class_names, processor, model):
    text_inputs = processor(text=class_names, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        text_features = model.get_text_features(**text_inputs)
        return text_features / text_features.norm(p=2, dim=-1, keepdim=True)
    

text_features = compute_text_embeddings(class_names, processor, model)


In [None]:
top1_preds = []
top5_preds = []

for img_path in image_paths:
    try:
        image = Image.open(img_path).convert("RGB")
    except:
        top1_preds.append("INVALID")
        top5_preds.append(["INVALID"] * 5)
        continue

    inputs = processor(images=image, return_tensors="pt", padding=True)
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
        image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)

        similarity = image_features @ text_features.T
        probs = similarity.softmax(dim=1)

        top1_idx = probs.argmax().item()
        top5_idx = probs.topk(5).indices[0].tolist()

        top1_preds.append(class_names[top1_idx])
        top5_preds.append([class_names[i] for i in top5_idx])


In [None]:
valid_rows = [i for i, pred in enumerate(top1_preds) if pred != "INVALID"]
filtered_true = [true_labels[i] for i in valid_rows]
filtered_top1 = [top1_preds[i] for i in valid_rows]
filtered_top5 = [top5_preds[i] for i in valid_rows]



In [None]:
from sklearn.metrics import accuracy_score, top_k_accuracy_score
top1_acc = accuracy_score(filtered_true, filtered_top1)
top5_acc = top_k_accuracy_score(filtered_true, filtered_top5, k=5, labels=class_names)

print(f"Top-1 Accuracy: {top1_acc:.4f}")
print(f"Top-5 Accuracy: {top5_acc:.4f}")


Predicted class: This is a Yorkshire Terrier (confidence: 0.03)
