In [None]:
import pandas as pd

# read xlsx file
df = pd.read_excel('../data/dataset.xlsx')

paths = df['Image_Path'].tolist()

paths = ['../data/' + path for path in paths]


labels = df['Label'].tolist()

# Binary classification

labels = [0 if label == 'wt' else 1 for label in labels]

In [None]:
# Zero-shot prediction with CLIP
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import pandas as pd
from tqdm import tqdm

# ---- Load CLIP ----
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# ---- Define your classes ----
class_names = ["a photo of a normal fly wing", "a photo of a deformed fly wing"]

# ---- Encode text prompts ----
text_inputs = processor(text=class_names, return_tensors="pt", padding=True).to(device)
with torch.no_grad():
    text_features = model.get_text_features(**text_inputs)
    text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)

# ---- Classify images ----
preds = []
for path in tqdm(paths):
    image = Image.open(path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
        image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)

        # cosine similarity with class prompts
        similarities = (image_features @ text_features.T).squeeze(0)
        pred = similarities.argmax().item()
        preds.append(pred)

# ---- Evaluate ----
from sklearn.metrics import accuracy_score, classification_report


print("Accuracy:", accuracy_score(labels, preds))
print(classification_report(labels, preds))

# Print confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
cm = confusion_matrix(labels, preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


In [None]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn import svm
from sklearn.model_selection import cross_val_predict, StratifiedKFold


# ---- Load CLIP image encoder ----
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model.eval()

# ---- Extract embeddings ----
embeddings = []
for path in tqdm(paths):
    image = Image.open(path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        img_feat = model.get_image_features(**inputs)
        img_feat = img_feat.cpu().numpy().flatten()
    embeddings.append(img_feat)

X = np.array(embeddings)
y = np.array(labels)

# Assuming X, y are your embeddings and labels
clf = svm.SVC(kernel="linear", C=1)
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# ---- Get cross-validated predictions ----
preds = cross_val_predict(clf, X, y, cv=cv)

# ---- Evaluate ----
print("Accuracy:", accuracy_score(y, preds))
print("\nClassification report:\n", classification_report(y, preds))

# ---- Confusion matrix ----
cm = confusion_matrix(y, preds)
class_names = ["class 0", "class 1"]  # adjust to your labels
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()