In [1]:
import os
import clip
import torch

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision.datasets import ImageFolder

In [2]:
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)

In [11]:
labels = clip.tokenize(["a photo of a potato plant leaf infected with early blight", "a photo of a potato plant leaf infected with late blight", "a photo of a healthy potato plant leaf"]).to(device)

In [9]:
# Load the dataset
root = os.path.expanduser("~/.cache")

train = ImageFolder('/home/jmunoz/CLIP/PLD_3_Classes_256/Training', transform=preprocess)

test = ImageFolder('/home/jmunoz/CLIP/PLD_3_Classes_256/Testing', transform=preprocess)

validation = ImageFolder('/home/jmunoz/CLIP/PLD_3_Classes_256/Validation', transform=preprocess)




def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(DataLoader(dataset, batch_size=100)):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features), torch.cat(all_labels).cpu().numpy()


# Calculate the image features
train_features, train_labels = get_features(train)
test_features, test_labels = get_features(test)
val_features, val_labels = get_features(validation)

  0%|          | 0/33 [00:00<?, ?it/s]

100%|██████████| 33/33 [00:08<00:00,  3.98it/s]
100%|██████████| 5/5 [00:01<00:00,  4.93it/s]
100%|██████████| 5/5 [00:01<00:00,  4.84it/s]


In [43]:
text_features = model.encode_text(labels)
train_features /= train_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * train_features @ text_features.T).softmax(dim=-1)
predicted_labels = similarity.argmax(dim=-1).cpu().numpy()

In [45]:
accuracy = np.mean((train_labels == predicted_labels).astype(float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

Accuracy = 29.191


In [46]:
# Calculate F1, precision, recall

f1 = f1_score(train_labels, predicted_labels, average='macro')
precision = precision_score(train_labels, predicted_labels, average='macro')
recall = recall_score(train_labels, predicted_labels, average='macro')

print(f"F1 = {f1:.3f}")
print(f"Precision = {precision:.3f}")
print(f"Recall = {recall:.3f}")

F1 = 0.167
Precision = 0.130
Recall = 0.246
