In [22]:
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm


### Testing Zero-Shot on the SeaLake class

In [34]:
# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define class names (categories for classification)
class_names = ["AnnualCrop", "Forest", "Herbaceous Vegetation", "Highway", "Industrial",
               "Pasture", "PermanentCrop", "Residential", "River", "SeaLake"]

# Convert class names into CLIP-compatible text prompts
text_inputs = [f"satellite image of {c.lower()}" for c in class_names]

all_preds = []
all_labels = [9 for _ in range(3000)]
all_probs = np.zeros((10))

# Run inference on batches
for i in tqdm(range(100), desc="Processing Batches"):
    images = [Image.open(f"./2750/SeaLake/SeaLake_{1 + id + 30*i}.jpg") for id in range(30)]
    inputs = processor(text=class_names, images=images, return_tensors="pt", padding=True)

    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
    predictions = probs.argmax(dim=1).tolist()

    all_preds.extend(predictions)

accuracy = accuracy_score(all_labels, all_preds)
print(f"Overall Accuracy: {accuracy * 100:.2f}%")



Overall Accuracy: 54.70%
