In [1]:
from PIL import Image
import requests
import os
import torch

from transformers import CLIPProcessor, CLIPModel

device = "cuda" if torch.cuda.is_available() else "cpu"
DATA_PATH = './data/Animals'

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

features = []
image_files = [f for f in os.listdir(DATA_PATH) if f.lower().endswith(('.jpg', '.jpeg'))]
for filename in image_files:
    filepath = os.path.join(DATA_PATH, filename)
    image = Image.open(filepath).convert("RGB")
    inputs = processor(images=image, return_tensors="pt", padding=True)
    with torch.no_grad():
        image_features = model.get_image_features(pixel_values=inputs.pixel_values.to(device))

    features.append(image_features.cpu())

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
import numpy as np
from scipy.spatial.distance import cosine


feature_img1 = np.array(features[0]).squeeze()
feature_img2 = np.array(features[1]).squeeze()
feature_img3 = np.array(features[2]).squeeze()

distance_1_2 = cosine(feature_img1, feature_img2)
distance_1_3 = cosine(feature_img1, feature_img3)

print(f"Cosine Distance between 1 and 2: {distance_1_2}")
print(f"Cosine Distance between 1 and 3:{distance_1_3}")


if distance_1_2 < distance_1_3:
    print("conclusion: image 1 and 2 are more similar")
else:
    print("conclusion: image 1 and 3 are more similar")

Cosine Distance between 1 and 2: 0.3358575701713562
Cosine Distance between 1 and 3:0.20379269123077393
conclusion: image 1 and 3 are more similar


  feature_img1 = np.array(features[0]).squeeze()
  feature_img2 = np.array(features[1]).squeeze()
  feature_img3 = np.array(features[2]).squeeze()
