In [5]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import numpy as np
import os
from sklearn.metrics.pairwise import cosine_similarity

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Function to get image embeddings
def get_image_embeddings(image_list):
    inputs = processor(images=image_list, return_tensors="pt")
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
    return image_features

# Load images from directory
image_dir = 'data'
image_list = []
image_filenames = []

for filename in os.listdir(image_dir):
    if filename.endswith(('jpg', 'jpeg', 'png', 'bmp', 'gif')):
        image_path = os.path.join(image_dir, filename)
        img = Image.open(image_path)
        image_list.append(img)
        image_filenames.append(filename)

# Get embeddings for all images
image_embeddings = get_image_embeddings(image_list)

# Normalize embeddings
image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)

# Load and encode the query image
query_image_path = "./data/input/car.jpeg"
query_image = Image.open(query_image_path)
query_embedding = get_image_embeddings([query_image])
query_embedding = query_embedding / query_embedding.norm(dim=-1, keepdim=True)

# Compute cosine similarity between query and all images
similarity_scores = cosine_similarity(query_embedding, image_embeddings)

print(similarity_scores)
# Find the best match
best_match_index = np.argmax(similarity_scores)
best_match_filename = image_filenames[best_match_index]

print(f"Best match: {best_match_filename} with similarity score: {similarity_scores[0][best_match_index]:.4f}")

[[0.6502657  0.46062407 0.27988875 0.3605373  0.34908727 0.43779412
  0.2977665  0.29149652]]
Best match: car.jpeg with similarity score: 0.6503
