In [5]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Define the Siamese Network
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=512):
        super(SiameseNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1)  # Output similarity score
        )

    def forward(self, emb1, emb2):
        distance = torch.abs(emb1 - emb2)  # Absolute difference
        similarity = self.fc(distance)
        return similarity

# Initialize Siamese Network
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
siamese_model = SiameseNetwork().to(device)

# Load CLIP model for image embeddings
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Load stored embeddings
embedding_dir = 'db'
image_dir = 'db'
embeddings_list = []
image_filenames = []

for filename in os.listdir(embedding_dir):
    if filename.endswith('_embedding.txt'):
        embedding_path = os.path.join(embedding_dir, filename)
        embedding = np.loadtxt(embedding_path)
        embeddings_list.append(torch.tensor(embedding, dtype=torch.float32).to(device))

        image_filename = filename.replace('_embedding.txt', '')  # Adjust extension if needed
        image_filenames.append(image_filename)

# Convert list to tensor
embeddings_tensor = torch.stack(embeddings_list)

print("Loaded embeddings size:", embeddings_tensor.shape)

# Load and encode the query image
image_path = "./data/input/product-img-4.jpg"
query_image = Image.open(image_path)

inputs = processor(images=query_image, return_tensors="pt").to(device)
query_image_vector = clip_model.get_image_features(**inputs).detach()

# Compute similarity scores using the Siamese Network
query_image_vector = query_image_vector.squeeze(0)  # Remove batch dimension
similarity_scores = []
for stored_embedding in embeddings_tensor:
    score = siamese_model(query_image_vector, stored_embedding)
    similarity_scores.append(score.item())

# Find the best match
best_match_index = np.argmax(similarity_scores)
best_match_filename = image_filenames[best_match_index]

print(f"Best match: {best_match_filename} with similarity score: {similarity_scores[best_match_index]:.4f}")


Loaded embeddings size: torch.Size([8, 512])
Best match: book1.pdf with similarity score: -0.0418
