In [116]:
!pip install opencv-python
!pip install ultralytics
!pip install pillow
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp312-cp312-win_amd64.whl.metadata (4.5 kB)
Downloading faiss_cpu-1.10.0-cp312-cp312-win_amd64.whl (13.7 MB)
   ---------------------------------------- 0.0/13.7 MB ? eta -:--:--
   ------------ --------------------------- 4.2/13.7 MB 27.9 MB/s eta 0:00:01
   ----------------------------- ---------- 10.2/13.7 MB 29.0 MB/s eta 0:00:01
   ---------------------------------------- 13.7/13.7 MB 29.6 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [117]:
import cv2
import numpy as np
import torch
import torchvision
from PIL import Image
from torch import nn
from torchvision import transforms as tr
from torchvision.models import vit_h_14
from ultralytics import YOLO
import faiss

In [78]:
# This class is modified from this link:
# https://onyekaokonji.medium.com/cosine-similarity-measuring-similarity-between-multiple-images-f289aaf40c2b
class CosineSimilarity:
    """Class for comparing similarity between detected image regions."""
    
    def __init__(self, device=None):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = self.load_model()
        self.index = None
    
    def load_model(self):
        """Loads a pre-trained ResNet50 model for feature extraction."""
        model = torchvision.models.resnet50(pretrained=True)
        model.fc = nn.Identity()  # Remove the classification head
        return model.to(self.device)

    def process_image(self, img):
        """Preprocesses an image for embedding extraction."""
        transformations = tr.Compose([
            tr.ToTensor(),
            tr.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            tr.Resize((224, 224))
        ])
        img = transformations(img).float().unsqueeze(0).to(self.device)
        return img

    def get_embedding(self, image):
        """Computes the embedding for a given image."""
        img_tensor = self.process_image(image)
        with torch.no_grad():
            embedding = self.model(img_tensor).cpu().numpy()
        return embedding

    def create_index(self, embeddings):
        """Create a FAISS index for fast similarity search."""
        embedding_size = embeddings.shape[1]  # Assuming 1D embeddings
        self.index = faiss.IndexFlatL2(embedding_size)
        self.index.add(embeddings)  # Add embeddings to the index

    def compute_similarity(self, emb1, emb2):
        """Computes cosine similarity between two embeddings."""
        return torch.nn.functional.cosine_similarity(torch.tensor(emb1), torch.tensor(emb2)).item()
    
    def search_similar(self, query_embedding, k=5):
        """Search for the k most similar embeddings using FAISS."""
        D, I = self.index.search(query_embedding, k)  # D is the distances, I is indices
        return I, D

In [80]:
model = YOLO("488_back/best.pt")
similarity_checker = CosineSimilarity()

In [108]:
def extract_logo_regions(image_path):
    """Runs YOLO on an image and extracts detected logo regions."""
    img = cv2.imread(image_path)
    results = model(img)
    logo_regions = []
    bounding_boxes = []
    
    for box in results[0].boxes:
        xyxy = box.xyxy[0].tolist()
        x1, y1, x2, y2 = map(int, xyxy)
        cropped_logo = img[y1:y2, x1:x2]  # Extract detected region
        if cropped_logo.size > 0:
            logo_regions.append(cropped_logo)
            bounding_boxes.append((x1, y1, x2, y2))

    return logo_regions, bounding_boxes

def process_image(input_path, output_path, reference_image_path, confidence_threshold=0.25, similarity_threshold=0.45):
    """Processes an image, detects logos, compares them to reference logos, and saves output if similarity is high enough."""
    
    # Extract detected logo regions from both input and reference images
    input_logos, input_bboxes = extract_logo_regions(input_path)
    reference_logos, _ = extract_logo_regions(reference_image_path)
    
    if not input_logos or not reference_logos:
        print("No logos detected in one or both images.")
        return
    
    img = cv2.imread(input_path)
    save_image = False
    
    # Compute similarity for each detected logo region
    for idx, input_logo in enumerate(input_logos):
        input_embedding = similarity_checker.get_embedding(Image.fromarray(input_logo))
        
        for ref_logo in reference_logos:
            ref_embedding = similarity_checker.get_embedding(Image.fromarray(ref_logo))
            similarity_score = similarity_checker.compute_similarity(input_embedding, ref_embedding)
            print(f"Similarity score: {similarity_score}")
            
            if similarity_score >= similarity_threshold:
                x1, y1, x2, y2 = input_bboxes[idx]
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 255), 2)
                save_image = True
                break  # Stop checking once a match is found
    
    # Save image if at least one bounding box was drawn
    if save_image:
        cv2.imwrite(output_path, img)
        print(f"Processed image saved as {output_path}")
    else:
        print("Image not saved, no logos met the similarity threshold.")

In [112]:
input_image_path = "starbucks.jpg"  # Change this to your image file
output_image_path = "output.jpg"
reference_image_path = "mcdonalds_referencepng.png"  # Reference image for similarity comparison
process_image(input_image_path, output_image_path, reference_image_path)


0: 448x640 3 logos, 28.6ms
Speed: 1.5ms preprocess, 28.6ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)

0: 480x640 1 logo, 27.2ms
Speed: 0.8ms preprocess, 27.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)
0.2509891092777252
0.26875507831573486
0.2514099180698395
Image not saved, no logos met the similarity threshold.
