# Product Linking and Similarity Search

This notebook demonstrates the product linking pipeline using vector embeddings. It visualizes similarity search results between product localizations and finds related products using CLIP embeddings stored in Qdrant.

## Environment Setup
Configure environment variables and import required libraries.

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

In [None]:
import sys
sys.path.append(r'c:\Users\ice\projects\iris')

from tqdm.notebook import tqdm

from iris.config.data_pipeline_config_manager import DataPipelineConfigManager
from iris.data_pipeline.mongodb_manager import MongoDBManager
from iris.data_pipeline.image_store_manager import ImageStoreManager
from iris.config.embedding_pipeline_config_manager import EmbeddingPipelineConfigManager
from iris.embedding_pipeline.embedder import Embedder
from iris.data_pipeline.qdrant_manager import QdrantManager
from iris.embedding_pipeline.embedding_handler import EmbeddingHandler
from iris.models.localization import Localization

from iris.models.document import Document
from iris.protocols.context_protocols import HasMongoDBContext, HasImageContext
from pathlib import Path
from PIL import Image as PILImage

## Initialize Pipeline Components

Set up the required pipeline components:
1. Data pipeline configuration
2. MongoDB connection for document storage
3. Image store for accessing image data
4. CLIP model for embeddings
5. Qdrant vector database for similarity search

In [None]:
# Initialize configuration managers
config_manager = DataPipelineConfigManager()

# Initialize MongoDB manager
mongodb_config = config_manager.mongodb_config
mongodb_manager = MongoDBManager(mongodb_config)

# Initialize image store manager
image_store_config = config_manager.image_store_config
image_store_manager = ImageStoreManager(image_store_config)

# Initialize embedder
embedding_config_manager = EmbeddingPipelineConfigManager()
embedder = Embedder(embedding_config_manager.clip_config)
qdrant_manager = QdrantManager(config_manager.qdrant_config)
embedding_handler = EmbeddingHandler(embedder=embedder, qdrant_manager=qdrant_manager)

## Context Setup

Create a unified context that provides access to both MongoDB and image storage.
This allows seamless access to documents and their associated images.

In [None]:
class FullContext:
    def __init__(self, mongodb_context: HasMongoDBContext, image_context: HasImageContext):
        self.mongodb_context = mongodb_context
        self.config = mongodb_context.config
        self.image_store_context = image_context

    def find_all(self, collection: str, document_hashes: list[str]) -> list["Document"]:
        return self.mongodb_context.find_all(collection, document_hashes)
        
    def get_pil_image(
        self, 
        image_id: str | None = None,
        path: Path | None = None, 
        url: str | None = None
    ) -> tuple[PILImage.Image, Path]:
        return self.image_store_context.get_pil_image(image_id=image_id, path=path, url=url)
    
full_context = FullContext(
    mongodb_context=mongodb_manager, 
    image_context=image_store_manager
)

## Find the closest products for each localization

1. For each localization in MongoDB, search Qdrant for neighbors.
2. Update MongoDB with identified neighbors.

In [None]:
with mongodb_manager as mongodb, embedding_handler as emb_handl:
    localizations = mongodb.find_all(mongodb.config.localization_collection)

    for localization in tqdm(localizations, desc="Linking localizations to products"):
        localization: Localization

        localization_embedding = emb_handl.get_embedding(
            localization, 
            full_context, 
            emb_handl.qdrant_manager.qdrant_config.localization_collection
        )
        localization.calculate_point()

        # Get nearest product neighbors from Qdrant
        neighbors = emb_handl.qdrant_manager.search_points(
            emb_handl.qdrant_manager.qdrant_config.product_collection,
            query_vector=localization_embedding,
            limit=10
        )
        product_predictions = {product.payload['hash']: product.score for product in neighbors}
        localization.product_predictions = product_predictions
                
        # Update the document
        success = mongodb.upsert(
            mongodb_manager.config.localization_collection,
            localization
        )