# Product Linking and Similarity Search

This notebook demonstrates the product linking pipeline using vector embeddings. It visualizes similarity search results between product localizations and finds related products using CLIP embeddings stored in Qdrant.

## Environment Setup
Configure environment variables and import required libraries.

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

In [None]:
import sys
sys.path.append(r'c:\Users\ice\projects\iris')

from iris.config.data_pipeline_config_manager import DataPipelineConfigManager
from iris.data_pipeline.mongodb_manager import MongoDBManager
from iris.data_pipeline.image_store_manager import ImageStoreManager
from iris.config.embedding_pipeline_config_manager import EmbeddingPipelineConfigManager
from iris.embedding_pipeline.embedder import Embedder
from iris.data_pipeline.qdrant_manager import QdrantManager
from iris.embedding_pipeline.embedding_handler import EmbeddingHandler
from iris.models.product import Product
from iris.models.image import Image
from iris.models.localization import Localization
from qdrant_client.models import ScoredPoint

from iris.models.document import Document
from iris.protocols.context_protocols import HasMongoDBContext, HasImageContext
from pathlib import Path
from PIL import Image as PILImage

## Initialize Pipeline Components

Set up the required pipeline components:
1. Data pipeline configuration
2. MongoDB connection for document storage
3. Image store for accessing image data
4. CLIP model for embeddings
5. Qdrant vector database for similarity search

In [None]:
# Initialize configuration managers
config_manager = DataPipelineConfigManager()

# Initialize MongoDB manager
mongodb_config = config_manager.mongodb_config
mongodb_manager = MongoDBManager(mongodb_config)

# Initialize image store manager
image_store_config = config_manager.image_store_config
image_store_manager = ImageStoreManager(image_store_config)

# Initialize embedder
embedding_config_manager = EmbeddingPipelineConfigManager()
embedder = Embedder(embedding_config_manager.clip_config)
qdrant_manager = QdrantManager(config_manager.qdrant_config)
embedding_handler = EmbeddingHandler(embedder=embedder, qdrant_manager=qdrant_manager)

## Context Setup

Create a unified context that provides access to both MongoDB and image storage.
This allows seamless access to documents and their associated images.

In [None]:
class FullContext:
    def __init__(self, mongodb_context: HasMongoDBContext, image_context: HasImageContext):
        self.mongodb_context = mongodb_context
        self.config = mongodb_context.config
        self.image_store_context = image_context

    def find_all(self, collection: str, document_hashes: list[str]) -> list["Document"]:
        return self.mongodb_context.find_all(collection, document_hashes)
        
    def get_pil_image(
        self, 
        image_id: str | None = None,
        path: Path | None = None, 
        url: str | None = None
    ) -> tuple[PILImage.Image, Path]:
        return self.image_store_context.get_pil_image(image_id=image_id, path=path, url=url)
    
full_context = FullContext(
    mongodb_context=mongodb_manager, 
    image_context=image_store_manager
)

## Visualization Utilities

Helper functions to visualize search results:
1. Convert images to base64 for HTML display
2. Generate thumbnail previews

In [None]:
import base64
from io import BytesIO

def get_image_html(result: ScoredPoint, size=(100, 140)):
    """Create HTML img tag for a search result"""
    with mongodb_manager as db:
        product: Product = db.find_one(
            db.config.product_collection,
            {'hash': result.payload['hash']}
        )
        image: Image = db.find_one(
            db.config.image_metadata_collection,
            {'hash': product.image_hashes[0]}
        )
        img = image.render(image_store_manager)
        
        img.thumbnail(size)
        buffered = BytesIO()
        img.save(buffered, format='JPEG', quality=70)
        img_b64 = base64.b64encode(buffered.getvalue()).decode()
        return f'<img src="data:image/jpeg;base64,{img_b64}" style="max-width:none">'

## Similarity Search Demo

Demonstrate the similarity search pipeline:
1. Select a sample localization
2. Generate its embedding
3. Find similar products using vector similarity
4. Visualize results with interactive plot and image grid

In [None]:
import plotly.graph_objects as go
from IPython.display import display, HTML
from random import choice

with mongodb_manager as mongodb, embedding_handler as emb_handl:
    localization: Localization = mongodb.find_one(
        collection_name=mongodb.config.localization_collection,
        query={'hash': "4c1e3095a306b84894c8d844454160f3"}
    )

    print(f"Selected localization: {localization.hash}")
    display(localization.render(image_store_manager))

    localization_embedding = emb_handl.get_embedding(
        localization, 
        full_context, 
        emb_handl.qdrant_manager.qdrant_config.localization_collection
    )

    neighbors = emb_handl.qdrant_manager.search_points(
        emb_handl.qdrant_manager.qdrant_config.product_collection,
        query_vector=localization_embedding,
        limit=10
    )

# Extract data for plot
indices = list(range(len(neighbors)))
distances = [1 - neighbor.score for neighbor in neighbors]  # Convert cosine similarity to distance

# Create scatter plot with simpler hover text
fig = go.Figure(data=go.Scatter(
    x=indices,
    y=distances,
    mode='lines+markers',
    marker=dict(size=8),
    text=[f'Index: {i}<br>ID: {neighbor.payload["hash"]}<br>Distance: {1-neighbor.score:.4f}<br>Type: {neighbor.payload["type"]}' 
          for i, neighbor in enumerate(neighbors)],
    hovertemplate='%{text}<extra></extra>'
))

fig.update_layout(
    title='Image Similarity Plot',
    xaxis_title='Index',
    yaxis_title='Distance',
    width=800,
    height=400,
    showlegend=False
)

# Display plot
fig.show()

# Create grid of thumbnails below the plot
html = ['<div style="display: flex; flex-wrap: wrap; gap: 10px;">']

for i, neighbor in enumerate(neighbors):
    img_html = get_image_html(neighbor, size=(150, 210))
    item_html = f"""
    <div style='text-align: center; border: 1px solid #ddd; padding: 5px;'>
        {img_html}
        <br>
        <small>Index: {i}</small><br>
        <small>Distance: {1-neighbor.score:.4f}</small><br>
        <small>ID: {neighbor.payload['hash']}</small><br>
    </div>
    """
    html.append(item_html)

html.append('</div>')
display(HTML(''.join(html)))