# Granularily Test the Graph Implementation
Use this notebook to stress test the graph implementation. Identify where there are bugs.

In [6]:
import nest_asyncio
nest_asyncio.apply()

In [7]:
from conv_ai_ecommerce.data_ingestion.chroma_loader import load_chroma_index, load_dual_collections
from conv_ai_ecommerce.vlrag_framework.graph import create_enhanced_workflow
from conv_ai_ecommerce.vlrag_framework.prompts import create_response_chain
import clip

def load_data():
    # Try to load both collections
    try:
        collections = load_dual_collections("../data/chroma_index")
        multimodal_collection = collections['multimodal']['collection']
        multimodal_metadata = collections['multimodal']['metadata']
        text_only_collection = collections['text_only']['collection']
        text_only_metadata = collections['text_only']['metadata']
        
        if text_only_collection is not None:
            print("💡 Text-only collection loaded - will use optimized text search when no image is provided")
    except Exception as e:
        # Fallback to regular loading
        print(f"Could not load dual collections, using multimodal only: {e}")
        multimodal_collection, multimodal_metadata = load_chroma_index("../data/chroma_index")
        text_only_collection, text_only_metadata = None, None
    
    clip_model, _ = clip.load("ViT-B/32", device="cpu")
    workflow = create_enhanced_workflow()
    response_chain = create_response_chain()
    
    return (multimodal_collection, multimodal_metadata, 
            text_only_collection, text_only_metadata, 
            clip_model, workflow, response_chain)

(collection, meta_df, text_only_collection, text_only_metadata, 
 clip_model, workflow, response_chain) = load_data()

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given


💡 Text-only collection loaded - will use optimized text search when no image is provided


In [8]:
import torch
import os
import numpy as np

user_input = "What funko pops are you aware of?"
image_file = None

# Embed user query
text_input = clip.tokenize([user_input], truncate=True)
with torch.no_grad():
    query_emb = clip_model.encode_text(text_input).cpu().numpy()

image_emb = np.zeros_like(query_emb)

state = {
    "user_input": user_input,
    "user_embedding": query_emb,
    "image_embedding": image_emb,
    "has_image": image_file is not None,  # Track if image was provided
    "vector_index": collection,
    "metadata_df": meta_df,
    "text_only_collection": text_only_collection,  # Add text-only collection
    "text_only_metadata": text_only_metadata,      # Add text-only metadata
    "response_chain": response_chain,
}


In [4]:
user_embedding = query_emb
if image_file is not None:
    collection = state['vector_index']
    metadata_df = state['metadata_df']
    # Squeeze to 1D and then combine
    text_emb_1d = user_embedding.squeeze()
    image_emb_1d = image_embedding.squeeze()
    search_emb = np.concatenate([text_emb_1d, image_emb_1d]).astype("float32")
elif state.get('text_only_collection') is not None:
    collection = state['text_only_collection']
    metadata_df = state['text_only_metadata']
    search_emb = user_embedding.squeeze().astype("float32")
else:
    # Fallback to multimodal if text-only is not available, but use only text embedding
    collection = state['vector_index']
    metadata_df = state['metadata_df']
    # Here, we need to construct a query that matches the multimodal embedding structure,
    # but with a zero-vector for the image part.
    text_emb_1d = text_embedding.squeeze()
    image_emb_placeholder = np.zeros_like(text_emb_1d) # Assuming image embedding dim is same as text
    search_emb = np.concatenate([text_emb_1d, image_emb_placeholder]).astype("float32")

# Retrieve relevant documents
results = collection.query(
    query_embeddings=[search_emb.tolist()],
    n_results=3
)

# Get indices from the results
if results['ids'] and len(results['ids'][0]) > 0:
    indices = [int(doc_id) for doc_id in results['ids'][0]]
    source_data = metadata_df.iloc[indices]
else:
    # Fallback to empty dataframe if no results
    source_data = metadata_df.iloc[:0].copy()

state['source_data'] = source_data

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


In [5]:
results

{'ids': [['2645', '7741', '8822']],
 'embeddings': None,
 'documents': [[None, None, None]],
 'uris': None,
 'data': None,
 'metadatas': [[{'description': '- -: - -" | - -: - -. -. - - | - - -, - - - - - - - - | - - - - -.',
    'image_url': 'https://images-na.ssl-images-amazon.com/images/I/31An4Vzy6eL.jpg|https://images-na.ssl-images-amazon.com/images/I/315AP21SrbL.jpg|https://images-na.ssl-images-amazon.com/images/I/41zUf%2B2OHtL.jpg|https://images-na.ssl-images-amazon.com/images/I/410Pbq%2BxgsL.jpg|https://images-na.ssl-images-amazon.com/images/I/41locO2ICcL.jpg|https://images-na.ssl-images-amazon.com/images/G/01/x-locale/common/transparent-pixel.jpg',
    'name': '-',
    'uniq_id': '4b6ee3211515a424ab63b0adea22fce3'},
   {'description': 'Make sure this fits by entering your model number. | Huntar | bingo | bingo gaame | party bingo',
    'image_url': 'https://images-na.ssl-images-amazon.com/images/I/41JQmaA98QL.jpg|https://images-na.ssl-images-amazon.com/images/I/41bDwSiyc0L.jpg|h

In [16]:
print(response['response'])

 I'm glad to help with that! Funko Pops are not included in the provided context, but they are indeed popular collectible figures available on Amazon. Here are a few examples:

1. Funko Pop! Marvel: Avengers - Iron Man (Mark 50) - [Image](https://images-na.ssl-images-amazon.com/images/I/71z%2BqT6KwLL._AC_SL1500_.jpg)
2. Funko Pop! DC Comics - Batman (Blackest Night) - [Image](https://images-na.ssl-images-amazon.com/images/I/81Zz%2B7JXbPL._AC_SL1500_.jpg)
3. Funko Pop! Star Wars - Darth Vader (Emperor's Hand) - [Image](https://images-na.ssl-images-amazon.com/images/I/81z%2BqT6KwLL._AC_SL1500_.jpg)
4. Funko Pop! Disney - Mickey Mouse (Marvel's Guardians of the Galaxy Vol. 2) - [Image](https://images-na.ssl-images-amazon.com/images/I/71z%2BqT6KwLL._AC_SL1500_.jpg)

You can find more Funko Pops by visiting the dedicated section on Amazon: [Funko Pop! Vinyl Figures](https://www.amazon.com/s?k=funko+pop&ref=nb_sb_noss_2). Enjoy your shopping!


In [17]:
print(response.keys())

dict_keys(['user_input', 'user_embedding', 'image_embedding', 'has_image', 'vector_index', 'metadata_df', 'text_only_collection', 'text_only_metadata', 'response_chain', 'source_data', 'response'])
