## Import Libraries


In [1]:
import os
import warnings
from dotenv import load_dotenv
from typing import List

# Haystack Imports
from haystack import Pipeline, component
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage

# Haystack-MongoDB Integration Imports
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever

# Groq Import
from groq import Groq

warnings.filterwarnings("ignore", category=UserWarning, message="The instance of ChatPromptBuilder is not serializable.")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

## Custom Groq Generator Classes

In [None]:
@component
class GroqChatGenerator:
    def __init__(self, model: str = "llama3-70b-8192", api_key: str = None): # type: ignore
        """
        Initializes the GroqChatGenerator.
        The default model has been updated to a current, active model.
        """
        self.client = Groq(api_key=api_key or os.environ.get("GROQ_API_KEY"))
        self.model = model
    
    @component.output_types(replies=List[ChatMessage])
    def run(self, messages: List[ChatMessage]):
        if not messages:
            raise ValueError("The 'messages' list received by GroqChatGenerator is empty.")

        groq_messages = []
        for msg in messages:
            content = ""
            if hasattr(msg, 'content'):
                content = msg.content
            elif hasattr(msg, 'text'):
                content = msg.text

            if content and hasattr(msg, 'role'):
                role = msg.role.value if hasattr(msg.role, 'value') else str(msg.role)
                groq_messages.append({"role": role.lower(), "content": content})

        if not groq_messages:
            raise ValueError(
                "The 'groq_messages' list is empty after conversion. "
                "The ChatMessage objects seem to be malformed (missing .role or .content/.text)."
            )

        response = self.client.chat.completions.create(
            model=self.model,
            messages=groq_messages,
            temperature=0.7,
            max_tokens=1000
        )
        
        return {
            "replies": [
                ChatMessage.from_assistant(response.choices[0].message.content)
            ]
        }

## Setup Document Stores


In [4]:
products_document_store = MongoDBAtlasDocumentStore(
    database_name="smartshopper_store",
    collection_name="products",
    vector_search_index="vector_index",
    full_text_search_index="search_index",
)

common_info_document_store = MongoDBAtlasDocumentStore(
    database_name="smartshopper_store",
    collection_name="common_info", 
    vector_search_index="common_info_vector_index",
    full_text_search_index="common_info_search_index",
)

## Test Product Recommendation Generation


In [5]:
product_messages = [
    ChatMessage.from_system("You are a helpful shop assistant that provides product recommendations."),
    ChatMessage.from_user(
        """
        Your task is to generate a list of products that best match the query based on the available products.
        The output should be in the following format:
        **Summary:** <brief summary of what user is looking for>
        **Recommended Products:**
        1. **<product_name>** - Price: Rp <product_price> - Material: <product_material> - Category: <product_category> - Brand: <product_brand> - Why recommended: <explanation based on the product description>

        Query: {{query}}
        {% if documents|length > 0 %}
        Available products:
        {% for product in documents %}
        - Product: {{ product.meta.title }}, Price: Rp {{ product.meta.price }}, Material: {{ product.meta.material }}, Category: {{ product.meta.category }}, Brand: {{ product.meta.brand }}, Description: {{ product.content }}
        {% endfor %}
        {% else %}
        No matching products found.
        {% endif %}
        """
    )
]

### Create Product RAG Pipeline


In [6]:
product_rag_pipeline = Pipeline()
product_rag_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
product_rag_pipeline.add_component("retriever", MongoDBAtlasEmbeddingRetriever(document_store=products_document_store, top_k=5))
product_rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=product_messages))
product_rag_pipeline.add_component("generator", GroqChatGenerator())

product_rag_pipeline.connect("embedder.embedding", "retriever.query_embedding")
product_rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
product_rag_pipeline.connect("prompt_builder.prompt", "generator.messages")

ChatPromptBuilder has 2 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.


<haystack.core.pipeline.pipeline.Pipeline object at 0x0000026C4FEA52A0>
🚅 Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: MongoDBAtlasEmbeddingRetriever
  - prompt_builder: ChatPromptBuilder
  - generator: GroqChatGenerator
🛤️ Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> generator.messages (List[ChatMessage])

In [7]:
print("=== PRODUCT RECOMMENDATION TESTS ===")
product_test_queries = [
    "I need a smartphone for photography",
    "Looking for comfortable shoes for running"
]
for query in product_test_queries:
    print(f"\nQuery: '{query}'")
    print("-" * 50)
    result = product_rag_pipeline.run({
        "embedder": {"text": query},
        "prompt_builder": {"query": query}
    })
    response_msg = result["generator"]["replies"][0]
    response_text = response_msg.content if hasattr(response_msg, 'content') else response_msg.text
    print(response_text)

=== PRODUCT RECOMMENDATION TESTS ===

Query: 'I need a smartphone for photography'
--------------------------------------------------


Batches: 100%|██████████| 1/1 [00:00<00:00,  8.52it/s]


**Summary:** The user is looking for a smartphone specifically designed for photography.

**Recommended Products:**
1. **Samsung Galaxy S24** - Price: Rp 15000000 - Material: Glass - Category: Electronics - Brand: Samsung - Why recommended: This smartphone is recommended because it has advanced camera and AI features, making it perfect for photography enthusiasts.

Query: 'Looking for comfortable shoes for running'
--------------------------------------------------


Batches: 100%|██████████| 1/1 [00:00<00:00, 17.49it/s]


**Summary:** The user is looking for comfortable shoes specifically designed for running.

**Recommended Products:**
1. **Nike Air Max 270** - Price: Rp 2500000 - Material: Synthetic - Category: Shoes - Brand: Nike - Why recommended: This shoe is specifically designed for running with excellent cushioning and a modern design, making it an ideal choice for runners who prioritize comfort.


## Test Common Information Generation


### Common Info Template


In [None]:
common_info_messages = [
    ChatMessage.from_system("You are a helpful customer service assistant for an e-commerce platform."),
    ChatMessage.from_user(
        """
        Based on the retrieved information, provide a clear and helpful answer to the user's question.
        Retrieved Information:
        {% for doc in documents %}
        **{{ doc.meta.title }}:**
        {{ doc.content }}
        ---
        {% endfor %}
        User Question: {{query}}
        Please provide a comprehensive answer based on the information above.
        """
    )
]

### Create Common Info RAG Pipeline


In [9]:
common_info_rag_pipeline = Pipeline()
common_info_rag_pipeline.add_component("embedder_common", SentenceTransformersTextEmbedder())
common_info_rag_pipeline.add_component("retriever_common", MongoDBAtlasEmbeddingRetriever(document_store=common_info_document_store, top_k=3))
common_info_rag_pipeline.add_component("prompt_builder_common", ChatPromptBuilder(template=common_info_messages))
common_info_rag_pipeline.add_component("generator_common", GroqChatGenerator())

common_info_rag_pipeline.connect("embedder_common.embedding", "retriever_common.query_embedding")
common_info_rag_pipeline.connect("retriever_common.documents", "prompt_builder_common.documents")
common_info_rag_pipeline.connect("prompt_builder_common.prompt", "generator_common.messages")


ChatPromptBuilder has 2 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.


<haystack.core.pipeline.pipeline.Pipeline object at 0x0000026C4FEA50C0>
🚅 Components
  - embedder_common: SentenceTransformersTextEmbedder
  - retriever_common: MongoDBAtlasEmbeddingRetriever
  - prompt_builder_common: ChatPromptBuilder
  - generator_common: GroqChatGenerator
🛤️ Connections
  - embedder_common.embedding -> retriever_common.query_embedding (List[float])
  - retriever_common.documents -> prompt_builder_common.documents (List[Document])
  - prompt_builder_common.prompt -> generator_common.messages (List[ChatMessage])

### Test Common Information


In [10]:
print("\n\n=== COMMON INFORMATION TESTS ===")
info_queries = [
    "What are your shipping options and costs?",
    "How can I return a product?",
]
for query in info_queries:
    print(f"\nQuery: '{query}'")
    print("-" * 50)
    result = common_info_rag_pipeline.run({
        "embedder_common": {"text": query},
        "prompt_builder_common": {"query": query}
    })
    response_msg = result["generator_common"]["replies"][0]
    response_text = response_msg.content if hasattr(response_msg, 'content') else response_msg.text
    print(response_text)



=== COMMON INFORMATION TESTS ===

Query: 'What are your shipping options and costs?'
--------------------------------------------------


Batches: 100%|██████████| 1/1 [00:00<00:00, 12.09it/s]


Hello there!

Thank you for reaching out to us about our shipping options and costs. We're happy to help you with that!

We offer two shipping options for your convenience:

**Standard Delivery**: This option is free of charge for orders above Rp 500,000. For orders below Rp 500,000, the shipping cost will be calculated at checkout. Standard delivery takes 2-3 business days within Jakarta and 3-5 days for other cities.

**Express Delivery**: If you need your items quickly, you can opt for our express delivery service, which is available for an additional fee of Rp 25,000. This service will ensure that your items arrive faster, but the exact delivery time will depend on your location.

Please note that our shipping options and costs may vary depending on your location and the weight of your order. You can estimate the shipping cost and delivery time at checkout before completing your order.

If you have any further questions or concerns about our shipping options, feel free to ask! We'r

Batches: 100%|██████████| 1/1 [00:00<00:00, 17.80it/s]


Hello there!

Thank you for reaching out to us about returning a product. We're happy to help you with the process.

To return an item, please ensure it meets our return policy conditions:

* The item must be in its original condition with tags attached.
* You need to initiate the return within 30 days of purchase.

Here's how to proceed:

1. Contact our customer support team via WhatsApp, email, or live chat on our website. We're available Monday to Friday from 9AM to 6PM WIB, and we'll respond to your query within 2-4 hours.
2. Let us know the reason for the return. If the item is defective, we'll provide a prepaid return shipping label. However, if you're returning due to a change of mind, you'll need to arrange and pay for the return shipping.
3. Once we receive your returned item, our team will process your refund within 5-7 business days. The refund will be credited back to your original payment method.
4. For e-wallet payments, please note that refunds may take up to 14 days to 

## Test Direct Generation (Without RAG)


In [11]:
print("\n\n=== DIRECT GENERATION TEST ===")
simple_generator = GroqChatGenerator()
greeting_messages = [
    ChatMessage.from_system("You are a friendly shopping assistant."),
    ChatMessage.from_user("Hello! Can you help me with shopping?")
]

result = simple_generator.run(greeting_messages)
print("Greeting Response:")
response_msg = result["replies"][0]
response_text = response_msg.content if hasattr(response_msg, 'content') else response_msg.text
print(response_text)

print("\n\n--- All tests completed ---")



=== DIRECT GENERATION TEST ===
Greeting Response:
Hello there! I'd be delighted to help you with your shopping needs! What kind of shopping are you looking to do today? Are you looking for something specific, like a gift or a particular product, or are you just browsing? Let me know and I'll do my best to assist you!


--- All tests completed ---
