### Imports

In [10]:
import openai
import instructor
from qdrant_client import QdrantClient

from pydantic import BaseModel, Field

### RAG Pipeline

In [11]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [12]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="Answer to the question.")

In [13]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    retrieved_context_ratings = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

- The answer to the question should contain detailed information about the product and returned with detailed specification in bullet points.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt

def generate_answer(prompt):

    client = instructor.from_openai(openai.OpenAI())

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
        response_model=RAGGenerationResponse
    )

    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer.answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [14]:
result = rag_pipeline("Can I get some earphones?", qdrant_client, top_k=10)

In [15]:
result

{'answer': 'Yes, there are several earphones available:\n\n1. Wireless Earbuds, Bluetooth 5.3 Headphones (ID: B0B9FTVL58)\n- Bluetooth 5.3\n- 37H Playback\n- LED Power Display\n- In-Ear Headphones with Deep Bass\n- IPX7 Waterproof\n- Ultra-Light with Charging Case\n- Smart Touch Controls\n- Suitable for Sport\n\n2. TELSOR Wireless Earbuds for iPhone (ID: B0C6K1GQCF)\n- Bluetooth 5.1\n- Touch Control Stereo Sound\n- Noise Cancelling Mic for Calls\n- 30H Playtime (6H per charge + charging case)\n- IPX7 Waterproof\n- Ergonomic Design\n- USB-C Charging\n\n3. Open Ear Headphones Bluetooth 5.3 Earbuds (ID: B0CBMPG524)\n- True Wireless Open Ear Design\n- 60H Playtime (11H per charge + charging case)\n- IPX7 Waterproof\n- Immersive Premium Sound\n- Earhooks for Secure Fit\n- Suitable for Running, Walking, Workouts\n\n4. Siniffo Upgraded Bone Conduction Headphones (ID: B0BNHVLF7G)\n- Wireless Bluetooth 5.3\n- Open Ear Sports Earphones\n- Sweat Resistant\n- Noise Canceling Mic\n- 8-Hour Battery 

In [16]:
print(result["answer"])

Yes, there are several earphones available:

1. Wireless Earbuds, Bluetooth 5.3 Headphones (ID: B0B9FTVL58)
- Bluetooth 5.3
- 37H Playback
- LED Power Display
- In-Ear Headphones with Deep Bass
- IPX7 Waterproof
- Ultra-Light with Charging Case
- Smart Touch Controls
- Suitable for Sport

2. TELSOR Wireless Earbuds for iPhone (ID: B0C6K1GQCF)
- Bluetooth 5.1
- Touch Control Stereo Sound
- Noise Cancelling Mic for Calls
- 30H Playtime (6H per charge + charging case)
- IPX7 Waterproof
- Ergonomic Design
- USB-C Charging

3. Open Ear Headphones Bluetooth 5.3 Earbuds (ID: B0CBMPG524)
- True Wireless Open Ear Design
- 60H Playtime (11H per charge + charging case)
- IPX7 Waterproof
- Immersive Premium Sound
- Earhooks for Secure Fit
- Suitable for Running, Walking, Workouts

4. Siniffo Upgraded Bone Conduction Headphones (ID: B0BNHVLF7G)
- Wireless Bluetooth 5.3
- Open Ear Sports Earphones
- Sweat Resistant
- Noise Canceling Mic
- 8-Hour Battery Life
- Type-C Quick Charge
- IP56 Waterproof
-

### Rag Pipeline with Grounding Context

In [17]:
class RAGUsedContext(BaseModel):
    id: str = Field(description="ID of the item used to answer the question.")
    description: str = Field(description="Description of the item used to answer the question.")

class RAGGenerationResponseWithReferences(BaseModel):
    answer: str = Field(description="Answer to the question.")
    references: list[RAGUsedContext] = Field(description="List of items used to answer the question.")

def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    retrieved_context_ratings = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.
- As an output, you need to provide:

* The answer to the question based on the provided context.
* The list of the IDs of the chunks that were used to answer the question. Only return the ones that are used in the answer.
* Short description (1-2 sentences) of the item based on the description provided in the context.

- The short description should have the name of the item.
- The answer to the question should contain detailed information about the product and returned with detailed specification in bullet points.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt

def generate_answer(prompt):

    client = instructor.from_openai(openai.OpenAI())

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
        response_model=RAGGenerationResponseWithReferences
    )

    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer.answer,
        "references": answer.references,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [18]:
result = rag_pipeline("Can I get some earphones?", qdrant_client, top_k=10)
result

{'answer': "Yes, you can get several types of earphones from the available products:\n\n1. TUNEAKE Kids Headphones (ID: B0C142QS8X)\n- Over-ear design specifically for kids with volume limited to 94dB for hearing protection.\n- Foldable and adjustable headband for comfort and portability.\n- Compatible with all devices having a 3.5mm audio jack.\n- No microphone included, but you can still use the device's mic.\n\n2. Wireless Earbuds, Bluetooth 5.3 Headphones (ID: B0B9FTVL58)\n- Bluetooth 5.3 for stable wireless connection.\n- 37 hours playback with LED power display.\n- In-ear design with deep bass and IPX7 waterproof rating.\n- Includes microphone and smart touch controls.\n\n3. TELSOR Wireless Earbuds (ID: B0C6K1GQCF)\n- Bluetooth 5.1 with AVRCP, HCP, HSP, and A2DP profiles.\n- Noise-cancelling mic with 10mm speakers for clear calls and deep bass music.\n- 6 hours playtime per charge, 30 hours with charging case.\n- IPX7 waterproof and ergonomic design.\n- Smart touch controls for v

In [19]:
print(result["answer"])

Yes, you can get several types of earphones from the available products:

1. TUNEAKE Kids Headphones (ID: B0C142QS8X)
- Over-ear design specifically for kids with volume limited to 94dB for hearing protection.
- Foldable and adjustable headband for comfort and portability.
- Compatible with all devices having a 3.5mm audio jack.
- No microphone included, but you can still use the device's mic.

2. Wireless Earbuds, Bluetooth 5.3 Headphones (ID: B0B9FTVL58)
- Bluetooth 5.3 for stable wireless connection.
- 37 hours playback with LED power display.
- In-ear design with deep bass and IPX7 waterproof rating.
- Includes microphone and smart touch controls.

3. TELSOR Wireless Earbuds (ID: B0C6K1GQCF)
- Bluetooth 5.1 with AVRCP, HCP, HSP, and A2DP profiles.
- Noise-cancelling mic with 10mm speakers for clear calls and deep bass music.
- 6 hours playtime per charge, 30 hours with charging case.
- IPX7 waterproof and ergonomic design.
- Smart touch controls for volume, playback, calls, and voi