### Import dependencies

In [18]:
import openai
import instructor
from qdrant_client import QdrantClient

from pydantic import BaseModel, Field

### Mock Example

In [19]:
prompt = """
You are a helpful assistant.
Return an answer to the question.
Question: What is your name?
"""

In [20]:
response = openai.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": prompt}
    ]
)

print(response.choices[0].message.content)

I am ChatGPT, your AI assistant. How can I help you today?


#### Add instructor (structured outputs)

In [21]:
client = instructor.from_openai(openai.OpenAI())

In [22]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="Answer to the question.")

In [23]:
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    response_model=RAGGenerationResponse
)

In [24]:
response

RAGGenerationResponse(answer='I am ChatGPT, an AI language model created by OpenAI. How can I assist you today?')

In [25]:
raw_response

ChatCompletion(id='chatcmpl-CQw666z19orRR2NHGbZZlOlJoYNcA', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='call_OqKc8cOgzwUyMHRqNhDscY1B', function=Function(arguments='{"answer":"I am ChatGPT, an AI language model created by OpenAI. How can I assist you today?"}', name='RAGGenerationResponse'), type='function')]))], created=1760535890, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_c064fdde7c', usage=CompletionUsage(completion_tokens=25, prompt_tokens=94, total_tokens=119, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

### RAG Pipeline

In [26]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    retrieved_context_ratings = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt

def generate_answer(prompt):

    client = instructor.from_openai(openai.OpenAI())

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
        response_model=RAGGenerationResponse
    )

    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer.answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [27]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [28]:
output = rag_pipeline("Can I get a tablet?", qdrant_client)

In [29]:
output

{'answer': 'Based on the available products, there is no tablet listed in stock. However, there are accessories compatible with tablets such as iPads (like the KEEPRO Pencil 2nd Generation and ESR case for iPad Air), but no actual tablet device is available for purchase.',
 'question': 'Can I get a tablet?',
 'retrieved_context_ids': ['B0BF18F6R7',
  'B08BX2L8F2',
  'B09QGNB537',
  'B0C9XFF3CT',
  'B0BG5L2YLC'],
 'retrieved_context': ['KEEPRO Pencil 2nd Generation for iPad, Magnetic Wireless Charge Tilt Sensitivity Palm Rejection Active Pen for Apple iPad Pro 11" 4/3/2/1, iPad Pro 12.9" 6/5/4/3, iPad Air 4/5, iPad Mini 6 [Compatibility]- ONLY compatible with iPad mini (6th generation), iPad Air (4th and 5th generation), iPad Pro 12.9-inch (3rd, 4th, 5th and 6th generation), iPad Pro 11-inch (1st, 2nd, 3rd and 4th generation), check and confirm your device before place the order (Note: If the pen doesn\'t charge, fully charge your iPad first then try charging the pen again)[Charging and