In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import openai
import pandas as pd

#### Read the sampled dataset with Amazon inventory metadata

In [2]:
df_items = pd.read_json("../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl", lines=True)

#### Concatenate title and features

In [3]:
def preprocess_data(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [4]:
df_items["preprocessed_data"] = df_items.apply(preprocess_data, axis=1)

#### Initiate Qdrant client

In [None]:
# docker pull qdrant/qdrant

# docker run -p 6333:6333 -p 6334:6334 \
#     -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
#     qdrant/qdrant

# Access web UI at http://localhost:6333/dashboard

In [None]:
qdrant_client = QdrantClient(url="http://localhost:6333")

# qdrant_client.delete_collection(collection_name="Amazon-items-collection-01")
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-01",
    #collection_name="Amazon-items-collection-02",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

#### Sample 50 items from the dataset

In [6]:
df_sample = df_items.sample(n=50, random_state=25)

#### Define the embeddings function

In [7]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=[text],
        model=model,
    )
    return response.data[0].embedding

#### Embed data

In [8]:
data_to_embed = df_sample["preprocessed_data"].tolist()
pointstructs = []
for i, data in enumerate(data_to_embed):
    embedding = get_embedding(data)
    pointstructs.append(
        PointStruct(
            id=i,
            vector=embedding,
            payload={"text": data},
        )
    )

#### Write embedded data to Qdrant

In [9]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-01",
    #collection_name="Amazon-items-collection-02",
    wait=True,
    points=pointstructs,
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

#### Render a prompt to generate synthetic Eval reference dataset

In [10]:
import json

output_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Suggested question.",
            },
            "chunk_ids": {
                "type": "array",
                "items": {
                    "type": "integer",
                    "description": "Index of the chunk that could be used to answer the question.",
                },
            },
            "answer_example": {
                "type": "string",
                "description": "Suggested answer grounded in the contexr.",
            },
            "reasoning": {
                "type": "string",
                "description": "Reasoning why the question could be answered with the chunks.",
            },
        },
    },
}


SYSTEM_PROMPT = f"""
I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with indexes of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
As an output I need you to provide me the list of questions and the indexes of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Try to have a mix of questions that could use multipple chunks and questions that could use single chunk.
Also, include 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{json.dumps(output_schema, indent=2)}
</OUTPUT JSON SCHEMA>

I need to be able to parse the json output.
"""

USER_PROMPT = f"""
Here is the list of chunks, each list element is a dictionary with id and text:
{[{"id": i, "text": data} for i, data in enumerate(data_to_embed)]}
"""

In [11]:
print(SYSTEM_PROMPT)


I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with indexes of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
As an output I need you to provide me the list of questions and the indexes of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Try to have a mix of questions that could use multipple chunks and questions that could use single chunk.
Also, include 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{
  "type": "array",
  "items": {
    "type": "object",
    "properties": {
      "question": {
        "type": "string",
      

In [None]:
# Question + all product chunks
print(USER_PROMPT)


Here is the list of chunks, each list element is a dictionary with id and text:
[{'id': 0, 'text': 'Bluetooth Car Adapter, LDNIO Bluetooth FM Transmitter for Car, 43W PD&QC 3.0 Three USB Port Car Bluetooth Adapter with LED Display, Hands-Free Calling, and AUX Input for All Smartphones Audio Player Fast Charging Type C Multi Ports: USB Type C Durable Fast Connect & Clear Bluetooth Sound 3 in 1 Value'}, {'id': 1, 'text': 'Bluetooth Multi-Device Keyboard, Dual Channel Universal Rechargeable Wireless Keyboard with Integrated Stand for iPad Smartphone Tablet MacBook iOS Windows Android Devices - Pink „ÄêEasy-Switch to 2 Devices„Äë Simply press the FN+BT1/BT2 to switch typing between 2 connected Bluetooth devices, work with your smartphone and tablet on the slot stablely. „ÄêType Anywhere in Comfort„Äë0.46in thick and 11.34in long, the compact Bluetooth keyboard is small enough to tuck into your briefcase and light enough to hold in hand. Minimalist layout lets you multitask at home or on t

#### Generate synthetic eval reference data

In [13]:
response = openai.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ]
)

print(response.choices[0].message.content)

```json
[
  {
    "question": "Which Bluetooth speakers are available for purchase?",
    "chunk_ids": [9, 23, 47],
    "answer_example": "We have several Bluetooth speakers available: the maio Portable Wireless Speaker (30W, IPX67 waterproof, 18-hour playtime), the co2CREA Soft Silicone Case for the Bose SoundLink Flex Bluetooth Portable Speaker, and the Tiksounds Wireless Earbuds with Bluetooth 5.3 and up to 60 hours of playtime.",
    "reasoning": "Chunks 9, 23, and 47 all describe Bluetooth audio speaker products, some portable with waterproof or sports specifications."
  },
  {
    "question": "Do you have any products for organizing cables?",
    "chunk_ids": [4, 17, 43],
    "answer_example": "Yes, we offer a variety of cable organizing products including tifanso adhesive cable clips (6pcs), 40 pcs silicone cable ties in assorted colors, and ApexOne hook-and-loop reusable cable ties with metal buckles that you can cut to length.",
    "reasoning": "Each of these chunks provides 

#### Clean up the output and make it a parseable json

In [14]:
import json

json_output = response.choices[0].message.content
json_output = json_output.replace("```json", "")
json_output = json_output.replace("```", "")
json_output = json_output.replace("// BEGIN UNANSWERABLE QUESTIONS SECTION (5)", "")
json_output = json.loads(json_output)

In [15]:
json_output

[{'question': 'Which Bluetooth speakers are available for purchase?',
  'chunk_ids': [9, 23, 47],
  'answer_example': 'We have several Bluetooth speakers available: the maio Portable Wireless Speaker (30W, IPX67 waterproof, 18-hour playtime), the co2CREA Soft Silicone Case for the Bose SoundLink Flex Bluetooth Portable Speaker, and the Tiksounds Wireless Earbuds with Bluetooth 5.3 and up to 60 hours of playtime.',
  'reasoning': 'Chunks 9, 23, and 47 all describe Bluetooth audio speaker products, some portable with waterproof or sports specifications.'},
 {'question': 'Do you have any products for organizing cables?',
  'chunk_ids': [4, 17, 43],
  'answer_example': 'Yes, we offer a variety of cable organizing products including tifanso adhesive cable clips (6pcs), 40 pcs silicone cable ties in assorted colors, and ApexOne hook-and-loop reusable cable ties with metal buckles that you can cut to length.',
  'reasoning': 'Each of these chunks provides details about cable management items,

#### Upload the dataset to LangSmith

In [16]:
from langsmith import Client
import os

client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

dataset_name = "amazonbe-ai-agent-evaluation-dataset"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Dataset for evaluating RAG pipeline"
)

In [17]:
for item in json_output:
    client.create_example(
        dataset_id=dataset.id,
        inputs={"question": item["question"]},
        outputs={
            "ground_truth": item["answer_example"],
            "context_ids": item["chunk_ids"],
            "contexts": [qdrant_client.retrieve(collection_name="Amazon-items-collection-01", ids=[id], with_payload=True)[0].payload["text"] for id in item["chunk_ids"]]
        }
    )