In [34]:
import openai
import os
import json

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

from langsmith import Client

In [35]:
qdrant_client = QdrantClient(url="http://localhost:6333")

### Download all data from Qdrant

In [36]:
all_points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    limit=100,
    offset=None,
    with_payload=True,
    with_vectors=False
)

In [37]:
all_points[0][0].payload

{'description': "Light up Headphone Controller Holder, 16 Colors 3D LED Lights Gamepad Headset Stand, Game Controller Hanger for All Universal Gaming PC Accessories üëçNOT ONLY a STAND: Multi advanced all-round stand: controller holder+headset stand+...---all universal models can be perfectly stored or displayed.Excellent stable and flexible, compatibility with your stylish interiors and digital home appliances , so it not only a stand but also a night light. üëçCreative 3D Visual Effect: This is a 3D illusion lamp that can show up the gamepad when it lights up, with an optical acrylic flat board with laser engraving can give you amazing 3D visual effect. 3D is just visual, the light itself is flat. It's bound to amaze and impress your kids and guests. It is a unique birthday, Christmas, or Easter gift for kids or friends, controller decorations for home. üëçConvenient and Safe: The 3D gamepad night light could use USB to connect or 3 x AAA batteries (not included) to power. The lig

In [38]:
all_context = [{"id": data.payload["parent_asin"], "text": data.payload["description"]} for data in all_points[0]]

In [39]:
all_context

[{'id': 'B09V7G7YY4',
  'text': "Light up Headphone Controller Holder, 16 Colors 3D LED Lights Gamepad Headset Stand, Game Controller Hanger for All Universal Gaming PC Accessories üëçNOT ONLY a STAND: Multi advanced all-round stand: controller holder+headset stand+...---all universal models can be perfectly stored or displayed.Excellent stable and flexible, compatibility with your stylish interiors and digital home appliances , so it not only a stand but also a night light. üëçCreative 3D Visual Effect: This is a 3D illusion lamp that can show up the gamepad when it lights up, with an optical acrylic flat board with laser engraving can give you amazing 3D visual effect. 3D is just visual, the light itself is flat. It's bound to amaze and impress your kids and guests. It is a unique birthday, Christmas, or Easter gift for kids or friends, controller decorations for home. üëçConvenient and Safe: The 3D gamepad night light could use USB to connect or 3 x AAA batteries (not included) t

### Render a prompt to generate synthetic Eval reference dataset

In [40]:
output_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Suggested question.",
            },
            "chunk_ids": {
                "type": "array",
                "items": {
                    "type": "string",
                    "description": "ID of the chunk that could be used to answer the question.",
                },
            },
            "answer_example": {
                "type": "string",
                "description": "Suggested answer grounded in the context.",
            },
            "reasoning": {
                "type": "string",
                "description": "Reasoning why the question could be answered with the chunks.",
            },
        },
    },
}


SYSTEM_PROMPT = f"""
I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with IDs of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
The questions should imitate a potential real user of this RAG system.
As an output I need you to provide me the list of questions and the IDs of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Construct 10 questions that could use multipple chunks in the answer.
Construct 15 questions that could use single chunk in the answer.
Construct 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{json.dumps(output_schema, indent=2)}
</OUTPUT JSON SCHEMA>

I need to be able to parse the json output.
"""

USER_PROMPT = f"""
Here is the list of chunks, each list element is a dictionary with id and text:
{all_context}
"""

In [41]:
print(SYSTEM_PROMPT)


I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with IDs of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
The questions should imitate a potential real user of this RAG system.
As an output I need you to provide me the list of questions and the IDs of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Construct 10 questions that could use multipple chunks in the answer.
Construct 15 questions that could use single chunk in the answer.
Construct 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{
  "type": "array",
  "items": {
    "

In [42]:
print(USER_PROMPT)


Here is the list of chunks, each list element is a dictionary with id and text:
[{'id': 'B09V7G7YY4', 'text': "Light up Headphone Controller Holder, 16 Colors 3D LED Lights Gamepad Headset Stand, Game Controller Hanger for All Universal Gaming PC Accessories üëçNOT ONLY a STAND: Multi advanced all-round stand: controller holder+headset stand+...---all universal models can be perfectly stored or displayed.Excellent stable and flexible, compatibility with your stylish interiors and digital home appliances , so it not only a stand but also a night light. üëçCreative 3D Visual Effect: This is a 3D illusion lamp that can show up the gamepad when it lights up, with an optical acrylic flat board with laser engraving can give you amazing 3D visual effect. 3D is just visual, the light itself is flat. It's bound to amaze and impress your kids and guests. It is a unique birthday, Christmas, or Easter gift for kids or friends, controller decorations for home. üëçConvenient and Safe: The 3D gam

In [43]:
response = openai.chat.completions.create(
    model="gpt-5-mini",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ],
    reasoning_effort="minimal"
)

print(response.choices[0].message.content)

[
  {
    "question": "Do you have a charger that can charge two PS5 DualSense controllers at once?",
    "chunk_ids": [
      "B0BQDQ5FQM"
    ],
    "answer_example": "Yes. The KICKDOT PS5 Stand (B0BQDQ5FQM) includes a dual fast PS5 controller charging station that can fully charge 2 DualSense controllers simultaneously (about 3 hours) with LED indicators (red charging, green fully charged).",
    "reasoning": "This chunk describes a PS5 stand with a dual fast controller charging station and specifies charging time and LED indicators, which directly answers the question."
  },
  {
    "question": "Which product can expand storage for PS5/PS4 games via USB?",
    "chunk_ids": [
      "B09RPKFJH3"
    ],
    "answer_example": "The Avolusion PRO-X 8TB USB 3.0 external gaming hard drive (B09RPKFJH3) is compatible with PS5/PS4. You can play PS4 games directly from the USB drive and store PS5 games there (but PS5 games must be moved back to internal SSD to play).",
    "reasoning": "The ch

In [44]:
import json

json_output = response.choices[0].message.content
json_output = json.loads(json_output)

In [45]:
json_output

[{'question': 'Do you have a charger that can charge two PS5 DualSense controllers at once?',
  'chunk_ids': ['B0BQDQ5FQM'],
  'answer_example': 'Yes. The KICKDOT PS5 Stand (B0BQDQ5FQM) includes a dual fast PS5 controller charging station that can fully charge 2 DualSense controllers simultaneously (about 3 hours) with LED indicators (red charging, green fully charged).',
  'reasoning': 'This chunk describes a PS5 stand with a dual fast controller charging station and specifies charging time and LED indicators, which directly answers the question.'},
 {'question': 'Which product can expand storage for PS5/PS4 games via USB?',
  'chunk_ids': ['B09RPKFJH3'],
  'answer_example': 'The Avolusion PRO-X 8TB USB 3.0 external gaming hard drive (B09RPKFJH3) is compatible with PS5/PS4. You can play PS4 games directly from the USB drive and store PS5 games there (but PS5 games must be moved back to internal SSD to play).',
  'reasoning': 'The chunk explicitly describes an 8TB USB 3.0 external driv

In [46]:
len(json_output)

35

In [47]:
points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key="parent_asin",
                match=MatchValue(value="B0BQDQ5FQM")
            )
        ]
    ),
    limit=100,
    with_payload=True,
    with_vectors=False
)[0]

In [48]:
points[0].payload

{'description': "KICKDOT PS5 Stand with 3 Adjustable Cooling Fan, Dual Fast PS5 Controller Charging Station with 3 USB Hubs, Built-in 13 Game Storage PS5 Accessories PS5 Cooling Station for PS5 Digital/Disc Console üåÄ „ÄêNEWEST COOLING STATION FOR PS5 CONSOLE„Äë The high speed PS5 cooling fan has 3 adjustable gears: Strong/Medium/Normal. Playstation 5 accessories PS5 fan pulls out hot air from console bottom and efficiently dissipate heat from right side, so PS5 fan cooling fan will keep your PS5 console cooling without noise, enhance your gaming experience. No need to worry about console overheating from playing games all day and night! PS5 cooling station will maximize the life of your game console. üåÄ „ÄêYOUR IDEAL PS5 ACCESSORIES„Äë PS5 assecories ps5 station is perfectly compatible with Playstation 5 Dualsense controller, Digital and Disc Edition console. The 3 extra USB hubs allow you to charge other devices, like mouse, keyboard, headset etc. With a sleek and tidy design, PS

In [49]:
def get_description(parent_asin: str) -> str:  
    
    points = qdrant_client.scroll(
        collection_name="Amazon-items-collection-00",
        scroll_filter=Filter(
            must=[
                FieldCondition(
                    key="parent_asin",
                    match=MatchValue(value=parent_asin)
                )
            ]
        ),
        limit=100,
        with_payload=True,
        with_vectors=False
    )[0]

    return points[0].payload["description"]

In [50]:
get_description("B0BQDQ5FQM")

"KICKDOT PS5 Stand with 3 Adjustable Cooling Fan, Dual Fast PS5 Controller Charging Station with 3 USB Hubs, Built-in 13 Game Storage PS5 Accessories PS5 Cooling Station for PS5 Digital/Disc Console üåÄ „ÄêNEWEST COOLING STATION FOR PS5 CONSOLE„Äë The high speed PS5 cooling fan has 3 adjustable gears: Strong/Medium/Normal. Playstation 5 accessories PS5 fan pulls out hot air from console bottom and efficiently dissipate heat from right side, so PS5 fan cooling fan will keep your PS5 console cooling without noise, enhance your gaming experience. No need to worry about console overheating from playing games all day and night! PS5 cooling station will maximize the life of your game console. üåÄ „ÄêYOUR IDEAL PS5 ACCESSORIES„Äë PS5 assecories ps5 station is perfectly compatible with Playstation 5 Dualsense controller, Digital and Disc Edition console. The 3 extra USB hubs allow you to charge other devices, like mouse, keyboard, headset etc. With a sleek and tidy design, PS5 charging stati

### Create Eval dataset in Langsmith

In [51]:
client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

In [54]:
from langsmith import Client

client = Client()

dataset_name = "rag-evaluation-dataset"

existing = client.list_datasets(dataset_name=dataset_name)
existing = list(existing)

if existing:
    dataset = existing[0]
else:
    dataset = client.create_dataset(
        dataset_name=dataset_name,
        description="Dataset for evaluating RAG pipeline",
    )

In [56]:
for item in json_output:
    client.create_example(
        dataset_id=dataset.id,
        inputs={"question": item["question"]},
        outputs={
            "ground_truth": item["answer_example"],
            "reference_context_ids": item["chunk_ids"],
            "reference_descriptions": [get_description(id) for id in item["chunk_ids"]]
        }
    )