## Evaluation dataset

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import openai
import pandas as pd

#### Read the sampled dataset with Amazon inventory metadata

In [None]:
df_items = pd.read_json("../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl", lines=True)

#### Concatenate title and features

In [None]:
def preprocess_data(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [None]:
df_items["preprocessed_data"] = df_items.apply(preprocess_data, axis=1)

#### Initiate Qdrant client

In [None]:
# docker pull qdrant/qdrant

# docker run -p 6333:6333 -p 6334:6334 \
#     -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
#     qdrant/qdrant

# Access web UI at http://localhost:6333/dashboard

In [None]:
qdrant_client = QdrantClient(url="http://localhost:6333")

# qdrant_client.delete_collection(collection_name="Amazon-items-collection-01")
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-01",
    #collection_name="Amazon-items-collection-02",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

#### Sample 50 items from the dataset

In [None]:
df_sample = df_items.sample(n=50, random_state=25)

#### Define the embeddings function

In [None]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=[text],
        model=model,
    )
    return response.data[0].embedding

#### Embed data

In [None]:
data_to_embed = df_sample["preprocessed_data"].tolist()
pointstructs = []
for i, data in enumerate(data_to_embed):
    embedding = get_embedding(data)
    pointstructs.append(
        PointStruct(
            id=i,
            vector=embedding,
            payload={"text": data},
        )
    )

#### Write embedded data to Qdrant

In [None]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-01",
    #collection_name="Amazon-items-collection-02",
    wait=True,
    points=pointstructs,
)

#### Render a prompt to generate synthetic Eval reference dataset

In [None]:
import json

output_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Suggested question.",
            },
            "chunk_ids": {
                "type": "array",
                "items": {
                    "type": "integer",
                    "description": "Index of the chunk that could be used to answer the question.",
                },
            },
            "answer_example": {
                "type": "string",
                "description": "Suggested answer grounded in the contexr.",
            },
            "reasoning": {
                "type": "string",
                "description": "Reasoning why the question could be answered with the chunks.",
            },
        },
    },
}


SYSTEM_PROMPT = f"""
I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with indexes of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
As an output I need you to provide me the list of questions and the indexes of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Try to have a mix of questions that could use multipple chunks and questions that could use single chunk.
Also, include 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{json.dumps(output_schema, indent=2)}
</OUTPUT JSON SCHEMA>

I need to be able to parse the json output.
"""

USER_PROMPT = f"""
Here is the list of chunks, each list element is a dictionary with id and text:
{[{"id": i, "text": data} for i, data in enumerate(data_to_embed)]}
"""

In [None]:
print(SYSTEM_PROMPT)

In [None]:
# Question + all product chunks
print(USER_PROMPT)

#### Generate synthetic eval reference data

In [None]:
response = openai.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ]
)

print(response.choices[0].message.content)

#### Clean up the output and make it a parseable json

In [None]:
import json

json_output = response.choices[0].message.content
json_output = json_output.replace("```json", "")
json_output = json_output.replace("```", "")
json_output = json_output.replace("// BEGIN UNANSWERABLE QUESTIONS SECTION (5)", "")
json_output = json.loads(json_output)

In [None]:
json_output

In [None]:
output_json = [{'question': 'Which Bluetooth speakers are available for purchase?',
  'chunk_ids': [9, 23, 47],
  'answer_example': 'We have several Bluetooth speakers available: the maio Portable Wireless Speaker (30W, IPX67 waterproof, 18-hour playtime), the co2CREA Soft Silicone Case for the Bose SoundLink Flex Bluetooth Portable Speaker, and the Tiksounds Wireless Earbuds with Bluetooth 5.3 and up to 60 hours of playtime.',
  'reasoning': 'Chunks 9, 23, and 47 all describe Bluetooth audio speaker products, some portable with waterproof or sports specifications.'},
 {'question': 'Do you have any products for organizing cables?',
  'chunk_ids': [4, 17, 43],
  'answer_example': 'Yes, we offer a variety of cable organizing products including tifanso adhesive cable clips (6pcs), 40 pcs silicone cable ties in assorted colors, and ApexOne hook-and-loop reusable cable ties with metal buckles that you can cut to length.',
  'reasoning': 'Each of these chunks provides details about cable management items, suitable for office, home, or general organizing.'},
 {'question': 'Are there any smartwatches or fitness trackers in stock?',
  'chunk_ids': [15, 8, 18],
  'answer_example': 'We have smartwatches and replacement bands in stock. The CHOKOVIE Smart Watch supports call receiving/dialing, heart rate, and activity tracking. We also offer YAXIN silicone sport bands for Fitbit Versa (various models) and NotoCity silicone bands compatible with Garmin Fenix and other smartwatches.',
  'reasoning': 'Chunk 15 describes a smart watch, while 8 and 18 list compatible bands—which may interest customers seeking fitness devices or accessories.'},
 {'question': 'Is there a digital microscope available?',
  'chunk_ids': [19],
  'answer_example': 'Yes, we offer the SKYBASIC Wireless Digital Microscope, supporting 50X-1000X magnification, compatible with phones and computers, and perfect for various learning or exploration uses.',
  'reasoning': 'Chunk 19 exclusively details a wireless digital microscope, including features and compatibility.'},
 {'question': 'Can you recommend headphones or earbuds for noise reduction?',
  'chunk_ids': [2, 11],
  'answer_example': 'We have the Active Noise Cancelling Wireless Earbuds with deep bass and LED display, as well as Hearprotek Sleep Earbuds, specifically designed for comfort and effective noise reduction while sleeping.',
  'reasoning': 'Both products in these chunks offer earbuds with noise cancelling or reduction features.'},
 {'question': 'Are there any Apple MFi certified charging cables or adapters?',
  'chunk_ids': [13, 26, 39, 44],
  'answer_example': 'Yes, you can find Apple MFi certified products such as Car Apple Carplay Cable (USB A to Lightning), Amaitree Lightning Cable (6ft, nylon braided), a Lightning to 3.5mm Headphone Jack Adapter, and a 1TB USB flash drive compatible with iPhone and iPad.',
  'reasoning': 'These products support Apple devices and specifically mention MFi certification or compatibility.'},
 {'question': 'Are there wireless keyboards or keyboard-mouse combos in stock?',
  'chunk_ids': [1, 25, 28],
  'answer_example': 'Yes, we have multiple options including the Bluetooth Multi-Device Keyboard (dual channel, rechargeable), the Nasuque Bluetooth Keyboard for Mac OS (supports three devices), and the Samsers Ultra Slim Wireless Keyboard and Mouse Combo, supporting Bluetooth and 2.4G connections.',
  'reasoning': 'All listed chunks describe wireless or Bluetooth keyboards, with chunk 28 also including a mouse.'},
 {'question': 'Do you sell a wireless mouse for gaming?',
  'chunk_ids': [5],
  'answer_example': 'We offer the Glorious Model D Wireless Gaming Mouse, a superlight ergonomic mouse with RGB lighting, wireless/wired options, and up to 71 hours of playtime.',
  'reasoning': 'Chunk 5 describes a gaming wireless mouse, specifically designed for low latency and performance.'},
 {'question': 'What types of camera accessories or security cameras are available?',
  'chunk_ids': [3, 14, 30, 31],
  'answer_example': 'Our selection includes the Noonkey 2K/3MP Light Bulb Security Camera (pack of 2), a NEEWER ND Filter Set for DJI Mini 3 drones, the VAIMEST 2K Solar Powered Wireless Outdoor Security Camera, and the Esky 720P HD License Plate Backup Camera.',
  'reasoning': 'These chunks contain details about security cameras, backup cameras, and camera accessories.'},
 {'question': 'Is there a portable storage device or USB flash drive available?',
  'chunk_ids': [6, 44],
  'answer_example': 'Yes, we have the LinkMore NR34 128GB USB 3.1 Flash Drive with fast read/write speeds, and the ALLBYT iPhone Flash Drive, a 1TB MFi-certified external storage device compatible with iPhone, iPad, and Android.',
  'reasoning': 'Both chunks describe portable flash drives with storage capacities and relevant interfaces.'},
 {'question': 'Are there any products related to car electronics or car audio?',
  'chunk_ids': [0, 12, 13, 29, 31],
  'answer_example': 'We have multiple car electronics including: LDNIO Bluetooth FM Transmitter (with fast charging and hands-free calls), a Double Din Car Stereo with wireless Carplay/Android Auto, Carplay/charging cables, a Wireless CarPlay Adapter, and the Esky 720P License Plate Backup Camera.',
  'reasoning': 'These chunks collectively describe car audio, dashboard electronics, adapters, and camera accessories.'},
 {'question': 'What devices do you have that are waterproof?',
  'chunk_ids': [2, 9, 15, 30, 47, 31],
  'answer_example': 'We offer several waterproof devices: Active Noise Cancelling Earbuds (IPX6), maio Bluetooth Speaker (IPX67), CHOKOVIE Smartwatch (IP68), VAIMEST Security Cameras (IP65), Tiksounds Wireless Earbuds (IPX7), and the Esky 720P License Plate Backup Camera (IP67).',
  'reasoning': 'Each product in these chunks lists a waterproof rating as a major feature.'},
 {'question': 'Do you sell any cases or bags for computers, tablets, or laptops?',
  'chunk_ids': [20, 24, 35, 23, 41],
  'answer_example': 'Yes, our offerings include the YIMIKOL Kids iPad Case (shockproof), ECOSUSI Briefcase for Women (laptop bag), NISHEL Laptop Sleeve (for 13” laptops), co2CREA Soft Silicone Case for Bose SoundLink Flex Speaker, and the Fullant Kids Tablet with a shockproof silicone case.',
  'reasoning': 'All products in these chunks are related to protective bags or cases for tech items.'},
 {'question': 'What kinds of headphones or earphones are available for iPhone?',
  'chunk_ids': [2, 22, 39, 47],
  'answer_example': 'We offer: Active Noise Cancelling Wireless Earbuds (Bluetooth 5.3, compatible with iPhone), Apple Earbuds Headphones with Lightning Connector, a Lightning to 3.5mm Adapter for using standard headphones with iPhone, and Tiksounds Wireless Earbuds compatible with iPhone.',
  'reasoning': 'These chunks include headphone/earbud products explicitly compatible with iPhone, either natively or via adapter.'},
 {'question': 'Are there any products for network connectivity, such as splitters or hubs?',
  'chunk_ids': [16, 36, 37],
  'answer_example': 'We have the NOBVEQ RJ45 Ethernet Splitter (1 male to 3 female), a barsone 4-Port USB 3.0 Hub, and Wansurs USB SD Card Readers (with dual slots, 3-pack).',
  'reasoning': 'Each of these chunks lists a product designed to connect or expand computer/device ports.'},
 {'question': 'Which products are geared towards children?',
  'chunk_ids': [20, 41],
  'answer_example': 'For children, we offer the YIMIKOL shockproof iPad case and the Fullant Kids Tablet (7 inch, kid-proof case, parental controls, educational apps).',
  'reasoning': '20 is a kids-proof iPad case, while 41 is a kids tablet with parental control and education features.'},
 {'question': 'Do you offer any GPS or tracking devices?',
  'chunk_ids': [38, 48],
  'answer_example': 'Yes, we have the Garmin Edge Explore 2 GPS Cycling Navigator for bikes and also an 8-pack GPS tracker for dogs, kids, and valuables.',
  'reasoning': 'These chunks are about GPS navigation (cycling) and small Bluetooth or GPS trackers.'},
 {'question': 'Are there accessories for drones or aerial photography?',
  'chunk_ids': [14],
  'answer_example': 'Yes, the NEEWER ND Filter Set is available, compatible with DJI Mini 3/Mini 3 Pro drones, for various lighting and color correction needs.',
  'reasoning': 'Chunk 14 is specific to DJI drone lens accessories.'},
 {'question': 'Are there any products specifically for music lovers or audiophiles?',
  'chunk_ids': [46, 49, 33],
  'answer_example': 'For music lovers, we offer a Vinyl Record Player Turntable with speakers, the Razer Leviathan V2 X PC soundbar with Chroma RGB, and RGB Gaming Desktop Speakers.',
  'reasoning': 'These products are all focused on delivering high-quality music/audio listening experiences.'},
 {'question': 'Do you have any PC accessories such as monitors or speakers?',
  'chunk_ids': [42, 33, 49, 36, 35],
  'answer_example': 'Certainly! We offer the LG UltraWide 26-inch monitor with HDR and AMD FreeSync, RGB Gaming Desktop Speakers, Razer Leviathan V2 X PC Soundbar, a barsone 4-Port USB 3.0 Hub, and NISHEL Laptop Sleeve Case.',
  'reasoning': 'Monitor and speakers are direct accessories, and the hub and sleeve offer necessary support for a PC setup.'},
 {'question': 'Can I buy a replacement band for my smartwatch?',
  'chunk_ids': [8, 18],
  'answer_example': 'Yes, we carry the YAXIN Slim Sport Bands (Fitbit Versa models) and the NotoCity 22mm band for various Garmin Fenix and Forerunner models.',
  'reasoning': 'These chunks are dedicated to replacement watch bands for popular smartwatch lines.'},
 {'question': 'Which products support wireless charging or fast charging?',
  'chunk_ids': [0, 2],
  'answer_example': 'The Bluetooth Car Adapter from LDNIO supports 43W PD&QC 3.0 fast charging, including USB Type C ports. Our Wireless Earbuds (Bluetooth 5.3) support USB-C fast charging for quick power-ups.',
  'reasoning': 'These products explicitly mention fast or USB-C charging capabilities.'},
 {'question': 'Are there any products for home surveillance or baby monitoring?',
  'chunk_ids': [3, 30],
  'answer_example': 'Our selection includes Noonkey 2K/3MP Light Bulb Security Cameras (with AI human tracking and color night vision) and the VAIMEST Solar Powered PTZ WiFi Outdoor Camera (pan, tilt, zoom, night vision, two-way audio).',
  'reasoning': 'Both are focused on home security with apps and outdoor/indoor options.'},
 {'question': 'Is there a product that helps in creating or learning about electronics?',
  'chunk_ids': [34],
  'answer_example': 'Yes, we stock the SunFounder Ultimate Starter Kit (compatible with Arduino UNO), which includes over 87 projects and components for learning IoT, robotics, and programming.',
  'reasoning': 'This chunk is specifically an educational electronics kit.'},
 {'question': 'Are there stands or holders for smart speakers?',
  'chunk_ids': [7],
  'answer_example': 'We have an LDYAN Owl Holder Stand, suitable for various Echo Dot generations and Google Home Mini/Nest Mini, featuring a stylish design and better sound experience.',
  'reasoning': 'This product is dedicated to holding smart speakers.'},
 {'question': 'Can you suggest a product suitable for video calls or streaming?',
  'chunk_ids': [40, 42],
  'answer_example': "Great options include the YINGNUOST 10'' Ring Light with tripod and RGB app-control lighting, and the LG UltraWide 26-inch Monitor, which provides extra screen real estate for calls or streaming.",
  'reasoning': 'Ring lights improve video call/stream visuals, and a wide monitor is useful for multitasking.'},
 {'question': 'Are there any products for sewing or crafts organization?',
  'chunk_ids': [43, 17],
  'answer_example': 'We offer ApexOne hook-and-loop straps with metal buckles (cut to length, suitable for sewing and crafts) and 40 pcs silicone cable ties, which can also be used in sewing and DIY projects.',
  'reasoning': 'Both cable tie products mention use in crafting and sewing applications.'},
 {'question': 'Do you sell binoculars or products for concerts and events?',
  'chunk_ids': [21],
  'answer_example': 'Yes, we carry the Aroncent Portable Binoculars (3x2.5 magnification) designed for theatre, sports, and concerts, with a stylish vintage look.',
  'reasoning': 'Chunk 21 is dedicated to compact opera/theatre binoculars.'},
 {'question': 'Which items support Mac computers or Apple devices?',
  'chunk_ids': [1, 25, 26, 35, 39, 44, 22],
  'answer_example': 'A wide selection is available: Bluetooth Multi-Device Keyboard, Nasuque Bluetooth Keyboard for Mac OS, Amaitree Lightning Cable, NISHEL Laptop Sleeve for MacBook Air/Pro, Lightning to 3.5mm Adapter, ALLBYT iPhone 1TB Flash Drive, and Apple Earbuds with Lightning Connector.',
  'reasoning': 'All these products mention explicit compatibility with Mac or Apple mobile devices.'},
 {'question': "Are there any products that can't be answered with the available chunks? (Unanswerable)",
  'chunk_ids': [],
  'answer_example': 'Sorry, we do not have any home coffee makers available.',
  'reasoning': 'There are no coffee makers described in the product collection.'},
 {'question': 'Are there any shoes or shoe accessories for sale?',
  'chunk_ids': [],
  'answer_example': 'We currently do not have any shoes or shoe accessories in our inventory.',
  'reasoning': 'None of the provided chunks are about shoes or footwear.'},
 {'question': 'Do you have any home appliances like vacuum cleaners or blenders?',
  'chunk_ids': [],
  'answer_example': 'There are no home appliances such as vacuum cleaners or blenders in our product selection.',
  'reasoning': 'No chunks refer to kitchen/home appliances like those listed.'},
 {'question': 'Are there any pet food or pet grooming products?',
  'chunk_ids': [],
  'answer_example': 'We do not have pet food or pet grooming items, but we do carry pet trackers for safety.',
  'reasoning': 'No chunks describe pet food or grooming products; the GPS tracker is for pets but not food/grooming.'},
 {'question': 'Can I purchase eyeglasses or sunglasses?',
  'chunk_ids': [],
  'answer_example': 'Currently, we do not offer eyeglasses or sunglasses.',
  'reasoning': 'No eyewear is mentioned in any product chunk.'}]

In [None]:
# Save the dataset
import json
output_path = '../data/synth_eval_dataset/eval_dataset.json'
with open(output_path, 'w') as f:
    json.dump(output_json, f)

#### Upload the dataset to LangSmith

In [None]:
from langsmith import Client
import os

client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

dataset_name = "amazonbe-ai-agent-evaluation-dataset"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Dataset for evaluating RAG pipeline"
)

In [None]:
for item in json_output:
    client.create_example(
        dataset_id=dataset.id,
        inputs={"question": item["question"]},
        outputs={
            "ground_truth": item["answer_example"],
            "context_ids": item["chunk_ids"],
            "contexts": [qdrant_client.retrieve(collection_name="Amazon-items-collection-01", ids=[id], with_payload=True)[0].payload["text"] for id in item["chunk_ids"]]
        }
    )