In [1]:

import openai
from qdrant_client import QdrantClient

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

  from .autonotebook import tqdm as notebook_tqdm


### Download an example reference data point from LangSmith

In [2]:

client = Client()

In [3]:
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)

In [4]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('86c91f54-22b8-43c3-be98-84758638e856'), created_at=datetime.datetime(2026, 1, 20, 3, 32, 25, 277061, tzinfo=TzInfo(0)), modified_at=datetime.datetime(2026, 1, 20, 3, 32, 25, 277061, tzinfo=TzInfo(0)), example_count=53, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-15.3.2-arm64-arm-64bit', 'sdk_version': '0.6.4', 'runtime_version': '3.12.12', 'langchain_version': None, 'py_implementation': 'CPython', 'langchain_core_version': None}})

In [27]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[1].outputs

{'ground_truth': "Examples: The SKYBASIC wireless microscope (B0BG5L2YLC not listed here?) and other wireless devices reference companion apps; XVIM bulb camera (B0C4DBSWGW) uses the 'HoMeYe Pro' app for setup and remote viewing. The camera clock (B0B2JJJFCD) references 'Lookcam' app for iPhone/Android. (Note: The included Fintie Garmin protector (B0B3MMP22L) doesn't require an app but the monocular/camera devices do.)",
 'reference_context_ids': ['B0BGLRMPQD',
  'B0B3MMP22L',
  'B0C4DBSWGW',
  'B0B2JJJFCD'],
 'reference_descriptions': ["Monocular Telescope, 10x42 Monoculars for Adults, Usogood Compact Portable Waterproof Monocular with Hand Strap, Lightweight Handheld Pocket Telescope for Bird Watching 【10x42 High Definition and Comfortable Viewing】This monoculars for adults with a 42mm objective lens provide 10x magnification, which ensures that you can easily magnify the object with a stable view when observing handheld. With a large field of view of 360ft/1000yards, you can clearly

In [28]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[1].inputs

{'question': 'Which products mention having a companion app for setup or control?'}

In [29]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[1].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[1].outputs

### RAG Pipeline

In [32]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


def rag_pipeline(question, top_k=5):

    qdrant_client = QdrantClient(url="http://localhost:6333")

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [33]:

rag_pipeline("Can adult friendly electronics do you sell?", top_k=5)

{'answer': 'Yes, we do sell adult-friendly electronics. From the available products, examples include:\n\n- Wireless Keyboard and Mouse Combo (2.4G, retro round keys)\n- Wireless Digital Microscope (SKYBASIC, WiFi/USB, 50X-1000X)\n- 1/4 TRS to Dual 1/4 TS Y-Splitter Cable (audio accessory)\n\nIf you’re looking for specific categories or more options, tell me what you have in mind and I’ll filter the available products.',
 'question': 'Can adult friendly electronics do you sell?',
 'retrieved_context_ids': ['B0B7495RL6',
  'B0B96LV4C5',
  'B09PYFMTBF',
  'B09Y39DSWR',
  'B0BG5L2YLC'],
 'retrieved_context': ['Cleaner Kit for AirPods Pro, 4 in 1 Earbuds Cleaning Pen, Bluetooth Headphone Cleaning Pen for Airpods, Airpods Pro 1 2 3 and Other Earphones, Keyboard, Mouse, Cellphones, Laptop, Camera (White) 【4-IN-1 DESIGN】The airpod cleaning kit is divided into 4 parts - flocking sponge, high-density brush, long-bristle brush and metal tip, which can deeply clean the earbuds and earphone chargi

### RAG Metrics

In [35]:
from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy


In [36]:
ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [37]:
reference_input

{'question': 'Which products mention having a companion app for setup or control?'}

In [38]:
reference_output

{'ground_truth': "Examples: The SKYBASIC wireless microscope (B0BG5L2YLC not listed here?) and other wireless devices reference companion apps; XVIM bulb camera (B0C4DBSWGW) uses the 'HoMeYe Pro' app for setup and remote viewing. The camera clock (B0B2JJJFCD) references 'Lookcam' app for iPhone/Android. (Note: The included Fintie Garmin protector (B0B3MMP22L) doesn't require an app but the monocular/camera devices do.)",
 'reference_context_ids': ['B0BGLRMPQD',
  'B0B3MMP22L',
  'B0C4DBSWGW',
  'B0B2JJJFCD'],
 'reference_descriptions': ["Monocular Telescope, 10x42 Monoculars for Adults, Usogood Compact Portable Waterproof Monocular with Hand Strap, Lightweight Handheld Pocket Telescope for Bird Watching 【10x42 High Definition and Comfortable Viewing】This monoculars for adults with a 42mm objective lens provide 10x magnification, which ensures that you can easily magnify the object with a stable view when observing handheld. With a large field of view of 360ft/1000yards, you can clearly

In [39]:
result = rag_pipeline(reference_input["question"])

In [40]:
result

{'answer': 'None of the listed products mention having a companion app for setup or control.',
 'question': 'Which products mention having a companion app for setup or control?',
 'retrieved_context_ids': ['B0B3MMP22L',
  'B09ZPV8WBV',
  'B0B96LV4C5',
  'B09PYFMTBF',
  'B0B7495RL6'],
 'retrieved_context': ["Fintie 3 Packs Screen Protector Case Compatible with Garmin Venu Sq 2 / Sq 2 Music/Venu Sq/Venu Sq Music, Soft TPU Plated Bumper Full Cover Protective Cases [Scratch-Proof], Black/Clear/Rose Gold Perfect protective case with screen protector only compatible with Garmin Venu Sq 2 / Sq 2 Music / Venu Sq / Venu Sq Music Watch. 360 degree full protection, keep your watch away from scratches. 99% high transparency, provides a better viewing experience. Fintie screen protector case for Garmin Venu Sq / Venu Sq Music is easy to install and take off, you don't need to remove band when installing the case, and also don't need to take off the cover while charging. The case made from high qual

In [41]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [42]:
await ragas_faithfulness(result, "")

1.0

In [43]:
async def ragas_responce_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [44]:
await ragas_responce_relevancy(result, "")

np.float64(0.9087996368200786)

In [46]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [47]:
await ragas_context_precision_id_based(result, reference_output)

0.2

In [48]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [49]:
await ragas_context_recall_id_based(result, reference_output)

0.25