In [2]:
import openai
from qdrant_client import QdrantClient

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

  from .autonotebook import tqdm as notebook_tqdm


### Download an example reference data point from LangSmith

In [3]:
client = Client()

In [4]:
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)

In [5]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('ec8fa4b1-55fc-47db-b780-917c43f0f74f'), created_at=datetime.datetime(2026, 2, 4, 4, 26, 24, 831529, tzinfo=TzInfo(0)), modified_at=datetime.datetime(2026, 2, 4, 4, 26, 24, 831529, tzinfo=TzInfo(0)), example_count=38, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-15.7.3-arm64-arm-64bit', 'sdk_version': '0.6.2', 'runtime_version': '3.12.12', 'langchain_version': '1.2.3', 'py_implementation': 'CPython', 'langchain_core_version': '1.2.7'}})

In [6]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

{'ground_truth': 'Warranty statements in the chunks are generic (some mention 12 months, lifetime support etc.), but there is no information about extended warranty plans by region. Please consult the seller or warranty policy for region-specific extended plans.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [7]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs

{'question': 'Questions I cannot answer from available chunks: Are any of these products covered by extended warranty plans available in my region?'}

In [8]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

### RAG Pipeline

In [11]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-01",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


def rag_pipeline(question, top_k=5):

    qdrant_client = QdrantClient(url="http://localhost:6333")

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [12]:
rag_pipeline("Can I get some charger?", top_k=5)

{'answer': 'Yes. Here are charger options from the available products:\n- B0BYYLJRHT: 3-pack of MFi certified iPhone Lightning cables, 3 ft each (Black). Compatible with iPhone models including iPhone 13/12/11, etc.\n- B0BFPZGYLD: 5 in 1 USB C to Multi Charging Cable (10 ft/3 m). Includes Lightning, USB C, and Micro USB connectors; supports multiple devices but not for iPad with USB C PD on some ports.\n- B09TNXY54Y: MUXA 6 Pack of colorful Nylon Lightning Cables, various lengths (3/3/6/6/10/10 ft) for iPhone models listed; MFi certified.\n- B0BV6PWVCG: GREPHONE 2 Pack USB C to Lightning Cables, 6 ft each; MFi certified and supports fast charging.\n- B0BGDQLZD2: Mixblu Charger Cable Replacement for Fitbit Inspire 3 (2 Pack, 3.3 ft).\n\nTell me which type you need (iPhone Lightning cables, USB-C to Lightning, or multi-charger) and preferred length, quantity, and budget, and I’ll help you pick.',
 'question': 'Can I get some charger?',
 'retrieved_context_ids': ['B0BYYLJRHT',
  'B0BFPZGY

### RAGAS metrics

In [13]:
from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy


In [14]:
ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [15]:
reference_input

{'question': 'Questions I cannot answer from available chunks: Are any of these products covered by extended warranty plans available in my region?'}

In [16]:
reference_output

{'ground_truth': 'Warranty statements in the chunks are generic (some mention 12 months, lifetime support etc.), but there is no information about extended warranty plans by region. Please consult the seller or warranty policy for region-specific extended plans.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [17]:
result = rag_pipeline(reference_input["question"])

In [18]:
result

{'answer': 'I don’t have information in the available products about extended warranty plans or regional coverage. If you’d like, I can check if any of these items mention warranties in their descriptions or help you compare general warranty options.',
 'question': 'Questions I cannot answer from available chunks: Are any of these products covered by extended warranty plans available in my region?',
 'retrieved_context_ids': ['B08BX2L8F2',
  'B09Y39DSWR',
  'B08BR9X387',
  'B0B3MMP22L',
  'B0BZ5R7CVP'],
 'retrieved_context': ['Garmin 890 8-inch RV GPS Navigator Bundle with Car Charger Expander and Hard Shell EVA Case for Tablets/GPS (010-02425-00) Built in Wi-Fi connectivity makes updating your maps of North America a breeze. This large 8" GPS navigator features a bright, high-resolution edge-to-edge touchscreen display so you can easily see important information Built-in Wi-Fi connectivity makes it easy to keep your maps and software up to date without using a computer IN THE BOX: RV 

In [19]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [20]:
await ragas_faithfulness(result, "")

0.75

In [21]:
async def ragas_responce_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [22]:
await ragas_responce_relevancy(result, "")

np.float64(0.0)

In [23]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [24]:
await ragas_context_precision_id_based(result, reference_output)

0.0

In [25]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [26]:
await ragas_context_recall_id_based(result, reference_output)

No reference context IDs provided, cannot calculate recall.


nan