### RAG Setup

In [None]:
# %pip install sentence_transformers

In [1]:

# Step 1: Define Sample Documents
documents = [
    {"section": "Employee Info", "content": "John's pay is processed on the 1st of every month."},
    {"section": "Employee Info", "content": "Mark is on a leave of absence until next Monday."},
    {"section": "Employee Info", "content": "Julie is a software engineer."},
    {"section": "Employee Info", "content": "Julie's pay is processed on the 1st of every month."},
    {"section": "Employee Info", "content": "Mark is a product manager."},
    {"section": "Employee Info", "content": "John is an AI architect and has salary of 500K USD."},
]

# Step 2: Get Content Texts
content_corpus = [doc["content"] for doc in documents]
content_corpus

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
doc_vectors = model.encode(content_corpus)

doc_vectors
print(doc_vectors.shape)


  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|██████████| 103/103 [00:00<00:00, 1892.41it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


(6, 384)


In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True, dotenv_path="../.env.local")
my_api_key = os.getenv("OPEN_AI_API_KEY")

my_client = OpenAI(api_key=my_api_key)
# my_client


# Define your target function that performs retrieval per-question
def ask_question_open_ai(prompt, context=""):
    """Call the LLM with the provided prompt and context.

    IMPORTANT: use the passed-in prompt (not a global variable) so each
    evaluation example can be answered correctly.
    """
    llm_response = my_client.chat.completions.create(
        model="gpt-5-nano",
        messages=[
            {"role": "system", "content": '''
             You are an AI assistant who answers only based on the given context.
             '''},
            {"role": "user", "content": f"Context: {context}\n\nUser Question: {prompt}"}
        ]

    )
    print (llm_response)
    return llm_response.choices[0].message.content

In [3]:
ask_question_open_ai("When is Summer solstice in 2026?")

ChatCompletion(id='chatcmpl-DAoeCODndgDnbzAKi0mpe44eLUFy1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Not provided in the given context. If you’d like, I can help look it up or you can share more details (e.g., location) for precision.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1771470340, model='gpt-5-nano-2025-08-07', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=683, prompt_tokens=43, total_tokens=726, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=640, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))


'Not provided in the given context. If you’d like, I can help look it up or you can share more details (e.g., location) for precision.'

In [None]:
from langsmith import traceable

@traceable
def ask_question(inputs):
    question = inputs["question"]

    # Embed question
    query_vec = model.encode([question])[0]

    import numpy as np
    similarities = model.similarity(query_vec, doc_vectors)
    similarities = np.asarray(similarities).squeeze()

    # Top 3 retrieval
    top_3_indices = np.argsort(similarities)[::-1][:3]
    top_docs = [content_corpus[i] for i in top_3_indices]

    context = "\n---\n".join(top_docs)

    # Call LLM
    answer = ask_question_open_ai(question, context)

    return {
        "answer": answer,
        "contexts": top_docs  # must be list[str]
    }


### Evaluation

In [None]:
# %pip install --upgrade langsmith


In [None]:
import langsmith
print(langsmith.__version__)

In [None]:
reference_data = [
    {
        "inputs": {"question": "When is John's pay processed?"},
        "outputs": {"answer": "John's pay is processed on the 1st of every month."}
    },
    {
        "inputs": {"question": "What is Julie's job title?"},
        "outputs": {"answer": "Julie is a software engineer."}
    },
    {
        "inputs": {"question": "What is John's salary?"},
        "outputs": {"answer": "John has a salary of 500K USD."}
    },
    {
        "inputs": {"question": "What is Mark's current work status?"},
        "outputs": {"answer": "Mark is on a leave of absence until next Monday."}
    },
]



In [None]:
def evaluate_context_recall(contexts, reference_answer):
    #context: John
    context_text = " ".join(contexts)
    return int(reference_answer.lower() in context_text.lower())


def evaluate_context_precision(contexts, reference_answer):
    relevant = sum(reference_answer.lower() in c.lower() for c in contexts)
    return relevant / len(contexts)


def evaluate_faithfulness(answer, contexts):
    context_text = " ".join(contexts)
    return int(answer.lower() in context_text.lower())


def evaluate_answer_correctness(answer, reference_answer):
    return int(answer.lower().strip() == reference_answer.lower().strip())


In [None]:
for data in reference_data:
    question = data["inputs"]["question"]
    reference = data["outputs"]["answer"]

    result = ask_question({"question": question})
    answer = result["answer"]
    contexts = result["contexts"]

    recall = evaluate_context_recall(contexts, reference)
    precision = evaluate_context_precision(contexts, reference)
    faith = evaluate_faithfulness(answer, contexts)
    correctness = evaluate_answer_correctness(answer, reference)

    print("\n==============================")
    print("Question:", question)
    print("Answer:", answer)
    print("Reference:", reference)

    print("\nMetrics:")
    print("Context Recall:", recall)
    print("Context Precision:", round(precision, 2))
    print("Faithfulness:", faith)
    print("Answer Correctness:", correctness)
