# Demonstration RAG-Eval


Quick test: see if things work as planned

In [3]:
from rag_eval.protocol import LLMClient
from rag_eval.client import AzureClient
from rag_eval.components import ClaimExtractor, EntailmentJudge
from rag_eval.data_models import EvalContainer, ClaimVerdict
from rag_eval.evaluators import AnswerPrecisionEvaluator, RetrievalPrecisionEvaluator
from rag_eval.coordinator import RAGEvaluator
from rag_eval.helpers import _safe_json
import json
from data.rag_eval_samples import test_examples
from openai import AzureOpenAI
from dotenv import load_dotenv
import os

In [4]:

api_key = os.getenv("OPENAI_API_KEY")
api_version = os.getenv("OPENAI_API_VERSION")
base_url = os.getenv("OPENAI_CHATCOMPLETIONS_URL")
model = os.getenv("OPENAI_DEPLOYMENT")
password = os.getenv("CORRECT_PASSWORD")

llm = AzureClient(
    api_key=api_key,
    api_version=api_version,
    base_url=base_url,
    model=model
)


In [None]:
coord = RAGEvaluator(llm_extractor=llm, llm_judge=llm)
data = test_examples[0:2]


for i, ex in enumerate(data, start=1):
    print(f"EXAMPLE {i}")
    results = coord.evaluate_all(ex)
    print(json.dumps(results, indent=2))

EXAMPLE 1


KeyboardInterrupt: 

# Test Agent

Check if agent module actually works.

In [42]:
# force reload of agent-script after changes 
import importlib, inspect, agent.main as main

#print("Module file:", main.__file__)      
main = importlib.reload(main)               # force reload the module
#print("Reloaded file:", main.__file__)

# show the exact source Python is executing
#print(inspect.getsource(main.food_agent))

In [43]:
# load agent and pipeline for indexing docs
import agent.main as main
from rag.indexing import pdf_files, index_pipeline

In [44]:
index_pipeline.run({"converter": {"sources": pdf_files}})

incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
incorrect startxref pointer(1)
parsing for Object Streams
Batches: 100%|██████████| 2/2 [00:00<00:00,  2.54it/s]
ID '6813de646a27f38897ceead174c0e733a49594824454166a61a9058a4eb27340' already exists
ID '55ea35af218d8f635cc1f164380ae56bd9ab934880719bd83532052a312b01d8' already exists
ID 'fa907c2693059c9626593992e357cafda724bc44afba9d0d7b86565380415d76' already exists
ID '8db7c025cfc37b01c412b348b9371a6a3259c15b4fe7b957fff2e749af64d3f1' already exists
ID '17f5daa9f6e8c9fbca59050f0cc03b48124ae7aca7bfe5dc101b3e92ef5fafc7' already exists
ID '9d17892efc6f60f289cc10133c069fcc3d75225288d139060930cbf5a20d7b0b' already exists
ID 'cc3053f925e71ae06c2e20569

{'writer': {'documents_written': 0}}

In [37]:
main.food_agent(user_input="hi")


▶️ Step 1: thinking…
🍕 Fooder: Hello! I’m Fooder, your food ordering agent. Are you hungry or thirsty today? Let me know what you’d like to order, or if you want to see which meals and drinks are available right now!


## RAG-Loop
Loop which passes questions to agent and extracts generated answers alongside retrieved contexts (plain text). Combines them with ground truth and query to form `EvalContainer`-object.

In [38]:
# quick test of my query_gt_extractor function
from src.rag_eval.helpers import query_gt_extractor

queries, gt = query_gt_extractor(path="data/agent_eval_questions.json", tokenize=False)
print(gt)


['2010', 'La cucina povera', 'Auguste\u202fEscoffier', 'Jerry\u202fThomas', '1920\u202f–\u202f1933', 'Orecchiette', 'Tarte\u202fTatin', '8,50\u202f€', 'Horchata', 'Secondo', 'Béchamel', 'Reis', 'Mezcal', 'Churros', '2,00\u202f€', 'Risotto\u202fal\u202fTartufo\u202fNero\u202fdel\u202fMastino', 'Mole', 'Camembert', 'Tiramisù\u202fClassico\u202f&\u202fCoda\u202fdi\u202fVolpe', 'Rindfleisch', 'Madrid', 'R', '1969', '7', 'Ernest\xa0Hemingway', 'Mais, Bohnen, Chili, Kürbis, Kakao; UNESCO‑Jahr\xa02010', 'Mexikanische Küche\xa02010; Gastronomisches Mahl der Franzosen\xa02010', 'Parmigiano\u202fReggiano, Mozzarella\u202fdi\u202fBufala; Camembert, Roquefort', 'Reis (Risotto) für Norditalien; Olivenöl für die Provence', 'Prosecco Extra Dry del Cucciolo\u202f–\u202f50,00\u202f€; Vin Rouge Français\u202f(0,75\u202fl)\u202f–\u202f18,00\u202f€', 'Martini, Manhattan, Daiquiri; Negroni', 'Elote – gegrillter Mais mit Limette und Käse; Caprese (Mozzarella, Tomate, Basilikum)', 'Béchamel; Filetto\u202fdi\

This code cell creates a RAG-loop for the agent. It:  
- feeds the questions from the test battery to the agent  
- extracts the generated answer and retrieved context from the RAG-call
- returns a list of `EvalContainer`-objects storing question, ground truth, generated answer and retrieved context

It thus lays the foundation for the `RAGEevaluator`. 

In [45]:
from src.rag_eval.helpers import query_gt_extractor
import agent.main as main
from rag_eval.data_models import EvalContainer

def rag_loop_agent() -> list[EvalContainer]:
    queries, gts = query_gt_extractor(path="data/agent_eval_questions.json", tokenize=False) # get questions and ground truth
    for (query, gt) in zip(queries, gts):
        result = main.food_agent(user_input=query)
        main.food_agent(user_input="quit")

        # TODO: extract generated answers, gt, question, context and store in EvalContainer
    return result




In [46]:
result = rag_loop_agent()


▶️ Step 1: thinking…
🍕 Fooder: Die mexikanische Küche wurde im Jahr 2010 von der UNESCO als immaterielles Kulturerbe der Menschheit anerkannt.
Goodbye 🍕

▶️ Step 1: thinking…
🍕 Fooder: Der italienische Begriff für die 'Küche der armen Leute' ist **Cucina povera**.  
Diese traditionelle Kochweise entstand aus einfachen Zutaten und hat viele berühmte, klassische Gerichte hervorgebracht.
Goodbye 🍕

▶️ Step 1: thinking…
🍕 Fooder: Der französische Koch, der im 19. Jahrhundert die Küchenorganisation revolutionierte und das berühmte Werk «Le Guide Culinaire» schrieb, ist Auguste Escoffier.
Goodbye 🍕

▶️ Step 1: thinking…

▶️ Step 2: thinking…
🍕 Fooder: Entschuldigung, beim Zugriff auf die Dokumente ist ein Fehler aufgetreten. Möchten Sie, dass ich noch einmal nachschaue, wer laut Cocktail-Dokument als Vater der modernen Barkultur gilt? Oder möchten Sie etwas anderes bestellen oder wissen?
Goodbye 🍕

▶️ Step 1: thinking…

▶️ Step 2: thinking…
🍕 Fooder: Entschuldigung, ich konnte das Cocktail-

## Eval Loop
Extract individual queries from `EvalContainer-object` like so: 
```
container = test_examples[1]
print(container.query)
print(container)
```