# Evaluating Your RAG pipeline

In [46]:
from llama_index.core import Document, VectorStoreIndex
from llama_index.core.evaluation import generate_question_context_pairs, RetrieverEvaluator
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.schema import BaseNode, TextNode
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore

import chromadb
import csv
import hashlib
import nest_asyncio
import os
import pandas as pd

nest_asyncio.apply()

## Download and prepare the data

In [2]:
!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv

--2025-03-01 18:32:23--  https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
connected. to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... 
HTTP request sent, awaiting response... 200 OK
Length: 173646 (170K) [text/plain]
Saving to: ‘mini-llama-articles.csv’


2025-03-01 18:32:27 (116 KB/s) - ‘mini-llama-articles.csv’ saved [173646/173646]



In [5]:
chromaClient= chromadb.PersistentClient(path="./mini-llama-articles")
chromaCollection= chromaClient.get_or_create_collection("mini-llama-articles")

In [6]:
vectorStore= ChromaVectorStore(chroma_collection=chromaCollection)

In [8]:
rows= []
with open("./mini-llama-articles.csv", mode="r", encoding="utf-8") as file:
    csvReader = csv.reader(file)
    for index, row in enumerate(csvReader):
        if index == 0:
            continue
        rows.append(row)

len(rows)

14

In [15]:
documents = [ Document( text=row[1], metadata={"title": row[0], "url": row[2], "source_name": row[3]}, ) for row in rows ]
for index, doc in enumerate(documents):
    doc.id_ = f"doc_{index}"

In [21]:
def deterministicIdFunct(i: int, doc: BaseNode) -> str:
    """ Deterministic ID function for the text splitter.
        This will be used to generate a unique repeatable identifier for each node.
    """
    unique_identifier = doc.id_ + str(i)
    hasher = hashlib.sha256()
    hasher.update(unique_identifier.encode('utf-8')) 
    result= hasher.hexdigest()
    print(f'id:{doc.id_} digest:{result}')
    return result

In [22]:
textSplitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128, id_func=deterministicIdFunct)

In [26]:
pipeline = IngestionPipeline(
    transformations=[
        textSplitter,
        OpenAIEmbedding(model = 'text-embedding-3-small'),
    ],
    vector_store= vectorStore
)

nodes = pipeline.run(documents=documents, show_progress=True)

Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]

id:doc_0 digest:4ab5bd897f01474fc9b0049f95e31edae3ccd9e74d0f0acd3932b50a74d608b6
id:doc_0 digest:e470fa0d001e50b3ec3088022462a94ea7c87dd80106411b7d120f90b379e977
id:doc_0 digest:4b3a13a10f7ea2464249fb6aa64e9f403f8151daf24133dbcffbfa0e01fa0d74
id:doc_0 digest:98e9cbb20d5a2f5ab9d5d9712f9e66ef7123b584e1e1985cebef6bd4f41c0858
id:doc_0 digest:df6183049976174f912d271a7d08fda25e3086030c160fdc603face8a6000e00
id:doc_1 digest:de49ab9024a434ca1cd1efba258fbaa9a3e2d9a1bca3ab4a0349220cc1e2754f
id:doc_1 digest:15268fd9c2a45644a0c49ca1b4897b4fabfe3005fccee48af0acc7eea7dd0e9c
id:doc_1 digest:6d646836e0c2e6830a4c6d3147c3b1d28d3e92351cf0be1d27f5f3a18c520e3d
id:doc_1 digest:b7eaf40d5ed90dbefc226732645cf49e5f98fb471a1b56a4151f646b60891738
id:doc_1 digest:8bd2dacc5eca082fcea46f2e3aace5c8c3817dd817cffa9f1ab3800bd476a3d3
id:doc_1 digest:7d7e3d805418e033c4aa24a972a8358d33d94a60fef7af58a318efe9232be19b
id:doc_2 digest:567b14c826413d4ff28ecb510609350966136f2d0914c2d28eda5d8b3e646e82
id:doc_2 digest:2652e0efd386

Generating embeddings:   0%|          | 0/108 [00:00<?, ?it/s]

Add of existing embedding ID: 4ab5bd897f01474fc9b0049f95e31edae3ccd9e74d0f0acd3932b50a74d608b6
Add of existing embedding ID: e470fa0d001e50b3ec3088022462a94ea7c87dd80106411b7d120f90b379e977
Add of existing embedding ID: 4b3a13a10f7ea2464249fb6aa64e9f403f8151daf24133dbcffbfa0e01fa0d74
Add of existing embedding ID: 98e9cbb20d5a2f5ab9d5d9712f9e66ef7123b584e1e1985cebef6bd4f41c0858
Add of existing embedding ID: df6183049976174f912d271a7d08fda25e3086030c160fdc603face8a6000e00
Add of existing embedding ID: de49ab9024a434ca1cd1efba258fbaa9a3e2d9a1bca3ab4a0349220cc1e2754f
Add of existing embedding ID: 15268fd9c2a45644a0c49ca1b4897b4fabfe3005fccee48af0acc7eea7dd0e9c
Add of existing embedding ID: 6d646836e0c2e6830a4c6d3147c3b1d28d3e92351cf0be1d27f5f3a18c520e3d
Add of existing embedding ID: b7eaf40d5ed90dbefc226732645cf49e5f98fb471a1b56a4151f646b60891738
Add of existing embedding ID: 8bd2dacc5eca082fcea46f2e3aace5c8c3817dd817cffa9f1ab3800bd476a3d3
Add of existing embedding ID: 7d7e3d805418e033c4aa

## Query the Engine

In [34]:
index= VectorStoreIndex.from_vector_store(vectorStore)

In [31]:
llm = Gemini(model="models/gemini-2.0-flash", temperature=0, max_tokens=512)
queryEngine = index.as_query_engine(llm=llm, similarity_top_k=5)

res = queryEngine.query("How many parameters LLaMA 2 model has?")

print(res.response)

The LLaMA 2 model has 7 billion and 13 billion parameters.



## Evaluation
LlamaIndex is able to use an LLM to generate a number of questions for each node.<br>
<b>This way, we already know the node that must be selected to answer to a question.</b>

In [42]:
ragEvalDatasetFilename= "./rag_eval_dataset.json" 
if not os.path.exists( ragEvalDatasetFilename):    
    llmLite = OpenAI(model="gpt-4o-mini", temperature=0, max_tokens=512)
    ragEvaluationDataset = generate_question_context_pairs(nodes, llm=llmLite, num_questions_per_chunk=1)
    ragEvaluationDataset.save_json(ragEvalDatasetFilename)
    print('Generates the dataset')
else:
    print(f'The dataset file {ragEvalDatasetFilename} already exists')

The dataset file ./rag_eval_dataset.json already exists


In [47]:
def displayResultsRetriever(name, evalResults):
    """Display results from evaluate."""

    metric_dicts = []
    for evalResult in evalResults:
        metric_dict = evalResult.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

In [50]:
for k in [2, 4, 6, 8, 10]:
    retriever = index.as_retriever(similarity_top_k=k)
    retriever_evaluator = RetrieverEvaluator.from_metric_names(["mrr", "hit_rate"], retriever=retriever)
    eval_results = await retriever_evaluator.aevaluate_dataset( ragEvaluationDataset, workers=32)
    print(displayResultsRetriever(f"Retriever top_{k}", eval_results))

    Retriever Name  Hit Rate       MRR
0  Retriever top_2  0.027778  0.023148
    Retriever Name  Hit Rate       MRR
0  Retriever top_4  0.037037  0.025463
    Retriever Name  Hit Rate      MRR
0  Retriever top_6  0.064815  0.03071
    Retriever Name  Hit Rate       MRR
0  Retriever top_8  0.111111  0.036828
     Retriever Name  Hit Rate       MRR
0  Retriever top_10   0.12037  0.037856
