## Loading Datasets


In [1]:
from datasets import load_dataset

hf_dataset = load_dataset(
    "google-research-datasets/nq_open", 
    split="validation",
    cache_dir="/mnt/d/datasets/nq_open"
)

hf_dataset = hf_dataset.select(range(10))

In [2]:
hf_dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 10
})

In [3]:
sample_docs = [
    "Albert Einstein proposed the theory of relativity, which transformed our understanding of time, space, and gravity.",
    "Marie Curie was a physicist and chemist who conducted pioneering research on radioactivity and won two Nobel Prizes.",
    "Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics.",
    "Charles Darwin introduced the theory of evolution by natural selection in his book 'On the Origin of Species'.",
    "Ada Lovelace is regarded as the first computer programmer for her work on Charles Babbage's early mechanical computer, the Analytical Engine."
]

## Evaluating the `QueryEngine`


### Preparing dataset

In [4]:
from langchain_ollama import ChatOllama
from langchain_huggingface import HuggingFaceEmbeddings

evaluator_llm = ChatOllama(
    model="mistral:7b",
    temperature=0.1,
)

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

    Found GPU0 NVIDIA GeForce GTX 1070 Ti which is of cuda capability 6.1.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (7.0) - (12.0)
    
    Please install PyTorch with a following CUDA
    configurations:  12.6 following instructions at
    https://pytorch.org/get-started/locally/
    
NVIDIA GeForce GTX 1070 Ti with CUDA capability sm_61 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_70 sm_75 sm_80 sm_86 sm_90 sm_100 sm_120.
If you want to use the NVIDIA GeForce GTX 1070 Ti GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [None]:
from src.simple_rag import RAG



rag = RAG()
ragas_dataset = []

for i, item in enumerate(hf_dataset):
    print(f"{i+1} iteration")
    question = item["question"]
    answer = item["answer"]

    relevant_docs = rag.get_most_relevant_docs(question)
    response = rag.generate_answer(question, relevant_docs)
    ragas_dataset.append(
        {
            "user_input":question,
            "retrieved_contexts":relevant_docs,
            "response":response,
            "reference":answer
        }
    )

1 iteration


ValueError: Documents and their embeddings are not loaded.

### Evaluating dataset

In [None]:
# import metrics
from ragas.metrics import (
    ContextPrecision,
    ContextRecall,
    Faithfulness,
    AnswerRelevancy,
    AnswerCorrectness
)

# init metrics with evaluator LLM
from ragas.llms import LangchainLLMWrapper

evaluator_llm = LangchainLLMWrapper(evaluator_llm)
metrics = [
    Faithfulness(llm=evaluator_llm),
    AnswerRelevancy(llm=evaluator_llm),
    ContextPrecision(llm=evaluator_llm),
    ContextRecall(llm=evaluator_llm),
    AnswerCorrectness(llm=evaluator_llm)
]

In [None]:
ragas_dataset

EvaluationDataset(features=['user_input', 'reference_contexts', 'reference'], len=6)

In [None]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    metrics=metrics,
    dataset=ragas_dataset,
)

Running Query Engine:   0%|          | 0/6 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
from pprint import pprint
pprint(result, indent=4)

{'faithfulness': 0.8778, 'answer_relevancy': 0.9574, 'context_precision': 1.0000, 'context_recall': 1.0000, 'answer_correctness': 0.5576}


In [None]:
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,faithfulness,answer_relevancy,context_precision,context_recall,answer_correctness
0,What GitLab say about being ally and how it re...,"[---\ntitle: ""The Ally Lab""\ndescription: Lear...","[--- title: ""The Ally Lab"" description: Learn ...",Being an ally at GitLab involves taking proact...,"At GitLab, it is required to be inclusive, but...",1.0,0.91789,1.0,1.0,0.560615
1,How can Zoom be utilized to promote allyship i...,"[Teach people how to disagree, set the expecta...",[Skills and Behaviors of allies To be an effec...,Zoom can be utilized to promote allyship in di...,Zoom can be utilized to promote allyship by pr...,1.0,1.0,1.0,1.0,0.527588
2,How can company engagement surveys be utilized...,"[---\ntitle: ""Building an Inclusive Remote Cul...","[<1-hop>\n\n--- title: ""Building an Inclusive ...",Company engagement surveys can be utilized to ...,Company engagement surveys can be utilized to ...,0.933333,0.952433,1.0,1.0,0.54058
3,What are the goals of the Privilege for Sale a...,[A DIB Team Member will set up a time to discu...,"[<1-hop>\n\n--- title: ""Roundtables"" descripti...",The goals of the Privilege for Sale activity i...,The goals of the Privilege for Sale activity i...,1.0,0.982884,1.0,1.0,0.748442
4,What role does Marina Brownrigg play in the DI...,[---\ntitle: Diversity Inclusion & Belonging C...,[<1-hop>\n\nDIB Monthly Initiatives Call We ho...,Marina Brownrigg serves as the DRI (Directly R...,Marina Brownrigg serves as the Directly Respon...,0.333333,0.891996,1.0,1.0,0.510638
5,What are some essential skills and strategies ...,[--- One of the mistakes that often happens he...,[<1-hop>\n\nWhat it means to be an ally - Take...,Some essential skills and strategies for being...,"To be an effective ally, it is essential to id...",1.0,0.998924,1.0,1.0,0.457451


## Questions: SingleHop vs MultiHop


In [None]:
from ragas.testset.synthesizers.single_hop.specific import SingleHopSpecificQuerySynthesizer
from ragas.testset.synthesizers.multi_hop.specific import MultiHopSpecificQuerySynthesizer
from ragas.testset.synthesizers.multi_hop.abstract import MultiHopAbstractQuerySynthesizer

In [None]:
single_hop_testset = generator.generate_with_llamaindex_docs(
    documents=documents,
    testset_size=30,
    query_distribution=[(SingleHopSpecificQuerySynthesizer(name="single_hop_specific"), 1.0)]
)
multi_hop_specific_testset = generator.generate_with_llamaindex_docs(
    documents=documents,
    testset_size=30,
    query_distribution=[(MultiHopSpecificQuerySynthesizer(name="multi_hop_specific"), 1.0)]
)
multi_hop_abstract_testset = generator.generate_with_llamaindex_docs(
    documents=documents,
    testset_size=30,
    query_distribution=[(MultiHopAbstractQuerySynthesizer(name="multi_hop_abstract"), 1.0)]
)


Applying HeadlinesExtractor:   0%|          | 0/8 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/11 [00:00<?, ?it/s]

Property 'summary' already exists in node '59c3f4'. Skipping!
Property 'summary' already exists in node 'bc5355'. Skipping!
Property 'summary' already exists in node 'b0f265'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/16 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/43 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node 'bc5355'. Skipping!
Property 'summary_embedding' already exists in node 'b0f265'. Skipping!
Property 'summary_embedding' already exists in node '59c3f4'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/30 [00:00<?, ?it/s]

Applying HeadlinesExtractor:   0%|          | 0/8 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/11 [00:00<?, ?it/s]

Property 'summary' already exists in node 'a047b3'. Skipping!
Property 'summary' already exists in node '59f5dd'. Skipping!
Property 'summary' already exists in node '09439b'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/16 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/43 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node 'a047b3'. Skipping!
Property 'summary_embedding' already exists in node '09439b'. Skipping!
Property 'summary_embedding' already exists in node '59f5dd'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/30 [00:00<?, ?it/s]

Applying HeadlinesExtractor:   0%|          | 0/8 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/11 [00:00<?, ?it/s]

Property 'summary' already exists in node 'b15a3d'. Skipping!
Property 'summary' already exists in node 'e031ab'. Skipping!
Property 'summary' already exists in node '232844'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/16 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/43 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '232844'. Skipping!
Property 'summary_embedding' already exists in node 'e031ab'. Skipping!
Property 'summary_embedding' already exists in node 'b15a3d'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
type(single_hop_testset)

ragas.testset.synthesizers.testset_schema.Testset

In [None]:
result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=single_hop_testset.to_evaluation_dataset(),
)
pprint(result, indent=4)

Running Query Engine:   0%|          | 0/30 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

{'faithfulness': 0.8113, 'answer_relevancy': 0.9375, 'context_precision': 0.8167, 'context_recall': 0.7389, 'answer_correctness': 0.4814}


In [None]:
result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=multi_hop_specific_testset.to_evaluation_dataset(),
)
pprint(result, indent=4)

Running Query Engine:   0%|          | 0/30 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

{'faithfulness': 0.8234, 'answer_relevancy': 0.9598, 'context_precision': 0.8500, 'context_recall': 0.7911, 'answer_correctness': 0.5685}


In [None]:
result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=multi_hop_abstract_testset.to_evaluation_dataset(),
)
pprint(result, indent=4)

Running Query Engine:   0%|          | 0/30 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

{'faithfulness': 0.7602, 'answer_relevancy': 0.9717, 'context_precision': 0.9833, 'context_recall': 0.8500, 'answer_correctness': 0.5806}
