In [1]:
from datasets import load_dataset
from rag_eval.rag.rag import *
from rag_eval.utils.client import llm
from rag_eval.metrics.retrieval_metrics import contextual_precision_llm, evaluate_output


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ragbench_hotpotqa = load_dataset("rungalileo/ragbench", "hotpotqa", split="train")


In [3]:
def prepare_data_point(ragbench_dict):
    
    # prepare dictionary 
    context_dict = {}
    for document in ragbench_dict["documents_sentences"]:
        for sentence in document:
            context_dict[sentence[0]] = sentence[1] 

    reference_sentences = []

    for key in ragbench_dict["all_relevant_sentence_keys"]:
        reference_sentences.append(context_dict[key])
    reference = ragbench_dict["response"]

    output = {
        "question": ragbench_dict["question"],
        "context":context_dict.values(),
        "reference_context": reference_sentences,
        "reference": reference,
        "context_dict" : context_dict
    }
    return output
    


In [4]:
test_set = []

for item in range(10):
    test_set.append(prepare_data_point(ragbench_hotpotqa[item]))



In [None]:
def evaluate_instance(test_instance):
    OPENAI_API_KEY = ""

    rag_model = rag(OPENAI_API_KEY)
    rag_model.process_docs_to_vectorstore(list(test_instance["context"]))
    docs = rag_model.retrieve_docs(test_instance["question"])
    docs = [item[0].page_content for item in docs]
    response = rag_model.query(test_instance["question"])
    metrics = evaluate_output(query = test_instance["question"],
                response = response,
                retrieval_list= docs ,
                ground_truth= test_instance["reference"],
                reference_list= test_instance["reference_context"])
    
    return metrics

In [6]:
outputs = [evaluate_instance(item) for item in test_set]


In [10]:
output_a = outputs
output_a

[{'contextual_precision': (1.0,
   {'classification': [0.5, 1, 0.5, 0, 0],
    'explanation': ["The first chunk confirms that the school is named after John Rankin Rogers, which is relevant to the school’s name as implied in the query, but does not mention the school district. So, it's partially relevant.",
     'The second chunk directly states that Governor John R. Rogers High School is in the Puyallup School District of Washington, which fully supports the answer.',
     "The third chunk provides biographical information about John Rankin Rogers, connecting to the namesake context, but does not mention anything about the school district or the school's location, so it’s only partially relevant due to the name linkage.",
     "The fourth chunk refers to a Rogers High School in Texas, not related to the school in the query or answer, so it's not relevant.",
     "The fifth chunk expands on the Texas Rogers High School and its district, not concerning the Washington school or its distr

### ares context_relevance scoring 

In [None]:
from rag_eval.utils.client import llm
output = []
output_by_doc = []
docs_by_docs = []
for item in test_set:
    OPENAI_API_KEY = ""

    rag_model = rag(OPENAI_API_KEY)
    rag_model.process_docs_to_vectorstore(list(item["context"]))
    docs = rag_model.retrieve_docs(item["question"])
    docs = [item[0].page_content for item in docs]
    response = rag_model.query(item["question"])
    score = 0
    doc_outputs = []
    for chunk in docs:

        prompt = (
        f"""You are an expert dialogue agent. 
        Your task is to analyze the provided document and determine whether it is relevant for responding to the dialogue. 
        In your evaluation, you should consider the content of the document and how it relates to the provided dialogue. 
        'Output your final verdict by strictly following this format: [[Yes]]" if the document is relevant and "[[No]]" if the document provided is not relevant. 
        "Do not provide any additional explanation for your decision.\n\n
        
        Question: {item['question']}
        Document: {chunk}
        """
        )
        model = llm()
        res = model.query(prompt)

        if "Yes" in res:
            score +=1
        doc_outputs.append(res)
    
    output_by_doc.append(doc_outputs)
    docs_by_docs.append(docs)
    if score <1:
        output.append(score)
    else:
        output.append(score/len(docs))

    

In [12]:
docs_by_docs

[['Commonly referred to as "Rogers" or "RHS," the high school is named after former Washington State governor John Rankin Rogers.',
  'Governor John R. Rogers High School is a high school in the Puyallup School District of Washington, United States.',
  'John Rankin Rogers (September 4, 1838 – December 26, 1901) was the third Governor of the state of Washington.',
  'Rogers High School is a 3A public high school located in Rogers, Texas (USA).',
  'It is part of the Rogers Independent School District located in southeastern Bell County.'],
 ['The 44-lap race was won by Daniel Ricciardo for the Red Bull Racing team, after starting from fifth position.',
  'After Mark Webber announced his retirement from Formula One, Ricciardo was confirmed as his replacement at Red Bull Racing for 2014.',
  'Daniel Joseph Ricciardo ( ; born 1 July 1989) is an Australian racing driver who is currently competing in Formula One for Red Bull Racing.',
  'In 2005 he switched to the Formula Renault 3.5 with S

In [13]:
output_by_doc

[['[[No]]', '[[Yes]]', '[[No]]', '[[No]]', '[[No]]'],
 ['[[Yes]]', '[[No]]', '[[Yes]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[No]]', '[[Yes]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[Yes]]', '[[No]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[No]]', '[[No]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[No]]', '[[No]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[No]]', '[[No]]', '[[No]]', '[[Yes]]'],
 ['[[No]]', '[[No]]', '[[No]]', '[[No]]', '[[No]]'],
 ['[[Yes]]', '[[No]]', '[[Yes]]', '[[No]]', '[[No]]'],
 ['[[No]]', '[[No]]', '[[No]]', '[[No]]', '[[No]]']]

### Ragas 

In [None]:
from rag_eval.metrics.retrieval_metrics import ragas_contextual_precision_llm, ragas_contextual_recall_llm

output_precision = []
output_recall = []
for item in test_set:
    OPENAI_API_KEY = ""
    rag_model = rag(OPENAI_API_KEY)
    rag_model.process_docs_to_vectorstore(list(item["context"]))
    docs = rag_model.retrieve_docs(item["question"])
    docs = [item[0].page_content for item in docs]
    response = rag_model.query(item["question"])

    output_precision.append(ragas_contextual_precision_llm(question = item["question"], retrieved_context = docs, reference = item["reference"]))
    output_recall.append(ragas_contextual_recall_llm(question= item["question"], response=response, reference= item["reference"], retrieved_context= docs))
    

In [15]:
output_precision

[0.49999999995,
 0.8333333332916666,
 0.3333333333,
 0.0,
 0.0,
 0.0,
 0.36666666664833336,
 0.0,
 0.0,
 0.0]

In [16]:
output_recall

[1.0, 1.0, 1.0, 1.0, 0.0, 0.3333333333333333, 1.0, 0.0, 1.0, 0.5]

In [22]:
import pandas as pd
output_df = pd.DataFrame(output_a)
output_df["ares_contextual_relevance_docs"] = docs_by_docs
output_df["ares_contextual_relevance_scores"] = output_by_doc
output_df["ragas_contextual_precision"] = output_precision
output_df["ragas_contextual_recall"] = output_recall

In [23]:
output_df 

Unnamed: 0,contextual_precision,contextual_recall,contextual_relevancy,mmr,ares_contextual_relevance_docs,ares_contextual_relevance_scores,ragas_contextual_precision,ragas_contextual_recall
0,"(1.0, {'classification': [0.5, 1, 0.5, 0, 0], ...","(1.0, {'claim': ['Governor John R. Rogers High...",0.636616,0.5,"[Commonly referred to as ""Rogers"" or ""RHS,"" th...","[[[No]], [[Yes]], [[No]], [[No]], [[No]]]",0.5,1.0
1,"(1.0, {'classification': [1, 0.5, 1, 0, 0], 'e...","(1.0, {'claim': ['Daniel Ricciardo won the 44-...",0.55112,1.0,[The 44-lap race was won by Daniel Ricciardo f...,"[[[Yes]], [[No]], [[Yes]], [[No]], [[No]]]",0.833333,1.0
2,"(1.0, {'classification': [1, 0.5, 1, 0, 0], 'e...","(1.0, {'claim': ['Nick Offerman appeared in th...",0.406429,1.0,"[Nicholas ""Nick"" Offerman (born June 26, 1970)...","[[[No]], [[No]], [[Yes]], [[No]], [[No]]]",0.333333,1.0
3,"(1.0, {'classification': [0.5, 1, 1, 0, 0], 'e...","(1.0, {'claim': ['Crocosmia is found in an env...",0.528149,0.0,"[Crocosmia ( ; J. E. Planchon, 1851) (montbret...","[[[No]], [[Yes]], [[No]], [[No]], [[No]]]",0.0,1.0
4,"(0.3333333333333333, {'classification': [0, 0,...","(0.5, {'claim': ['Dennis Bruce Allen was the m...",0.449609,0.0,[The Dingoes is an Australian country rock ban...,"[[[No]], [[No]], [[No]], [[No]], [[No]]]",0.0,0.0
5,"(1.0, {'classification': [1, 1, 1, 0.5, 0.5], ...","(0.3333333333333333, {'claim': ['Both American...",0.46068,1.0,[The Four Seasons is an American rock and pop ...,"[[[No]], [[No]], [[No]], [[No]], [[No]]]",0.0,0.333333
6,"(0.8666666666666667, {'classification': [0.5, ...","(0.5, {'claim': ['The king deposed in the Glor...",0.580098,0.0,[He was the great-grandson of the Honourable R...,"[[[No]], [[No]], [[No]], [[No]], [[Yes]]]",0.366667,1.0
7,"(0.4777777777777777, {'classification': [0, 0,...","(0.0, {'claim': ['No. 3 Elementary Flying Trai...",0.584893,0.0,[It was one of twelve elementary flying traini...,"[[[No]], [[No]], [[No]], [[No]], [[No]]]",0.0,0.0
8,"(1.0, {'classification': [1, 0.5, 1, 0, 0], 'e...","(1.0, {'claim': ['Catcha Lake is located in th...",0.437898,1.0,[The Hedley Mascot Mine was a gold mine in Hed...,"[[[Yes]], [[No]], [[Yes]], [[No]], [[No]]]",0.0,1.0
9,"(0.95, {'classification': [0.5, 1, 1, 0, 1], '...","(0.8333333333333334, {'claim': ['Ulli Lommel h...",0.474213,0.0,[Ulli Lommel (born 21 December 1944) is a Germ...,"[[[No]], [[No]], [[No]], [[No]], [[No]]]",0.0,0.5
