In [1]:
from core.agent import agent_executor, agent_llm
from core.CustomTools import fetch_and_rerank

from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
    AgentTokenBufferMemory,
)


from datasets import Dataset

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

import regex as re
import glob

from core.ragas import RagasCallbackHandler

# ---

#agent_memory = AgentTokenBufferMemory(llm=agent_llm)
callback_handler = RagasCallbackHandler()
def query_agent(prompt):
   response = agent_executor(
            {"input": prompt, "history": []},                        
            callbacks=[callback_handler],
            include_run_info=True,
        )
   return response



In [2]:
# source data

filenames = []
for _file in glob.glob("ragas_data/*.txt"):
    filenames.append(_file)

questions = []
ground_truths = []

DELIM = "__###__"
for name in filenames:
    file_content = None
    with open(name) as f:                
        file_content = f.read()

    content = file_content.split(DELIM)
    questions.append(content[0])
    truths = []
    truths.append(content[1])
    ground_truths.append(truths)

print("Questions:", len(questions))
print("Grounded Truths:", len(ground_truths))
    

Questions: 6
Grounded Truths: 6


In [3]:
answers = []
contexts = []

# Inference
for query in questions:
  
  # retrieve an answer
  response = query_agent(query)
  answers.append(response["output"])

  inputs, outputs = callback_handler.clear_collected_outputs()
  print("Input: ", len(inputs))
  print("Outputs: ", len(outputs))

  results = []  
  [results.append(doc) for doc in outputs]          
  contexts.append(results)  

print("Answer len:", len(contexts))
print("Context len:", len(contexts))





[1m> Entering new AgentExecutor chain...[0m
starting tool ...  global error handler RouteBuilder
Embedding ms:  0.34453701972961426
0.802 :  _manual_error-handler.html.txt_1
0.786 :  _next_lpr-component.html.txt_3
0.799 :  _eips_message-history.html.txt_2
0.8 :  _manual_exception-clause.html.txt_12
0.801 :  _manual_error-handler.html.txt_2
ending tool ... 

[1m> Finished chain.[0m
Input:  1
Outputs:  1


[1m> Entering new AgentExecutor chain...[0m
starting tool ...  jBang
ending tool ... 

[1m> Finished chain.[0m
Input:  1
Outputs:  1


[1m> Entering new AgentExecutor chain...[0m
starting tool ...  Rest DSL
Embedding ms:  0.3432190418243408
0.825 :  _manual_rest-dsl.html.txt_2
0.819 :  _manual_rest-dsl.html.txt_1
0.811 :  _manual_rest-dsl.html.txt_0
0.81 :  _manual_rest-dsl.html.txt_7
0.805 :  _next_rest-component.html.txt_0
ending tool ... 

[1m> Finished chain.[0m
Input:  1
Outputs:  1


[1m> Entering new AgentExecutor chain...[0m
starting tool ...  file idempotent
Em

In [5]:
# To dict
data = {
    "question": questions,
    "answer": answers,
    "contexts": contexts,
    "ground_truths": ground_truths
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)

result = evaluate(
    dataset = dataset, 
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

df = result.to_pandas()

df.head()

evaluating with [context_precision]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.74s/it]


evaluating with [context_recall]


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [03:43<00:00, 223.19s/it]


evaluating with [faithfulness]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:58<00:00, 58.98s/it]


evaluating with [answer_relevancy]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.72s/it]


Unnamed: 0,question,answer,contexts,ground_truths,context_precision,context_recall,faithfulness,answer_relevancy
0,how you can I register a global error handler ...,To register a global error handler for the Rou...,"[page_content='""errorHandler(noErrorHandler())...",[\nTo register a global error handler for the ...,1.0,1.0,0.25,0.940842
1,How can I test a Camel route using jBang?\n,"To test a Camel route using jBang, you can use...","[[Document(page_content='""CHAPTER 4. USING CAM...","[\nTo test a Camel route using jBang, you can ...",0.0,1.0,1.0,0.989555
2,Can you show an example how to use the Rest DS...,To use the Rest DSL in Java to build an endpoi...,"[page_content='""Inline Rest DSL as a single ro...",[\nTo create a REST endpoint that responds to ...,0.0,1.0,0.7,0.813892
3,"This is my route\n\n from(""sftp://userName:...",To avoid processing the same files in case of ...,"[page_content='""Using a file based idempotent ...",[\nTo avoid processing the same files in case ...,0.0,1.0,0.875,0.763041
4,I am having an issue while working on the requ...,To listen to events generated by one route and...,"[page_content='""Article source: https://rhaeto...",[\nTo listen to the events of one route by ano...,1.0,1.0,1.0,0.791901
