In [1]:
from dotenv import load_dotenv

from camel.retrievers import AutoRetriever
from camel.agents import ChatAgent
from datasets import load_dataset
from camel.retrievers.evaluation import annotate_dataset, \
    ragas_evaluate_dataset, ragas_calculate_metrics, ragas_benchmark

load_dotenv()
auto_retriever = AutoRetriever()


def context_call(example):
    retrieved_info = auto_retriever.run_vector_retriever(
        query=example['question'],
        contents=example['documents'],
        top_k=1,
        return_detailed_info=True,
        similarity_threshold=0.5,
    )
    return [c['text'] for c in retrieved_info['Retrieved Context']]


def answer_call(example):
    user_msg = str(example)
    assistant_sys_msg = """You are a helpful assistant to answer question,
         I will give you the Original Query and Retrieved Context,
        answer the Original Query based on the Retrieved Context,
        if you can't answer the question just say I don't know."""
    agent = ChatAgent(assistant_sys_msg)
    assistant_response = agent.step(user_msg)
    return assistant_response.msg.content


# Refer to https://huggingface.co/datasets/rungalileo/ragbench for a complete list of dataset
ragbench_hotpotqa = load_dataset("rungalileo/ragbench", "hotpotqa", split="test")

# Select a subset of the dataset for demonstration purposes
ds = ragbench_hotpotqa.select(range(94, 98))

# Annotate the dataset
annotated_ds = annotate_dataset(ds, context_call, answer_call)

# Evaluate the dataset
evaluated_ds = ragas_evaluate_dataset(
    annotated_ds,
    contexts_field_name="contexts",
    answer_field_name="answer",
    metrics_to_evaluate=["context_relevancy", "faithfulness"]
)

# Calculate metrics
# See https://arxiv.org/abs/2407.11005 for more details on the metrics, right now only context_relevancy and faithfulness are supported
calculated_metrics = ragas_calculate_metrics(
    evaluated_ds,
    pred_context_relevance_field="context_relevancy",
    pred_faithfulness_field="faithfulness",
)

# Or simply call
ds_two = ragbench_hotpotqa.select(range(94, 98))
calculated_metrics_two = ragas_benchmark(ds_two, context_call, answer_call)




2024-12-07 20:56:03,952 - camel - INFO - Camel library logging has been configured.
2024-12-07 20:56:04,884 - datasets - INFO - PyTorch version 2.5.1 available.


  from .autonotebook import tqdm as notebook_tqdm

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from ragas.metrics._answer_correctness import AnswerCorrectness, answer_correctness

For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from ragas.metrics._context_entities_recall import (




Map:   0%|          | 0/4 [00:00<?, ? examples/s]

2024-12-07 20:56:09,454 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:10,307 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:10,479 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:10,691 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:10,854 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:11,036 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:11,157 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:11,496 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:12,511 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  25%|██▌       | 1/4 [00:03<00:11,  3.79s/ examples]

2024-12-07 20:56:12,649 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:12,814 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:13,036 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:13,423 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:13,595 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:13,773 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:13,938 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:14,109 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:14,855 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  50%|█████     | 2/4 [00:06<00:05,  2.94s/ examples]

2024-12-07 20:56:15,033 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:15,240 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:15,500 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:15,668 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:15,909 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:16,089 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:16,200 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:16,600 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:17,004 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  75%|███████▌  | 3/4 [00:08<00:02,  2.58s/ examples]

2024-12-07 20:56:17,416 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:17,599 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:17,792 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:17,942 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:18,235 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:18,418 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:18,850 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:19,045 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:19,696 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map: 100%|██████████| 4/4 [00:10<00:00,  2.75s/ examples]
Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-07 20:56:20,136 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  12%|█▎        | 1/8 [00:00<00:02,  2.88it/s]

2024-12-07 20:56:20,165 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:20,287 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  38%|███▊      | 3/8 [00:00<00:00,  7.16it/s]

2024-12-07 20:56:20,441 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:20,780 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:20,801 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  50%|█████     | 4/8 [00:00<00:01,  3.72it/s]

2024-12-07 20:56:20,939 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:21,414 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:22,146 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  62%|██████▎   | 5/8 [00:02<00:01,  1.60it/s]

2024-12-07 20:56:22,188 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:23,252 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  88%|████████▊ | 7/8 [00:03<00:00,  1.70it/s]

2024-12-07 20:56:23,546 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Annotated Dataset:
Dataset({
    features: ['id', 'question', 'documents', 'response', 'generation_model_name', 'annotating_model_name', 'dataset_name', 'documents_sentences', 'response_sentences', 'sentence_support_information', 'unsupported_response_sentence_keys', 'adherence_score', 'overall_supported_explanation', 'relevance_explanation', 'all_relevant_sentence_keys', 'all_utilized_sentence_keys', 'trulens_groundedness', 'trulens_context_relevance', 'ragas_faithfulness', 'ragas_context_relevance', 'gpt3_adherence', 'gpt3_context_relevance', 'gpt35_utilization', 'relevance_score', 'utilization_score', 'completeness_score', 'contexts', 'answer'],
    num_rows: 4
})
Evaluated Dataset:
Dataset({
    features: ['id', 'question', 'documents', 'response', 'generation_model_name', 'annotating_model_name', 'dataset_name', 'documents_sentences', 'response_sentences', 'sentence_support_information', 'unsupported_response_sentence_keys', 'adherence_score', 'overall_supported_explanation', 'rel

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

2024-12-07 20:56:23,975 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,095 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,232 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,356 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,542 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,663 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,798 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:24,926 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:27,573 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  25%|██▌       | 1/4 [00:03<00:11,  3.80s/ examples]

2024-12-07 20:56:27,797 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:27,949 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:28,269 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:28,394 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:28,640 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:28,767 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:28,943 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:29,191 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:29,906 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  50%|█████     | 2/4 [00:06<00:05,  2.94s/ examples]

2024-12-07 20:56:30,316 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:30,473 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:30,832 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:30,980 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:31,191 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:31,415 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:31,675 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:31,993 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:32,423 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map:  75%|███████▌  | 3/4 [00:08<00:02,  2.75s/ examples]

2024-12-07 20:56:32,878 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:33,069 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:33,388 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:33,655 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:34,003 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:34,219 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:34,588 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:34,718 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-07 20:56:35,045 - httpx - INFO - HTTP Request: POST https://api.openai.c

Map: 100%|██████████| 4/4 [00:11<00:00,  2.82s/ examples]
Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-07 20:56:35,502 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:35,507 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  12%|█▎        | 1/8 [00:00<00:02,  2.84it/s]

2024-12-07 20:56:35,846 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:35,875 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  25%|██▌       | 2/8 [00:00<00:02,  2.77it/s]

2024-12-07 20:56:36,256 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:36,256 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  38%|███▊      | 3/8 [00:01<00:01,  2.69it/s]

2024-12-07 20:56:36,478 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:36,559 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  50%|█████     | 4/8 [00:01<00:01,  2.91it/s]

2024-12-07 20:56:36,619 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-07 20:56:37,179 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  75%|███████▌  | 6/8 [00:02<00:00,  3.07it/s]

2024-12-07 20:56:37,998 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating:  88%|████████▊ | 7/8 [00:02<00:00,  2.17it/s]

2024-12-07 20:56:38,651 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.29it/s]

Calculated Metrics 2:
{'relevance_rmse': 0.21071692178021015, 'hallucination_auroc': 1.0}





In [3]:
# Print the results
print("Calculated Metrics:")
print(calculated_metrics)

print("Calculated Metrics 2:")
print(calculated_metrics_two)


Calculated Metrics:
{'relevance_rmse': 0.21071692178021015, 'hallucination_auroc': 0.8333333333333333}
Calculated Metrics 2:
{'relevance_rmse': 0.21071692178021015, 'hallucination_auroc': 1.0}
