# `DocumentMAPEvaluator`

In [1]:
from haystack import Document
from haystack.components.evaluators import DocumentMAPEvaluator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
evaluator = DocumentMAPEvaluator()

In [3]:
result = evaluator.run(
    ground_truth_documents=[
        [Document(content="France")],
        [Document(content="9th century"), Document(content="9th")],
    ],
    retrieved_documents=[
        [Document(content="France")],
        [
            Document(content="9th century"),
            Document(content="10th century"),
            Document(content="9th"),
        ],
    ],
)

In [4]:
print(result)

{'score': 0.9166666666666666, 'individual_scores': [1.0, 0.8333333333333333]}


# `RagasEvaluator`

In [7]:
from haystack import Pipeline
from haystack_integrations.components.evaluators.ragas import (
    RagasEvaluator,
    RagasMetric,
)

In [8]:
evaluation_pipeline = Pipeline()
evaluator_context = RagasEvaluator(
    metric=RagasMetric.CONTEXT_PRECISION,
)
evaluator_aspect = RagasEvaluator(
    metric=RagasMetric.ASPECT_CRITIQUE,
    metric_params={
        "name": "custom",
        "definition": "Is this answer problematic for children?",
        "strictness": 3,
    },
)

In [9]:
evaluation_pipeline.add_component("evaluator_context", evaluator_context)
evaluation_pipeline.add_component("evaluator_aspect", evaluator_aspect)

In [3]:
from transformers import pipeline
from datasets import load_dataset
import evaluate

# Load model and tokenizer
generator = pipeline("text2text-generation", model="google/flan-t5-large")

# Load dataset
dataset = load_dataset("squad", split="validation[:100]")

# Prepare evaluation
rouge = evaluate.load("rouge")

# Generate predictions
references = [item["answers"]["text"][0] for item in dataset]
predictions = [generator(item["question"])[0]["generated_text"] for item in dataset]

# Compute Rouge
results = rouge.compute(predictions=predictions, references=references)
print(results)

Device set to use cpu


{'rouge1': 0.07433333333333333, 'rouge2': 0.02, 'rougeL': 0.07544444444444445, 'rougeLsum': 0.07388888888888889}
