Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/ragas/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from ragas.metrics._topic_adherence import TopicAdherenceScore
from ragas.metrics.base import (
Metric,
MetricOutputType,
MetricType,
MetricWithEmbeddings,
MetricWithLLM,
Expand All @@ -76,6 +77,7 @@
"MetricWithLLM",
"SingleTurnMetric",
"MultiTurnMetric",
"MetricOutputType",
# specific metrics
"AnswerCorrectness",
"answer_correctness",
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/metrics/_answer_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
LongFormAnswerPrompt,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithEmbeddings,
MetricWithLLM,
Expand Down Expand Up @@ -163,6 +164,7 @@ class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
MetricType.SINGLE_TURN: {"user_input", "response", "reference"}
}
)
output_type = MetricOutputType.CONTINUOUS
correctness_prompt: PydanticPrompt = field(default_factory=CorrectnessClassifier)
long_form_answer_prompt: PydanticPrompt = field(
default_factory=LongFormAnswerPrompt
Expand Down
3 changes: 3 additions & 0 deletions src/ragas/metrics/_answer_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithEmbeddings,
MetricWithLLM,
Expand Down Expand Up @@ -87,6 +88,8 @@ class ResponseRelevancy(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
}
}
)
output_type = MetricOutputType.CONTINUOUS

question_generation: PydanticPrompt = ResponseRelevancePrompt()
strictness: int = 3

Expand Down
8 changes: 7 additions & 1 deletion src/ragas/metrics/_answer_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@

from ragas.dataset_schema import SingleTurnSample
from ragas.embeddings.base import HuggingfaceEmbeddings
from ragas.metrics.base import MetricType, MetricWithEmbeddings, SingleTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithEmbeddings,
SingleTurnMetric,
)

if t.TYPE_CHECKING:
from langchain_core.callbacks.base import Callbacks
Expand Down Expand Up @@ -41,6 +46,7 @@ class SemanticSimilarity(MetricWithEmbeddings, SingleTurnMetric):
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
)
output_type = MetricOutputType.CONTINUOUS
is_cross_encoder: bool = False
threshold: t.Optional[float] = None

Expand Down
3 changes: 3 additions & 0 deletions src/ragas/metrics/_aspect_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from ragas.dataset_schema import MultiTurnSample, SingleTurnSample
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
MultiTurnMetric,
Expand Down Expand Up @@ -94,6 +95,7 @@ def __init__(
definition: str,
llm: t.Optional[BaseRagasLLM] = None,
required_columns: t.Optional[t.Dict[MetricType, t.Set[str]]] = None,
output_type: t.Optional[MetricOutputType] = MetricOutputType.BINARY,
single_turn_prompt: t.Optional[PydanticPrompt] = None,
multi_turn_prompt: t.Optional[PydanticPrompt] = None,
strictness: int = 1,
Expand All @@ -116,6 +118,7 @@ def __init__(
name=name,
_required_columns=self._required_columns,
llm=llm,
output_type=output_type,
)

self._definition = definition
Expand Down
8 changes: 7 additions & 1 deletion src/ragas/metrics/_context_entities_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from pydantic import BaseModel

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
)
from ragas.prompt import PydanticPrompt, StringIO

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -113,6 +118,7 @@ class ContextEntityRecall(MetricWithLLM, SingleTurnMetric):
MetricType.SINGLE_TURN: {"reference", "retrieved_contexts"}
}
)
output_type = MetricOutputType.CONTINUOUS
context_entity_recall_prompt: PydanticPrompt = field(
default_factory=ExtractEntitiesPrompt
)
Expand Down
9 changes: 8 additions & 1 deletion src/ragas/metrics/_context_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._string import NonLLMStringSimilarity
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric, ensembler
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
ensembler,
)
from ragas.prompt import PydanticPrompt
from ragas.run_config import RunConfig
from ragas.utils import deprecated
Expand Down Expand Up @@ -98,6 +104,7 @@ class LLMContextPrecisionWithReference(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type = MetricOutputType.CONTINUOUS
context_precision_prompt: PydanticPrompt = field(
default_factory=ContextPrecisionPrompt
)
Expand Down
10 changes: 9 additions & 1 deletion src/ragas/metrics/_context_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._string import NonLLMStringSimilarity
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric, ensembler
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
ensembler,
)
from ragas.prompt import PydanticPrompt
from ragas.run_config import RunConfig
from ragas.utils import deprecated
Expand Down Expand Up @@ -102,6 +108,7 @@ class LLMContextRecall(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
context_recall_prompt: PydanticPrompt = field(
default_factory=ContextRecallClassificationPrompt
)
Expand Down Expand Up @@ -202,6 +209,7 @@ class NonLLMContextRecall(SingleTurnMetric):
}
}
)
output_type: MetricOutputType = MetricOutputType.CONTINUOUS
distance_measure: SingleTurnMetric = field(
default_factory=lambda: NonLLMStringSimilarity()
)
Expand Down
9 changes: 8 additions & 1 deletion src/ragas/metrics/_domain_specific_rubrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from ragas.dataset_schema import MultiTurnSample, SingleTurnSample
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
MultiTurnMetric,
Expand Down Expand Up @@ -88,6 +89,7 @@ def __init__(
rubrics: t.Dict[str, str] = DEFAULT_REFERENCE_FREE_RUBRICS,
llm: t.Optional[BaseRagasLLM] = None,
required_columns: t.Optional[t.Dict[MetricType, t.Set[str]]] = None,
output_type: t.Optional[MetricOutputType] = MetricOutputType.DISCRETE,
single_turn_prompt: t.Optional[PydanticPrompt] = None,
multi_turn_prompt: t.Optional[PydanticPrompt] = None,
max_retries: int = 1,
Expand All @@ -109,7 +111,12 @@ def __init__(
"reference:optional",
},
}
super().__init__(name=name, llm=llm, _required_columns=self._required_columns)
super().__init__(
name=name,
llm=llm,
_required_columns=self._required_columns,
output_type=output_type,
)

def __repr__(self) -> str:
return f"{self.name}(required_columns={self.required_columns}, llm={self.llm}), rubrics={self.rubrics}"
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/metrics/_factual_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
NLIStatementPrompt,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
Expand Down Expand Up @@ -210,6 +211,7 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
default_factory=lambda: {MetricType.SINGLE_TURN: {"response", "reference"}}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
mode: t.Literal["precision", "recall", "f1"] = "f1"
beta: float = 1.0
atomicity: t.Literal["low", "high"] = "low"
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/metrics/_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
Expand Down Expand Up @@ -172,6 +173,7 @@ class Faithfulness(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
nli_statements_message: PydanticPrompt = field(default_factory=NLIStatementPrompt)
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
Expand Down
8 changes: 7 additions & 1 deletion src/ragas/metrics/_goal_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
from pydantic import BaseModel, Field

from ragas.dataset_schema import MultiTurnSample
from ragas.metrics.base import MetricType, MetricWithLLM, MultiTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
MultiTurnMetric,
)
from ragas.prompt import PydanticPrompt

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -106,6 +111,7 @@ class AgentGoalAccuracyWithReference(MetricWithLLM, MultiTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.BINARY
workflow_prompt: PydanticPrompt = field(
default_factory=lambda: InferGoalOutcomePrompt()
)
Expand Down
3 changes: 3 additions & 0 deletions src/ragas/metrics/_instance_specific_rubrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
SingleTurnInputWithoutRubric,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
MultiTurnMetric,
Expand Down Expand Up @@ -54,6 +55,7 @@ def __init__(
name: str = "instance_rubrics",
llm: t.Optional[BaseRagasLLM] = None,
required_columns: t.Optional[t.Dict[MetricType, t.Set[str]]] = None,
output_type: t.Optional[MetricOutputType] = MetricOutputType.DISCRETE,
single_turn_prompt: t.Optional[PydanticPrompt] = None,
multi_turn_prompt: t.Optional[PydanticPrompt] = None,
max_retries: int = 1,
Expand All @@ -73,6 +75,7 @@ def __init__(
"reference:optional",
},
}
self.output_type = output_type
super().__init__(name=name, llm=llm, _required_columns=self._required_columns)

self.single_turn_prompt = single_turn_prompt or SingleTurnPrompt()
Expand Down
8 changes: 7 additions & 1 deletion src/ragas/metrics/_multi_modal_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from pydantic import BaseModel, Field

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
)
from ragas.prompt import ImageTextPrompt

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -74,6 +79,7 @@ class MultiModalFaithfulness(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
faithfulness_prompt: ImageTextPrompt = MultiModalFaithfulnessPrompt()

async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
Expand Down
9 changes: 8 additions & 1 deletion src/ragas/metrics/_multi_modal_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from pydantic import BaseModel, Field

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
)
from ragas.prompt import ImageTextPrompt

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -80,6 +85,8 @@ class MultiModalRelevance(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS

relevance_prompt: ImageTextPrompt = MultiModalRelevancePrompt()

async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/metrics/_noise_sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
NLIStatementPrompt,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
Expand Down Expand Up @@ -43,6 +44,7 @@ class NoiseSensitivity(MetricWithLLM, SingleTurnMetric):
}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
nli_statements_message: PydanticPrompt = field(default_factory=NLIStatementPrompt)
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
Expand Down
3 changes: 3 additions & 0 deletions src/ragas/metrics/_simple_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from ragas.dataset_schema import MultiTurnSample, SingleTurnSample
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
MultiTurnMetric,
Expand Down Expand Up @@ -94,6 +95,7 @@ def __init__(
definition: str,
llm: t.Optional[BaseRagasLLM] = None,
required_columns: t.Optional[t.Dict[MetricType, t.Set[str]]] = None,
output_type: t.Optional[MetricOutputType] = MetricOutputType.DISCRETE,
single_turn_prompt: t.Optional[PydanticPrompt] = None,
multi_turn_prompt: t.Optional[PydanticPrompt] = None,
strictness: int = 1,
Expand All @@ -116,6 +118,7 @@ def __init__(
name=name,
llm=llm,
_required_columns=required_columns,
output_type=output_type,
)

self._definition = definition
Expand Down
8 changes: 7 additions & 1 deletion src/ragas/metrics/_sql_semantic_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from pydantic import BaseModel, Field

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import MetricType, MetricWithLLM, SingleTurnMetric
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
)
from ragas.prompt import PydanticPrompt

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -70,6 +75,7 @@ class LLMSQLEquivalence(MetricWithLLM, SingleTurnMetric):
MetricType.SINGLE_TURN: {"response", "reference", "reference_contexts"}
}
)
output_type: t.Optional[MetricOutputType] = MetricOutputType.BINARY
equivalence_prompt: PydanticPrompt = EquivalencePrompt()

async def _single_turn_ascore(
Expand Down
Loading
Loading