Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pydantic import ConfigDict, Field

from .._helpers.output_path import resolve_output_path
from ..models import EvaluationResult
from ..models.models import (
AgentExecution,
Expand Down Expand Up @@ -85,6 +86,19 @@ class BaseLegacyEvaluator(

# Note: __init_subclass__ is inherited from BaseEvaluator and handles metrics tracking

def get_targeted_field(self, obj: Any) -> Any:
"""Resolve the target output key path from the given object.

If target_output_key is set and not "*", resolves the dot-notation path.
Returns the original object if resolution fails or no key is configured.
"""
if self.target_output_key and self.target_output_key != "*":
try:
return resolve_output_path(obj, self.target_output_key)
except (KeyError, IndexError, TypeError):
return obj
return obj

def model_post_init(self, __context: Any):
"""Post-initialization hook for Pydantic models."""
# Ensure config is set up for legacy evaluators
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from uipath.eval.models import BooleanEvaluationResult, EvaluationResult

from .._helpers.output_path import resolve_output_path
from ..models.models import AgentExecution
from .base_legacy_evaluator import LegacyEvaluationCriteria, LegacyEvaluatorConfig
from .legacy_deterministic_evaluator_base import BaseLegacyDeterministicEvaluator
Expand Down Expand Up @@ -41,30 +40,8 @@ async def evaluate(
Returns:
EvaluationResult: Boolean result indicating exact match (True/False)
"""
actual_output = agent_execution.agent_output
expected_output = evaluation_criteria.expected_output

if self.target_output_key and self.target_output_key != "*":
if isinstance(actual_output, dict) and isinstance(expected_output, dict):
actual_resolved = True
expected_resolved = True

try:
actual_output = resolve_output_path(
actual_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
actual_resolved = False

try:
expected_output = resolve_output_path(
expected_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
expected_resolved = False

if not actual_resolved or not expected_resolved:
actual_output = expected_output = {}
actual_output = self.get_targeted_field(agent_execution.agent_output)
expected_output = self.get_targeted_field(evaluation_criteria.expected_output)

return BooleanEvaluationResult(
score=self._canonical_json(actual_output)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import math
from typing import Any, Tuple, TypeVar

from .._helpers.output_path import resolve_output_path
from ..models import EvaluationResult, NumericEvaluationResult
from ..models.models import AgentExecution
from .base_legacy_evaluator import LegacyEvaluationCriteria, LegacyEvaluatorConfig
Expand Down Expand Up @@ -47,23 +46,8 @@ async def evaluate(
Returns:
EvaluationResult: Numerical score between 0-100 indicating similarity
"""
actual_output = agent_execution.agent_output
expected_output = evaluation_criteria.expected_output

if self.target_output_key and self.target_output_key != "*":
try:
actual_output = resolve_output_path(
actual_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
actual_output = {}

try:
expected_output = resolve_output_path(
expected_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
expected_output = {}
actual_output = self.get_targeted_field(agent_execution.agent_output)
expected_output = self.get_targeted_field(evaluation_criteria.expected_output)

return NumericEvaluationResult(
score=self._compare_json(expected_output, actual_output)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from ..._utils.constants import COMMUNITY_agents_SUFFIX
from .._execution_context import eval_set_run_id_context
from .._helpers.helpers import is_empty_value
from .._helpers.output_path import resolve_output_path
from ..models import NumericEvaluationResult
from ..models.models import (
AgentExecution,
Expand Down Expand Up @@ -125,23 +124,8 @@ async def evaluate(
if self.llm is None:
self._initialize_llm()

actual_output = agent_execution.agent_output
expected_output = evaluation_criteria.expected_output

if self.target_output_key and self.target_output_key != "*":
try:
actual_output = resolve_output_path(
actual_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
pass

try:
expected_output = resolve_output_path(
expected_output, self.target_output_key
)
except (KeyError, IndexError, TypeError):
pass
actual_output = self.get_targeted_field(agent_execution.agent_output)
expected_output = self.get_targeted_field(evaluation_criteria.expected_output)

# Create the evaluation prompt
evaluation_prompt = self._create_evaluation_prompt(
Expand Down
Loading