From b42717b9ebeb98857b59ac191f8ddd009771b771 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Sun, 22 Mar 2026 23:52:45 -0700 Subject: [PATCH] respect dataset adapter --- eval_protocol/pytest/evaluation_test.py | 2 +- eval_protocol/pytest/evaluation_test_utils.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index d2a44fde..a1703cb8 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -211,7 +211,7 @@ def evaluation_test( completion_params = parse_ep_completion_params_overwrite(completion_params) original_completion_params = completion_params passed_threshold = parse_ep_passed_threshold(passed_threshold) - data_loaders = parse_ep_dataloaders(data_loaders) + data_loaders = parse_ep_dataloaders(data_loaders, dataset_adapter=dataset_adapter) custom_invocation_id = os.environ.get("EP_INVOCATION_ID", None) # ignore other data input params when dataloader is provided diff --git a/eval_protocol/pytest/evaluation_test_utils.py b/eval_protocol/pytest/evaluation_test_utils.py index fafce6b5..cf8b8337 100644 --- a/eval_protocol/pytest/evaluation_test_utils.py +++ b/eval_protocol/pytest/evaluation_test_utils.py @@ -21,6 +21,7 @@ EvaluationThresholdDict, Status, ) +from eval_protocol.common_utils import load_jsonl from eval_protocol.data_loader import DynamicDataLoader from eval_protocol.data_loader.models import EvaluationDataLoader from eval_protocol.pytest.rollout_processor import RolloutProcessor @@ -288,10 +289,21 @@ def _rows_from_jsonl(path: str) -> list[EvaluationRow]: def parse_ep_dataloaders( dataloaders: Sequence[EvaluationDataLoader] | EvaluationDataLoader | None, + *, + dataset_adapter: Callable[[list[dict[str, Any]]], list[EvaluationRow]] | None = None, ) -> Sequence[EvaluationDataLoader] | EvaluationDataLoader | None: + """When ``EP_JSONL_PATH`` is set, load JSONL as raw dicts and run ``dataset_adapter`` if provided. + + Without ``dataset_adapter``, rows are built with ``EvaluationRow(**dict)`` (legacy behavior), + which skips custom label fields that adapters normally attach. + """ try: load_from_jsonl_path = os.getenv("EP_JSONL_PATH") if load_from_jsonl_path: + if dataset_adapter is not None: + return DynamicDataLoader( + generators=[lambda path=load_from_jsonl_path, da=dataset_adapter: da(load_jsonl(path))] + ) return DynamicDataLoader(generators=[lambda path=load_from_jsonl_path: _rows_from_jsonl(path)]) except Exception: pass