From e3013914f6c842bb80b92435f9f955f3834d2871 Mon Sep 17 00:00:00 2001 From: Jesser Hamdaoui Date: Mon, 20 Apr 2026 18:25:37 +0100 Subject: [PATCH 1/2] fix(optimization): handle None metric scores in LocalEvalSampler --- src/google/adk/optimization/local_eval_sampler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/google/adk/optimization/local_eval_sampler.py b/src/google/adk/optimization/local_eval_sampler.py index b00c34280f..f27d5a15fe 100644 --- a/src/google/adk/optimization/local_eval_sampler.py +++ b/src/google/adk/optimization/local_eval_sampler.py @@ -289,7 +289,11 @@ def _extract_eval_data( for eval_metric_result in per_invocation_result.eval_metric_results: eval_metric_results.append({ "metric_name": eval_metric_result.metric_name, - "score": round(eval_metric_result.score, 2), # accurate enough + "score": ( + round(eval_metric_result.score, 2) + if eval_metric_result.score is not None + else None + ), # accurate enough "eval_status": eval_metric_result.eval_status.name, }) per_invocation_result_dict = { From 6a8c12522d845c8286a2c94049f260e7bc9dcdd5 Mon Sep 17 00:00:00 2001 From: Jesser Hamdaoui Date: Mon, 20 Apr 2026 18:34:39 +0100 Subject: [PATCH 2/2] test(optimization): add regression test for None metric score extraction --- .../optimization/local_eval_sampler_test.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/unittests/optimization/local_eval_sampler_test.py b/tests/unittests/optimization/local_eval_sampler_test.py index 6ebd99cb58..e685256e09 100644 --- a/tests/unittests/optimization/local_eval_sampler_test.py +++ b/tests/unittests/optimization/local_eval_sampler_test.py @@ -338,6 +338,48 @@ async def test_extract_eval_data(mocker): ] +def test_extract_eval_data_preserves_none_metric_score(mocker): + mock_eval_sets_manager = mocker.MagicMock(spec=EvalSetsManager) + mock_eval_case = mocker.MagicMock() + mock_eval_case.conversation_scenario = "test_scenario" + mock_eval_sets_manager.get_eval_case.return_value = mock_eval_case + + mock_metric_result = mocker.MagicMock(spec=EvalMetricResult) + mock_metric_result.metric_name = "test_metric" + mock_metric_result.score = None + mock_metric_result.eval_status = EvalStatus.NOT_EVALUATED + + mock_per_inv_result = mocker.MagicMock(spec=EvalMetricResultPerInvocation) + mock_per_inv_result.actual_invocation = mocker.MagicMock(spec=Invocation) + mock_per_inv_result.expected_invocation = mocker.MagicMock(spec=Invocation) + mock_per_inv_result.eval_metric_results = [mock_metric_result] + + mock_eval_result = mocker.MagicMock(spec=EvalCaseResult) + mock_eval_result.eval_id = "t1" + mock_eval_result.eval_metric_result_per_invocation = [mock_per_inv_result] + + mocker.patch( + "google.adk.optimization.local_eval_sampler.extract_single_invocation_info", + side_effect=[{"info": "actual"}, {"info": "expected"}], + ) + + config = LocalEvalSamplerConfig( + eval_config=EvalConfig(), + app_name="test_app", + train_eval_set="train_set", + train_eval_case_ids=["t1"], + ) + interface = LocalEvalSampler(config, mock_eval_sets_manager) + + eval_data = interface._extract_eval_data("train_set", [mock_eval_result]) + + assert eval_data["t1"]["invocations"][0]["eval_metric_results"] == [{ + "metric_name": "test_metric", + "score": None, + "eval_status": "NOT_EVALUATED", + }] + + @pytest.mark.asyncio async def test_sample_and_score(mocker): # Mock results