From a52198acfe759267a1079e138d8f678bb0e62f5e Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Thu, 17 Jul 2025 07:32:31 -0700 Subject: [PATCH] feat: GenAI SDK client(evals) - Add async evaluate_instances method PiperOrigin-RevId: 784173496 --- .../genai/replays/test_batch_evaluate.py | 23 ++++++++ .../genai/replays/test_evaluate_instances.py | 55 ++++++++++++++++--- vertexai/_genai/evals.py | 18 ++++++ vertexai/_genai/mypy.ini | 22 ++++++++ 4 files changed, 110 insertions(+), 8 deletions(-) create mode 100644 vertexai/_genai/mypy.ini diff --git a/tests/unit/vertexai/genai/replays/test_batch_evaluate.py b/tests/unit/vertexai/genai/replays/test_batch_evaluate.py index ad6983ffb7..3b89e6a371 100644 --- a/tests/unit/vertexai/genai/replays/test_batch_evaluate.py +++ b/tests/unit/vertexai/genai/replays/test_batch_evaluate.py @@ -14,6 +14,8 @@ # # pylint: disable=protected-access,bad-continuation,missing-function-docstring +import pytest + from tests.unit.vertexai.genai.replays import pytest_helper from vertexai._genai import types @@ -43,3 +45,24 @@ def test_batch_eval(client): globals_for_file=globals(), test_method="evals.batch_evaluate", ) + +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.asyncio +async def test_batch_eval_async(client): + eval_dataset = types.EvaluationDataset( + gcs_source=types.GcsSource( + uris=["gs://genai-eval-sdk-replay-test/test_data/inference_results.jsonl"] + ) + ) + + response = await client.aio.evals.batch_evaluate( + dataset=eval_dataset, + metrics=[ + types.PrebuiltMetric.TEXT_QUALITY, + ], + dest="gs://genai-eval-sdk-replay-test/test_data/batch_eval_output", + ) + assert "operations" in response.name + assert "EvaluateDatasetOperationMetadata" in response.metadata.get("@type") diff --git a/tests/unit/vertexai/genai/replays/test_evaluate_instances.py b/tests/unit/vertexai/genai/replays/test_evaluate_instances.py index c3a78654a3..786dcfa003 100644 --- a/tests/unit/vertexai/genai/replays/test_evaluate_instances.py +++ b/tests/unit/vertexai/genai/replays/test_evaluate_instances.py @@ -14,11 +14,12 @@ # # pylint: disable=protected-access,bad-continuation,missing-function-docstring +import json from tests.unit.vertexai.genai.replays import pytest_helper from vertexai._genai import types import pandas as pd -import json +import pytest def test_bleu_metric(client): @@ -31,7 +32,11 @@ def test_bleu_metric(client): ], metric_spec=types.BleuSpec(), ) - response = client.evals._evaluate_instances(bleu_input=test_bleu_input) + response = client.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + bleu_input=test_bleu_input + ) + ) assert len(response.bleu_results.bleu_metric_values) == 1 @@ -46,8 +51,10 @@ def test_exact_match_metric(client): ], metric_spec=types.ExactMatchSpec(), ) - response = client.evals._evaluate_instances( - exact_match_input=test_exact_match_input + response = client.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + exact_match_input=test_exact_match_input + ) ) assert len(response.exact_match_results.exact_match_metric_values) == 1 @@ -63,7 +70,11 @@ def test_rouge_metric(client): ], metric_spec=types.RougeSpec(rouge_type="rougeL"), ) - response = client.evals._evaluate_instances(rouge_input=test_rouge_input) + response = client.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + rouge_input=test_rouge_input + ) + ) assert len(response.rouge_results.rouge_metric_values) == 1 @@ -78,7 +89,11 @@ def test_pointwise_metric(client): metric_prompt_template="Evaluate if the response '{response}' correctly answers the prompt '{prompt}'." ), ) - response = client.evals._evaluate_instances(pointwise_metric_input=test_input) + response = client.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + pointwise_metric_input=test_input + ) + ) assert response.pointwise_metric_result is not None assert response.pointwise_metric_result.score is not None @@ -100,8 +115,10 @@ def test_pairwise_metric_with_autorater(client): ) autorater_config = types.AutoraterConfig(sampling_count=2) - response = client.evals._evaluate_instances( - pairwise_metric_input=test_input, autorater_config=autorater_config + response = client.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + pairwise_metric_input=test_input, autorater_config=autorater_config + ) ) assert response.pairwise_metric_result is not None assert response.pairwise_metric_result.pairwise_choice is not None @@ -147,3 +164,25 @@ def test_inference_with_prompt_template(client): globals_for_file=globals(), test_method="evals.evaluate", ) + + +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.asyncio +async def test_bleu_metric_async(client): + test_bleu_input = types.BleuInput( + instances=[ + types.BleuInstance( + reference="The quick brown fox jumps over the lazy dog.", + prediction="A fast brown fox leaps over a lazy dog.", + ) + ], + metric_spec=types.BleuSpec(), + ) + response = await client.aio.evals.evaluate_instances( + metric_config=types._EvaluateInstancesRequestParameters( + bleu_input=test_bleu_input + ) + ) + assert len(response.bleu_results.bleu_metric_values) == 1 diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 5c5940b16b..2a6d8fe797 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -1522,3 +1522,21 @@ async def batch_evaluate( self._api_client._verify_response(return_value) return return_value + + async def evaluate_instances( + self, + *, + metric_config: types._EvaluateInstancesRequestParameters, + ) -> types.EvaluateInstancesResponse: + """Evaluates an instance of a model.""" + + if isinstance(metric_config, types._EvaluateInstancesRequestParameters): + metric_config = metric_config.model_dump() + else: + metric_config = dict(metric_config) + + result = await self._evaluate_instances( + **metric_config, + ) + + return result diff --git a/vertexai/_genai/mypy.ini b/vertexai/_genai/mypy.ini new file mode 100644 index 0000000000..ae490bc32d --- /dev/null +++ b/vertexai/_genai/mypy.ini @@ -0,0 +1,22 @@ +[mypy] +# TODO(b/422425982): Fix arg-type errors +disable_error_code = import-not-found, import-untyped, arg-type + +# We only want to run mypy on _genai dir, ignore dependent modules +[mypy-vertexai.agent_engines.*] +ignore_errors = True + +[mypy-vertexai.preview.*] +ignore_errors = True + +[mypy-vertexai.generative_models.*] +ignore_errors = True + +[mypy-vertexai.prompts.*] +ignore_errors = True + +[mypy-vertexai.tuning.*] +ignore_errors = True + +[mypy-vertexai.caching.*] +ignore_errors = True \ No newline at end of file