Skip to content

Commit bbbadbc

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add get_evaluation_run method to Vertex AI GenAI SDK evals
PiperOrigin-RevId: 812865627
1 parent 36a5bbc commit bbbadbc

File tree

3 files changed

+620
-0
lines changed

3 files changed

+620
-0
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import datetime
20+
import pytest
21+
22+
23+
def test_get_eval_run(client):
24+
"""Tests that get_evaluation_run() returns a correctly structured EvaluationRun."""
25+
evaluation_run_name = (
26+
"projects/503583131166/locations/us-central1/evaluationRuns/1957799200510967808"
27+
)
28+
evaluation_run = client.evals.get_evaluation_run(name=evaluation_run_name)
29+
assert isinstance(evaluation_run, types.EvaluationRun)
30+
assert evaluation_run.name == evaluation_run_name
31+
assert evaluation_run.display_name == "test2"
32+
assert evaluation_run.metadata == {"pipeline_id": "4460531348888616960"}
33+
assert evaluation_run.create_time == datetime.datetime(
34+
2025, 9, 8, 20, 55, 41, 833176, tzinfo=datetime.timezone.utc
35+
)
36+
assert evaluation_run.completion_time == datetime.datetime(
37+
2025, 9, 8, 20, 56, 13, 492971, tzinfo=datetime.timezone.utc
38+
)
39+
assert evaluation_run.state == types.EvaluationRunState.SUCCEEDED
40+
assert evaluation_run.evaluation_set_snapshot == (
41+
"projects/503583131166/locations/us-central1/evaluationSets/8069535738573619200"
42+
)
43+
assert evaluation_run.data_source.bigquery_request_set == types.BigQueryRequestSet(
44+
uri="bq://lakeyk-test-limited.inference_batch_prediction_input.1317387725199900672_1b",
45+
prompt_column="request",
46+
candidate_response_columns={
47+
"baseline_model_response": "baseline_model_response",
48+
"checkpoint_1": "checkpoint_1",
49+
"checkpoint_2": "checkpoint_2",
50+
},
51+
)
52+
assert evaluation_run.error is None
53+
54+
55+
def test_get_eval_run_bq_source(client):
56+
"""Tests that get_evaluation_run() returns a correctly structured EvaluationRun."""
57+
evaluation_run_name = (
58+
"projects/503583131166/locations/us-central1/evaluationRuns/1968424880881795072"
59+
)
60+
evaluation_run = client.evals.get_evaluation_run(name=evaluation_run_name)
61+
assert isinstance(evaluation_run, types.EvaluationRun)
62+
assert evaluation_run.name == evaluation_run_name
63+
assert evaluation_run.display_name == "test1"
64+
assert evaluation_run.data_source.bigquery_request_set == types.BigQueryRequestSet(
65+
uri="bq://lakeyk-test-limited.inference_batch_prediction_input.1317387725199900672_1b",
66+
prompt_column="request",
67+
rubrics_column="rubric",
68+
candidate_response_columns={
69+
"baseline_model_response": "baseline_model_response",
70+
"checkpoint_1": "checkpoint_1",
71+
"checkpoint_2": "checkpoint_2",
72+
},
73+
sampling_config=types.SamplingConfig(
74+
sampling_count=100,
75+
sampling_method=types.SamplingMethod.RANDOM,
76+
sampling_duration="60s",
77+
),
78+
)
79+
80+
81+
def test_get_eval_run_eval_set_source(client):
82+
"""Tests that get_evaluation_run() returns a correctly structured EvaluationRun."""
83+
evaluation_run_name = (
84+
"projects/503583131166/locations/us-central1/evaluationRuns/6903525647549726720"
85+
)
86+
evaluation_run = client.evals.get_evaluation_run(name=evaluation_run_name)
87+
assert isinstance(evaluation_run, types.EvaluationRun)
88+
assert evaluation_run.name == evaluation_run_name
89+
assert evaluation_run.display_name == "test3"
90+
assert evaluation_run.data_source.evaluation_set == (
91+
"projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
92+
)
93+
assert evaluation_run.state == types.EvaluationRunState.FAILED
94+
assert evaluation_run.error.message == (
95+
"code=INVALID_ARGUMENT, message=EvaluationRun 6903525647549726720 has no "
96+
"items, cause=null"
97+
)
98+
99+
100+
pytest_plugins = ("pytest_asyncio",)
101+
102+
103+
@pytest.mark.asyncio
104+
async def test_get_eval_run_async(client):
105+
"""Tests that get_evaluation_run() returns a correctly structured EvaluationRun."""
106+
eval_run_id = "1957799200510967808"
107+
eval_run_name = (
108+
f"projects/503583131166/locations/us-central1/evaluationRuns/{eval_run_id}"
109+
)
110+
evaluation_run = await client.aio.evals.get_evaluation_run(name=eval_run_id)
111+
assert isinstance(evaluation_run, types.EvaluationRun)
112+
assert evaluation_run.name == eval_run_name
113+
assert evaluation_run.display_name == "test2"
114+
assert evaluation_run.data_source.bigquery_request_set == types.BigQueryRequestSet(
115+
uri="bq://lakeyk-test-limited.inference_batch_prediction_input.1317387725199900672_1b",
116+
prompt_column="request",
117+
candidate_response_columns={
118+
"baseline_model_response": "baseline_model_response",
119+
"checkpoint_1": "checkpoint_1",
120+
"checkpoint_2": "checkpoint_2",
121+
},
122+
)
123+
124+
125+
pytestmark = pytest_helper.setup(
126+
file=__file__,
127+
globals_for_file=globals(),
128+
test_method="evals.get_evaluation_run",
129+
)

0 commit comments

Comments
 (0)