Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 46 additions & 54 deletions tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,27 @@
def test_gen_user_scenarios(client):
"""Tests that generate_user_scenarios() correctly calls the API and parses the response."""
eval_dataset = client.evals.generate_user_scenarios(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
agent_info=types.evals.AgentInfo(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
root_agent_id="booking-agent",
),
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
user_scenario_count=2,
simulation_instruction=(
Expand All @@ -49,18 +52,11 @@ def test_gen_user_scenarios(client):
environment_data="Today is Monday. Flights to Paris are available.",
model_name="gemini-2.5-flash",
),
root_agent_id="booking-agent",
)
assert isinstance(eval_dataset, types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
assert (
eval_dataset.eval_cases[0].user_scenario.starting_prompt
== "I want to find a flight from New York to London."
)
assert (
eval_dataset.eval_cases[0].user_scenario.conversation_plan
== "Actually, I meant Paris, not London. Please search for flights to Paris."
)
assert eval_dataset.eval_cases[0].user_scenario.starting_prompt
assert eval_dataset.eval_cases[0].user_scenario.conversation_plan


pytest_plugins = ("pytest_asyncio",)
Expand All @@ -70,24 +66,27 @@ def test_gen_user_scenarios(client):
async def test_gen_user_scenarios_async(client):
"""Tests that generate_user_scenarios() async correctly calls the API and parses the response."""
eval_dataset = await client.aio.evals.generate_user_scenarios(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
agent_info=types.evals.AgentInfo(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
root_agent_id="booking-agent",
),
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
user_scenario_count=2,
simulation_instruction=(
Expand All @@ -97,18 +96,11 @@ async def test_gen_user_scenarios_async(client):
environment_data="Today is Monday. Flights to Paris are available.",
model_name="gemini-2.5-flash",
),
root_agent_id="booking-agent",
)
assert isinstance(eval_dataset, types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
assert (
eval_dataset.eval_cases[1].user_scenario.starting_prompt
== "Find me a flight from Boston to Rome for next month."
)
assert (
eval_dataset.eval_cases[1].user_scenario.conversation_plan
== "Wait, change of plans. I need to go to Milan instead, and it needs to be a round trip, returning two weeks after departure."
)
assert eval_dataset.eval_cases[1].user_scenario.starting_prompt
assert eval_dataset.eval_cases[1].user_scenario.conversation_plan


pytestmark = pytest_helper.setup(
Expand Down
6 changes: 2 additions & 4 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -6213,9 +6213,8 @@ def test_generate_user_scenarios(self):
evals_module = evals.Evals(api_client_=self.mock_api_client)

eval_dataset = evals_module.generate_user_scenarios(
agents={"agent_1": {}},
agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"},
user_scenario_generation_config={"user_scenario_count": 2},
root_agent_id="agent_1",
)
assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
Expand Down Expand Up @@ -6243,9 +6242,8 @@ async def test_async_generate_user_scenarios(self):
async_evals_module = evals.AsyncEvals(api_client_=self.mock_api_client)

eval_dataset = await async_evals_module.generate_user_scenarios(
agents={"agent_1": {}},
agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"},
user_scenario_generation_config={"user_scenario_count": 2},
root_agent_id="agent_1",
)
assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
Expand Down
46 changes: 26 additions & 20 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2129,15 +2129,15 @@ def create_evaluation_run(
raise ValueError(
"At most one of agent_info or inference_configs can be provided."
)
agent_info_pydantic = (
parsed_agent_info = (
evals_types.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else (agent_info or evals_types.AgentInfo())
)
if isinstance(dataset, types.EvaluationDataset):
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
self._api_client, dataset, dest, parsed_agent_info
)
output_config = genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
Expand All @@ -2149,10 +2149,10 @@ def create_evaluation_run(
output_config=output_config, metrics=resolved_metrics
)
resolved_inference_configs = _evals_common._resolve_inference_configs(
self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
self._api_client, resolved_dataset, inference_configs, parsed_agent_info
)
resolved_labels = _evals_common._add_evaluation_run_labels(
labels, agent_info_pydantic
labels, parsed_agent_info
)
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
return self._create_evaluation_run(
Expand Down Expand Up @@ -2306,26 +2306,29 @@ def create_evaluation_set(
def generate_user_scenarios(
self,
*,
agents: dict[str, evals_types.AgentConfigOrDict],
agent_info: evals_types.AgentInfoOrDict,
user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict,
root_agent_id: str,
) -> types.EvaluationDataset:
"""Generates an evaluation dataset with user scenarios,
which helps to generate conversations between a simulated user
and the agent under test.

Args:
agents: A map of agent ID to AgentConfig.
agent_info: The agent info to generate user scenarios for.
user_scenario_generation_config: Configuration for generating user scenarios.
root_agent_id: The ID of the root agent.

Returns:
An EvaluationDataset containing the generated user scenarios.
"""
parsed_agent_info = (
evals_types.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else agent_info
)
response = self._generate_user_scenarios(
agents=agents,
agents=parsed_agent_info.agents,
root_agent_id=parsed_agent_info.root_agent_id,
user_scenario_generation_config=user_scenario_generation_config,
root_agent_id=root_agent_id,
)
return _evals_utils._postprocess_user_scenarios_response(response)

Expand Down Expand Up @@ -3304,15 +3307,15 @@ async def create_evaluation_run(
raise ValueError(
"At most one of agent_info or inference_configs can be provided."
)
agent_info_pydantic = (
parsed_agent_info = (
evals_types.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else (agent_info or evals_types.AgentInfo())
)
if isinstance(dataset, types.EvaluationDataset):
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
self._api_client, dataset, dest, parsed_agent_info
)
output_config = genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
Expand All @@ -3324,10 +3327,10 @@ async def create_evaluation_run(
output_config=output_config, metrics=resolved_metrics
)
resolved_inference_configs = _evals_common._resolve_inference_configs(
self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
self._api_client, resolved_dataset, inference_configs, parsed_agent_info
)
resolved_labels = _evals_common._add_evaluation_run_labels(
labels, agent_info_pydantic
labels, parsed_agent_info
)
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"

Expand Down Expand Up @@ -3488,26 +3491,29 @@ async def create_evaluation_set(
async def generate_user_scenarios(
self,
*,
agents: dict[str, evals_types.AgentConfigOrDict],
agent_info: evals_types.AgentInfoOrDict,
user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict,
root_agent_id: str,
) -> types.EvaluationDataset:
"""Generates an evaluation dataset with user scenarios,
which helps to generate conversations between a simulated user
and the agent under test.

Args:
agents: A map of agent ID to AgentConfig.
agent_info: The agent info to generate user scenarios for.
user_scenario_generation_config: Configuration for generating user scenarios.
root_agent_id: The ID of the root agent.

Returns:
An EvaluationDataset containing the generated user scenarios.
"""
parsed_agent_info = (
evals_types.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else agent_info
)
response = await self._generate_user_scenarios(
agents=agents,
agents=parsed_agent_info.agents,
root_agent_id=parsed_agent_info.root_agent_id,
user_scenario_generation_config=user_scenario_generation_config,
root_agent_id=root_agent_id,
)
return _evals_utils._postprocess_user_scenarios_response(response)

Expand Down
Loading