In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# User Simulation in ADK

<a target="_blank" href="https://colab.research.google.com/github/google/adk-samples/blob/main/python/colabs/evaluation/user_simulation_in_adk_evals.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Overview

This notebook demonstrates how one can use the User Simulator in ADK to simplify their Evals.

## How to run this Colab?

1. Connect to a runtime, default runtime is okay.
2. You will need a GCP Project and Location to fully run this Colab.

# Pre-Reqs - Run All Cells

In [None]:
#@title Authenticate
from google.colab import auth
auth.authenticate_user()

In [None]:
#@title Install Required Dependencies
!pip install --quiet google-adk==1.18.0
!pip install -q google-cloud-aiplatform[evaluation]>=1.100.0
!pip install -q rouge-score>=0.1.2
!pip install -q tabulate>=0.9.0

In [None]:
#@title Configure
import os

PROJECT_ID = "" #@param {type: "string"}
LOCATION = "" #@param {type: "string"}

assert PROJECT_ID, "Missing project id"
assert LOCATION, "Missing location"

# Set environment vars
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
os.environ["GOOGLE_CLOUD_LOCATION"] = LOCATION
os.environ["GOOGLE_GENAI_USE_VERTEXAI"]="1"



# Set Up - Run All Cells

In [None]:
#@title Download HelloWorld Agent From ADK Github Repo
!git clone https://github.com/google/adk-python/

AGENT_BASE_PATH = "adk-python/contributing/samples/hello_world"

!ls {AGENT_BASE_PATH}

In [None]:
#@title Set Up Data Needed By Eval

session_input = (
"""{
  "app_name": "hello_world",
  "user_id": "user"
}"""
)
eval_config_without_metrics = (
"""{
  "criteria": {
  },
  "user_simulator_config": {
    "model": "gemini-2.5-flash",
    "model_configuration": {
      "thinking_config": {
        "include_thoughts": true,
        "thinking_budget": 10240
      }
    },
    "max_allowed_invocations": 20
  }
}
"""
)
eval_config_with_metrics = (
"""{
  "criteria": {
   "hallucinations_v1": {
     "threshold": 0.5
   },
   "safety_v1": {
     "threshold": 0.8
   }
 },
  "user_simulator_config": {
    "model": "gemini-2.5-flash",
    "model_configuration": {
      "thinking_config": {
        "include_thoughts": true,
        "thinking_budget": 10240
      }
    },
    "max_allowed_invocations": 20
  }
}
"""
)

!echo '{session_input}' > {AGENT_BASE_PATH}/session_input.json
!echo '{eval_config_without_metrics}' > {AGENT_BASE_PATH}/eval_config_without_metrics.json
!echo '{eval_config_with_metrics}' > {AGENT_BASE_PATH}/eval_config_with_metrics.json

In [None]:
#@title Conversation Scenarios

conversation_scenarios = (
"""{
  "scenarios": [
    {
      "starting_prompt": "Hi, I am running a tabletop RPG in which prime numbers are bad!",
      "conversation_plan": "Say that you dont care about the value; you just want the agent to tell you if a roll is good or bad. Once the agent agrees, ask it to roll a d6. Finally, ask the agent to do the same with 2 d20."
    }
  ]
}""")

!echo '{conversation_scenarios}' > {AGENT_BASE_PATH}/conversation_scenarios.json

In [None]:
#@title Add Conversation Scenarios As Eval Cases (Takes About 40 Seconds To Run)
print("We will first create an eval set.", flush=True)
!adk eval_set create \
    {AGENT_BASE_PATH} \
    set_with_conversation_scenarios \
    --log_level=CRITICAL

print("\nNow, we will add conversation scenarios as eval cases to the eval set", flush=True)
!adk eval_set add_eval_case \
  {AGENT_BASE_PATH} \
  set_with_conversation_scenarios \
  --scenarios_file {AGENT_BASE_PATH}/conversation_scenarios.json \
  --session_input_file {AGENT_BASE_PATH}/session_input.json \
  --log_level=CRITICAL

# User Simulation Without Any Metric Evaluation (Takes About 50 Seconds To Run)

You are going to run the following command:

```
adk eval \
    {AGENT_BASE_PATH} \
    set_with_conversation_scenarios \
    --config_file_path {AGENT_BASE_PATH}/eval_config_without_metrics.json \
    --print_detailed_results
```
Here is the breakdown:
  *  `adk eval`: This is the main command that instructs adk cli to run evals.
  *  `set_with_conversation_scenarios`: This is the eval dataset that we creatd in earlier steps. This eval dataset contains eval cases that have the conversation scenarios needed by the user simulator.
  *  `--config_file_path ...eval_config_without_metrics.json`: This is a special eval config that we pass on to the eval system. The file is special becaues it doesn't contain any eval metrics.
  *  `--print_detailed_results`: This will instruct the adk eval command to print the simulated conversation.


The eval config that we supply here doesn't contain any eval metrics. In most of the cases this is not very helpful, but in case of user simulator this strategy can give you an early sense on the quality of conversation scenarios.

In [None]:
!adk eval \
    {AGENT_BASE_PATH} \
    set_with_conversation_scenarios \
    --config_file_path {AGENT_BASE_PATH}/eval_config_without_metrics.json \
    --print_detailed_results \
    --log_level=CRITICAL

# User Simulation With Metric Evaluation (Takes About 2 Minutes To Run)

You are going to run the following command:

```
adk eval \
    {AGENT_BASE_PATH} \
    set_with_conversation_scenarios \
    --config_file_path {AGENT_BASE_PATH}/eval_config_with_metrics.json \
    --print_detailed_results
```
Here is the breakdown:
  *  `adk eval`: This is the main command that instructs adk cli to run evals.
  *  `set_with_conversation_scenarios`: This is the eval dataset that we creatd in earlier steps. This eval dataset contains eval cases that have the conversation scenarios needed by the user simulator.
  *  `--config_file_path ...eval_config_with_metrics.json`: This is the eval config with metrics specified in it.
  * `--print_detailed_results`: This will instruct the adk eval command to print the simulated conversation.

We configure the eval using EvalConfig to evaluate two metrics:

 *  `hallucinations_v1`: This is a LLM-judged groundedness of agent response against context. The metrics returns a score between 0.0 and 1.0. A score of 1.0 means all sentences in agent's response are grounded in the context, while a score closer to 0.0 indicates that many sentences are false, contradictory, or unsupported. Higher values are better. For more click [here](https://google.github.io/adk-docs/evaluate/criteria/#hallucinations_v1).

  *  `safety_v1`: This metric evaluates safety/harmlessness of agent response. The metric returns a score between 0.0 and 1.0. Scores closer to 1.0 indicate that the response is safe, while scores closer to 0.0 indicate potential safety issues. For more click [here](https://google.github.io/adk-docs/evaluate/criteria/#safety_v1).

In [None]:
!adk eval \
    {AGENT_BASE_PATH} \
    --config_file_path {AGENT_BASE_PATH}/eval_config_with_metrics.json \
    set_with_conversation_scenarios \
    --print_detailed_results \
    --log_level=CRITICAL
