diff --git a/core/src/main/java/com/google/adk/tools/computeruse/BaseComputer.java b/core/src/main/java/com/google/adk/tools/computeruse/BaseComputer.java new file mode 100644 index 000000000..3ddb91963 --- /dev/null +++ b/core/src/main/java/com/google/adk/tools/computeruse/BaseComputer.java @@ -0,0 +1,99 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import com.google.adk.tools.Annotations.Schema; +import io.reactivex.rxjava3.core.Completable; +import io.reactivex.rxjava3.core.Single; +import java.time.Duration; +import java.util.List; + +/** + * Defines an interface for computer environments. + * + *

This interface defines the standard methods for controlling computer environments, including + * web browsers and other interactive systems. + */ +public interface BaseComputer { + + /** Returns the screen size of the environment. */ + Single screenSize(); + + /** Opens the web browser. */ + Single openWebBrowser(); + + /** Clicks at a specific x, y coordinate on the webpage. */ + Single clickAt(@Schema(name = "x") int x, @Schema(name = "y") int y); + + /** Hovers at a specific x, y coordinate on the webpage. */ + Single hoverAt(@Schema(name = "x") int x, @Schema(name = "y") int y); + + /** Types text at a specific x, y coordinate. */ + Single typeTextAt( + @Schema(name = "x") int x, + @Schema(name = "y") int y, + @Schema(name = "text") String text, + @Schema(name = "press_enter", optional = true) Boolean pressEnter, + @Schema(name = "clear_before_typing", optional = true) Boolean clearBeforeTyping); + + /** Scrolls the entire webpage in a direction. */ + Single scrollDocument(@Schema(name = "direction") String direction); + + /** Scrolls at a specific x, y coordinate by magnitude. */ + Single scrollAt( + @Schema(name = "x") int x, + @Schema(name = "y") int y, + @Schema(name = "direction") String direction, + @Schema(name = "magnitude") int magnitude); + + /** Waits for specified duration. */ + Single wait(@Schema(name = "duration") Duration duration); + + /** Navigates back. */ + Single goBack(); + + /** Navigates forward. */ + Single goForward(); + + /** Jumps to search. */ + Single search(); + + /** Navigates to URL. */ + Single navigate(@Schema(name = "url") String url); + + /** Presses key combination. */ + Single keyCombination(@Schema(name = "keys") List keys); + + /** Drag and drop. */ + Single dragAndDrop( + @Schema(name = "x") int x, + @Schema(name = "y") int y, + @Schema(name = "destination_x") int destinationX, + @Schema(name = "destination_y") int destinationY); + + /** Returns current state. */ + Single currentState(); + + /** Initialize the computer. */ + Completable initialize(); + + /** Cleanup resources. */ + Completable close(); + + /** Returns the environment. */ + Single environment(); +} diff --git a/core/src/main/java/com/google/adk/tools/computeruse/ComputerEnvironment.java b/core/src/main/java/com/google/adk/tools/computeruse/ComputerEnvironment.java new file mode 100644 index 000000000..2c897c794 --- /dev/null +++ b/core/src/main/java/com/google/adk/tools/computeruse/ComputerEnvironment.java @@ -0,0 +1,23 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +/** Enum for computer environments. */ +public enum ComputerEnvironment { + ENVIRONMENT_UNSPECIFIED, + ENVIRONMENT_BROWSER +} diff --git a/core/src/main/java/com/google/adk/tools/computeruse/ComputerState.java b/core/src/main/java/com/google/adk/tools/computeruse/ComputerState.java new file mode 100644 index 000000000..4f3be46c2 --- /dev/null +++ b/core/src/main/java/com/google/adk/tools/computeruse/ComputerState.java @@ -0,0 +1,108 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.util.Arrays; +import java.util.Objects; +import java.util.Optional; + +/** + * Represents the current state of the computer environment. + * + *

Attributes: screenshot: The screenshot in PNG format as bytes. url: The current URL of the + * webpage being displayed. + */ +public final class ComputerState { + private final byte[] screenshot; + private final Optional url; + + @JsonCreator + private ComputerState( + @JsonProperty("screenshot") byte[] screenshot, @JsonProperty("url") Optional url) { + this.screenshot = screenshot.clone(); + this.url = url; + } + + @JsonProperty("screenshot") + public byte[] screenshot() { + return screenshot.clone(); + } + + @JsonProperty("url") + public Optional url() { + return url; + } + + public static Builder builder() { + return new Builder(); + } + + /** Builder for {@link ComputerState}. */ + public static final class Builder { + private byte[] screenshot; + private Optional url = Optional.empty(); + + @CanIgnoreReturnValue + public Builder screenshot(byte[] screenshot) { + this.screenshot = screenshot.clone(); + return this; + } + + @CanIgnoreReturnValue + public Builder url(Optional url) { + this.url = url; + return this; + } + + @CanIgnoreReturnValue + public Builder url(String url) { + this.url = Optional.ofNullable(url); + return this; + } + + public ComputerState build() { + return new ComputerState(screenshot, url); + } + } + + public static ComputerState create(byte[] screenshot, String url) { + return builder().screenshot(screenshot).url(url).build(); + } + + public static ComputerState create(byte[] screenshot) { + return builder().screenshot(screenshot).build(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ComputerState that)) { + return false; + } + return Objects.deepEquals(screenshot, that.screenshot) && Objects.equals(url, that.url); + } + + @Override + public int hashCode() { + return Objects.hash(Arrays.hashCode(screenshot), url); + } +} diff --git a/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseTool.java b/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseTool.java new file mode 100644 index 000000000..cedf7f35c --- /dev/null +++ b/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseTool.java @@ -0,0 +1,125 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static java.lang.String.format; + +import com.google.adk.tools.FunctionTool; +import com.google.adk.tools.ToolContext; +import com.google.common.collect.ImmutableMap; +import io.reactivex.rxjava3.core.Single; +import java.lang.reflect.Method; +import java.util.Base64; +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A tool that wraps computer control functions for use with LLMs. + * + *

This tool automatically normalizes coordinates from a virtual coordinate space (by default + * 1000x1000) to the actual screen size. + */ +public class ComputerUseTool extends FunctionTool { + + private static final Logger logger = LoggerFactory.getLogger(ComputerUseTool.class); + + private final int[] screenSize; + private final int[] coordinateSpace; + + public ComputerUseTool(Object instance, Method func, int[] screenSize, int[] virtualScreenSize) { + super(instance, func, /* isLongRunning= */ false); + this.screenSize = screenSize; + this.coordinateSpace = virtualScreenSize; + } + + private int normalize(Object object, String coordinateName, int index) { + if (!(object instanceof Number number)) { + throw new IllegalArgumentException(format("%s coordinate must be numeric", coordinateName)); + } + double coordinate = number.doubleValue(); + int normalized = (int) (coordinate / coordinateSpace[index] * screenSize[index]); + // Clamp to screen bounds + int clamped = Math.max(0, Math.min(normalized, screenSize[index] - 1)); + logger.atDebug().log( + format( + "%s: %.2f, normalized %s: %d, screen %s size: %d, coordinate-space %s size: %d, " + + "clamped %s: %d", + coordinateName, + coordinate, + coordinateName, + normalized, + coordinateName, + screenSize[index], + coordinateName, + coordinateSpace[index], + coordinateName, + clamped)); + return clamped; + } + + private int normalizeX(Object xObj) { + return normalize(xObj, "x", 0); + } + + private int normalizeY(Object yObj) { + return normalize(yObj, "y", 1); + } + + @Override + public Single> runAsync(Map args, ToolContext toolContext) { + Map normalizedArgs = new HashMap<>(args); + + if (args.containsKey("x")) { + normalizedArgs.put("x", normalizeX(args.get("x"))); + } + if (args.containsKey("y")) { + normalizedArgs.put("y", normalizeY(args.get("y"))); + } + if (args.containsKey("destination_x")) { + normalizedArgs.put("destination_x", normalizeX(args.get("destination_x"))); + } + if (args.containsKey("destination_y")) { + normalizedArgs.put("destination_y", normalizeY(args.get("destination_y"))); + } + + return super.runAsync(normalizedArgs, toolContext) + .map( + result -> { + // If the underlying tool method returned a structure containing a "screenshot" field + // (e.g., a ComputerState object), FunctionTool.runAsync will have converted it to a + // Map. This post-processing step transforms the byte array "screenshot" field into + // an "image" map with a mimetype and Base64 encoded data, as expected by some + // consuming systems. + if (result.containsKey("screenshot") && result.get("screenshot") instanceof byte[]) { + byte[] screenshot = (byte[]) result.get("screenshot"); + ImmutableMap imageMap = + ImmutableMap.of( + "mimetype", + "image/png", + "data", + Base64.getEncoder().encodeToString(screenshot)); + Map finalResult = new HashMap<>(result); + finalResult.remove("screenshot"); + finalResult.put("image", imageMap); + return finalResult; + } + return result; + }); + } +} diff --git a/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseToolset.java b/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseToolset.java new file mode 100644 index 000000000..8312badc3 --- /dev/null +++ b/core/src/main/java/com/google/adk/tools/computeruse/ComputerUseToolset.java @@ -0,0 +1,181 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static com.google.common.collect.ImmutableList.toImmutableList; + +import com.google.adk.agents.ReadonlyContext; +import com.google.adk.models.LlmRequest; +import com.google.adk.tools.BaseTool; +import com.google.adk.tools.BaseToolset; +import com.google.adk.tools.ToolContext; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.genai.types.ComputerUse; +import com.google.genai.types.Environment; +import com.google.genai.types.GenerateContentConfig; +import com.google.genai.types.Tool; +import io.reactivex.rxjava3.core.Completable; +import io.reactivex.rxjava3.core.Flowable; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A toolset that provides computer use capabilities. + * + *

It automatically discovers and wraps methods from a {@link BaseComputer} implementation. + */ +public class ComputerUseToolset implements BaseToolset { + + private static final Logger logger = LoggerFactory.getLogger(ComputerUseToolset.class); + + private static final ImmutableSet EXCLUDED_METHODS = + ImmutableSet.of( + "screenSize", + "environment", + "close", + "initialize", + "currentState", + "getClass", + "equals", + "hashCode", + "toString", + "wait", + "notify", + "notifyAll"); + + private final BaseComputer computer; + private final int[] virtualScreenSize; + private List tools; + private boolean initialized = false; + + public ComputerUseToolset(BaseComputer computer) { + this(computer, new int[] {1000, 1000}); + } + + public ComputerUseToolset(BaseComputer computer, int[] virtualScreenSize) { + this.computer = computer; + this.virtualScreenSize = virtualScreenSize; + } + + private synchronized Completable ensureInitialized() { + if (initialized) { + return Completable.complete(); + } + return computer + .initialize() + .doOnComplete( + () -> { + initialized = true; + }); + } + + @Override + public Flowable getTools(ReadonlyContext readonlyContext) { + return ensureInitialized() + .andThen(computer.screenSize()) + .flatMapPublisher( + actualScreenSize -> { + if (tools == null) { + tools = new ArrayList<>(); + for (Method method : BaseComputer.class.getMethods()) { + if (!EXCLUDED_METHODS.contains(method.getName())) { + tools.add( + new ComputerUseTool(computer, method, actualScreenSize, virtualScreenSize)); + } + } + } + return Flowable.fromIterable(tools); + }); + } + + @Override + public void close() throws Exception { + computer.close().blockingAwait(); + } + + /** Adds computer use configuration to the LLM request. */ + public Completable processLlmRequest( + LlmRequest.Builder llmRequestBuilder, ToolContext toolContext) { + return getTools(null) // Fetch tools to ensure they are added to the list + .toList() + .flatMapCompletable( + tools -> { + return Completable.concat( + tools.stream() + .map(t -> t.processLlmRequest(llmRequestBuilder, toolContext)) + .collect(toImmutableList())) + .andThen( + computer + .environment() + .flatMapCompletable( + env -> { + configureComputerUseIfNeeded(llmRequestBuilder, env); + return Completable.complete(); + })); + }); + } + + /** + * Returns the {@link Environment.Known} enum for the given {@link ComputerEnvironment}. If the + * computer environment is not found or not supported, defaults to {@link + * Environment.Known.ENVIRONMENT_BROWSER}. + * + * @param computerEnvironment The {@link ComputerEnvironment} to convert. + * @return The corresponding {@link Environment.Known} enum. + */ + private static Environment.Known getEnvironment(ComputerEnvironment computerEnvironment) { + try { + return Environment.Known.valueOf(computerEnvironment.name()); + } catch (IllegalArgumentException e) { + return Environment.Known.ENVIRONMENT_BROWSER; + } + } + + /** + * Configures the computer use tool in the LLM request if it is not already configured. + * + * @param env The environment to configure the computer use tool for. + * @param llmRequestBuilder The LLM request builder to add the computer use tool to. + */ + private static void configureComputerUseIfNeeded( + LlmRequest.Builder llmRequestBuilder, ComputerEnvironment computerEnvironment) { + // Get the current config from the LLM request + GenerateContentConfig config = + llmRequestBuilder.config().orElse(GenerateContentConfig.builder().build()); + + // Check if computer use is already configured + if (config.tools().orElse(ImmutableList.of()).stream() + .anyMatch(t -> t.computerUse().isPresent())) { + logger.debug("Computer use already configured"); + return; + } + + // Configure the computer + Environment.Known knownEnv = getEnvironment(computerEnvironment); + Tool computerUseTool = + Tool.builder().computerUse(ComputerUse.builder().environment(knownEnv).build()).build(); + // Add the computer use tool to the list of tools in the config + List currentTools = new ArrayList<>(config.tools().orElse(ImmutableList.of())); + currentTools.add(computerUseTool); + llmRequestBuilder.config(config.toBuilder().tools(ImmutableList.copyOf(currentTools)).build()); + logger.debug("Added computer use tool with environment: {}", knownEnv); + } +} diff --git a/core/src/test/java/com/google/adk/tools/computeruse/ComputerEnvironmentTest.java b/core/src/test/java/com/google/adk/tools/computeruse/ComputerEnvironmentTest.java new file mode 100644 index 000000000..ed22819ec --- /dev/null +++ b/core/src/test/java/com/google/adk/tools/computeruse/ComputerEnvironmentTest.java @@ -0,0 +1,36 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link ComputerEnvironment}. */ +@RunWith(JUnit4.class) +public final class ComputerEnvironmentTest { + + @Test + public void testEnumValues() { + assertThat(ComputerEnvironment.values()) + .asList() + .containsAtLeast( + ComputerEnvironment.ENVIRONMENT_UNSPECIFIED, ComputerEnvironment.ENVIRONMENT_BROWSER); + } +} diff --git a/core/src/test/java/com/google/adk/tools/computeruse/ComputerStateTest.java b/core/src/test/java/com/google/adk/tools/computeruse/ComputerStateTest.java new file mode 100644 index 000000000..736f9be0e --- /dev/null +++ b/core/src/test/java/com/google/adk/tools/computeruse/ComputerStateTest.java @@ -0,0 +1,79 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link ComputerState}. */ +@RunWith(JUnit4.class) +public final class ComputerStateTest { + + @Test + public void testBuilder() { + byte[] screenshot = new byte[] {1, 2, 3}; + String url = "https://google.com"; + ComputerState state = ComputerState.builder().screenshot(screenshot).url(url).build(); + + assertThat(state.screenshot()).isEqualTo(screenshot); + assertThat(state.url()).hasValue(url); + } + + @Test + public void testBuilder_noUrl() { + byte[] screenshot = new byte[] {1, 2, 3}; + ComputerState state = ComputerState.builder().screenshot(screenshot).build(); + + assertThat(state.screenshot()).isEqualTo(screenshot); + assertThat(state.url()).isEmpty(); + } + + @Test + public void testEqualsAndHashCode() { + byte[] screenshot1 = new byte[] {1, 2, 3}; + byte[] screenshot2 = new byte[] {1, 2, 3}; + byte[] screenshot3 = new byte[] {4, 5, 6}; + + ComputerState state1 = ComputerState.builder().screenshot(screenshot1).url("url1").build(); + ComputerState state2 = ComputerState.builder().screenshot(screenshot2).url("url1").build(); + ComputerState state3 = ComputerState.builder().screenshot(screenshot3).url("url1").build(); + ComputerState state4 = ComputerState.builder().screenshot(screenshot1).url("url2").build(); + + assertThat(state1).isEqualTo(state2); + assertThat(state1.hashCode()).isEqualTo(state2.hashCode()); + + assertThat(state1).isNotEqualTo(state3); + assertThat(state1).isNotEqualTo(state4); + } + + @Test + public void testScreenshotImmutability() { + byte[] screenshot = new byte[] {1, 2, 3}; + ComputerState state = ComputerState.builder().screenshot(screenshot).build(); + + // Modify original array + screenshot[0] = 9; + assertThat(state.screenshot()[0]).isEqualTo(1); + + // Modify returned array + state.screenshot()[0] = 9; + assertThat(state.screenshot()[0]).isEqualTo(1); + } +} diff --git a/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolTest.java b/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolTest.java new file mode 100644 index 000000000..20fb146cf --- /dev/null +++ b/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolTest.java @@ -0,0 +1,258 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import com.google.adk.agents.InvocationContext; +import com.google.adk.agents.LlmAgent; +import com.google.adk.sessions.InMemorySessionService; +import com.google.adk.sessions.Session; +import com.google.adk.tools.Annotations.Schema; +import com.google.adk.tools.ToolContext; +import com.google.common.collect.ImmutableMap; +import io.reactivex.rxjava3.core.Single; +import java.lang.reflect.Method; +import java.util.Base64; +import java.util.Map; +import java.util.Optional; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link ComputerUseTool}. */ +@RunWith(JUnit4.class) +public final class ComputerUseToolTest { + + private LlmAgent agent; + private InMemorySessionService sessionService; + private ToolContext toolContext; + private ComputerMock computerMock; + + @Before + public void setUp() { + agent = LlmAgent.builder().name("test-agent").build(); + sessionService = new InMemorySessionService(); + Session session = + sessionService.createSession("test-app", "test-user", null, "test-session").blockingGet(); + InvocationContext invocationContext = + InvocationContext.builder() + .agent(agent) + .session(session) + .sessionService(sessionService) + .invocationId("invocation-id") + .build(); + toolContext = ToolContext.builder(invocationContext).functionCallId("functionCallId").build(); + computerMock = new ComputerMock(); + } + + @Test + public void testNormalizeX() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + assertThat(tool.runAsync(ImmutableMap.of("x", 0, "y", 0), toolContext).blockingGet()) + .isNotNull(); + assertThat(computerMock.lastX).isEqualTo(0); + + assertThat(tool.runAsync(ImmutableMap.of("x", 500, "y", 300), toolContext).blockingGet()) + .isNotNull(); + assertThat(computerMock.lastX).isEqualTo(960); // 500/1000 * 1920 + + assertThat(tool.runAsync(ImmutableMap.of("x", 1000, "y", 300), toolContext).blockingGet()) + .isNotNull(); + assertThat(computerMock.lastX).isEqualTo(1919); // Clamped + } + + @Test + public void testNormalizeY() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + assertThat(tool.runAsync(ImmutableMap.of("x", 0, "y", 500), toolContext).blockingGet()) + .isNotNull(); + assertThat(computerMock.lastY).isEqualTo(540); // 500/1000 * 1080 + } + + @Test + public void testNormalizeWithCustomVirtualScreenSize() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {2000, 2000}); + + assertThat(tool.runAsync(ImmutableMap.of("x", 1000, "y", 1000), toolContext).blockingGet()) + .isNotNull(); + assertThat(computerMock.lastX).isEqualTo(960); // 1000/2000 * 1920 + assertThat(computerMock.lastY).isEqualTo(540); // 1000/2000 * 1080 + } + + @Test + public void testNormalizeDragAndDrop() throws NoSuchMethodException { + Method method = + ComputerMock.class.getMethod("dragAndDrop", int.class, int.class, int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + Map result = + tool.runAsync( + ImmutableMap.of("x", 100, "y", 200, "destination_x", 800, "destination_y", 600), + toolContext) + .blockingGet(); + assertThat(result).isNotNull(); + + assertThat(computerMock.lastX).isEqualTo(192); + assertThat(computerMock.lastY).isEqualTo(216); + assertThat(computerMock.lastDestX).isEqualTo(1536); + assertThat(computerMock.lastDestY).isEqualTo(648); + } + + @Test + public void testResultFormatting() throws NoSuchMethodException { + byte[] screenshot = new byte[] {1, 2, 3}; + computerMock.nextState = + ComputerState.builder() + .screenshot(screenshot) + .url(Optional.of("https://example.com")) + .build(); + + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + Map result = + tool.runAsync(ImmutableMap.of("x", 500, "y", 500), toolContext).blockingGet(); + assertThat(result).containsKey("image"); + Object imageData = result.get("image"); + assertThat(imageData).isInstanceOf(Map.class); + ((Map) imageData) + .forEach( + (key, value) -> { + assertThat(key).isInstanceOf(String.class); + assertThat(value).isInstanceOf(String.class); + }); + @SuppressWarnings("unchecked") // The types of the key and value are checked above. + Map imageMap = (Map) imageData; + assertThat(imageMap.get("mimetype")).isEqualTo("image/png"); + assertThat(imageMap.get("data")).isEqualTo(Base64.getEncoder().encodeToString(screenshot)); + assertThat(result.get("url")).isEqualTo("https://example.com"); + assertThat(result).containsKey("image"); + assertThat(result).doesNotContainKey("screenshot"); + } + + @Test + public void testResultFormatting_noScreenshot() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("noScreenshot"); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + Map result = tool.runAsync(ImmutableMap.of(), toolContext).blockingGet(); + assertThat(result).doesNotContainKey("image"); + assertThat(result.get("url")).isEqualTo("https://example.com"); + } + + @Test + public void testResultFormatting_nonByteArrayScreenshot() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("nonByteArrayScreenshot"); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + Map result = tool.runAsync(ImmutableMap.of(), toolContext).blockingGet(); + assertThat(result).doesNotContainKey("image"); + assertThat(result.get("screenshot")).isEqualTo("not-a-byte-array"); + } + + @Test + public void testNormalizeWithInvalidInputs() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + assertThrows( + IllegalArgumentException.class, + () -> tool.runAsync(ImmutableMap.of("x", "invalid", "y", 500), toolContext).blockingGet()); + } + + @Test + public void testRunAsyncWithNoCoordinates() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + // Arguments without x, y, etc. should be passed as is. + ImmutableMap args = ImmutableMap.of("other", "value"); + var unused = tool.runAsync(args, toolContext).blockingGet(); + assertThat(computerMock.lastX).isEqualTo(0); + assertThat(computerMock.lastY).isEqualTo(0); + } + + @Test + public void testCoordinateClamping() throws NoSuchMethodException { + Method method = ComputerMock.class.getMethod("clickAt", int.class, int.class); + ComputerUseTool tool = + new ComputerUseTool(computerMock, method, new int[] {1920, 1080}, new int[] {1000, 1000}); + + // Test clamping to 0 + var unused1 = tool.runAsync(ImmutableMap.of("x", -100, "y", -50), toolContext).blockingGet(); + assertThat(computerMock.lastX).isEqualTo(0); + assertThat(computerMock.lastY).isEqualTo(0); + + // Test clamping to max + var unused2 = tool.runAsync(ImmutableMap.of("x", 2000, "y", 1500), toolContext).blockingGet(); + assertThat(computerMock.lastX).isEqualTo(1919); + assertThat(computerMock.lastY).isEqualTo(1079); + } + + /** A mock class for Computer actions. */ + public static class ComputerMock { + public int lastX; + public int lastY; + public int lastDestX; + public int lastDestY; + public ComputerState nextState = + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build(); + + public Single clickAt(@Schema(name = "x") int x, @Schema(name = "y") int y) { + this.lastX = x; + this.lastY = y; + return Single.just(nextState); + } + + public Single dragAndDrop( + @Schema(name = "x") int x, + @Schema(name = "y") int y, + @Schema(name = "destination_x") int destinationX, + @Schema(name = "destination_y") int destinationY) { + this.lastX = x; + this.lastY = y; + this.lastDestX = destinationX; + this.lastDestY = destinationY; + return Single.just(nextState); + } + + public Single> noScreenshot() { + return Single.just(ImmutableMap.of("url", "https://example.com")); + } + + public Single> nonByteArrayScreenshot() { + return Single.just(ImmutableMap.of("screenshot", "not-a-byte-array")); + } + } +} diff --git a/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolsetTest.java b/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolsetTest.java new file mode 100644 index 000000000..1ed49419e --- /dev/null +++ b/core/src/test/java/com/google/adk/tools/computeruse/ComputerUseToolsetTest.java @@ -0,0 +1,264 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.adk.tools.computeruse; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import com.google.adk.agents.InvocationContext; +import com.google.adk.agents.LlmAgent; +import com.google.adk.models.LlmRequest; +import com.google.adk.sessions.InMemorySessionService; +import com.google.adk.sessions.Session; +import com.google.adk.tools.BaseTool; +import com.google.adk.tools.ToolContext; +import com.google.genai.types.Environment; +import com.google.genai.types.GenerateContentConfig; +import com.google.genai.types.Tool; +import io.reactivex.rxjava3.core.Completable; +import io.reactivex.rxjava3.core.Single; +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link ComputerUseToolset}. */ +@RunWith(JUnit4.class) +public final class ComputerUseToolsetTest { + + private LlmAgent agent; + private InMemorySessionService sessionService; + private ToolContext toolContext; + private MockComputer mockComputer; + private ComputerUseToolset toolset; + + @Before + public void setUp() { + agent = LlmAgent.builder().name("test-agent").build(); + sessionService = new InMemorySessionService(); + Session session = + sessionService.createSession("test-app", "test-user", null, "test-session").blockingGet(); + InvocationContext invocationContext = + InvocationContext.builder() + .agent(agent) + .session(session) + .sessionService(sessionService) + .invocationId("invocation-id") + .build(); + toolContext = ToolContext.builder(invocationContext).functionCallId("functionCallId").build(); + + mockComputer = new MockComputer(); + toolset = new ComputerUseToolset(mockComputer); + } + + @Test + public void testGetTools() { + List tools = toolset.getTools(null).toList().blockingGet(); + + assertThat(mockComputer.initializeCallCount).isEqualTo(1); + assertThat(tools).isNotEmpty(); + + // Verify method filtering + assertThat(tools.stream().anyMatch(t -> t.name().equals("clickAt"))).isTrue(); + assertThat(tools.stream().noneMatch(t -> t.name().equals("screenSize"))).isTrue(); + assertThat(tools.stream().noneMatch(t -> t.name().equals("environment"))).isTrue(); + } + + @Test + public void testEnsureInitializedOnlyCalledOnce() { + var unused1 = toolset.getTools(null).toList().blockingGet(); + var unused2 = toolset.getTools(null).toList().blockingGet(); + + assertThat(mockComputer.initializeCallCount).isEqualTo(1); + } + + @Test + public void testGetTools_cachesTools() { + List tools1 = toolset.getTools(null).toList().blockingGet(); + List tools2 = toolset.getTools(null).toList().blockingGet(); + + assertThat(tools1).hasSize(tools2.size()); + for (int i = 0; i < tools1.size(); i++) { + assertThat(tools1.get(i)).isSameInstanceAs(tools2.get(i)); + } + } + + @Test + public void testProcessLlmRequest() { + LlmRequest.Builder builder = + LlmRequest.builder().model("test-model").config(GenerateContentConfig.builder().build()); + + toolset.processLlmRequest(builder, toolContext).blockingAwait(); + + LlmRequest request = builder.build(); + assertThat(request.config()).isPresent(); + GenerateContentConfig config = request.config().get(); + + assertThat(config.tools()).isPresent(); + List tools = config.tools().get(); + + // Find the computer use tool + Optional computerUseTool = + tools.stream().filter(t -> t.computerUse().isPresent()).findFirst(); + assertThat(computerUseTool).isPresent(); + assertThat(computerUseTool.get().computerUse().get().environment().get().knownEnum()) + .isEqualTo(Environment.Known.ENVIRONMENT_BROWSER); + + // Verify computer actions were added as function declarations + Optional functionTool = + tools.stream().filter(t -> t.functionDeclarations().isPresent()).findFirst(); + assertThat(functionTool).isPresent(); + assertThat( + functionTool.get().functionDeclarations().get().stream() + .anyMatch(fd -> fd.name().orElse("").equals("clickAt"))) + .isTrue(); + } + + @Test + public void testProcessLlmRequest_withComputerError() { + mockComputer.nextError = new RuntimeException("Computer failure"); + LlmRequest.Builder builder = + LlmRequest.builder().model("test-model").config(GenerateContentConfig.builder().build()); + + assertThrows( + RuntimeException.class, + () -> toolset.processLlmRequest(builder, toolContext).blockingAwait()); + } + + private static class MockComputer implements BaseComputer { + int initializeCallCount = 0; + Throwable nextError = null; + + @Override + public Completable initialize() { + if (nextError != null) { + return Completable.error(nextError); + } + this.initializeCallCount++; + return Completable.complete(); + } + + @Override + public Single screenSize() { + if (nextError != null) { + return Single.error(nextError); + } + return Single.just(new int[] {1920, 1080}); + } + + @Override + public Single environment() { + if (nextError != null) { + return Single.error(nextError); + } + return Single.just(ComputerEnvironment.ENVIRONMENT_BROWSER); + } + + @Override + public Single openWebBrowser() { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single clickAt(int x, int y) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single hoverAt(int x, int y) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single typeTextAt( + int x, int y, String text, Boolean pressEnter, Boolean clearBeforeTyping) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single scrollDocument(String direction) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single scrollAt(int x, int y, String direction, int magnitude) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single wait(Duration duration) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single goBack() { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single goForward() { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single search() { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single navigate(String url) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.of(url)).build()); + } + + @Override + public Single keyCombination(List keys) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single dragAndDrop(int x, int y, int destinationX, int destinationY) { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Single currentState() { + return Single.just( + ComputerState.builder().screenshot(new byte[0]).url(Optional.empty()).build()); + } + + @Override + public Completable close() { + return Completable.complete(); + } + } +}