From a22d0e9c14e58b29d81f5a83bdcc6e5fce52eafe Mon Sep 17 00:00:00 2001 From: olaola Date: Mon, 11 Dec 2017 07:53:15 -0800 Subject: [PATCH] Fix: uploading artifacts of failed actions to remote cache stopped working. To reproduce: run a failing test with --experimental_remote_spawn_cache or with --spawn_strategy=remote and no executor. Expected: test log is uploaded. Desired behavior: - regardless of whether a spawn is cacheable or not, its artifacts should be uploaded to the remote cache. - the spawn result should only be set if the spawn is cacheable *and* the action succeeded. - when executing remotely, the do_not_cache field should be set for non-cacheable spawns, and the remote execution engine should respect it. This CL contains multiple fixes to ensure the above behaviors, and adds a few tests, both end to end and unit tests. Important behavior change: it is no longer assumed that non-cacheable spawns should use a NO_CACHE SpawnCache! The appropriate test case was removed. Instead, an assumption was added that all implementations of SpawnCache should respect the Spawns.mayBeCached(spawn) property. Currently, only NO_CACHE and RemoteSpawnCache exist, and they (now) support it. TESTED=remote build execution backend. WANT_LGTM: philwo,buchgr RELNOTES: None PiperOrigin-RevId: 178617937 --- .../build/lib/actions/SpawnResult.java | 18 +++ .../build/lib/exec/AbstractSpawnStrategy.java | 15 +- .../devtools/build/lib/exec/SpawnCache.java | 2 + .../build/lib/remote/RemoteSpawnCache.java | 13 +- .../build/lib/remote/RemoteSpawnRunner.java | 16 +- .../sandbox/AbstractSandboxSpawnRunner.java | 44 +++--- .../lib/exec/AbstractSpawnStrategyTest.java | 19 --- .../lib/remote/RemoteSpawnCacheTest.java | 40 +++++ .../lib/remote/RemoteSpawnRunnerTest.java | 12 +- src/test/shell/bazel/BUILD | 10 ++ src/test/shell/bazel/bazel_sandboxing_test.sh | 21 +++ .../shell/bazel/remote_execution_rest_test.sh | 130 ++++++++++++++++ src/test/shell/bazel/remote_execution_test.sh | 146 +++++++++--------- .../build/remote/worker/ExecutionServer.java | 2 +- 14 files changed, 359 insertions(+), 129 deletions(-) create mode 100755 src/test/shell/bazel/remote_execution_rest_test.sh diff --git a/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java b/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java index 51d8d3b6459411..b6a26954d4044f 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java +++ b/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java @@ -178,6 +178,11 @@ public boolean isConsideredUserError() { /** Whether the spawn result was a cache hit. */ boolean isCacheHit(); + /** Returns an optional custom failure message for the result. */ + default String getFailureMessage() { + return ""; + } + String getDetailMessage( String messagePrefix, String message, boolean catastrophe, boolean forciblyRunRemotely); @@ -196,6 +201,7 @@ public static final class SimpleSpawnResult implements SpawnResult { private final Optional numBlockInputOperations; private final Optional numInvoluntaryContextSwitches; private final boolean cacheHit; + private final String failureMessage; SimpleSpawnResult(Builder builder) { this.exitCode = builder.exitCode; @@ -208,6 +214,7 @@ public static final class SimpleSpawnResult implements SpawnResult { this.numBlockInputOperations = builder.numBlockInputOperations; this.numInvoluntaryContextSwitches = builder.numInvoluntaryContextSwitches; this.cacheHit = builder.cacheHit; + this.failureMessage = builder.failureMessage; } @Override @@ -273,6 +280,11 @@ public boolean isCacheHit() { return cacheHit; } + @Override + public String getFailureMessage() { + return failureMessage; + } + @Override public String getDetailMessage( String messagePrefix, String message, boolean catastrophe, boolean forciblyRunRemotely) { @@ -318,6 +330,7 @@ public static final class Builder { private Optional numBlockInputOperations = Optional.empty(); private Optional numInvoluntaryContextSwitches = Optional.empty(); private boolean cacheHit; + private String failureMessage = ""; public SpawnResult build() { if (status == Status.SUCCESS) { @@ -395,5 +408,10 @@ public Builder setCacheHit(boolean cacheHit) { this.cacheHit = cacheHit; return this; } + + public Builder setFailureMessage(String failureMessage) { + this.failureMessage = failureMessage; + return this; + } } } diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java index 1512a0139ce100..bf3ef4e59c9bd8 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java @@ -84,7 +84,7 @@ public List exec( SpawnCache cache = actionExecutionContext.getContext(SpawnCache.class); // In production, the getContext method guarantees that we never get null back. However, our // integration tests don't set it up correctly, so cache may be null in testing. - if (cache == null || !Spawns.mayBeCached(spawn)) { + if (cache == null) { cache = SpawnCache.NO_CACHE; } SpawnResult spawnResult; @@ -107,12 +107,15 @@ public List exec( if (spawnResult.status() != Status.SUCCESS) { String cwd = actionExecutionContext.getExecRoot().getPathString(); + String resultMessage = spawnResult.getFailureMessage(); String message = - CommandFailureUtils.describeCommandFailure( - actionExecutionContext.getVerboseFailures(), - spawn.getArguments(), - spawn.getEnvironment(), - cwd); + resultMessage != "" + ? resultMessage + : CommandFailureUtils.describeCommandFailure( + actionExecutionContext.getVerboseFailures(), + spawn.getArguments(), + spawn.getEnvironment(), + cwd); throw new SpawnExecException(message, spawnResult, /*forciblyRunRemotely=*/false); } return ImmutableList.of(spawnResult); diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java index 016dfe80bd8b80..1374b47fd84d7f 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java @@ -168,6 +168,8 @@ void store(SpawnResult result, Collection files) * object is for the cache to store expensive intermediate values (such as the cache key) that are * needed both for the lookup and the subsequent store operation. * + *

The lookup must not succeed for non-cachable spawns. See {@link Spawns#mayBeCached()}. + * *

Note that cache stores may be disabled, in which case the returned {@link CacheHandle} * instance's {@link CacheHandle#store} is a no-op. */ diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java index 57f19fa258eaf3..91e27cd49c215a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java @@ -19,6 +19,7 @@ import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.actions.SpawnResult; import com.google.devtools.build.lib.actions.SpawnResult.Status; +import com.google.devtools.build.lib.actions.Spawns; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.Reporter; @@ -102,7 +103,8 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy policy) digestUtil.compute(command), repository.getMerkleDigest(inputRoot), platform, - policy.getTimeout()); + policy.getTimeout(), + Spawns.mayBeCached(spawn)); // Look up action cache, and reuse the action output if it is found. final ActionKey actionKey = digestUtil.computeActionKey(action); @@ -113,7 +115,9 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy policy) Context previous = withMetadata.attach(); try { ActionResult result = - this.options.remoteAcceptCached ? remoteCache.getCachedActionResult(actionKey) : null; + this.options.remoteAcceptCached && Spawns.mayBeCached(spawn) + ? remoteCache.getCachedActionResult(actionKey) + : null; if (result != null) { // We don't cache failed actions, so we know the outputs exist. // For now, download all outputs locally; in the future, we can reuse the digests to @@ -156,7 +160,10 @@ public boolean willStore() { @Override public void store(SpawnResult result, Collection files) throws InterruptedException, IOException { - boolean uploadAction = Status.SUCCESS.equals(result.status()) && result.exitCode() == 0; + boolean uploadAction = + Spawns.mayBeCached(spawn) + && Status.SUCCESS.equals(result.status()) + && result.exitCode() == 0; Context previous = withMetadata.attach(); try { remoteCache.upload(actionKey, execRoot, files, policy.getFileOutErr(), uploadAction); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index c67ef489a6c41e..c63975738b6d31 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -130,7 +130,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionPolicy policy) digestUtil.compute(command), repository.getMerkleDigest(inputRoot), platform, - policy.getTimeout()); + policy.getTimeout(), + Spawns.mayBeCached(spawn)); // Look up action cache, and reuse the action output if it is found. ActionKey actionKey = digestUtil.computeActionKey(action); @@ -262,7 +263,8 @@ static Action buildAction( Digest command, Digest inputRoot, Platform platform, - Duration timeout) { + Duration timeout, + boolean cacheable) { Action.Builder action = Action.newBuilder(); action.setCommandDigest(command); action.setInputRootDigest(inputRoot); @@ -279,6 +281,9 @@ static Action buildAction( if (!timeout.isZero()) { action.setTimeout(com.google.protobuf.Duration.newBuilder().setSeconds(timeout.getSeconds())); } + if (!cacheable) { + action.setDoNotCache(true); + } return action.build(); } @@ -326,7 +331,7 @@ private SpawnResult execLocally( @Nullable RemoteActionCache remoteCache, @Nullable ActionKey actionKey) throws ExecException, IOException, InterruptedException { - if (uploadToCache && Spawns.mayBeCached(spawn) && remoteCache != null && actionKey != null) { + if (uploadToCache && remoteCache != null && actionKey != null) { return execLocallyAndUpload(spawn, policy, inputMap, remoteCache, actionKey); } return fallbackRunner.exec(spawn, policy); @@ -351,7 +356,10 @@ SpawnResult execLocallyAndUpload( } List outputFiles = listExistingOutputFiles(execRoot, spawn); try { - boolean uploadAction = Status.SUCCESS.equals(result.status()) && result.exitCode() == 0; + boolean uploadAction = + Spawns.mayBeCached(spawn) + && Status.SUCCESS.equals(result.status()) + && result.exitCode() == 0; remoteCache.upload(actionKey, execRoot, outputFiles, policy.getFileOutErr(), uploadAction); } catch (IOException e) { if (verboseFailures) { diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/AbstractSandboxSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/sandbox/AbstractSandboxSpawnRunner.java index 5875dad58d17bb..957449eb2bd286 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/AbstractSandboxSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/AbstractSandboxSpawnRunner.java @@ -26,7 +26,6 @@ import com.google.devtools.build.lib.actions.SpawnResult.Status; import com.google.devtools.build.lib.actions.UserExecException; import com.google.devtools.build.lib.exec.ExecutionOptions; -import com.google.devtools.build.lib.exec.SpawnExecException; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.runtime.CommandEnvironment; import com.google.devtools.build.lib.shell.AbnormalTerminationException; @@ -90,13 +89,13 @@ protected SpawnResult runSpawn( Path execRoot, Path tmpDir, Duration timeout) - throws ExecException, IOException, InterruptedException { + throws IOException, InterruptedException { try { sandbox.createFileSystem(); OutErr outErr = policy.getFileOutErr(); policy.prefetchInputs(); - SpawnResult result = run(sandbox, outErr, timeout, tmpDir); + SpawnResult result = run(originalSpawn, sandbox, outErr, timeout, execRoot, tmpDir); policy.lockOutputFiles(); try { @@ -105,23 +104,6 @@ protected SpawnResult runSpawn( } catch (IOException e) { throw new IOException("Could not move output artifacts from sandboxed execution", e); } - - if (result.status() != Status.SUCCESS || result.exitCode() != 0) { - String message; - if (sandboxOptions.sandboxDebug) { - message = - CommandFailureUtils.describeCommandFailure( - true, sandbox.getArguments(), sandbox.getEnvironment(), execRoot.getPathString()); - } else { - message = - CommandFailureUtils.describeCommandFailure( - verboseFailures, - originalSpawn.getArguments(), - originalSpawn.getEnvironment(), - execRoot.getPathString()) + SANDBOX_DEBUG_SUGGESTION; - } - throw new SpawnExecException(message, result, /*forciblyRunRemotely=*/false); - } return result; } finally { if (!sandboxOptions.sandboxDebug) { @@ -131,12 +113,30 @@ protected SpawnResult runSpawn( } private final SpawnResult run( - SandboxedSpawn sandbox, OutErr outErr, Duration timeout, Path tmpDir) + Spawn originalSpawn, + SandboxedSpawn sandbox, + OutErr outErr, + Duration timeout, + Path execRoot, + Path tmpDir) throws IOException, InterruptedException { Command cmd = new Command( sandbox.getArguments().toArray(new String[0]), sandbox.getEnvironment(), sandbox.getSandboxExecRoot().getPathFile()); + String failureMessage; + if (sandboxOptions.sandboxDebug) { + failureMessage = + CommandFailureUtils.describeCommandFailure( + true, sandbox.getArguments(), sandbox.getEnvironment(), execRoot.getPathString()); + } else { + failureMessage = + CommandFailureUtils.describeCommandFailure( + verboseFailures, + originalSpawn.getArguments(), + originalSpawn.getEnvironment(), + execRoot.getPathString()) + SANDBOX_DEBUG_SUGGESTION; + } long startTime = System.currentTimeMillis(); CommandResult commandResult; @@ -162,6 +162,7 @@ private final SpawnResult run( return new SpawnResult.Builder() .setStatus(Status.EXECUTION_FAILED) .setExitCode(LOCAL_EXEC_ERROR) + .setFailureMessage(failureMessage) .build(); } @@ -182,6 +183,7 @@ private final SpawnResult run( .setWallTime(wallTime) .setUserTime(commandResult.getUserExecutionTime()) .setSystemTime(commandResult.getSystemExecutionTime()) + .setFailureMessage(status != Status.SUCCESS || exitCode != 0 ? failureMessage : "") .build(); } diff --git a/src/test/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategyTest.java b/src/test/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategyTest.java index 3547e31ac6e690..986053fee9a241 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategyTest.java +++ b/src/test/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategyTest.java @@ -167,23 +167,4 @@ public void testCacheMissWithNonZeroExit() throws Exception { verify(spawnRunner).exec(any(Spawn.class), any(SpawnExecutionPolicy.class)); verify(entry).store(eq(result), any(Collection.class)); } - - @Test - public void testTagNoCache() throws Exception { - SpawnCache cache = mock(SpawnCache.class); - when(actionExecutionContext.getContext(eq(SpawnCache.class))).thenReturn(cache); - when(actionExecutionContext.getExecRoot()).thenReturn(fs.getPath("/execroot")); - when(spawnRunner.exec(any(Spawn.class), any(SpawnExecutionPolicy.class))) - .thenReturn(new SpawnResult.Builder().setStatus(Status.SUCCESS).build()); - - Spawn uncacheableSpawn = - new SpawnBuilder("/bin/echo", "Hi").withExecutionInfo("no-cache", "").build(); - - new TestedSpawnStrategy(spawnRunner).exec(uncacheableSpawn, actionExecutionContext); - - // Must only be called exactly once. - verify(spawnRunner).exec(any(Spawn.class), any(SpawnExecutionPolicy.class)); - // Must not be called. - verify(cache, never()).lookup(any(Spawn.class), any(SpawnExecutionPolicy.class)); - } } diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnCacheTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnCacheTest.java index 7ac5cfa190c209..65c7d88960d8cd 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnCacheTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnCacheTest.java @@ -29,6 +29,7 @@ import com.google.devtools.build.lib.actions.ActionInputHelper; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander; +import com.google.devtools.build.lib.actions.ExecutionRequirements; import com.google.devtools.build.lib.actions.ResourceSet; import com.google.devtools.build.lib.actions.SimpleSpawn; import com.google.devtools.build.lib.actions.SpawnResult; @@ -263,6 +264,45 @@ public Void answer(InvocationOnMock invocation) { .upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(true)); } + @Test + public void noCacheSpawns() throws Exception { + // Checks that spawns that have mayBeCached false are not looked up in the remote cache, + // and also that their result is not uploaded to the remote cache. The artifacts, however, + // are uploaded. + SimpleSpawn uncacheableSpawn = new SimpleSpawn( + new FakeOwner("foo", "bar"), + /*arguments=*/ ImmutableList.of(), + /*environment=*/ ImmutableMap.of(), + ImmutableMap.of(ExecutionRequirements.NO_CACHE, ""), + /*inputs=*/ ImmutableList.of(), + /*outputs=*/ ImmutableList.of(), + ResourceSet.ZERO); + CacheHandle entry = cache.lookup(uncacheableSpawn, simplePolicy); + verify(remoteCache, never()) + .getCachedActionResult(any(ActionKey.class)); + assertThat(entry.hasResult()).isFalse(); + SpawnResult result = new SpawnResult.Builder().setExitCode(0).setStatus(Status.SUCCESS).build(); + ImmutableList outputFiles = ImmutableList.of(fs.getPath("/random/file")); + entry.store(result, outputFiles); + verify(remoteCache) + .upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(false)); + } + + @Test + public void noCacheSpawnsNoResultStore() throws Exception { + // Only successful action results are uploaded to the remote cache. The artifacts, however, + // are uploaded regardless. + CacheHandle entry = cache.lookup(simpleSpawn, simplePolicy); + verify(remoteCache).getCachedActionResult(any(ActionKey.class)); + assertThat(entry.hasResult()).isFalse(); + SpawnResult result = + new SpawnResult.Builder().setExitCode(1).setStatus(Status.NON_ZERO_EXIT).build(); + ImmutableList outputFiles = ImmutableList.of(fs.getPath("/random/file")); + entry.store(result, outputFiles); + verify(remoteCache) + .upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(false)); + } + @Test public void printWarningIfUploadFails() throws Exception { CacheHandle entry = cache.lookup(simpleSpawn, simplePolicy); diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java index 31681906d89459..730244d627f7cf 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java @@ -115,8 +115,9 @@ public final void setUp() throws Exception { @Test @SuppressWarnings("unchecked") public void nonCachableSpawnsShouldNotBeCached_remote() throws Exception { - // Test that if a spawn is marked "NO_CACHE" that it's neither fetched from a remote cache - // nor uploaded to a remote cache. It should be executed remotely, however. + // Test that if a spawn is marked "NO_CACHE" then it's not fetched from a remote cache. + // It should be executed remotely, but marked non-cacheable to remote execution, so that + // the action result is not saved in the remote cache. RemoteOptions options = Options.getDefaults(RemoteOptions.class); options.remoteAcceptCached = true; @@ -156,6 +157,7 @@ public void nonCachableSpawnsShouldNotBeCached_remote() throws Exception { ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(ExecuteRequest.class); verify(executor).executeRemotely(requestCaptor.capture()); assertThat(requestCaptor.getValue().getSkipCacheLookup()).isTrue(); + assertThat(requestCaptor.getValue().getAction().getDoNotCache()).isTrue(); verify(cache, never()) .getCachedActionResult(any(ActionKey.class)); @@ -173,7 +175,7 @@ public void nonCachableSpawnsShouldNotBeCached_remote() throws Exception { @SuppressWarnings("unchecked") public void nonCachableSpawnsShouldNotBeCached_local() throws Exception { // Test that if a spawn is executed locally, due to the local fallback, that its result is not - // uploaded to the remote cache. + // uploaded to the remote cache. However, the artifacts should still be uploaded. RemoteOptions options = Options.getDefaults(RemoteOptions.class); options.remoteAcceptCached = true; @@ -213,13 +215,13 @@ public void nonCachableSpawnsShouldNotBeCached_local() throws Exception { verify(cache, never()) .getCachedActionResult(any(ActionKey.class)); - verify(cache, never()) + verify(cache) .upload( any(ActionKey.class), any(Path.class), any(Collection.class), any(FileOutErr.class), - any(Boolean.class)); + eq(false)); } @Test diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index 0ddc941446c51d..0f2ddd1fa909a4 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -420,6 +420,16 @@ sh_test( ], ) +sh_test( + name = "remote_execution_rest_test", + size = "large", + srcs = ["remote_execution_rest_test.sh"], + data = [ + ":test-deps", + "//src/tools/remote:worker", + ], +) + sh_test( name = "remote_execution_sandboxing_test", size = "large", diff --git a/src/test/shell/bazel/bazel_sandboxing_test.sh b/src/test/shell/bazel/bazel_sandboxing_test.sh index 85d4fab17ba417..d0a1272730a468 100755 --- a/src/test/shell/bazel/bazel_sandboxing_test.sh +++ b/src/test/shell/bazel/bazel_sandboxing_test.sh @@ -544,6 +544,27 @@ EOF expect_log "Executing genrule //:test failed:" } +function test_sandbox_debug() { + cat > BUILD <<'EOF' +genrule( + name = "broken", + outs = ["bla.txt"], + cmd = "exit 1", +) +EOF + bazel build --verbose_failures :broken &> $TEST_log \ + && fail "build should have failed" || true + expect_log "Use --sandbox_debug to see verbose messages from the sandbox" + expect_log "Executing genrule //:broken failed" + + bazel build --verbose_failures --sandbox_debug :broken &> $TEST_log \ + && fail "build should have failed" || true + expect_log "Executing genrule //:broken failed" + expect_not_log "Use --sandbox_debug to see verbose messages from the sandbox" + # This will appear a lot in the sandbox failure details. + expect_log "bazel-sandbox" +} + function test_sandbox_mount_customized_path () { if ! [ "${PLATFORM-}" = "linux" -a \ diff --git a/src/test/shell/bazel/remote_execution_rest_test.sh b/src/test/shell/bazel/remote_execution_rest_test.sh new file mode 100755 index 00000000000000..17cbe3572e71d8 --- /dev/null +++ b/src/test/shell/bazel/remote_execution_rest_test.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Copyright 2017 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Tests remote execution and caching. +# + +# Load the test setup defined in the parent directory +CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${CURRENT_DIR}/../integration_test_setup.sh" \ + || { echo "integration_test_setup.sh not found!" >&2; exit 1; } + +function set_up() { + work_path=$(mktemp -d "${TEST_TMPDIR}/remote.XXXXXXXX") + pid_file=$(mktemp -u "${TEST_TMPDIR}/remote.XXXXXXXX") + attempts=1 + while [ $attempts -le 5 ]; do + (( attempts++ )) + worker_port=$(pick_random_unused_tcp_port) || fail "no port found" + hazelcast_port=$(pick_random_unused_tcp_port) || fail "no port found" + "${bazel_data}/src/tools/remote/worker" \ + --work_path="${work_path}" \ + --listen_port=${worker_port} \ + --hazelcast_standalone_listen_port=${hazelcast_port} \ + --pid_file="${pid_file}" >& $TEST_log & + local wait_seconds=0 + until [ -s "${pid_file}" ] || [ "$wait_seconds" -eq 15 ]; do + sleep 1 + ((wait_seconds++)) || true + done + if [ -s "${pid_file}" ]; then + break + fi + done + if [ ! -s "${pid_file}" ]; then + fail "Timed out waiting for remote worker to start." + fi +} + +function tear_down() { + bazel clean --expunge >& $TEST_log + if [ -s "${pid_file}" ]; then + local pid=$(cat "${pid_file}") + kill "${pid}" || true + fi + rm -rf "${pid_file}" + rm -rf "${work_path}" +} + +function test_cc_binary_rest_cache() { + mkdir -p a + cat > a/BUILD < a/test.cc < +int main() { std::cout << "Hello world!" << std::endl; return 0; } +EOF + bazel build //a:test >& $TEST_log \ + || fail "Failed to build //a:test without remote cache" + cp -f bazel-bin/a/test ${TEST_TMPDIR}/test_expected + + bazel clean --expunge >& $TEST_log + bazel build \ + --experimental_remote_spawn_cache=true \ + --remote_rest_cache=http://localhost:${hazelcast_port}/hazelcast/rest/maps \ + //a:test >& $TEST_log \ + || fail "Failed to build //a:test with remote REST cache service" + diff bazel-bin/a/test ${TEST_TMPDIR}/test_expected \ + || fail "Remote cache generated different result" + # Check that persistent connections are closed after the build. Is there a good cross-platform way + # to check this? + if [[ "$PLATFORM" = "linux" ]]; then + if netstat -tn | grep -qE ":${hazelcast_port}\\s+ESTABLISHED$"; then + fail "connections to to cache not closed" + fi + fi +} + +function test_cc_binary_rest_cache_bad_server() { + mkdir -p a + cat > a/BUILD < a/test.cc < +int main() { std::cout << "Hello world!" << std::endl; return 0; } +EOF + bazel build //a:test >& $TEST_log \ + || fail "Failed to build //a:test without remote cache" + cp -f bazel-bin/a/test ${TEST_TMPDIR}/test_expected + + bazel clean --expunge >& $TEST_log + bazel build \ + --experimental_remote_spawn_cache=true \ + --remote_rest_cache=http://bad.hostname/bad/cache \ + //a:test >& $TEST_log \ + || fail "Failed to build //a:test with remote REST cache service" + diff bazel-bin/a/test ${TEST_TMPDIR}/test_expected \ + || fail "Remote cache generated different result" + # Check that persistent connections are closed after the build. Is there a good cross-platform way + # to check this? + if [[ "$PLATFORM" = "linux" ]]; then + if netstat -tn | grep -qE ":${hazelcast_port}\\s+ESTABLISHED$"; then + fail "connections to to cache not closed" + fi + fi +} + +run_suite "Remote execution and remote cache tests" diff --git a/src/test/shell/bazel/remote_execution_test.sh b/src/test/shell/bazel/remote_execution_test.sh index a80acc7daeaa61..5eadb1d1ae7eb6 100755 --- a/src/test/shell/bazel/remote_execution_test.sh +++ b/src/test/shell/bazel/remote_execution_test.sh @@ -24,16 +24,16 @@ source "${CURRENT_DIR}/../integration_test_setup.sh" \ function set_up() { work_path=$(mktemp -d "${TEST_TMPDIR}/remote.XXXXXXXX") + cas_path=$(mktemp -d "${TEST_TMPDIR}/remote.XXXXXXXX") pid_file=$(mktemp -u "${TEST_TMPDIR}/remote.XXXXXXXX") attempts=1 while [ $attempts -le 5 ]; do (( attempts++ )) worker_port=$(pick_random_unused_tcp_port) || fail "no port found" - hazelcast_port=$(pick_random_unused_tcp_port) || fail "no port found" "${bazel_data}/src/tools/remote/worker" \ --work_path="${work_path}" \ --listen_port=${worker_port} \ - --hazelcast_standalone_listen_port=${hazelcast_port} \ + --cas_path=${cas_path} \ --pid_file="${pid_file}" >& $TEST_log & local wait_seconds=0 until [ -s "${pid_file}" ] || [ "$wait_seconds" -eq 15 ]; do @@ -50,12 +50,14 @@ function set_up() { } function tear_down() { + bazel clean --expunge >& $TEST_log if [ -s "${pid_file}" ]; then local pid=$(cat "${pid_file}") kill "${pid}" || true fi rm -rf "${pid_file}" rm -rf "${work_path}" + rm -rf "${cas_path}" } function test_cc_binary() { @@ -161,108 +163,112 @@ EOF --remote_cache=localhost:${worker_port} \ --test_output=errors \ //a:test >& $TEST_log \ - && fail "Expected test failure" || exitcode=$? + && fail "Expected test failure" || true # TODO(ulfjack): Check that the test failure gets reported correctly. } -# Tests that the remote worker can return a 200MB blob that requires chunking. -# Blob has to be that large in order to exceed the grpc default max message size. -function test_genrule_large_output_chunking() { +function is_file_uploaded() { + h=$(shasum -a256 < $1) + if [ -e "$cas_path/${h:0:64}" ]; then return 0; else return 1; fi +} + +function test_failing_cc_test_grpc_cache() { mkdir -p a cat > a/BUILD <> \$@; cp \$@ tmp.txt; done)", +cc_test( +name = 'test', +srcs = [ 'test.cc' ], ) EOF - cat > a/small_blob.txt < a/test.cc < +int main() { std::cout << "Fail me!" << std::endl; return 1; } EOF - bazel build //a:large_output >& $TEST_log \ - || fail "Failed to build //a:large_output without remote execution" - cp -f bazel-genfiles/a/large_blob.txt ${TEST_TMPDIR}/large_blob_expected.txt - - bazel clean --expunge >& $TEST_log - bazel build \ + bazel test \ --spawn_strategy=remote \ - --remote_executor=localhost:${worker_port} \ --remote_cache=localhost:${worker_port} \ - //a:large_output >& $TEST_log \ - || fail "Failed to build //a:large_output with remote execution" - diff bazel-genfiles/a/large_blob.txt ${TEST_TMPDIR}/large_blob_expected.txt \ - || fail "Remote execution generated different result" + --test_output=errors \ + //a:test >& $TEST_log \ + && fail "Expected test failure" || true + $(is_file_uploaded bazel-testlogs/a/test/test.log) \ + || fail "Expected test log to be uploaded to remote execution" + $(is_file_uploaded bazel-testlogs/a/test/test.xml) \ + || fail "Expected test xml to be uploaded to remote execution" } -function test_cc_binary_rest_cache() { +function test_failing_cc_test_remote_spawn_cache() { mkdir -p a cat > a/BUILD < a/test.cc < -int main() { std::cout << "Hello world!" << std::endl; return 0; } +int main() { std::cout << "Fail me!" << std::endl; return 1; } EOF - bazel build //a:test >& $TEST_log \ - || fail "Failed to build //a:test without remote cache" - cp -f bazel-bin/a/test ${TEST_TMPDIR}/test_expected - - bazel clean --expunge >& $TEST_log - bazel build \ - --experimental_remote_spawn_cache=true \ - --remote_rest_cache=http://localhost:${hazelcast_port}/hazelcast/rest/maps \ + bazel test \ + --experimental_remote_spawn_cache \ + --remote_cache=localhost:${worker_port} \ + --test_output=errors \ //a:test >& $TEST_log \ - || fail "Failed to build //a:test with remote REST cache service" - diff bazel-bin/a/test ${TEST_TMPDIR}/test_expected \ - || fail "Remote cache generated different result" - # Check that persistent connections are closed after the build. Is there a good cross-platform way - # to check this? - if [[ "$PLATFORM" = "linux" ]]; then - if netstat -tn | grep -qE ":${hazelcast_port}\\s+ESTABLISHED$"; then - fail "connections to to cache not closed" - fi - fi + && fail "Expected test failure" || true + $(is_file_uploaded bazel-testlogs/a/test/test.log) \ + || fail "Expected test log to be uploaded to remote execution" + $(is_file_uploaded bazel-testlogs/a/test/test.xml) \ + || fail "Expected test xml to be uploaded to remote execution" + # Check that logs are uploaded regardless of the spawn being cacheable. + # Re-running a changed test that failed once renders the test spawn uncacheable. + rm -f a/test.cc + cat > a/test.cc < +int main() { std::cout << "Fail me again!" << std::endl; return 1; } +EOF + bazel test \ + --experimental_remote_spawn_cache \ + --remote_cache=localhost:${worker_port} \ + --test_output=errors \ + //a:test >& $TEST_log \ + && fail "Expected test failure" || true + $(is_file_uploaded bazel-testlogs/a/test/test.log) \ + || fail "Expected test log to be uploaded to remote execution" + $(is_file_uploaded bazel-testlogs/a/test/test.xml) \ + || fail "Expected test xml to be uploaded to remote execution" } -function test_cc_binary_rest_cache_bad_server() { +# Tests that the remote worker can return a 200MB blob that requires chunking. +# Blob has to be that large in order to exceed the grpc default max message size. +function test_genrule_large_output_chunking() { mkdir -p a cat > a/BUILD <> \$@; cp \$@ tmp.txt; done)", ) EOF - cat > a/test.cc < -int main() { std::cout << "Hello world!" << std::endl; return 0; } + cat > a/small_blob.txt <& $TEST_log \ - || fail "Failed to build //a:test without remote cache" - cp -f bazel-bin/a/test ${TEST_TMPDIR}/test_expected + bazel build //a:large_output >& $TEST_log \ + || fail "Failed to build //a:large_output without remote execution" + cp -f bazel-genfiles/a/large_blob.txt ${TEST_TMPDIR}/large_blob_expected.txt bazel clean --expunge >& $TEST_log bazel build \ - --experimental_remote_spawn_cache=true \ - --remote_rest_cache=http://bad.hostname/bad/cache \ - //a:test >& $TEST_log \ - || fail "Failed to build //a:test with remote REST cache service" - diff bazel-bin/a/test ${TEST_TMPDIR}/test_expected \ - || fail "Remote cache generated different result" - # Check that persistent connections are closed after the build. Is there a good cross-platform way - # to check this? - if [[ "$PLATFORM" = "linux" ]]; then - if netstat -tn | grep -qE ":${hazelcast_port}\\s+ESTABLISHED$"; then - fail "connections to to cache not closed" - fi - fi + --spawn_strategy=remote \ + --remote_executor=localhost:${worker_port} \ + --remote_cache=localhost:${worker_port} \ + //a:large_output >& $TEST_log \ + || fail "Failed to build //a:large_output with remote execution" + diff bazel-genfiles/a/large_blob.txt ${TEST_TMPDIR}/large_blob_expected.txt \ + || fail "Remote execution generated different result" } function test_py_test() { @@ -359,7 +365,7 @@ EOF --remote_cache=localhost:${worker_port} \ --test_output=errors \ //a:test >& $TEST_log \ - && fail "Expected test failure" || exitcode=$? + && fail "Expected test failure" || true xml=bazel-testlogs/a/test/test.xml [ -e $xml ] || fail "Expected to find XML output" cat $xml > $TEST_log diff --git a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java index e80257b5b01bde..22761a9b19da5c 100644 --- a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java +++ b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java @@ -267,7 +267,7 @@ private ActionResult execute(Action action, Path execRoot) byte[] stderr = cmdResult.getStderr(); cache.uploadOutErr(result, stdout, stderr); ActionResult finalResult = result.setExitCode(exitCode).build(); - if (exitCode == 0) { + if (exitCode == 0 && !action.getDoNotCache()) { ActionKey actionKey = digestUtil.computeActionKey(action); cache.setCachedActionResult(actionKey, finalResult); }