Skip to content

Commit

Permalink
Fix: uploading artifacts of failed actions to remote cache stopped wo…
Browse files Browse the repository at this point in the history
…rking.

To reproduce: run a failing test with --experimental_remote_spawn_cache or with --spawn_strategy=remote and no executor. Expected: test log is uploaded.

Desired behavior:
- regardless of whether a spawn is cacheable or not, its artifacts should be uploaded to the remote cache.
- the spawn result should only be set if the spawn is cacheable *and* the action succeeded.
- when executing remotely, the do_not_cache field should be set for non-cacheable spawns, and the remote execution engine should respect it.

This CL contains multiple fixes to ensure the above behaviors, and adds a few tests, both end to end and unit tests. Important behavior change: it is no longer assumed that non-cacheable spawns should use a NO_CACHE SpawnCache! The appropriate test case was removed. Instead, an assumption was added that all implementations of SpawnCache should respect the Spawns.mayBeCached(spawn) property. Currently, only NO_CACHE and RemoteSpawnCache exist, and they (now) support it.

TESTED=remote build execution backend.
WANT_LGTM: philwo,buchgr
RELNOTES: None
PiperOrigin-RevId: 178617937
  • Loading branch information
olaola authored and Copybara-Service committed Dec 11, 2017
1 parent 180d256 commit a22d0e9
Show file tree
Hide file tree
Showing 14 changed files with 359 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ public boolean isConsideredUserError() {
/** Whether the spawn result was a cache hit. */
boolean isCacheHit();

/** Returns an optional custom failure message for the result. */
default String getFailureMessage() {
return "";
}

String getDetailMessage(
String messagePrefix, String message, boolean catastrophe, boolean forciblyRunRemotely);

Expand All @@ -196,6 +201,7 @@ public static final class SimpleSpawnResult implements SpawnResult {
private final Optional<Long> numBlockInputOperations;
private final Optional<Long> numInvoluntaryContextSwitches;
private final boolean cacheHit;
private final String failureMessage;

SimpleSpawnResult(Builder builder) {
this.exitCode = builder.exitCode;
Expand All @@ -208,6 +214,7 @@ public static final class SimpleSpawnResult implements SpawnResult {
this.numBlockInputOperations = builder.numBlockInputOperations;
this.numInvoluntaryContextSwitches = builder.numInvoluntaryContextSwitches;
this.cacheHit = builder.cacheHit;
this.failureMessage = builder.failureMessage;
}

@Override
Expand Down Expand Up @@ -273,6 +280,11 @@ public boolean isCacheHit() {
return cacheHit;
}

@Override
public String getFailureMessage() {
return failureMessage;
}

@Override
public String getDetailMessage(
String messagePrefix, String message, boolean catastrophe, boolean forciblyRunRemotely) {
Expand Down Expand Up @@ -318,6 +330,7 @@ public static final class Builder {
private Optional<Long> numBlockInputOperations = Optional.empty();
private Optional<Long> numInvoluntaryContextSwitches = Optional.empty();
private boolean cacheHit;
private String failureMessage = "";

public SpawnResult build() {
if (status == Status.SUCCESS) {
Expand Down Expand Up @@ -395,5 +408,10 @@ public Builder setCacheHit(boolean cacheHit) {
this.cacheHit = cacheHit;
return this;
}

public Builder setFailureMessage(String failureMessage) {
this.failureMessage = failureMessage;
return this;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public List<SpawnResult> exec(
SpawnCache cache = actionExecutionContext.getContext(SpawnCache.class);
// In production, the getContext method guarantees that we never get null back. However, our
// integration tests don't set it up correctly, so cache may be null in testing.
if (cache == null || !Spawns.mayBeCached(spawn)) {
if (cache == null) {
cache = SpawnCache.NO_CACHE;
}
SpawnResult spawnResult;
Expand All @@ -107,12 +107,15 @@ public List<SpawnResult> exec(

if (spawnResult.status() != Status.SUCCESS) {
String cwd = actionExecutionContext.getExecRoot().getPathString();
String resultMessage = spawnResult.getFailureMessage();
String message =
CommandFailureUtils.describeCommandFailure(
actionExecutionContext.getVerboseFailures(),
spawn.getArguments(),
spawn.getEnvironment(),
cwd);
resultMessage != ""
? resultMessage
: CommandFailureUtils.describeCommandFailure(
actionExecutionContext.getVerboseFailures(),
spawn.getArguments(),
spawn.getEnvironment(),
cwd);
throw new SpawnExecException(message, spawnResult, /*forciblyRunRemotely=*/false);
}
return ImmutableList.of(spawnResult);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ void store(SpawnResult result, Collection<Path> files)
* object is for the cache to store expensive intermediate values (such as the cache key) that are
* needed both for the lookup and the subsequent store operation.
*
* <p>The lookup must not succeed for non-cachable spawns. See {@link Spawns#mayBeCached()}.
*
* <p>Note that cache stores may be disabled, in which case the returned {@link CacheHandle}
* instance's {@link CacheHandle#store} is a no-op.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.SpawnResult.Status;
import com.google.devtools.build.lib.actions.Spawns;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.Reporter;
Expand Down Expand Up @@ -102,7 +103,8 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy policy)
digestUtil.compute(command),
repository.getMerkleDigest(inputRoot),
platform,
policy.getTimeout());
policy.getTimeout(),
Spawns.mayBeCached(spawn));

// Look up action cache, and reuse the action output if it is found.
final ActionKey actionKey = digestUtil.computeActionKey(action);
Expand All @@ -113,7 +115,9 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy policy)
Context previous = withMetadata.attach();
try {
ActionResult result =
this.options.remoteAcceptCached ? remoteCache.getCachedActionResult(actionKey) : null;
this.options.remoteAcceptCached && Spawns.mayBeCached(spawn)
? remoteCache.getCachedActionResult(actionKey)
: null;
if (result != null) {
// We don't cache failed actions, so we know the outputs exist.
// For now, download all outputs locally; in the future, we can reuse the digests to
Expand Down Expand Up @@ -156,7 +160,10 @@ public boolean willStore() {
@Override
public void store(SpawnResult result, Collection<Path> files)
throws InterruptedException, IOException {
boolean uploadAction = Status.SUCCESS.equals(result.status()) && result.exitCode() == 0;
boolean uploadAction =
Spawns.mayBeCached(spawn)
&& Status.SUCCESS.equals(result.status())
&& result.exitCode() == 0;
Context previous = withMetadata.attach();
try {
remoteCache.upload(actionKey, execRoot, files, policy.getFileOutErr(), uploadAction);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionPolicy policy)
digestUtil.compute(command),
repository.getMerkleDigest(inputRoot),
platform,
policy.getTimeout());
policy.getTimeout(),
Spawns.mayBeCached(spawn));

// Look up action cache, and reuse the action output if it is found.
ActionKey actionKey = digestUtil.computeActionKey(action);
Expand Down Expand Up @@ -262,7 +263,8 @@ static Action buildAction(
Digest command,
Digest inputRoot,
Platform platform,
Duration timeout) {
Duration timeout,
boolean cacheable) {
Action.Builder action = Action.newBuilder();
action.setCommandDigest(command);
action.setInputRootDigest(inputRoot);
Expand All @@ -279,6 +281,9 @@ static Action buildAction(
if (!timeout.isZero()) {
action.setTimeout(com.google.protobuf.Duration.newBuilder().setSeconds(timeout.getSeconds()));
}
if (!cacheable) {
action.setDoNotCache(true);
}
return action.build();
}

Expand Down Expand Up @@ -326,7 +331,7 @@ private SpawnResult execLocally(
@Nullable RemoteActionCache remoteCache,
@Nullable ActionKey actionKey)
throws ExecException, IOException, InterruptedException {
if (uploadToCache && Spawns.mayBeCached(spawn) && remoteCache != null && actionKey != null) {
if (uploadToCache && remoteCache != null && actionKey != null) {
return execLocallyAndUpload(spawn, policy, inputMap, remoteCache, actionKey);
}
return fallbackRunner.exec(spawn, policy);
Expand All @@ -351,7 +356,10 @@ SpawnResult execLocallyAndUpload(
}
List<Path> outputFiles = listExistingOutputFiles(execRoot, spawn);
try {
boolean uploadAction = Status.SUCCESS.equals(result.status()) && result.exitCode() == 0;
boolean uploadAction =
Spawns.mayBeCached(spawn)
&& Status.SUCCESS.equals(result.status())
&& result.exitCode() == 0;
remoteCache.upload(actionKey, execRoot, outputFiles, policy.getFileOutErr(), uploadAction);
} catch (IOException e) {
if (verboseFailures) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import com.google.devtools.build.lib.actions.SpawnResult.Status;
import com.google.devtools.build.lib.actions.UserExecException;
import com.google.devtools.build.lib.exec.ExecutionOptions;
import com.google.devtools.build.lib.exec.SpawnExecException;
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.runtime.CommandEnvironment;
import com.google.devtools.build.lib.shell.AbnormalTerminationException;
Expand Down Expand Up @@ -90,13 +89,13 @@ protected SpawnResult runSpawn(
Path execRoot,
Path tmpDir,
Duration timeout)
throws ExecException, IOException, InterruptedException {
throws IOException, InterruptedException {
try {
sandbox.createFileSystem();
OutErr outErr = policy.getFileOutErr();
policy.prefetchInputs();

SpawnResult result = run(sandbox, outErr, timeout, tmpDir);
SpawnResult result = run(originalSpawn, sandbox, outErr, timeout, execRoot, tmpDir);

policy.lockOutputFiles();
try {
Expand All @@ -105,23 +104,6 @@ protected SpawnResult runSpawn(
} catch (IOException e) {
throw new IOException("Could not move output artifacts from sandboxed execution", e);
}

if (result.status() != Status.SUCCESS || result.exitCode() != 0) {
String message;
if (sandboxOptions.sandboxDebug) {
message =
CommandFailureUtils.describeCommandFailure(
true, sandbox.getArguments(), sandbox.getEnvironment(), execRoot.getPathString());
} else {
message =
CommandFailureUtils.describeCommandFailure(
verboseFailures,
originalSpawn.getArguments(),
originalSpawn.getEnvironment(),
execRoot.getPathString()) + SANDBOX_DEBUG_SUGGESTION;
}
throw new SpawnExecException(message, result, /*forciblyRunRemotely=*/false);
}
return result;
} finally {
if (!sandboxOptions.sandboxDebug) {
Expand All @@ -131,12 +113,30 @@ protected SpawnResult runSpawn(
}

private final SpawnResult run(
SandboxedSpawn sandbox, OutErr outErr, Duration timeout, Path tmpDir)
Spawn originalSpawn,
SandboxedSpawn sandbox,
OutErr outErr,
Duration timeout,
Path execRoot,
Path tmpDir)
throws IOException, InterruptedException {
Command cmd = new Command(
sandbox.getArguments().toArray(new String[0]),
sandbox.getEnvironment(),
sandbox.getSandboxExecRoot().getPathFile());
String failureMessage;
if (sandboxOptions.sandboxDebug) {
failureMessage =
CommandFailureUtils.describeCommandFailure(
true, sandbox.getArguments(), sandbox.getEnvironment(), execRoot.getPathString());
} else {
failureMessage =
CommandFailureUtils.describeCommandFailure(
verboseFailures,
originalSpawn.getArguments(),
originalSpawn.getEnvironment(),
execRoot.getPathString()) + SANDBOX_DEBUG_SUGGESTION;
}

long startTime = System.currentTimeMillis();
CommandResult commandResult;
Expand All @@ -162,6 +162,7 @@ private final SpawnResult run(
return new SpawnResult.Builder()
.setStatus(Status.EXECUTION_FAILED)
.setExitCode(LOCAL_EXEC_ERROR)
.setFailureMessage(failureMessage)
.build();
}

Expand All @@ -182,6 +183,7 @@ private final SpawnResult run(
.setWallTime(wallTime)
.setUserTime(commandResult.getUserExecutionTime())
.setSystemTime(commandResult.getSystemExecutionTime())
.setFailureMessage(status != Status.SUCCESS || exitCode != 0 ? failureMessage : "")
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,23 +167,4 @@ public void testCacheMissWithNonZeroExit() throws Exception {
verify(spawnRunner).exec(any(Spawn.class), any(SpawnExecutionPolicy.class));
verify(entry).store(eq(result), any(Collection.class));
}

@Test
public void testTagNoCache() throws Exception {
SpawnCache cache = mock(SpawnCache.class);
when(actionExecutionContext.getContext(eq(SpawnCache.class))).thenReturn(cache);
when(actionExecutionContext.getExecRoot()).thenReturn(fs.getPath("/execroot"));
when(spawnRunner.exec(any(Spawn.class), any(SpawnExecutionPolicy.class)))
.thenReturn(new SpawnResult.Builder().setStatus(Status.SUCCESS).build());

Spawn uncacheableSpawn =
new SpawnBuilder("/bin/echo", "Hi").withExecutionInfo("no-cache", "").build();

new TestedSpawnStrategy(spawnRunner).exec(uncacheableSpawn, actionExecutionContext);

// Must only be called exactly once.
verify(spawnRunner).exec(any(Spawn.class), any(SpawnExecutionPolicy.class));
// Must not be called.
verify(cache, never()).lookup(any(Spawn.class), any(SpawnExecutionPolicy.class));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.google.devtools.build.lib.actions.ActionInputHelper;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.ExecutionRequirements;
import com.google.devtools.build.lib.actions.ResourceSet;
import com.google.devtools.build.lib.actions.SimpleSpawn;
import com.google.devtools.build.lib.actions.SpawnResult;
Expand Down Expand Up @@ -263,6 +264,45 @@ public Void answer(InvocationOnMock invocation) {
.upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(true));
}

@Test
public void noCacheSpawns() throws Exception {
// Checks that spawns that have mayBeCached false are not looked up in the remote cache,
// and also that their result is not uploaded to the remote cache. The artifacts, however,
// are uploaded.
SimpleSpawn uncacheableSpawn = new SimpleSpawn(
new FakeOwner("foo", "bar"),
/*arguments=*/ ImmutableList.of(),
/*environment=*/ ImmutableMap.of(),
ImmutableMap.of(ExecutionRequirements.NO_CACHE, ""),
/*inputs=*/ ImmutableList.of(),
/*outputs=*/ ImmutableList.<ActionInput>of(),
ResourceSet.ZERO);
CacheHandle entry = cache.lookup(uncacheableSpawn, simplePolicy);
verify(remoteCache, never())
.getCachedActionResult(any(ActionKey.class));
assertThat(entry.hasResult()).isFalse();
SpawnResult result = new SpawnResult.Builder().setExitCode(0).setStatus(Status.SUCCESS).build();
ImmutableList<Path> outputFiles = ImmutableList.of(fs.getPath("/random/file"));
entry.store(result, outputFiles);
verify(remoteCache)
.upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(false));
}

@Test
public void noCacheSpawnsNoResultStore() throws Exception {
// Only successful action results are uploaded to the remote cache. The artifacts, however,
// are uploaded regardless.
CacheHandle entry = cache.lookup(simpleSpawn, simplePolicy);
verify(remoteCache).getCachedActionResult(any(ActionKey.class));
assertThat(entry.hasResult()).isFalse();
SpawnResult result =
new SpawnResult.Builder().setExitCode(1).setStatus(Status.NON_ZERO_EXIT).build();
ImmutableList<Path> outputFiles = ImmutableList.of(fs.getPath("/random/file"));
entry.store(result, outputFiles);
verify(remoteCache)
.upload(any(ActionKey.class), any(Path.class), eq(outputFiles), eq(outErr), eq(false));
}

@Test
public void printWarningIfUploadFails() throws Exception {
CacheHandle entry = cache.lookup(simpleSpawn, simplePolicy);
Expand Down
Loading

0 comments on commit a22d0e9

Please sign in to comment.