Skip to content

Commit

Permalink
Introduce options for remote cache key scrubbing.
Browse files Browse the repository at this point in the history
Introduce an experimental flag for remote cache key scrubbing.

This PR introduces the --experimental_remote_scrubbing_config flag, which can
be used to scrub platform-dependent data from the key used to retrieve and
store action results from a remote or disk cache This way, actions executing
on different platforms but targeting the same platform may be able to share
cache entries.

To avoid flag proliferation and make for a nicer API, the scrubbing parameters
are supplied via an external file in text proto format.

This is a simplified implementation of one of the ideas described in [1],
highly influenced by Olivier Notteghem's earlier proposal [2], intended to
provide a simple yet flexible API to enable further experimentation. It must
be used with care, as incorrect settings can compromise build correctness.

[1] https://docs.google.com/document/d/1uMPj2s0TlHSIKSngqOkWJoeqOtKzaxQLtBrRfYif3Lo/edit?usp=sharing
[2] #18669

Closes #19523.

PiperOrigin-RevId: 571947013
Change-Id: Ic8da59946ea6d6811b840aee01548746aab2eba0
  • Loading branch information
tjgq authored and Copybara-Service committed Oct 9, 2023
1 parent 3bb882d commit 24de276
Show file tree
Hide file tree
Showing 20 changed files with 1,004 additions and 27 deletions.
15 changes: 15 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Expand Up @@ -37,6 +37,7 @@ java_library(
"RemoteOutputChecker.java",
"AbstractActionInputPrefetcher.java",
"LeaseService.java",
"Scrubber.java",
"Store.java",
],
),
Expand Down Expand Up @@ -90,6 +91,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/exec/local",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/remote:scrubber",
"//src/main/java/com/google/devtools/build/lib/remote/circuitbreaker",
"//src/main/java/com/google/devtools/build/lib/remote/common",
"//src/main/java/com/google/devtools/build/lib/remote/common:bulk_transfer_exception",
Expand Down Expand Up @@ -252,6 +254,19 @@ java_library(
],
)

java_library(
name = "scrubber",
srcs = ["Scrubber.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/protobuf:remote_scrubbing_java_proto",
"//third_party:guava",
"//third_party:jsr305",
"//third_party/protobuf:protobuf_java",
],
)

java_library(
name = "store",
srcs = ["Store.java"],
Expand Down
Expand Up @@ -87,6 +87,7 @@
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.DirectoryMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.FileMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.SymlinkMetadata;
import com.google.devtools.build.lib.remote.Scrubber.SpawnScrubber;
import com.google.devtools.build.lib.remote.common.BulkTransferException;
import com.google.devtools.build.lib.remote.common.OperationObserver;
import com.google.devtools.build.lib.remote.common.OutputDigestMismatchException;
Expand Down Expand Up @@ -179,6 +180,8 @@ public class RemoteExecutionService {

@Nullable private final RemoteOutputChecker remoteOutputChecker;

@Nullable private final Scrubber scrubber;

public RemoteExecutionService(
Executor executor,
Reporter reporter,
Expand Down Expand Up @@ -210,6 +213,7 @@ public RemoteExecutionService(
if (remoteOptions.remoteMerkleTreeCacheSize != 0) {
merkleTreeCacheBuilder.maximumSize(remoteOptions.remoteMerkleTreeCacheSize);
}
this.scrubber = remoteOptions.scrubber;
this.merkleTreeCache = merkleTreeCacheBuilder.build();

this.tempPathGenerator = tempPathGenerator;
Expand All @@ -219,12 +223,13 @@ public RemoteExecutionService(
this.remoteOutputChecker = remoteOutputChecker;
}

static Command buildCommand(
private Command buildCommand(
Collection<? extends ActionInput> outputs,
List<String> arguments,
ImmutableMap<String, String> env,
@Nullable Platform platform,
RemotePathResolver remotePathResolver) {
RemotePathResolver remotePathResolver,
@Nullable SpawnScrubber spawnScrubber) {
Command.Builder command = Command.newBuilder();
ArrayList<String> outputFiles = new ArrayList<>();
ArrayList<String> outputDirectories = new ArrayList<>();
Expand All @@ -249,6 +254,9 @@ static Command buildCommand(
command.setPlatform(platform);
}
for (String arg : arguments) {
if (spawnScrubber != null) {
arg = spawnScrubber.transformArgument(arg);
}
command.addArguments(decodeBytestringUtf8(arg));
}
// Sorting the environment pairs by variable name.
Expand Down Expand Up @@ -349,15 +357,18 @@ private SortedMap<PathFragment, ActionInput> buildOutputDirMap(Spawn spawn) {
}

private MerkleTree buildInputMerkleTree(
Spawn spawn, SpawnExecutionContext context, ToolSignature toolSignature)
Spawn spawn,
SpawnExecutionContext context,
ToolSignature toolSignature,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Add output directories to inputs so that they are created as empty directories by the
// executor. The spec only requires the executor to create the parent directory of an output
// directory, which differs from the behavior of both local and sandboxed execution.
SortedMap<PathFragment, ActionInput> outputDirMap = buildOutputDirMap(spawn);
boolean useMerkleTreeCache = remoteOptions.remoteMerkleTreeCache;
if (toolSignature != null) {
// Marking tool files is not yet supported in conjunction with the merkle tree cache.
if (toolSignature != null || spawnScrubber != null) {
// The Merkle tree cache is not yet compatible with scrubbing or marking tool files.
useMerkleTreeCache = false;
}
if (useMerkleTreeCache) {
Expand All @@ -366,18 +377,22 @@ private MerkleTree buildInputMerkleTree(
remotePathResolver.walkInputs(
spawn,
context,
(Object nodeKey, InputWalker walker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
nodeKey, walker, inputMetadataProvider, context.getPathResolver()));
});
(Object nodeKey, InputWalker walker) ->
subMerkleTrees.add(
buildMerkleTreeVisitor(
nodeKey,
walker,
inputMetadataProvider,
context.getPathResolver(),
spawnScrubber)));
if (!outputDirMap.isEmpty()) {
subMerkleTrees.add(
MerkleTree.build(
outputDirMap,
inputMetadataProvider,
execRoot,
context.getPathResolver(),
/* spawnScrubber= */ null,
digestUtil));
}
return MerkleTree.merge(subMerkleTrees, digestUtil);
Expand All @@ -399,6 +414,7 @@ private MerkleTree buildInputMerkleTree(
context.getInputMetadataProvider(),
execRoot,
context.getPathResolver(),
spawnScrubber,
digestUtil);
}
}
Expand All @@ -407,7 +423,8 @@ private MerkleTree buildMerkleTreeVisitor(
Object nodeKey,
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Deduplicate concurrent computations for the same node. It's not possible to use
// MerkleTreeCache#get(key, loader) because the loading computation may cause other nodes to be
Expand All @@ -419,7 +436,8 @@ private MerkleTree buildMerkleTreeVisitor(
// No preexisting cache entry, so we must do the computation ourselves.
try {
freshFuture.complete(
uncachedBuildMerkleTreeVisitor(walker, inputMetadataProvider, artifactPathResolver));
uncachedBuildMerkleTreeVisitor(
walker, inputMetadataProvider, artifactPathResolver, spawnScrubber));
} catch (Exception e) {
freshFuture.completeExceptionally(e);
}
Expand All @@ -443,7 +461,8 @@ private MerkleTree buildMerkleTreeVisitor(
public MerkleTree uncachedBuildMerkleTreeVisitor(
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber scrubber)
throws IOException, ForbiddenActionInputException {
ConcurrentLinkedQueue<MerkleTree> subMerkleTrees = new ConcurrentLinkedQueue<>();
subMerkleTrees.add(
Expand All @@ -452,18 +471,18 @@ public MerkleTree uncachedBuildMerkleTreeVisitor(
inputMetadataProvider,
execRoot,
artifactPathResolver,
scrubber,
digestUtil));
walker.visitNonLeaves(
(Object subNodeKey, InputWalker subWalker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver));
});
(Object subNodeKey, InputWalker subWalker) ->
subMerkleTrees.add(
buildMerkleTreeVisitor(
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver, scrubber)));
return MerkleTree.merge(subMerkleTrees, digestUtil);
}

@Nullable
private static ByteString buildSalt(Spawn spawn) {
private static ByteString buildSalt(Spawn spawn, @Nullable SpawnScrubber spawnScrubber) {
CacheSalt.Builder saltBuilder =
CacheSalt.newBuilder().setMayBeExecutedRemotely(Spawns.mayBeExecutedRemotely(spawn));

Expand All @@ -473,6 +492,11 @@ private static ByteString buildSalt(Spawn spawn) {
saltBuilder.setWorkspace(workspace);
}

if (spawnScrubber != null) {
saltBuilder.setScrubSalt(
CacheSalt.ScrubSalt.newBuilder().setSalt(spawnScrubber.getSalt()).build());
}

return saltBuilder.build().toByteString();
}

Expand Down Expand Up @@ -508,7 +532,9 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
remoteActionBuildingSemaphore.acquire();
try {
ToolSignature toolSignature = getToolSignature(spawn, context);
final MerkleTree merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber != null ? scrubber.forSpawn(spawn) : null;
final MerkleTree merkleTree =
buildInputMerkleTree(spawn, context, toolSignature, spawnScrubber);

// Get the remote platform properties.
Platform platform;
Expand All @@ -526,7 +552,8 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
spawn.getArguments(),
spawn.getEnvironment(),
platform,
remotePathResolver);
remotePathResolver,
spawnScrubber);
Digest commandHash = digestUtil.compute(command);
Action action =
Utils.buildAction(
Expand All @@ -535,7 +562,7 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
platform,
context.getTimeout(),
Spawns.mayBeCachedRemotely(spawn),
buildSalt(spawn));
buildSalt(spawn, spawnScrubber));

ActionKey actionKey = digestUtil.computeActionKey(action);

Expand Down Expand Up @@ -1414,7 +1441,8 @@ public void uploadInputsIfNotPresent(RemoteAction action, boolean force)
Spawn spawn = action.getSpawn();
SpawnExecutionContext context = action.getSpawnExecutionContext();
ToolSignature toolSignature = getToolSignature(spawn, context);
merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber != null ? scrubber.forSpawn(spawn) : null;
merkleTree = buildInputMerkleTree(spawn, context, toolSignature, spawnScrubber);
}

remoteExecutionCache.ensureInputsPresent(
Expand Down
Expand Up @@ -311,6 +311,14 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
FailureDetails.RemoteOptions.Code.EXECUTION_WITH_INVALID_CACHE);
}

boolean enableScrubbing = remoteOptions.scrubber != null;
if (enableScrubbing && enableRemoteExecution) {

throw createOptionsExitException(
"Cannot combine remote cache key scrubbing with remote execution",
FailureDetails.RemoteOptions.Code.EXECUTION_WITH_SCRUBBING);
}

// TODO(bazel-team): Consider adding a warning or more validation if the remoteDownloadRegex is
// used without Build without the Bytes.
ImmutableList.Builder<Pattern> patternsToDownloadBuilder = ImmutableList.builder();
Expand Down

0 comments on commit 24de276

Please sign in to comment.