Skip to content

Commit

Permalink
Create output directories for remote execution (#15818)
Browse files Browse the repository at this point in the history
By explicitly declaring output directories as inputs to a remote action,
this commit ensures that these directories are created by the remote
executor prior to the execution of the action. This brings the behavior
of remote execution regarding tree artifacts in line with that of local
or sandboxed execution.

Getting the tests to pass requires modifying a check in Bazel's own
remote worker implementation: Previously, the worker explicitly verified
that output directories don't exist after the inputs have been staged.
This behavior is not backed by the spec and has thus been modified: Now,
it is only checked that the output directories either don't exist or are
directories.

Fixes #6393

Closes #15366.

PiperOrigin-RevId: 447451303

Co-authored-by: Chenchu K <ckolli@google.com>
  • Loading branch information
fmeum and ckolli5 committed Jul 7, 2022
1 parent 64571a4 commit 1404651
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 27 deletions.
Expand Up @@ -139,6 +139,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Executor;
Expand Down Expand Up @@ -305,8 +306,25 @@ public boolean mayBeExecutedRemotely(Spawn spawn) {
&& Spawns.mayBeExecutedRemotely(spawn);
}

private SortedMap<PathFragment, ActionInput> buildOutputDirMap(Spawn spawn) {
TreeMap<PathFragment, ActionInput> outputDirMap = new TreeMap<>();
for (ActionInput output : spawn.getOutputFiles()) {
if (output instanceof Artifact && ((Artifact) output).isTreeArtifact()) {
outputDirMap.put(
PathFragment.create(remotePathResolver.getWorkingDirectory())
.getRelative(remotePathResolver.localPathToOutputPath(output.getExecPath())),
output);
}
}
return outputDirMap;
}

private MerkleTree buildInputMerkleTree(Spawn spawn, SpawnExecutionContext context)
throws IOException, ForbiddenActionInputException {
// Add output directories to inputs so that they are created as empty directories by the
// executor. The spec only requires the executor to create the parent directory of an output
// directory, which differs from the behavior of both local and sandboxed execution.
SortedMap<PathFragment, ActionInput> outputDirMap = buildOutputDirMap(spawn);
if (remoteOptions.remoteMerkleTreeCache) {
MetadataProvider metadataProvider = context.getMetadataProvider();
ConcurrentLinkedQueue<MerkleTree> subMerkleTrees = new ConcurrentLinkedQueue<>();
Expand All @@ -316,9 +334,20 @@ private MerkleTree buildInputMerkleTree(Spawn spawn, SpawnExecutionContext conte
(Object nodeKey, InputWalker walker) -> {
subMerkleTrees.add(buildMerkleTreeVisitor(nodeKey, walker, metadataProvider));
});
if (!outputDirMap.isEmpty()) {
subMerkleTrees.add(MerkleTree.build(outputDirMap, metadataProvider, execRoot, digestUtil));
}
return MerkleTree.merge(subMerkleTrees, digestUtil);
} else {
SortedMap<PathFragment, ActionInput> inputMap = remotePathResolver.getInputMapping(context);
if (!outputDirMap.isEmpty()) {
// The map returned by getInputMapping is mutable, but must not be mutated here as it is
// shared with all other strategies.
SortedMap<PathFragment, ActionInput> newInputMap = new TreeMap<>();
newInputMap.putAll(inputMap);
newInputMap.putAll(outputDirMap);
inputMap = newInputMap;
}
return MerkleTree.build(inputMap, context.getMetadataProvider(), execRoot, digestUtil);
}
}
Expand Down Expand Up @@ -937,9 +966,9 @@ public InMemoryOutput downloadOutputs(RemoteAction action, RemoteActionResult re
// Check that all mandatory outputs are created.
for (ActionInput output : action.getSpawn().getOutputFiles()) {
if (action.getSpawn().isMandatoryOutput(output)) {
// Don't check output that is tree artifact since spawn could generate nothing under that
// directory. Remote server typically doesn't create directory ahead of time resulting in
// empty tree artifact missing from action cache entry.
// In the past, remote execution did not create output directories if the action didn't do
// this explicitly. This check only remains so that old remote cache entries that do not
// include empty output directories remain valid.
if (output instanceof Artifact && ((Artifact) output).isTreeArtifact()) {
continue;
}
Expand Down
Expand Up @@ -21,6 +21,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/remote/util",
"//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//third_party:guava",
Expand Down
Expand Up @@ -24,7 +24,6 @@
import com.google.devtools.build.lib.remote.merkletree.DirectoryTree.DirectoryNode;
import com.google.devtools.build.lib.remote.merkletree.DirectoryTree.FileNode;
import com.google.devtools.build.lib.remote.util.DigestUtil;
import com.google.devtools.build.lib.skyframe.TreeArtifactValue;
import com.google.devtools.build.lib.vfs.Dirent;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand All @@ -35,7 +34,6 @@
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.annotation.Nullable;

/** Builder for directory trees. */
class DirectoryTreeBuilder {
Expand Down Expand Up @@ -97,7 +95,6 @@ private static int buildFromPaths(
Map<PathFragment, DirectoryNode> tree)
throws IOException {
return build(
null,
inputs,
tree,
(input, path, currDir) -> {
Expand Down Expand Up @@ -126,7 +123,6 @@ private static int buildFromActionInputs(
Map<PathFragment, DirectoryNode> tree)
throws IOException {
return build(
metadataProvider,
inputs,
tree,
(input, path, currDir) -> {
Expand Down Expand Up @@ -182,7 +178,6 @@ private static int buildFromActionInputs(
}

private static <T> int build(
@Nullable MetadataProvider metadataProvider,
SortedMap<PathFragment, T> inputs,
Map<PathFragment, DirectoryNode> tree,
FileNodeVisitor<T> fileNodeVisitor)
Expand All @@ -200,24 +195,13 @@ private static <T> int build(
T input = e.getValue();

if (input instanceof DerivedArtifact && ((DerivedArtifact) input).isTreeArtifact()) {
DerivedArtifact artifact = (DerivedArtifact) input;
// MetadataProvider is provided by all callers for which T is a superclass of
// DerivedArtifact.
Preconditions.checkNotNull(metadataProvider);
FileArtifactValue metadata =
Preconditions.checkNotNull(
metadataProvider.getMetadata(artifact),
"missing metadata for '%s'",
artifact.getExecPathString());
Preconditions.checkState(
metadata.equals(TreeArtifactValue.empty().getMetadata()),
"Encountered non-empty TreeArtifact '%s' with metadata '%s', which should have"
+ " been expanded by SpawnInputExpander. This is a bug.",
path,
metadata);
// Create an empty directory and its parent directories but don't visit the TreeArtifact
// input itself: A TreeArtifact's metadata has type REGULAR_FILE, not DIRECTORY, and would
// thus lead to an empty file being created in the buildFromActionInputs visitor.
// SpawnInputExpander has already expanded non-empty tree artifacts into a collection of
// TreeFileArtifacts. Thus, at this point, tree artifacts represent empty directories, which
// we create together with their parents.
// Note: This also handles output directories of actions, which are explicitly included as
// inputs so that they are created by the executor before the action executes. Since such a
// directory must remain writeable, MetadataProvider#getMetadata must not be called on the
// tree artifact here as it would have the side effect of making it read only.
DirectoryNode emptyDir = new DirectoryNode(path.getBaseName());
tree.put(path, emptyDir);
createParentDirectoriesIfNotExist(path, emptyDir, tree);
Expand Down
79 changes: 79 additions & 0 deletions src/test/shell/bazel/remote/remote_execution_test.sh
Expand Up @@ -3052,6 +3052,85 @@ function test_empty_tree_artifact_as_inputs_remote_cache() {
expect_log "remote cache hit"
}

function generate_tree_artifact_output() {
touch WORKSPACE
mkdir -p pkg

cat > pkg/def.bzl <<'EOF'
def _r(ctx):
empty_dir = ctx.actions.declare_directory("%s/empty_dir" % ctx.label.name)
ctx.actions.run_shell(
outputs = [empty_dir],
command = "cd %s && pwd" % empty_dir.path,
)
non_empty_dir = ctx.actions.declare_directory("%s/non_empty_dir" % ctx.label.name)
ctx.actions.run_shell(
outputs = [non_empty_dir],
command = "cd %s && pwd && touch out" % non_empty_dir.path,
)
return [DefaultInfo(files = depset([empty_dir, non_empty_dir]))]
r = rule(implementation = _r)
EOF

cat > pkg/BUILD <<'EOF'
load(":def.bzl", "r")
r(name = "a")
EOF
}

function test_create_tree_artifact_outputs() {
# Test that if a tree artifact is declared as an input, then the corresponding
# empty directory is created before the action executes remotely.
generate_tree_artifact_output

bazel build \
--spawn_strategy=remote \
--remote_executor=grpc://localhost:${worker_port} \
//pkg:a &>$TEST_log || fail "expected build to succeed"
[[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file"
[[ -d bazel-bin/pkg/a/empty_dir ]] || fail "expected directory to exist"

bazel clean --expunge
bazel build \
--spawn_strategy=remote \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_merkle_tree_cache \
//pkg:a &>$TEST_log || fail "expected build to succeed with Merkle tree cache"
[[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file"
[[ -d bazel-bin/pkg/a/empty_dir ]] || fail "expected directory to exist"

bazel clean --expunge
bazel build \
--spawn_strategy=remote \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_sibling_repository_layout \
//pkg:a &>$TEST_log || fail "expected build to succeed with sibling repository layout"
[[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file"
[[ -d bazel-bin/pkg/a/empty_dir ]] || fail "expected directory to exist"
}

function test_create_tree_artifact_outputs_remote_cache() {
# Test that implicitly created empty directories corresponding to empty tree
# artifacts outputs are correctly cached in the remote cache.
generate_tree_artifact_output

bazel build \
--remote_cache=grpc://localhost:${worker_port} \
//pkg:a &>$TEST_log || fail "expected build to succeed"

bazel clean

bazel build \
--remote_cache=grpc://localhost:${worker_port} \
//pkg:a &>$TEST_log || fail "expected build to succeed"

expect_log "2 remote cache hit"
[[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file"
[[ -d bazel-bin/pkg/a/empty_dir ]] || fail "expected directory to exist"
}

# Runs coverage with `cc_test` and RE then checks the coverage file is returned.
# Older versions of gcov are not supported with bazel coverage and so will be skipped.
# See the above `test_java_rbe_coverage_produces_report` for more information.
Expand Down
Expand Up @@ -283,7 +283,13 @@ private ActionResult execute(
for (String output : command.getOutputDirectoriesList()) {
Path file = workingDirectory.getRelative(output);
if (file.exists()) {
throw new FileAlreadyExistsException("Output directory/file already exists: " + file);
if (!file.isDirectory()) {
throw new FileAlreadyExistsException(
"Non-directory exists at output directory path: " + file);
} else if (!file.getDirectoryEntries().isEmpty()) {
throw new FileAlreadyExistsException(
"Non-empty directory exists at output directory path: " + file);
}
}
FileSystemUtils.createDirectoryAndParents(file.getParentDirectory());
outputs.add(file);
Expand Down

0 comments on commit 1404651

Please sign in to comment.