Skip to content

Commit

Permalink
Fix stale trash dir not cleaned up on worker creation
Browse files Browse the repository at this point in the history
The bug in Blaze is that ${output_base}/blaze-workers/_moved_trash_dir/ doesn't
get wiped between blaze restarts. The directory to move to for asynchronous
deletion is an incrementing AtomicInteger but after a restart it will be zero
again. If there were previous directories there, we might get an error trying
to replace a non-empty directory while renaming.

RELNOTES:none
PiperOrigin-RevId: 610323627
Change-Id: I2d397fbd4590fa9f83273ac11d92d591bf9348b8
  • Loading branch information
oquenchil authored and Copybara-Service committed Feb 26, 2024
1 parent 9d34f8a commit e482e8b
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,8 @@ public void shutdown() {
service = null;
}
}

public Path getTrashBase() {
return trashBase;
}
}
3 changes: 1 addition & 2 deletions src/main/java/com/google/devtools/build/lib/worker/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/exec:execution_options",
"//src/main/java/com/google/devtools/build/lib/exec:runfiles_tree_updater",
"//src/main/java/com/google/devtools/build/lib/exec:spawn_strategy_registry",
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/exec/local",
"//src/main/java/com/google/devtools/build/lib/runtime/commands/events",
"//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
Expand Down Expand Up @@ -193,7 +192,7 @@ java_library(
":worker_options",
":worker_process_status",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/sandbox:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/protobuf:failure_details_java_proto",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import com.google.common.io.BaseEncoding;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.exec.TreeDeleter;
import com.google.devtools.build.lib.sandbox.AsynchronousTreeDeleter;
import com.google.devtools.build.lib.server.FailureDetails.Worker.Code;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -49,7 +49,7 @@ public class WorkerFactory extends BaseKeyedPooledObjectFactory<WorkerKey, Worke
protected final WorkerOptions workerOptions;

private final Path workerBaseDir;
private final TreeDeleter treeDeleter;
private final AsynchronousTreeDeleter treeDeleter;
private Reporter reporter;

/**
Expand All @@ -66,7 +66,7 @@ public WorkerFactory(
Path workerBaseDir,
WorkerOptions workerOptions,
@Nullable WorkerSandboxOptions hardenedSandboxOptions,
@Nullable TreeDeleter treeDeleter) {
@Nullable AsynchronousTreeDeleter treeDeleter) {
this.workerBaseDir = workerBaseDir;
this.workerOptions = workerOptions;
this.hardenedSandboxOptions = hardenedSandboxOptions;
Expand All @@ -83,6 +83,10 @@ public Worker create(WorkerKey key) throws IOException {
String workTypeName = key.getWorkerTypeName();
if (!workerBaseDir.isDirectory()) {
workerBaseDir.createDirectoryAndParents();
Path deleterTrashBase = treeDeleter == null ? null : treeDeleter.getTrashBase();
if (deleterTrashBase != null) {
deleterTrashBase.createDirectory();
}
}
Path logFile =
workerBaseDir.getRelative(workTypeName + "-" + workerId + "-" + key.getMnemonic() + ".log");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import com.google.devtools.build.lib.exec.ExecutionOptions;
import com.google.devtools.build.lib.exec.RunfilesTreeUpdater;
import com.google.devtools.build.lib.exec.SpawnStrategyRegistry;
import com.google.devtools.build.lib.exec.TreeDeleter;
import com.google.devtools.build.lib.exec.local.LocalEnvProvider;
import com.google.devtools.build.lib.runtime.BlazeModule;
import com.google.devtools.build.lib.runtime.BlazeWorkspace;
Expand All @@ -46,10 +45,12 @@

/** A module that adds the WorkerActionContextProvider to the available action context providers. */
public class WorkerModule extends BlazeModule {

private static final String STALE_TRASH = "_stale_trash";
private CommandEnvironment env;

private WorkerFactory workerFactory;
private TreeDeleter treeDeleter;
private AsynchronousTreeDeleter treeDeleter;
@VisibleForTesting WorkerPoolImpl workerPool;
@Nullable private WorkerLifecycleManager workerLifecycleManager;

Expand Down Expand Up @@ -114,6 +115,9 @@ public void buildStarting(BuildStartingEvent event) {
Path trashBase = workerDir.getRelative(AsynchronousTreeDeleter.MOVED_TRASH_DIR);
if (treeDeleter == null) {
treeDeleter = new AsynchronousTreeDeleter(trashBase);
if (trashBase.exists()) {
removeStaleTrash(workerDir, trashBase);
}
}
WorkerFactory newWorkerFactory =
new WorkerFactory(workerDir, options, workerSandboxOptions, treeDeleter);
Expand Down Expand Up @@ -185,6 +189,28 @@ public void buildStarting(BuildStartingEvent event) {
workerPool.reset();
}

private void removeStaleTrash(Path workerDir, Path trashBase) {
try {
// The AsynchronousTreeDeleter relies on a counter for naming directories that will be
// moved out of the way before being deleted asynchronously.
// If there is trash on disk from a previous bazel server instance, the dirs will have
// names not synced with the counter, therefore we may run the risk of moving a directory
// in this server instance to a path of an existing directory. To solve this we rename
// the trash directory that was on disk, create a new empty trash directory and delete
// the old trash via the AsynchronousTreeDeleter. Before deletion the stale trash will be
// moved to a directory named `0` under MOVED_TRASH_DIR.
Path staleTrash = trashBase.getParentDirectory().getChild(STALE_TRASH);
trashBase.renameTo(staleTrash);
trashBase.createDirectory();
treeDeleter.deleteTree(staleTrash);
} catch (IOException e) {
env.getReporter()
.handle(
Event.error(
String.format("Could not trash dir in '%s': %s", workerDir, e.getMessage())));
}
}

@Override
public void registerSpawnStrategies(
SpawnStrategyRegistry.Builder registryBuilder, CommandEnvironment env) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.google.devtools.build.lib.runtime.BlazeRuntime;
import com.google.devtools.build.lib.runtime.BlazeWorkspace;
import com.google.devtools.build.lib.runtime.CommandEnvironment;
import com.google.devtools.build.lib.sandbox.AsynchronousTreeDeleter;
import com.google.devtools.build.lib.util.AbruptExitException;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.FileSystem;
Expand Down Expand Up @@ -262,6 +263,32 @@ public void buildStarting_survivesNoWorkerDir() throws Exception {
assertThrows(IOException.class, () -> module.workerPool.borrowObject(key));
}

@Test
public void buildStarting_cleansStaleTrashDirCleanedOnFirstBuild() throws Exception {
WorkerModule module = new WorkerModule();
WorkerOptions options = WorkerOptions.DEFAULTS;

when(request.getOptions(WorkerOptions.class)).thenReturn(options);
setupEnvironment("/outputRoot");

module.beforeCommand(env);
Path workerDir = fs.getPath("/outputRoot/outputBase/bazel-workers");
Path trashBase = workerDir.getChild(AsynchronousTreeDeleter.MOVED_TRASH_DIR);
trashBase.createDirectoryAndParents();

Path staleTrash = trashBase.getChild("random-trash");

staleTrash.createDirectoryAndParents();
module.buildStarting(BuildStartingEvent.create(env, request));
// Trash is cleaned upon first build.
assertThat(staleTrash.exists()).isFalse();

staleTrash.createDirectoryAndParents();
module.buildStarting(BuildStartingEvent.create(env, request));
// Trash is not cleaned upon subsequent builds.
assertThat(staleTrash.exists()).isTrue();
}

private void setupEnvironment(String rootDir) throws IOException, AbruptExitException {
storedEventHandler = new StoredEventHandler();
Path root = fs.getPath(rootDir);
Expand Down

0 comments on commit e482e8b

Please sign in to comment.