Skip to content

Commit

Permalink
Make SymlinkForest simpler and more efficient
Browse files Browse the repository at this point in the history
Bazel rebuilds the symlink tree under the execution root before every build to ensure source files from main repo and external repos are available and up to date. But the SymlinkForest has accumulated many legacy behaviors that is currently not necessary and inefficient. This change tries to simplify the logic and make it much more faster.

The main improvement is that instead of linking every file and dir under the top-level directory for every external repo, we only create a link to the top-level directory of the external repo. This will reduce a large amount of symlink create operations, which speeds up the preparing phase a lot on Windows.

RELNOTES: None
PiperOrigin-RevId: 246520821
  • Loading branch information
meteorcloudy authored and Copybara-Service committed May 3, 2019
1 parent ba4862d commit 844e4e2
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 374 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -453,8 +453,7 @@ private void prepare(PackageRoots packageRoots)

// Plant the symlink forest.
try (SilentCloseable c = Profiler.instance().profile("plantSymlinkForest")) {
new SymlinkForest(
packageRootMap.get(), getExecRoot(), runtime.getProductName(), env.getWorkspaceName())
new SymlinkForest(packageRootMap.get(), getExecRoot(), runtime.getProductName())
.plantSymlinkForest();
} catch (IOException e) {
throw new ExecutorInitException("Source forest creation failed", e);
Expand Down
253 changes: 59 additions & 194 deletions src/main/java/com/google/devtools/build/lib/buildtool/SymlinkForest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,251 +16,116 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.devtools.build.lib.cmdline.LabelConstants;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Root;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Creates a symlink forest based on a package path map.
*/
class SymlinkForest {

private static final Logger logger = Logger.getLogger(SymlinkForest.class.getName());
private static final boolean LOG_FINER = logger.isLoggable(Level.FINER);

private final ImmutableMap<PackageIdentifier, Root> packageRoots;
private final Path execroot;
private final String workspaceName;
private final String productName;
private final String[] prefixes;
private final String prefix;

SymlinkForest(
ImmutableMap<PackageIdentifier, Root> packageRoots,
Path execroot,
String productName,
String workspaceName) {
ImmutableMap<PackageIdentifier, Root> packageRoots, Path execroot, String productName) {
this.packageRoots = packageRoots;
this.execroot = execroot;
this.workspaceName = workspaceName;
this.productName = productName;
this.prefixes = new String[] { ".", "_", productName + "-"};
this.prefix = productName + "-";
}

/**
* Returns the longest prefix from a given set of 'prefixes' that are
* contained in 'path'. I.e the closest ancestor directory containing path.
* Returns null if none found.
* @param path
* @param prefixes
*/
@VisibleForTesting
static PackageIdentifier longestPathPrefix(
PackageIdentifier path, ImmutableSet<PackageIdentifier> prefixes) {
for (int i = path.getPackageFragment().segmentCount(); i >= 0; i--) {
PackageIdentifier prefix = createInRepo(path, path.getPackageFragment().subFragment(0, i));
if (prefixes.contains(prefix)) {
return prefix;
}
}
return null;
}

/**
* Delete all dir trees under a given 'dir' that don't start with one of a set
* of given 'prefixes'. Does not follow any symbolic links.
* Delete all dir trees under a given 'dir' that don't start with a given 'prefix'. Does not
* follow any symbolic links.
*/
@VisibleForTesting
@ThreadSafety.ThreadSafe
static void deleteTreesBelowNotPrefixed(Path dir, String[] prefixes) throws IOException {
dirloop:
static void deleteTreesBelowNotPrefixed(Path dir, String prefix) throws IOException {
for (Path p : dir.getDirectoryEntries()) {
String name = p.getBaseName();
for (String prefix : prefixes) {
if (name.startsWith(prefix)) {
continue dirloop;
}
if (!p.getBaseName().startsWith(prefix)) {
p.deleteTree();
}
p.deleteTree();
}
}

/**
* Plant a symlink forest under execution root to ensure sources file are available and up to
* date. For the main repo: If root package ("//:") is used, link every file and directory under
* the top-level directory of the main repo. Otherwise, we only link the directories that are used
* in presented main repo packages. For every external repo: make a such a directory link:
* <execroot>/<ws_name>/external/<repo_name> --> <output_base>/external/<repo_name>
*/
void plantSymlinkForest() throws IOException {
deleteTreesBelowNotPrefixed(execroot, prefixes);
// TODO(kchodorow): this can be removed once the execution root is rearranged.
// Current state: symlink tree was created under execroot/$(basename ws) and then
// execroot/wsname is symlinked to that. The execution root change creates (and cleans up)
// subtrees for each repository and has been rolled forward and back several times. Thus, if
// someone was using a with-execroot-change version of bazel and then switched to this one,
// their execution root would contain a subtree for execroot/wsname that would never be
// cleaned up by this version of Bazel.
Path realWorkspaceDir = execroot.getParentDirectory().getRelative(workspaceName);
if (!workspaceName.equals(execroot.getBaseName()) && realWorkspaceDir.exists()
&& !realWorkspaceDir.isSymbolicLink()) {
realWorkspaceDir.deleteTree();
}
deleteTreesBelowNotPrefixed(execroot, prefix);

Path mainRepoRoot = null;
Map<Path, Path> mainRepoLinks = Maps.newHashMap();
Set<Path> externalRepoLinks = Sets.newHashSet();

// Packages come from exactly one root, but their shared ancestors may come from more.
Map<PackageIdentifier, Set<Root>> dirRootsMap = Maps.newHashMap();
// Elements in this list are added so that parents come before their children.
ArrayList<PackageIdentifier> dirsParentsFirst = new ArrayList<>();
for (Map.Entry<PackageIdentifier, Root> entry : packageRoots.entrySet()) {
PackageIdentifier pkgId = entry.getKey();
if (pkgId.equals(LabelConstants.EXTERNAL_PACKAGE_IDENTIFIER)) {
// This isn't a "real" package, don't add it to the symlink tree.
continue;
}
Root pkgRoot = entry.getValue();
ArrayList<PackageIdentifier> newDirs = new ArrayList<>();
for (PathFragment fragment = pkgId.getPackageFragment();
!fragment.isEmpty();
fragment = fragment.getParentDirectory()) {
PackageIdentifier dirId = createInRepo(pkgId, fragment);
Set<Root> roots = dirRootsMap.get(dirId);
if (roots == null) {
roots = Sets.newHashSet();
dirRootsMap.put(dirId, roots);
newDirs.add(dirId);
RepositoryName repository = pkgId.getRepository();
if (repository.isMain() || repository.isDefault()) {
// If root package of the main repo is required, we record the main repo root so that
// we can later link everything under main repo's top-level directory. And in this case,
// we don't need to record other links for directories under the top-level directory any
// more.
if (pkgId.getPackageFragment().equals(PathFragment.EMPTY_FRAGMENT)) {
mainRepoRoot = entry.getValue().getRelative(pkgId.getSourceRoot());
}
roots.add(pkgRoot);
}
Collections.reverse(newDirs);
dirsParentsFirst.addAll(newDirs);
}
// Now add in roots for all non-pkg dirs that are in between two packages, and missed above.
for (PackageIdentifier dir : dirsParentsFirst) {
if (!packageRoots.containsKey(dir)) {
PackageIdentifier pkgId = longestPathPrefix(dir, packageRoots.keySet());
if (pkgId != null) {
dirRootsMap.get(dir).add(packageRoots.get(pkgId));
if (mainRepoRoot == null) {
Path execrootLink = execroot.getRelative(pkgId.getPackageFragment().getSegment(0));
Path sourcePath = entry.getValue().getRelative(pkgId.getSourceRoot().getSegment(0));
mainRepoLinks.putIfAbsent(execrootLink, sourcePath);
}
}
}
// Create output dirs for all dirs that have more than one root and need to be split.
for (PackageIdentifier dir : dirsParentsFirst) {
if (!dir.getRepository().isMain()) {
FileSystemUtils.createDirectoryAndParents(
execroot.getRelative(dir.getRepository().getPathUnderExecRoot()));
}
if (dirRootsMap.get(dir).size() > 1) {
if (LOG_FINER) {
logger.finer("mkdir " + execroot.getRelative(dir.getPathUnderExecRoot()));
} else {
// For other external repositories, generate a symlink to the external repository
// directory itself.
// <output_base>/execroot/<main repo name>/external/<external repo name> -->
// <output_base>/external/<external repo name>
Path execrootLink = execroot.getRelative(repository.getPathUnderExecRoot());
Path sourcePath = entry.getValue().getRelative(repository.getSourceRoot());
if (externalRepoLinks.contains(execrootLink)) {
continue;
}
FileSystemUtils.createDirectoryAndParents(
execroot.getRelative(dir.getPathUnderExecRoot()));
}
}

// Make dir links for single rooted dirs.
for (PackageIdentifier dir : dirsParentsFirst) {
Set<Root> roots = dirRootsMap.get(dir);
// Simple case of one root for this dir.
if (roots.size() == 1) {
PathFragment parent = dir.getPackageFragment().getParentDirectory();
if (!parent.isEmpty() && dirRootsMap.get(createInRepo(dir, parent)).size() == 1) {
continue; // skip--an ancestor will link this one in from above
}
// This is the top-most dir that can be linked to a single root. Make it so.
Root root = roots.iterator().next(); // lone root in set
if (LOG_FINER) {
logger.finer(
"ln -s "
+ root.getRelative(dir.getSourceRoot())
+ " "
+ execroot.getRelative(dir.getPathUnderExecRoot()));
}
execroot.getRelative(dir.getPathUnderExecRoot())
.createSymbolicLink(root.getRelative(dir.getSourceRoot()));
}
}
// Make links for dirs within packages, skip parent-only dirs.
for (PackageIdentifier dir : dirsParentsFirst) {
if (dirRootsMap.get(dir).size() > 1) {
// If this dir is at or below a package dir, link in its contents.
PackageIdentifier pkgId = longestPathPrefix(dir, packageRoots.keySet());
if (pkgId != null) {
Root root = packageRoots.get(pkgId);
try {
Path absdir = root.getRelative(dir.getSourceRoot());
if (absdir.isDirectory()) {
if (LOG_FINER) {
logger.finer(
"ln -s " + absdir + "/* " + execroot.getRelative(dir.getSourceRoot()) + "/");
}
for (Path target : absdir.getDirectoryEntries()) {
PathFragment p = root.relativize(target);
if (!dirRootsMap.containsKey(createInRepo(pkgId, p))) {
//LOG.finest("ln -s " + target + " " + linkRoot.getRelative(p));
execroot.getRelative(p).createSymbolicLink(target);
}
}
} else {
logger.fine("Symlink planting skipping dir '" + absdir + "'");
}
} catch (IOException e) {
e.printStackTrace();
}
// Otherwise its just an otherwise empty common parent dir.
if (externalRepoLinks.isEmpty()) {
execroot.getRelative(LabelConstants.EXTERNAL_PACKAGE_NAME).createDirectoryAndParents();
}
externalRepoLinks.add(execrootLink);
execrootLink.createSymbolicLink(sourcePath);
}
}

for (Map.Entry<PackageIdentifier, Root> entry : packageRoots.entrySet()) {
PackageIdentifier pkgId = entry.getKey();
if (!pkgId.getPackageFragment().equals(PathFragment.EMPTY_FRAGMENT)) {
continue;
}
Path execrootDirectory = execroot.getRelative(pkgId.getPathUnderExecRoot());
// If there were no subpackages, this directory might not exist yet.
if (!execrootDirectory.exists()) {
FileSystemUtils.createDirectoryAndParents(execrootDirectory);
}
// For the top-level directory, generate symlinks to everything in the directory instead of
// the directory itself.
Path sourceDirectory = entry.getValue().getRelative(pkgId.getSourceRoot());
for (Path target : sourceDirectory.getDirectoryEntries()) {
if (mainRepoRoot != null) {
// For the main repo top-level directory, generate symlinks to everything in the directory
// instead of the directory itself.
for (Path target : mainRepoRoot.getDirectoryEntries()) {
String baseName = target.getBaseName();
Path execPath = execrootDirectory.getRelative(baseName);
// Create any links that don't exist yet and don't start with bazel-.
if (!baseName.startsWith(productName + "-") && !execPath.exists()) {
Path execPath = execroot.getRelative(baseName);
// Create any links that don't start with bazel-.
if (!baseName.startsWith(prefix)) {
execPath.createSymbolicLink(target);
}
}
} else {
for (Map.Entry<Path, Path> entry : mainRepoLinks.entrySet()) {
Path link = entry.getKey();
Path target = entry.getValue();
link.createSymbolicLink(target);
}
}

symlinkCorrectWorkspaceName();
}

/**
* Right now, the execution root is under the basename of the source directory, not the name
* defined in the WORKSPACE file. Thus, this adds a symlink with the WORKSPACE's workspace name
* to the old-style execution root.
* TODO(kchodorow): get rid of this once exec root is always under the WORKSPACE's workspace
* name.
* @throws IOException
*/
private void symlinkCorrectWorkspaceName() throws IOException {
Path correctDirectory = execroot.getParentDirectory().getRelative(workspaceName);
if (!correctDirectory.exists()) {
correctDirectory.createSymbolicLink(execroot);
}
}

private static PackageIdentifier createInRepo(
PackageIdentifier repo, PathFragment packageFragment) {
return PackageIdentifier.create(repo.getRepository(), packageFragment);
}
}
Loading

0 comments on commit 844e4e2

Please sign in to comment.