Skip to content

Commit

Permalink
internal/gopathwalk: check ignored directories lexically
Browse files Browse the repository at this point in the history
Previously we used os.SameFile, but that is needlessly expensive when
we expect the original (user-provided) paths to be preserved during
the walk: the user-provided list of ignored directories is relative.

Because 'go list' does not traverse symlinks, we do not expect a
user's source roots to contain symlinks for Go package source either,
and in the rare even that the tree contains a directory that resides
within a non-ignored subdirectory symlink, the user can explicitly
include all of the relative paths by which the directory may be
encountered.

This reduces benchmark latency by ~7.5% compared to CL 508506, bringing
the overall latency to +~6% over the previous approach using
internal/fastwalk.

	~/x/tools$ benchstat cl508506.txt cl508507.txt
	goos: linux
	goarch: amd64
	pkg: golang.org/x/tools/internal/imports
	cpu: AMD EPYC 7B12
	                │ cl508506.txt │            cl508507.txt            │
	                │    sec/op    │   sec/op     vs base               │
	ScanModCache-24    264.4m ± 1%   244.6m ± 1%  -7.49% (p=0.000 n=10)

	~/x/tools$ benchstat cl508505.txt cl508507.txt
	goos: linux
	goarch: amd64
	pkg: golang.org/x/tools/internal/imports
	cpu: AMD EPYC 7B12
	                │ cl508505.txt │            cl508507.txt            │
	                │    sec/op    │   sec/op     vs base               │
	ScanModCache-24    231.0m ± 1%   244.6m ± 1%  +5.90% (p=0.000 n=10)

For golang/go#58035.

Change-Id: I937faf7793b8fad10a88b2fdc21fa4e4001c7246
Reviewed-on: https://go-review.googlesource.com/c/tools/+/508507
Auto-Submit: Bryan Mills <bcmills@google.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
Bryan C. Mills authored and gopherbot committed Oct 30, 2023
1 parent 1762c80 commit 3aa6cfd
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 28 deletions.
34 changes: 12 additions & 22 deletions internal/gopathwalk/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ type walker struct {
skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true.
opts Options // Options passed to Walk by the user.

ignoredDirs []os.FileInfo // The ignored directories, loaded from .goimportsignore files.
ignoredDirs []string

added map[string]bool
}
Expand All @@ -133,13 +133,9 @@ func (w *walker) init() {

for _, p := range ignoredPaths {
full := filepath.Join(w.root.Path, p)
if fi, err := os.Stat(full); err == nil {
w.ignoredDirs = append(w.ignoredDirs, fi)
if w.opts.Logf != nil {
w.opts.Logf("Directory added to ignore list: %s", full)
}
} else if w.opts.Logf != nil {
w.opts.Logf("Error statting ignored directory: %v", err)
w.ignoredDirs = append(w.ignoredDirs, full)
if w.opts.Logf != nil {
w.opts.Logf("Directory added to ignore list: %s", full)
}
}
}
Expand Down Expand Up @@ -174,16 +170,9 @@ func (w *walker) getIgnoredDirs(path string) []string {
}

// shouldSkipDir reports whether the file should be skipped or not.
func (w *walker) shouldSkipDir(fi os.FileInfo, dir string) bool {
func (w *walker) shouldSkipDir(dir string) bool {
for _, ignoredDir := range w.ignoredDirs {
// TODO(bcmills): Given that we already ought to be preserving the
// user-provided paths for directories encountered via symlinks, and that we
// don't expect GOROOT/src or GOPATH/src to itself contain symlinks,
// os.SameFile seems needlessly expensive here — it forces the caller to
// obtain an os.FileInfo instead of a potentially much cheaper fs.DirEntry.
//
// Can we drop this and use a lexical comparison instead?
if os.SameFile(fi, ignoredDir) {
if dir == ignoredDir {
return true
}
}
Expand Down Expand Up @@ -223,8 +212,7 @@ func (w *walker) walk(path string, d fs.DirEntry, err error) error {
(!w.opts.ModulesEnabled && base == "node_modules") {
return filepath.SkipDir
}
fi, err := d.Info()
if err == nil && w.shouldSkipDir(fi, path) {
if w.shouldSkipDir(path) {
return filepath.SkipDir
}
return nil
Expand Down Expand Up @@ -293,6 +281,10 @@ func (w *walker) walk(path string, d fs.DirEntry, err error) error {
// should be followed. It makes sure symlinks were never visited
// before to avoid symlink loops.
func (w *walker) shouldTraverse(path string) bool {
if w.shouldSkipDir(path) {
return false
}

ts, err := os.Stat(path)
if err != nil {
logf := w.opts.Logf
Expand All @@ -305,9 +297,7 @@ func (w *walker) shouldTraverse(path string) bool {
if !ts.IsDir() {
return false
}
if w.shouldSkipDir(ts, filepath.Dir(path)) {
return false
}

// Check for symlink loops by statting each directory component
// and seeing if any are the same file as ts.
for {
Expand Down
7 changes: 1 addition & 6 deletions internal/gopathwalk/walk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,8 @@ func TestShouldTraverse(t *testing.T) {
},
}
for i, tt := range tests {
fi, err := os.Stat(filepath.Join(dir, tt.dir, tt.file))
if err != nil {
t.Errorf("%d. Stat = %v", i, err)
continue
}
var w walker
got := w.shouldTraverse(filepath.Join(dir, tt.dir, fi.Name()))
got := w.shouldTraverse(filepath.Join(dir, tt.dir, tt.file))
if got != tt.want {
t.Errorf("%d. shouldTraverse(%q, %q) = %v; want %v", i, tt.dir, tt.file, got, tt.want)
}
Expand Down

0 comments on commit 3aa6cfd

Please sign in to comment.