Skip to content

Commit

Permalink
cue/load: avoid repeatedly loading parent directories by caching
Browse files Browse the repository at this point in the history
In order to load a cue package, the `cue.load.loader` performs various
CPU/IO heavy operations:

    - readDir: in order to load a cue instance, all ancestors of the
      instance directory are loaded as well, causing multiple dir
      reads. In case multiple instances are loaded, such as in
      `cue fmt ./...`, the same directory may be read many times as
      we descend into subdirectories
    - each file is loaded as a `*build.File` and its source parsed as
      an `*ast.File`

We now maintain two separate caches of the above work:

    - a cache of directory path to []*build.File
    - a cache of file path to []*ast.File

After this change we can see the following speedup on my monorepo:

before:

    cue fmt --check ./...  2.50s user 0.54s system 83% cpu 3.661 total

after:

    cue fmt --check ./...  0.72s user 0.21s system 51% cpu 1.815 total

Signed-off-by: Noam Dolovich <noam.tzvi.dolovich@gmail.com>
Change-Id: I95472ad22eb124df730f6711af730425106a85e8
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1193678
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
  • Loading branch information
NoamTD authored and mvdan committed Apr 24, 2024
1 parent b7c5800 commit d9c6c75
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 41 deletions.
67 changes: 41 additions & 26 deletions cue/load/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,35 +140,20 @@ func (l *loader) importPkg(pos token.Pos, p *build.Instance) []*build.Instance {
// Walk the parent directories up to the module root to add their files as well,
// since a package foo/bar/baz inherits from parent packages foo/bar and foo.
// See https://cuelang.org/docs/concept/modules-packages-instances/#instances.
//
// TODO(mvdan): Note that the work below, most notably readDir and ParseFile,
// is not cached or reused in any way. This causes slow-downs on big repos,
// for example `cue fmt --check ./...` on the cue repo inspects
// top-level files like README.md and LICENSE many dozens of times.
for _, d := range dirs {
for dir := filepath.Clean(d[1]); ctxt.isDir(dir); {
files, err := ctxt.readDir(dir)
if err != nil && !os.IsNotExist(err) {
return retErr(errors.Wrapf(err, pos, "import failed reading dir %v", dirs[0][1]))
sd, ok := l.dirCachedBuildFiles[dir]
if !ok {
sd = l.scanDir(dir)
l.dirCachedBuildFiles[dir] = sd
}
for _, f := range files {
if f.IsDir() {
continue
}
if f.Name() == "-" {
if _, err := cfg.fileSystem.stat("-"); !os.IsNotExist(err) {
continue
}
}
file, err := filetypes.ParseFile(f.Name(), filetypes.Input)
if err != nil {
p.UnknownFiles = append(p.UnknownFiles, &build.File{
Filename: f.Name(),
ExcludeReason: errors.Newf(token.NoPos, "unknown filetype"),
})
continue // skip unrecognized file types
}
fp.add(dir, file, importComment)
if err := sd.err; err != nil {
return retErr(errors.Wrapf(err, token.NoPos, "import failed reading dir %v", dir))
}
p.UnknownFiles = append(p.UnknownFiles, sd.unknownFiles...)
for _, f := range sd.buildFiles {
bf := *f
fp.add(dir, &bf, importComment)
}

if p.PkgName == "" || !inModule || l.cfg.isRoot(dir) || dir == d[0] {
Expand Down Expand Up @@ -220,6 +205,36 @@ func (l *loader) importPkg(pos token.Pos, p *build.Instance) []*build.Instance {
return all
}

func (l *loader) scanDir(dir string) cachedFileFiles {
sd := cachedFileFiles{}
files, err := l.cfg.fileSystem.readDir(dir)
if err != nil && !os.IsNotExist(err) {
sd.err = err
return sd
}
for _, f := range files {
if f.IsDir() {
continue
}
if f.Name() == "-" {
if _, err := l.cfg.fileSystem.stat("-"); !os.IsNotExist(err) {
continue
}
}
file, err := filetypes.ParseFile(f.Name(), filetypes.Input)
if err != nil {
sd.unknownFiles = append(sd.unknownFiles, &build.File{
Filename: f.Name(),
ExcludeReason: errors.Newf(token.NoPos, "unknown filetype"),
})
continue // skip unrecognized file types
}

sd.buildFiles = append(sd.buildFiles, file)
}
return sd
}

// _loadFunc is the method used for the value of l.loadFunc.
func (l *loader) _loadFunc(pos token.Pos, path string) *build.Instance {
impPath := importPath(path)
Expand Down
4 changes: 2 additions & 2 deletions cue/load/import_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func getInst(pkg, cwd string) (*build.Instance, error) {
if err != nil {
return nil, fmt.Errorf("unexpected error on Config.complete: %v", err)
}
l := loader{cfg: c}
l := newLoader(c, nil, nil)
inst := l.newRelInstance(token.NoPos, pkg, c.Package)
p := l.importPkg(token.NoPos, inst)[0]
return p, p.Err
Expand All @@ -51,7 +51,7 @@ func TestEmptyImport(t *testing.T) {
if err != nil {
t.Fatal(err)
}
l := loader{cfg: c}
l := newLoader(c, nil, nil)
inst := l.newInstance(token.NoPos, "")
p := l.importPkg(token.NoPos, inst)[0]
if p.Err == nil {
Expand Down
60 changes: 47 additions & 13 deletions cue/load/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package load
import (
"path/filepath"

"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/build"
"cuelang.org/go/cue/cuecontext"
"cuelang.org/go/cue/errors"
Expand All @@ -42,13 +43,36 @@ type loader struct {
stk importStack
loadFunc build.LoadFunc
pkgs *modpkgload.Packages

// When we descend into subdirectories to load patterns such as ./...
// we often end up loading parent directories many times over; cache that work by directory.
dirCachedBuildFiles map[string]cachedFileFiles

// The same file may be decoded into an *ast.File multiple times, e.g. when it is present in
// multiple different build instances in the same directory hierarchy; cache that work by file name.
fileCachedSyntaxFiles map[string]cachedSyntaxFiles
}

type (
cachedFileFiles struct {
err errors.Error
buildFiles []*build.File
unknownFiles []*build.File
}

cachedSyntaxFiles struct {
err error
files []*ast.File
}
)

func newLoader(c *Config, tg *tagger, pkgs *modpkgload.Packages) *loader {
l := &loader{
cfg: c,
tagger: tg,
pkgs: pkgs,
cfg: c,
tagger: tg,
pkgs: pkgs,
dirCachedBuildFiles: map[string]cachedFileFiles{},
fileCachedSyntaxFiles: map[string]cachedSyntaxFiles{},
}
l.loadFunc = l._loadFunc
return l
Expand Down Expand Up @@ -137,18 +161,28 @@ func (l *loader) cueFilesPackage(files []*build.File) *build.Instance {
}

func (l *loader) addFiles(dir string, p *build.Instance) {
for _, f := range p.BuildFiles {
// TODO(mvdan): reuse the same context for an entire loader
d := encoding.NewDecoder(cuecontext.New(), f, &encoding.Config{
Stdin: l.cfg.stdin(),
ParseFile: l.cfg.ParseFile,
})
for ; !d.Done(); d.Next() {
_ = p.AddSyntax(d.File())
for _, bf := range p.BuildFiles {
syntax, ok := l.fileCachedSyntaxFiles[bf.Filename]
if !ok {
syntax = cachedSyntaxFiles{}
// TODO(mvdan): reuse the same context for an entire loader
d := encoding.NewDecoder(cuecontext.New(), bf, &encoding.Config{
Stdin: l.cfg.stdin(),
ParseFile: l.cfg.ParseFile,
})
for ; !d.Done(); d.Next() {
syntax.files = append(syntax.files, d.File())
}
syntax.err = d.Err()
d.Close()
l.fileCachedSyntaxFiles[bf.Filename] = syntax
}
if err := d.Err(); err != nil {

if err := syntax.err; err != nil {
p.ReportError(errors.Promote(err, "load"))
}
d.Close()
for _, f := range syntax.files {
_ = p.AddSyntax(f)
}
}
}

0 comments on commit d9c6c75

Please sign in to comment.