diff --git a/commands/command_checkout.go b/commands/command_checkout.go index 6a5e136ecc..6bf9534ceb 100644 --- a/commands/command_checkout.go +++ b/commands/command_checkout.go @@ -73,7 +73,7 @@ func checkoutCommand(cmd *cobra.Command, args []string) { chgitscanner.Filter = filepathfilter.New(rootedPaths(args), nil, filepathfilter.GitIgnore) - if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil { + if err := chgitscanner.ScanLFSFiles(ref.Sha, nil); err != nil { ExitWithError(err) } diff --git a/commands/command_pull.go b/commands/command_pull.go index bbe1f53905..cbfdc0e7fb 100644 --- a/commands/command_pull.go +++ b/commands/command_pull.go @@ -87,7 +87,7 @@ func pull(filter *filepathfilter.Filter) { }() processQueue := time.Now() - if err := gitscanner.ScanTree(ref.Sha, nil); err != nil { + if err := gitscanner.ScanLFSFiles(ref.Sha, nil); err != nil { singleCheckout.Close() ExitWithError(err) } diff --git a/git/git.go b/git/git.go index 9afe71a811..74bdf48df2 100644 --- a/git/git.go +++ b/git/git.go @@ -316,6 +316,18 @@ func LsTree(ref string) (*subprocess.BufferedCmd, error) { ) } +func LsFilesLFS() (*subprocess.BufferedCmd, error) { + // This requires Git 2.42.0 for `--format` with `objecttype`. + return gitNoLFSBuffered( + "ls-files", + "--cached", + "--full-name", + "-z", + "--format=%(objectmode) %(objecttype) %(objectname) %(objectsize)\t%(path)", + ":(top,attr:filter=lfs)", + ) +} + func ResolveRef(ref string) (*Ref, error) { outp, err := gitNoLFSSimple("rev-parse", ref, "--symbolic-full-name", ref) if err != nil { diff --git a/lfs/gitscanner.go b/lfs/gitscanner.go index 764aa0f1ae..6259e3c3f0 100644 --- a/lfs/gitscanner.go +++ b/lfs/gitscanner.go @@ -219,6 +219,22 @@ func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error { return err } +// ScanLFSFiles takes a ref, which points to HEAD, and returns WrappedPointer +// objects in the index or tree at that ref. Differs from ScanRefs in that +// multiple files in the tree with the same content are all reported. +func (s *GitScanner) ScanLFSFiles(ref string, cb GitScannerFoundPointer) error { + callback, err := firstGitScannerCallback(cb, s.foundPointer) + if err != nil { + return err + } + + start := time.Now() + err = runScanLFSFiles(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv()) + tracerx.PerformanceSince("ScanLFSFiles", start) + + return err +} + // ScanUnpushed scans history for all LFS pointers which have been added but not // pushed to the named remote. remote can be left blank to mean 'any remote'. func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) error { diff --git a/lfs/gitscanner_tree.go b/lfs/gitscanner_tree.go index 247f14b5e9..1fdf50c5a5 100644 --- a/lfs/gitscanner_tree.go +++ b/lfs/gitscanner_tree.go @@ -10,6 +10,7 @@ import ( "github.com/git-lfs/git-lfs/v3/filepathfilter" "github.com/git-lfs/git-lfs/v3/git" "github.com/git-lfs/git-lfs/v3/git/gitattr" + "github.com/git-lfs/git-lfs/v3/subprocess" "github.com/git-lfs/git-lfs/v3/tr" ) @@ -38,6 +39,39 @@ func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.F return nil } +func runScanLFSFiles(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, gitEnv, osEnv config.Environment) error { + var treeShas *TreeBlobChannelWrapper + var err error + if git.IsGitVersionAtLeast("2.42.0") { + treeShas, err = lsFilesBlobs(func(t *git.TreeBlob) bool { + return t != nil && t.Size < blobSizeCutoff && filter.Allows(t.Filename) + }) + } else { + treeShas, err = lsTreeBlobs(ref, func(t *git.TreeBlob) bool { + return t != nil && t.Size < blobSizeCutoff && filter.Allows(t.Filename) + }) + } + // We don't use the nameMap approach here since that's imprecise when >1 file + // can be using the same content + if err != nil { + return err + } + + pcw, err := catFileBatchTree(treeShas, gitEnv, osEnv) + if err != nil { + return err + } + + for p := range pcw.Results { + cb(p, nil) + } + + if err := pcw.Wait(); err != nil { + cb(nil, err) + } + return nil +} + // catFileBatchTree() uses an ObjectDatabase from the // github.com/git-lfs/gitobj/v2 package to get the contents of Git // blob objects, given their SHA1s from git.TreeBlob structs, similar @@ -98,7 +132,13 @@ func catFileBatchTree(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.En // The returned channel will be sent these blobs which should be sent to catFileBatchTree // for final check & conversion to Pointer func lsTreeBlobs(ref string, predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) { - cmd, err := git.LsTree(ref) + return lsBlobs(func() (*subprocess.BufferedCmd, error) { + return git.LsTree(ref) + }, predicate) +} + +func lsBlobs(backend func() (*subprocess.BufferedCmd, error), predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) { + cmd, err := backend() if err != nil { return nil, err } @@ -128,6 +168,15 @@ func lsTreeBlobs(ref string, predicate func(*git.TreeBlob) bool) (*TreeBlobChann return NewTreeBlobChannelWrapper(blobs, errchan), nil } +// Use ls-files at ref to find a list of candidate tree blobs which might be lfs files +// The returned channel will be sent these blobs which should be sent to catFileBatchTree +// for final check & conversion to Pointer +func lsFilesBlobs(predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) { + return lsBlobs(func() (*subprocess.BufferedCmd, error) { + return git.LsFilesLFS() + }, predicate) +} + func catFileBatchTreeForPointers(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.Environment) (map[string]*WrappedPointer, *filepathfilter.Filter, error) { pscanner, err := NewPointerScanner(gitEnv, osEnv) if err != nil {