Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce calls to git cat-file -s #14682

Merged
merged 9 commits into from
Feb 17, 2021
16 changes: 13 additions & 3 deletions modules/git/parse_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,21 @@ import (
"bytes"
"fmt"
"strconv"
"strings"

"github.com/go-git/go-git/v5/plumbing/filemode"
"github.com/go-git/go-git/v5/plumbing/object"
)

// ParseTreeEntries parses the output of a `git ls-tree` command.
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
return parseTreeEntries(data, nil)
}

func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entries := make([]*TreeEntry, 0, 10)
for pos := 0; pos < len(data); {
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
entry := new(TreeEntry)
entry.gogitTreeEntry = &object.TreeEntry{}
entry.ptree = ptree
Expand Down Expand Up @@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entry.gogitTreeEntry.Hash = id
pos += 41 // skip over sha and trailing space

end := pos + bytes.IndexByte(data[pos:], '\n')
end := pos + bytes.IndexByte(data[pos:], '\t')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
}
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
entry.sized = true

pos = end + 1

end = pos + bytes.IndexByte(data[pos:], '\n')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
}
Expand Down
13 changes: 9 additions & 4 deletions modules/git/parse_gogit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) {
Expected: []*TreeEntry{},
},
{
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n",
Expected: []*TreeEntry{
{
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
Expand All @@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) {
Name: "example/file2.txt",
Mode: filemode.Regular,
},
size: 1022,
sized: true,
},
},
},
{
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" +
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" +
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n",
Expected: []*TreeEntry{
{
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
Expand All @@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) {
Name: "example/\n.txt",
Mode: filemode.Symlink,
},
size: 234131,
sized: true,
},
{
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
sized: true,
gogitTreeEntry: &object.TreeEntry{
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
Name: "example",
Expand Down
16 changes: 13 additions & 3 deletions modules/git/parse_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,18 @@ import (
"bytes"
"fmt"
"strconv"
"strings"
)

// ParseTreeEntries parses the output of a `git ls-tree` command.
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
return parseTreeEntries(data, nil)
}

func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entries := make([]*TreeEntry, 0, 10)
for pos := 0; pos < len(data); {
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
entry := new(TreeEntry)
entry.ptree = ptree
if pos+6 > len(data) {
Expand Down Expand Up @@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entry.ID = id
pos += 41 // skip over sha and trailing space

end := pos + bytes.IndexByte(data[pos:], '\n')
end := pos + bytes.IndexByte(data[pos:], '\t')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
}
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
entry.sized = true

pos = end + 1

end = pos + bytes.IndexByte(data[pos:], '\n')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
}
Expand Down
70 changes: 70 additions & 0 deletions modules/git/parse_nogogit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

// +build !gogit

package git

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestParseTreeEntries(t *testing.T) {

testCases := []struct {
Input string
Expected []*TreeEntry
}{
{
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md
100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md
100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets
`,
Expected: []*TreeEntry{
{
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
name: "README.md",
entryMode: EntryModeBlob,
size: 8218,
sized: true,
},
{
ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"),
name: "README_ZH.md",
entryMode: EntryModeBlob,
size: 4681,
sized: true,
},
{
ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"),
name: "SECURITY.md",
entryMode: EntryModeBlob,
size: 429,
sized: true,
},
{
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
name: "assets",
entryMode: EntryModeTree,
zeripath marked this conversation as resolved.
Show resolved Hide resolved
sized: true,
},
},
},
}
for _, testCase := range testCases {
entries, err := ParseTreeEntries([]byte(testCase.Input))
assert.NoError(t, err)
assert.EqualValues(t, len(testCase.Expected), len(entries))
for i, entry := range entries {
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
}
}
}
2 changes: 2 additions & 0 deletions modules/git/tree_entry_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob {
ID: te.ID,
repoPath: te.ptree.repo.Path,
name: te.Name(),
size: te.size,
gotSize: te.sized,
}
}
4 changes: 2 additions & 2 deletions modules/git/tree_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) {
return t.entries, nil
}

stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path)
stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
if err != nil {
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
return nil, ErrNotExist{
Expand All @@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
if t.entriesRecursiveParsed {
return t.entriesRecursive, nil
}
stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
if err != nil {
return nil, err
}
Expand Down
20 changes: 13 additions & 7 deletions modules/indexer/code/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
return nil
}

stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath())
if err != nil {
return err
size := update.Size

if !update.Sized {
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath())
if err != nil {
return err
}
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
return fmt.Errorf("Misformatted git cat-file output: %v", err)
}
}
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
return fmt.Errorf("Misformatted git cat-file output: %v", err)
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {

if size > setting.Indexer.MaxIndexerFileSize {
return b.addDelete(update.Filename, repo, batch)
}

Expand Down
20 changes: 13 additions & 7 deletions modules/indexer/code/elastic_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
return nil, nil
}

stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath())
if err != nil {
return nil, err
size := update.Size

if !update.Sized {
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath())
if err != nil {
return nil, err
}
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
}
}
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {

if size > setting.Indexer.MaxIndexerFileSize {
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
}

Expand Down
8 changes: 6 additions & 2 deletions modules/indexer/code/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
type fileUpdate struct {
Filename string
BlobSha string
Size int64
Sized bool
}

// repoChanges changes (file additions/updates/removals) to a repo
Expand Down Expand Up @@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
updates[idxCount] = fileUpdate{
Filename: entry.Name(),
BlobSha: entry.ID.String(),
Size: entry.Size(),
Sized: true,
}
idxCount++
}
Expand All @@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
var changes repoChanges
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
RunInDirBytes(repo.RepoPath())
if err != nil {
return nil, err
Expand Down Expand Up @@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
}
}

cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
cmd.AddArguments(updatedFilenames...)
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
if err != nil {
Expand Down