From aecf5a6f45c87eeb193d4d3e6b627715cb860154 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 14 Feb 2021 15:42:30 +0000 Subject: [PATCH 1/5] Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. Signed-off-by: Andrew Thornton --- modules/git/parse_gogit.go | 20 ++++- modules/git/parse_nogogit.go | 20 ++++- modules/git/parse_nogogit_test.go | 120 +++++++++++++++++++++++++ modules/git/tree_entry_nogogit.go | 2 + modules/git/tree_nogogit.go | 8 +- modules/indexer/code/bleve.go | 20 +++-- modules/indexer/code/elastic_search.go | 20 +++-- modules/indexer/code/git.go | 14 +-- 8 files changed, 194 insertions(+), 30 deletions(-) create mode 100644 modules/git/parse_nogogit_test.go diff --git a/modules/git/parse_gogit.go b/modules/git/parse_gogit.go index 434fb4160f75..0ae2305306a8 100644 --- a/modules/git/parse_gogit.go +++ b/modules/git/parse_gogit.go @@ -10,17 +10,18 @@ import ( "bytes" "fmt" "strconv" + "strings" "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" ) // ParseTreeEntries parses the output of a `git ls-tree` command. -func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { - return parseTreeEntries(data, nil) +func ParseTreeEntries(data []byte, long ...bool) ([]*TreeEntry, error) { + return parseTreeEntries(data, nil, long...) } -func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { +func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { // expect line to be of the form " \t" @@ -61,6 +62,19 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entry.gogitTreeEntry.Hash = id pos += 41 // skip over sha and trailing space + if len(long) > 0 && long[0] { + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) + } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + if entry.size > 0 { + entry.sized = true + } + + pos = end + 1 + } + end := pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index 26dd700af732..4ae7e00d84a3 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -10,14 +10,15 @@ import ( "bytes" "fmt" "strconv" + "strings" ) // ParseTreeEntries parses the output of a `git ls-tree` command. -func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { - return parseTreeEntries(data, nil) +func ParseTreeEntries(data []byte, long ...bool) ([]*TreeEntry, error) { + return parseTreeEntries(data, nil, long...) } -func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { +func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { // expect line to be of the form " \t" @@ -56,6 +57,19 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entry.ID = id pos += 41 // skip over sha and trailing space + if len(long) > 0 && long[0] { + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) + } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + if entry.size > 0 { + entry.sized = true + } + + pos = end + 1 + } + end := pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go new file mode 100644 index 000000000000..2ac2d1fbe0dc --- /dev/null +++ b/modules/git/parse_nogogit_test.go @@ -0,0 +1,120 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build !gogit + +package git + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseTreeEntries(t *testing.T) { + testCases := []struct { + Input string + Expected []*TreeEntry + }{ + { + Input: "", + Expected: []*TreeEntry{}, + }, + { + Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md +100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 README_ZH.md +040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets +100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 atters.md +`, + Expected: []*TreeEntry{ + { + ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), + name: "README.md", + entryMode: EntryModeBlob, + }, + { + ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), + name: "README_ZH.md", + entryMode: EntryModeBlob, + }, + { + ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), + name: "assets", + entryMode: EntryModeTree, + }, + { + ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), + name: "atters.md", + entryMode: EntryModeBlob, + }, + }, + }, + } + for _, testCase := range testCases { + entries, err := ParseTreeEntries([]byte(testCase.Input)) + assert.NoError(t, err) + assert.EqualValues(t, len(testCase.Expected), len(entries)) + for i, entry := range entries { + assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) + assert.EqualValues(t, testCase.Expected[i].name, entry.name) + assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) + } + } +} + +func TestParseTreeEntriesLong(t *testing.T) { + + testCases := []struct { + Input string + Expected []*TreeEntry + }{ + { + Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md +100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md +100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md +040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets +`, + Expected: []*TreeEntry{ + { + ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), + name: "README.md", + entryMode: EntryModeBlob, + size: 8218, + sized: true, + }, + { + ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), + name: "README_ZH.md", + entryMode: EntryModeBlob, + size: 4681, + sized: true, + }, + { + ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), + name: "SECURITY.md", + entryMode: EntryModeBlob, + size: 429, + sized: true, + }, + { + ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), + name: "assets", + entryMode: EntryModeTree, + }, + }, + }, + } + for _, testCase := range testCases { + entries, err := ParseTreeEntries([]byte(testCase.Input), true) + assert.NoError(t, err) + assert.EqualValues(t, len(testCase.Expected), len(entries)) + for i, entry := range entries { + assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) + assert.EqualValues(t, testCase.Expected[i].name, entry.name) + assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) + assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) + assert.EqualValues(t, testCase.Expected[i].size, entry.size) + } + } +} diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index f18daee7788f..fd60de36f528 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob { ID: te.ID, repoPath: te.ptree.repo.Path, name: te.Name(), + size: te.size, + gotSize: te.sized, } } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index e78115b77765..531518a7dfb6 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) { return t.entries, nil } - stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path) + stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) if err != nil { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { return nil, ErrNotExist{ @@ -42,7 +42,7 @@ func (t *Tree) ListEntries() (Entries, error) { return nil, err } - t.entries, err = parseTreeEntries(stdout, t) + t.entries, err = parseTreeEntries(stdout, t, true) if err == nil { t.entriesParsed = true } @@ -55,12 +55,12 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { if t.entriesRecursiveParsed { return t.entriesRecursive, nil } - stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) + stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) if err != nil { return nil, err } - t.entriesRecursive, err = parseTreeEntries(stdout, t) + t.entriesRecursive, err = parseTreeEntries(stdout, t, true) if err == nil { t.entriesRecursiveParsed = true } diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 826efde4c1da..e2642d36b58a 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode return nil } - stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). - RunInDir(repo.RepoPath()) - if err != nil { - return err + size := update.Size + + if size == 0 { + stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). + RunInDir(repo.RepoPath()) + if err != nil { + return err + } + if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { + return fmt.Errorf("Misformatted git cat-file output: %v", err) + } } - if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { - return fmt.Errorf("Misformatted git cat-file output: %v", err) - } else if int64(size) > setting.Indexer.MaxIndexerFileSize { + + if size > setting.Indexer.MaxIndexerFileSize { return b.addDelete(update.Filename, repo, batch) } diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index f81dbb34d42b..b173ceff9e7a 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo return nil, nil } - stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). - RunInDir(repo.RepoPath()) - if err != nil { - return nil, err + size := update.Size + + if size == 0 { + stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). + RunInDir(repo.RepoPath()) + if err != nil { + return nil, err + } + if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { + return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) + } } - if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { - return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) - } else if int64(size) > setting.Indexer.MaxIndexerFileSize { + + if size > setting.Indexer.MaxIndexerFileSize { return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 37ab5ac3d326..5f1c79667c8d 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -17,6 +17,7 @@ import ( type fileUpdate struct { Filename string BlobSha string + Size int64 } // repoChanges changes (file additions/updates/removals) to a repo @@ -65,8 +66,8 @@ func isIndexable(entry *git.TreeEntry) bool { } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command -func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { - entries, err := git.ParseTreeEntries(stdout) +func parseGitLsTreeOutput(stdout []byte, long bool) ([]fileUpdate, error) { + entries, err := git.ParseTreeEntries(stdout, long) if err != nil { return nil, err } @@ -77,6 +78,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { updates[idxCount] = fileUpdate{ Filename: entry.Name(), BlobSha: entry.ID.String(), + Size: entry.Size(), } idxCount++ } @@ -87,12 +89,12 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { // genesisChanges get changes to add repo to the indexer for the first time func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { var changes repoChanges - stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). + stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision). RunInDirBytes(repo.RepoPath()) if err != nil { return nil, err } - changes.Updates, err = parseGitLsTreeOutput(stdout) + changes.Updates, err = parseGitLsTreeOutput(stdout, true) return &changes, err } @@ -162,12 +164,12 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, } } - cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") + cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--") cmd.AddArguments(updatedFilenames...) lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath()) if err != nil { return nil, err } - changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout) + changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout, true) return &changes, err } From 8b3b6df4930610709c879fb7f00c15f16479230f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 15 Feb 2021 19:28:30 +0000 Subject: [PATCH 2/5] simplify by always expecting the long format Signed-off-by: Andrew Thornton --- modules/git/parse_gogit.go | 32 +++++++++---------- modules/git/parse_gogit_test.go | 10 ++++-- modules/git/parse_nogogit.go | 32 +++++++++---------- modules/git/parse_nogogit_test.go | 53 +------------------------------ modules/git/tree_nogogit.go | 4 +-- modules/indexer/code/git.go | 8 ++--- 6 files changed, 44 insertions(+), 95 deletions(-) diff --git a/modules/git/parse_gogit.go b/modules/git/parse_gogit.go index 0ae2305306a8..91ac64978bd5 100644 --- a/modules/git/parse_gogit.go +++ b/modules/git/parse_gogit.go @@ -16,15 +16,15 @@ import ( "github.com/go-git/go-git/v5/plumbing/object" ) -// ParseTreeEntries parses the output of a `git ls-tree` command. -func ParseTreeEntries(data []byte, long ...bool) ([]*TreeEntry, error) { - return parseTreeEntries(data, nil, long...) +// ParseTreeEntries parses the output of a `git ls-tree -l` command. +func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { + return parseTreeEntries(data, nil) } -func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, error) { +func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { - // expect line to be of the form " \t" + // expect line to be of the form " \t" entry := new(TreeEntry) entry.gogitTreeEntry = &object.TreeEntry{} entry.ptree = ptree @@ -62,20 +62,18 @@ func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, err entry.gogitTreeEntry.Hash = id pos += 41 // skip over sha and trailing space - if len(long) > 0 && long[0] { - end := pos + bytes.IndexByte(data[pos:], '\t') - if end < pos { - return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) - } - entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) - if entry.size > 0 { - entry.sized = true - } - - pos = end + 1 + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + if entry.size > 0 { + entry.sized = true + } + + pos = end + 1 - end := pos + bytes.IndexByte(data[pos:], '\n') + end = pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) } diff --git a/modules/git/parse_gogit_test.go b/modules/git/parse_gogit_test.go index cf38c29932f5..70ec541b59b5 100644 --- a/modules/git/parse_gogit_test.go +++ b/modules/git/parse_gogit_test.go @@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) { Expected: []*TreeEntry{}, }, { - Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n", + Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n", Expected: []*TreeEntry{ { ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), @@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) { Name: "example/file2.txt", Mode: filemode.Regular, }, + size: 1022, + sized: true, }, }, }, { - Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" + - "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n", + Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" + + "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n", Expected: []*TreeEntry{ { ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), @@ -47,6 +49,8 @@ func TestParseTreeEntries(t *testing.T) { Name: "example/\n.txt", Mode: filemode.Symlink, }, + size: 234131, + sized: true, }, { ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index 4ae7e00d84a3..394c23f4556e 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -13,15 +13,15 @@ import ( "strings" ) -// ParseTreeEntries parses the output of a `git ls-tree` command. -func ParseTreeEntries(data []byte, long ...bool) ([]*TreeEntry, error) { - return parseTreeEntries(data, nil, long...) +// ParseTreeEntries parses the output of a `git ls-tree -l` command. +func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { + return parseTreeEntries(data, nil) } -func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, error) { +func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { - // expect line to be of the form " \t" + // expect line to be of the form " \t" entry := new(TreeEntry) entry.ptree = ptree if pos+6 > len(data) { @@ -57,20 +57,18 @@ func parseTreeEntries(data []byte, ptree *Tree, long ...bool) ([]*TreeEntry, err entry.ID = id pos += 41 // skip over sha and trailing space - if len(long) > 0 && long[0] { - end := pos + bytes.IndexByte(data[pos:], '\t') - if end < pos { - return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) - } - entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) - if entry.size > 0 { - entry.sized = true - } - - pos = end + 1 + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + if entry.size > 0 { + entry.sized = true + } + + pos = end + 1 - end := pos + bytes.IndexByte(data[pos:], '\n') + end = pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) } diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go index 2ac2d1fbe0dc..ed04338bb51d 100644 --- a/modules/git/parse_nogogit_test.go +++ b/modules/git/parse_nogogit_test.go @@ -13,57 +13,6 @@ import ( ) func TestParseTreeEntries(t *testing.T) { - testCases := []struct { - Input string - Expected []*TreeEntry - }{ - { - Input: "", - Expected: []*TreeEntry{}, - }, - { - Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md -100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 README_ZH.md -040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets -100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 atters.md -`, - Expected: []*TreeEntry{ - { - ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), - name: "README.md", - entryMode: EntryModeBlob, - }, - { - ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), - name: "README_ZH.md", - entryMode: EntryModeBlob, - }, - { - ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), - name: "assets", - entryMode: EntryModeTree, - }, - { - ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), - name: "atters.md", - entryMode: EntryModeBlob, - }, - }, - }, - } - for _, testCase := range testCases { - entries, err := ParseTreeEntries([]byte(testCase.Input)) - assert.NoError(t, err) - assert.EqualValues(t, len(testCase.Expected), len(entries)) - for i, entry := range entries { - assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) - assert.EqualValues(t, testCase.Expected[i].name, entry.name) - assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) - } - } -} - -func TestParseTreeEntriesLong(t *testing.T) { testCases := []struct { Input string @@ -106,7 +55,7 @@ func TestParseTreeEntriesLong(t *testing.T) { }, } for _, testCase := range testCases { - entries, err := ParseTreeEntries([]byte(testCase.Input), true) + entries, err := ParseTreeEntries([]byte(testCase.Input)) assert.NoError(t, err) assert.EqualValues(t, len(testCase.Expected), len(entries)) for i, entry := range entries { diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 531518a7dfb6..3ebdf10631db 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -42,7 +42,7 @@ func (t *Tree) ListEntries() (Entries, error) { return nil, err } - t.entries, err = parseTreeEntries(stdout, t, true) + t.entries, err = parseTreeEntries(stdout, t) if err == nil { t.entriesParsed = true } @@ -60,7 +60,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { return nil, err } - t.entriesRecursive, err = parseTreeEntries(stdout, t, true) + t.entriesRecursive, err = parseTreeEntries(stdout, t) if err == nil { t.entriesRecursiveParsed = true } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 5f1c79667c8d..5f00ea1c4896 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -66,8 +66,8 @@ func isIndexable(entry *git.TreeEntry) bool { } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command -func parseGitLsTreeOutput(stdout []byte, long bool) ([]fileUpdate, error) { - entries, err := git.ParseTreeEntries(stdout, long) +func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { + entries, err := git.ParseTreeEntries(stdout) if err != nil { return nil, err } @@ -94,7 +94,7 @@ func genesisChanges(repo *models.Repository, revision string) (*repoChanges, err if err != nil { return nil, err } - changes.Updates, err = parseGitLsTreeOutput(stdout, true) + changes.Updates, err = parseGitLsTreeOutput(stdout) return &changes, err } @@ -170,6 +170,6 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, if err != nil { return nil, err } - changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout, true) + changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout) return &changes, err } From b5d8442b0df49327bdd458c7264ded6eccc9082e Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 15 Feb 2021 19:35:05 +0000 Subject: [PATCH 3/5] Also always set the sized field and tell the indexer the update is sized Signed-off-by: Andrew Thornton --- modules/git/parse_gogit.go | 4 +--- modules/git/parse_nogogit.go | 4 +--- modules/indexer/code/bleve.go | 2 +- modules/indexer/code/elastic_search.go | 2 +- modules/indexer/code/git.go | 2 ++ 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/modules/git/parse_gogit.go b/modules/git/parse_gogit.go index 91ac64978bd5..a50ebec3dd72 100644 --- a/modules/git/parse_gogit.go +++ b/modules/git/parse_gogit.go @@ -67,9 +67,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) } entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) - if entry.size > 0 { - entry.sized = true - } + entry.sized = true pos = end + 1 diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index 394c23f4556e..e9e93f66fdc1 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -62,9 +62,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) } entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) - if entry.size > 0 { - entry.sized = true - } + entry.sized = true pos = end + 1 diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index e2642d36b58a..1ebc74c43aa5 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -181,7 +181,7 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode size := update.Size - if size == 0 { + if !update.Sized { stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). RunInDir(repo.RepoPath()) if err != nil { diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index b173ceff9e7a..c9d604b694b8 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -180,7 +180,7 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo size := update.Size - if size == 0 { + if !update.Sized { stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). RunInDir(repo.RepoPath()) if err != nil { diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 5f00ea1c4896..919d7854062f 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -18,6 +18,7 @@ type fileUpdate struct { Filename string BlobSha string Size int64 + Sized bool } // repoChanges changes (file additions/updates/removals) to a repo @@ -79,6 +80,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { Filename: entry.Name(), BlobSha: entry.ID.String(), Size: entry.Size(), + Sized: true, } idxCount++ } From 636377c2ccecab33b31e3dc36a31594b0439d34d Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 15 Feb 2021 22:51:31 +0000 Subject: [PATCH 4/5] Apply suggestions from code review --- modules/git/parse_gogit_test.go | 1 + modules/git/parse_nogogit_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/git/parse_gogit_test.go b/modules/git/parse_gogit_test.go index 70ec541b59b5..775179b4999b 100644 --- a/modules/git/parse_gogit_test.go +++ b/modules/git/parse_gogit_test.go @@ -54,6 +54,7 @@ func TestParseTreeEntries(t *testing.T) { }, { ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), + sized: true, gogitTreeEntry: &object.TreeEntry{ Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), Name: "example", diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go index ed04338bb51d..e08d5653bbf1 100644 --- a/modules/git/parse_nogogit_test.go +++ b/modules/git/parse_nogogit_test.go @@ -50,6 +50,7 @@ func TestParseTreeEntries(t *testing.T) { ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), name: "assets", entryMode: EntryModeTree, + sized: true, }, }, }, From f0ea4c2085462f0eec67bae0ef143f48994e1026 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Tue, 16 Feb 2021 23:20:48 +0100 Subject: [PATCH 5/5] make fmt --- modules/git/parse_gogit_test.go | 2 +- modules/git/parse_nogogit_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/git/parse_gogit_test.go b/modules/git/parse_gogit_test.go index 775179b4999b..c6374133c095 100644 --- a/modules/git/parse_gogit_test.go +++ b/modules/git/parse_gogit_test.go @@ -53,7 +53,7 @@ func TestParseTreeEntries(t *testing.T) { sized: true, }, { - ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), + ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), sized: true, gogitTreeEntry: &object.TreeEntry{ Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go index e08d5653bbf1..a9e7dcc7f8b1 100644 --- a/modules/git/parse_nogogit_test.go +++ b/modules/git/parse_nogogit_test.go @@ -50,7 +50,7 @@ func TestParseTreeEntries(t *testing.T) { ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), name: "assets", entryMode: EntryModeTree, - sized: true, + sized: true, }, }, },