Skip to content

Commit

Permalink
Use git attributes to determine generated and vendored status for lan…
Browse files Browse the repository at this point in the history
…guage stats and diffs (#16773)

Replaces #16262
Replaces #16250
Replaces #14833

This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader.

It then wires this in to the language stats helper and into the git diff generation.

Files which are marked generated will be folded by default.

Fixes #14786
Fixes #12653
  • Loading branch information
zeripath committed Sep 9, 2021
1 parent b83b4fb commit 248b96d
Show file tree
Hide file tree
Showing 10 changed files with 736 additions and 17 deletions.
28 changes: 28 additions & 0 deletions modules/analyze/generated.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package analyze

import (
"path/filepath"
"strings"

"github.com/go-enry/go-enry/v2/data"
)

// IsGenerated returns whether or not path is a generated path.
func IsGenerated(path string) bool {
ext := strings.ToLower(filepath.Ext(path))
if _, ok := data.GeneratedCodeExtensions[ext]; ok {
return true
}

for _, m := range data.GeneratedCodeNameMatchers {
if m(path) {
return true
}
}

return false
}
285 changes: 282 additions & 3 deletions modules/git/repo_attribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ package git

import (
"bytes"
"context"
"fmt"
"io"
"os"
"strconv"
"strings"
)

// CheckAttributeOpts represents the possible options to CheckAttribute
Expand All @@ -21,7 +26,7 @@ type CheckAttributeOpts struct {
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
err := LoadGitVersion()
if err != nil {
return nil, fmt.Errorf("Git version missing: %v", err)
return nil, fmt.Errorf("git version missing: %v", err)
}

stdOut := new(bytes.Buffer)
Expand Down Expand Up @@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...)

if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}

// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})

if len(fields)%3 != 1 {
return nil, fmt.Errorf("Wrong number of fields in return from check-attr")
return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}

var name2attribute2info = make(map[string]map[string]string)
Expand All @@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[

return name2attribute2info, nil
}

// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string

stdinReader io.ReadCloser
stdinWriter *os.File
stdOut attributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
running chan struct{}
}

// Init initializes the cmd
func (c *CheckAttributeReader) Init(ctx context.Context) error {
c.running = make(chan struct{})
cmdArgs := []string{"check-attr", "--stdin", "-z"}

if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
cmdArgs = append(cmdArgs, "--cached")
c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile}
}

if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
c.env = []string{"GIT_WORK_TREE=" + c.WorkTree}
}

if len(c.Attributes) > 0 {
cmdArgs = append(cmdArgs, c.Attributes...)
cmdArgs = append(cmdArgs, "--")
} else {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)

c.stdOut = lw
c.stdOut.Close()
return fmt.Errorf("no provided Attributes to check")
}

c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommandContext(c.ctx, cmdArgs...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
return err
}

if CheckGitVersionAtLeast("1.8.5") == nil {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)

c.stdOut = lw
} else {
lw := new(lineSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)

c.stdOut = lw
}
return nil
}

// Run run cmd
func (c *CheckAttributeReader) Run() error {
stdErr := new(bytes.Buffer)
err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error {
close(c.running)
return nil
})
defer c.cancel()
_ = c.stdOut.Close()
if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" {
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}

return nil
}

// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) {
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
case <-c.running:
}

if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.cancel()
return nil, err
}

if err := c.stdinWriter.Sync(); err != nil {
defer c.cancel()
return nil, err
}

rs := make(map[string]string)
for range c.Attributes {
select {
case attr := <-c.stdOut.ReadAttribute():
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}

// Close close pip after use
func (c *CheckAttributeReader) Close() error {
select {
case <-c.running:
default:
close(c.running)
}
defer c.cancel()
return c.stdinWriter.Close()
}

type attributeWriter interface {
io.WriteCloser
ReadAttribute() <-chan attributeTriple
}

type attributeTriple struct {
Filename string
Attribute string
Value string
}

type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
working attributeTriple
pos int
}

func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0

nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return len(p), nil
}

func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}

func (wr *nulSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}

type lineSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
}

func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l := len(p)

nlIdx := bytes.IndexByte(p, '\n')
for nlIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nlIdx]...)

if len(wr.tmp) == 0 {
// This should not happen
if len(p) > nlIdx+1 {
wr.tmp = wr.tmp[:0]
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}

working := attributeTriple{}
if wr.tmp[0] == '"' {
sb := new(strings.Builder)
remaining := string(wr.tmp[1:])
for len(remaining) > 0 {
rn, _, tail, err := strconv.UnquoteChar(remaining, '"')
if err != nil {
if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' {
working.Filename = sb.String()
wr.tmp = []byte(remaining[3:])
break
}
return l, fmt.Errorf("unexpected tail %s", string(remaining))
}
_, _ = sb.WriteRune(rn)
remaining = tail
}
} else {
idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Filename = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
wr.tmp = wr.tmp[idx+2:]
}

idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}

working.Attribute = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}

working.Value = string(wr.tmp[idx+2:])

wr.attributes <- working
wr.tmp = wr.tmp[:0]
if len(p) > nlIdx+1 {
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}

wr.tmp = append(wr.tmp, p...)
return l, nil
}

func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}

func (wr *lineSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}
Loading

0 comments on commit 248b96d

Please sign in to comment.