Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Git-backed storage.ReadBucket via storagegit #2114

Merged
merged 18 commits into from
May 25, 2023
Merged
2 changes: 2 additions & 0 deletions private/pkg/git/branch_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
)

func TestBranches(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var branches []string
err := repo.BranchIterator.ForEachBranch(func(branch string) error {
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/commit_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
)

func TestCommits(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var commits []git.Commit
err := repo.CommitIterator.ForEachCommit(gittest.DefaultBranch, func(c git.Commit) error {
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
)

func TestParseCommit(t *testing.T) {
t.Parallel()

hash, err := parseHashFromHex("43848150a6f5f6d76eeef6e0f69eb46290eefab6")
require.NoError(t, err)
commit, err := parseCommit(
Expand Down
49 changes: 49 additions & 0 deletions private/pkg/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,30 @@ import (
)

const (
// DotGitDir is a relative path to the `.git` directory.
DotGitDir = ".git"

// ModeUnknown is a mode's zero value.
ModeUnknown ObjectMode = 0
// ModeFile is a blob that should be written as a plain file.
ModeFile ObjectMode = 010_0644
// ModeExec is a blob that should be written with the executable bit set.
ModeExe ObjectMode = 010_0755
// ModeDir is a tree to be unpacked as a subdirectory in the current
// directory.
ModeDir ObjectMode = 004_0000
// ModeSymlink is a blob with its content being the path linked to.
ModeSymlink ObjectMode = 012_0000
// ModeSubmodule is a commit that the submodule is checked out at.
ModeSubmodule ObjectMode = 016_0000
)

var ErrTreeNodeNotFound = errors.New("node not found")

// ObjectMode is how to interpret a tree node's object. See the Mode* constants
// for how to interpret each mode value.
type ObjectMode uint32

// Name is a name identifiable by git.
type Name interface {
// If cloneBranch returns a non-empty string, any clones will be performed with --branch set to the value.
Expand Down Expand Up @@ -280,8 +301,12 @@ type AnnotatedTag interface {

// ObjectReader reads objects (commits, trees, blobs) from a `.git` directory.
type ObjectReader interface {
// Blob reads the blob identified by the hash.
Blob(id Hash) ([]byte, error)
// Commit reads the commit identified by the hash.
Commit(id Hash) (Commit, error)
// Tree reads the tree identified by the hash.
Tree(id Hash) (Tree, error)
// Tag reads the tag identified by the hash.
Tag(id Hash) (AnnotatedTag, error)
// Close closes the reader.
Expand All @@ -300,3 +325,27 @@ func OpenObjectReader(
) (ObjectReader, error) {
return newObjectReader(gitDirPath, runner)
}

// Tree is a git tree, which are a manifest of other git objects, including other trees.
type Tree interface {
// Hash is the Hash for this Tree.
Hash() Hash
// Nodes is the set of nodes in this Tree.
Nodes() []TreeNode
// Descendant walks down a tree, following the path specified,
// and returns the terminal Node. If no node is found, it returns
// ErrTreeNodeNotFound.
Descendant(path string, objectReader ObjectReader) (TreeNode, error)
}

// TreeNode is a reference to an object contained in a tree. These objects have
// a file mode associated with them, which hints at the type of object located
// at ID (tree or blob).
type TreeNode interface {
// Hash is the Hash of the object referenced by this Node.
Hash() Hash
// Name is the name of the object referenced by this Node.
Name() string
// Mode is the file mode of the object referenced by this Node.
Mode() ObjectMode
}
33 changes: 24 additions & 9 deletions private/pkg/git/gittest/gittest.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io"
"os"
"path"
"path/filepath"
"strings"
"testing"

Expand Down Expand Up @@ -107,30 +108,37 @@ func scaffoldGitRepository(t *testing.T, runner command.Runner) string {
runInDir(t, runner, local, "git", "remote", "add", "origin", remote)

// (1) commit in main branch
runInDir(t, runner, local, "touch", "randomBinary")
writeFiles(t, local, map[string]string{
"randomBinary": "some executable",
"proto/buf.yaml": "some buf.yaml",
"proto/acme/petstore/v1/a.proto": "cats",
"proto/acme/petstore/v1/b.proto": "animals",
"proto/acme/grocerystore/v1/c.proto": "toysrus",
"proto/acme/grocerystore/v1/d.proto": "petsrus",
})
runInDir(t, runner, local, "chmod", "+x", "randomBinary")
runInDir(t, runner, local, "mkdir", "proto")
runInDir(t, runner, path.Join(local, "proto"), "touch", "buf.yaml")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/petstore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "a.proto", "b.proto")
runInDir(t, runner, local, "mkdir", "-p", "proto/acme/grocerystore/v1")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "c.proto", "d.proto")
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "initial commit")
runInDir(t, runner, local, "git", "tag", "release/v1")
runInDir(t, runner, local, "git", "push", "--follow-tags", "-u", "-f", "origin", DefaultBranch)

// (2) branch off main and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch1")
runInDir(t, runner, path.Join(local, "proto", "acme", "petstore", "v1"), "touch", "e.proto", "f.proto")
writeFiles(t, local, map[string]string{
"proto/acme/petstore/v1/e.proto": "loblaws",
"proto/acme/petstore/v1/f.proto": "merchant of venice",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch1")
runInDir(t, runner, local, "git", "tag", "-m", "for testing", "branch/v1")
runInDir(t, runner, local, "git", "push", "--follow-tags", "origin", "smian/branch1")

// (3) branch off branch and begin work
runInDir(t, runner, local, "git", "checkout", "-b", "smian/branch2")
runInDir(t, runner, path.Join(local, "proto", "acme", "grocerystore", "v1"), "touch", "g.proto", "h.proto")
writeFiles(t, local, map[string]string{
"proto/acme/grocerystore/v1/g.proto": "hamlet",
"proto/acme/grocerystore/v1/h.proto": "bethoven",
})
runInDir(t, runner, local, "git", "add", ".")
runInDir(t, runner, local, "git", "commit", "-m", "branch2")
runInDir(t, runner, local, "git", "tag", "-m", "for testing", "branch/v2")
Expand Down Expand Up @@ -169,3 +177,10 @@ func runInDir(t *testing.T, runner command.Runner, dir string, cmd string, args
}
require.NoError(t, err)
}

func writeFiles(t *testing.T, dir string, files map[string]string) {
for path, contents := range files {
require.NoError(t, os.MkdirAll(filepath.Join(dir, filepath.Dir(path)), 0700))
require.NoError(t, os.WriteFile(filepath.Join(dir, path), []byte(contents), 0600))
}
}
4 changes: 4 additions & 0 deletions private/pkg/git/hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
)

func TestParseHashFromHex(t *testing.T) {
t.Parallel()

const hex = "5edab9f970913225f985d9673ac19d61d36f0942"

id, err := parseHashFromHex(hex)
Expand All @@ -31,6 +33,8 @@ func TestParseHashFromHex(t *testing.T) {
}

func TestNewHashFromBytes(t *testing.T) {
t.Parallel()

bytes := []byte{0x5e, 0xda, 0xb9, 0xf9, 0x70, 0x91, 0x32, 0x25, 0xf9, 0x85, 0xd9, 0x67, 0x3a, 0xc1, 0x9d, 0x61, 0xd3, 0x6f, 0x9, 0x42}

id, err := newHashFromBytes(bytes)
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/ident_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
)

func TestParseIdent(t *testing.T) {
t.Parallel()

ident, err := parseIdent([]byte("Foo <bar@baz> 1680571785 +0445"))

require.NoError(t, err)
Expand Down
27 changes: 23 additions & 4 deletions private/pkg/git/object_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ import (
"go.uber.org/multierr"
)

const (
objectTypeBlob = "blob"
objectTypeCommit = "commit"
objectTypeTree = "tree"
objectTypeTag = "tag"
)

// exitTime is the amount of time we'll wait for git-cat-file(1) to exit.
var exitTime = 5 * time.Second
var errObjectTypeMismatch = errors.New("object type mismatch")
Expand Down Expand Up @@ -82,16 +89,28 @@ func (o *objectReader) Close() error {
)
}

func (o *objectReader) Commit(id Hash) (Commit, error) {
data, err := o.read("commit", id)
func (o *objectReader) Blob(hash Hash) ([]byte, error) {
return o.read(objectTypeBlob, hash)
}

func (o *objectReader) Commit(hash Hash) (Commit, error) {
data, err := o.read(objectTypeCommit, hash)
if err != nil {
return nil, err
}
return parseCommit(hash, data)
}

func (o *objectReader) Tree(hash Hash) (Tree, error) {
data, err := o.read(objectTypeTree, hash)
if err != nil {
return nil, err
}
return parseCommit(id, data)
return parseTree(hash, data)
}

func (o *objectReader) Tag(hash Hash) (AnnotatedTag, error) {
data, err := o.read("tag", hash)
data, err := o.read(objectTypeTag, hash)
if err != nil {
return nil, err
}
Expand Down
2 changes: 2 additions & 0 deletions private/pkg/git/tag_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
)

func TestTags(t *testing.T) {
t.Parallel()

repo := gittest.ScaffoldGitRepository(t)
var tags []string
err := repo.TagIterator.ForEachTag(func(tag string, commitHash git.Hash) error {
Expand Down
115 changes: 115 additions & 0 deletions private/pkg/git/tree.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright 2020-2023 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package git

import (
"bytes"
"errors"
"fmt"

"github.com/bufbuild/buf/private/pkg/normalpath"
)

type tree struct {
hash Hash
nodes []TreeNode
}

func parseTree(hash Hash, data []byte) (*tree, error) {
saquibmian marked this conversation as resolved.
Show resolved Hide resolved
t := &tree{
hash: hash,
}
/*
data is in the format
<mode><space><name>\0<hash>
repeated
*/
for len(data) > 0 {
// We can find the \0 character before the <hash>
// and slice to the index of \0 + the length of a hash.
// That gives us a single node.
i := bytes.Index(data, []byte{0})
if i == -1 {
return nil, errors.New("parse tree")
}
length := i + 1 + hashLength
node, err := parseTreeNode(data[:length])
if err != nil {
return nil, fmt.Errorf("parse tree: %w", err)
}
t.nodes = append(t.nodes, node)
data = data[length:]
}
return t, nil
}

func (t *tree) Hash() Hash {
return t.hash
}

func (t *tree) Nodes() []TreeNode {
return t.nodes
}

func (t *tree) Descendant(path string, objectReader ObjectReader) (TreeNode, error) {
if path == "" {
return nil, errors.New("empty path")
}
return descendant(objectReader, t, normalpath.Components(path))
}

func descendant(
objectReader ObjectReader,
root Tree,
names []string,
) (TreeNode, error) {
// split by the name of the next node we're looking for
// and the names of the descendant nodes
name := names[0]
if len(names) >= 2 {
names = names[1:]
} else {
names = nil
}
// Find node with that name in this tree.
var found TreeNode
for _, node := range root.Nodes() {
if node.Name() == name {
found = node
break
}
}
if found == nil {
// No node with that name in this tree.
return nil, ErrTreeNodeNotFound
}
if len(names) == 0 {
// No more descendants, we've found our terminal node.
return found, nil
}
if found.Mode() != ModeDir {
// This is an intermediate (non-terminal) node, which are expected to be
// directories. This is node is not a directory, so we fail with a non-found
// errror.
return nil, ErrTreeNodeNotFound
}
// TODO: support symlinks (on intermediate dirs) with descendant option
// Descend down and traverse.
tree, err := objectReader.Tree(found.Hash())
if err != nil {
return nil, err
}
return descendant(objectReader, tree, names)
}