Skip to content

Commit

Permalink
Extract all bufsync.*Funcs into a bufsync.Handler (#2554)
Browse files Browse the repository at this point in the history
This also improves the testing to be blackbox; we invoke sync and assert
on the end-state.
  • Loading branch information
saquibmian committed Nov 10, 2023
1 parent e6c9734 commit 3b837ea
Show file tree
Hide file tree
Showing 13 changed files with 883 additions and 830 deletions.
101 changes: 31 additions & 70 deletions private/buf/bufsync/backfill_tags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package bufsync
package bufsync_test

import (
"context"
"fmt"
"testing"
"time"

"github.com/bufbuild/buf/private/buf/bufsync"
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduleref"
"github.com/bufbuild/buf/private/pkg/command"
"github.com/bufbuild/buf/private/pkg/git"
Expand All @@ -36,76 +37,58 @@ func TestBackfilltags(t *testing.T) {
moduleIdentityInHEAD, err := bufmoduleref.NewModuleIdentity("buf.build", "acme", "foo")
require.NoError(t, err)
prepareGitRepoBackfillTags(t, repoDir, moduleIdentityInHEAD)
mockBSRChecker := newMockSyncGitChecker()
mockHandler := newMockSyncHandler()
// prepare the top 5 commits as syncable commits, mark the rest as if they were already synced
var (
commitCount int
allCommitsHashes []string
startSyncHash git.Hash
fakeNowCommitLimitTime time.Time // to be sent as a fake clock and discard "old" commits
)
require.NoError(t, repo.ForEachCommit(func(commit git.Commit) error {
allCommitsHashes = append(allCommitsHashes, commit.Hash().Hex())
commitCount++
if commitCount == 5 {
startSyncHash = commit.Hash()
if commitCount == 6 {
// mark this commit as synced; nothing after this needs to be marked because syncer
// won't travel past this
mockHandler.setSyncPoint(
defaultBranchName,
commit.Hash(),
moduleIdentityInHEAD,
)
}
if commitCount > 5 {
if commitCount == 15 {
// have the time limit at the commit 15 looking back
fakeNowCommitLimitTime = commit.Committer().Timestamp()
}
mockBSRChecker.markSynced(commit.Hash().Hex())
if commitCount == 15 {
// have the time limit at the commit 15 looking back
fakeNowCommitLimitTime = commit.Committer().Timestamp()
}
return nil
}))
mockTagsBackfiller := newMockTagsBackfiller()
mockClock := &mockClock{now: fakeNowCommitLimitTime.Add(lookbackTimeLimit)}
const moduleDir = "." // module is at the git root repo
testSyncer := syncer{
repo: repo,
storageGitProvider: storagegit.NewProvider(repo.Objects()),
logger: zaptest.NewLogger(t),
sortedModulesDirsForSync: []string{moduleDir},
modulesDirsToIdentityOverrideForSync: map[string]bufmoduleref.ModuleIdentity{moduleDir: nil},
syncedGitCommitChecker: mockBSRChecker.checkFunc(),
commitsToTags: make(map[string][]string),
modulesDirsToBranchesToIdentities: make(map[string]map[string]bufmoduleref.ModuleIdentity),
modulesToBranchesExpectedSyncPoints: make(map[string]map[string]string),
modulesIdentitiesToCommitsSyncedCache: make(map[string]map[string]struct{}),
tagsBackfiller: mockTagsBackfiller.backfillFunc(),
errorHandler: &mockErrorHandler{},
}
require.NoError(t, testSyncer.prepareSync(context.Background()))
require.NoError(t, testSyncer.backfillTags(
context.Background(),
moduleDir,
moduleIdentityInHEAD,
defaultBranchName,
startSyncHash,
mockClock,
))
// in total the repo has at least 20 commits, we expect to backfill 11 of them...
syncer, err := bufsync.NewSyncer(
zaptest.NewLogger(t),
&mockClock{now: fakeNowCommitLimitTime.Add(bufsync.LookbackTimeLimit)},
repo,
storagegit.NewProvider(repo.Objects()),
mockHandler,
bufsync.SyncerWithModule(moduleDir, nil),
)
require.NoError(t, err)
require.NoError(t, syncer.Sync(context.Background()))
// in total the repo has at least 20 commits, we expect to backfill 11 of them
// and sync the next 4 commits
assert.GreaterOrEqual(t, len(allCommitsHashes), 20)
assert.Len(t, mockTagsBackfiller.backfilledCommitsToTags, 11)
assert.Len(t, mockHandler.tagsByHash, 15)
// as follows:
for i, commitHash := range allCommitsHashes {
if i < 4 {
// the 4 most recent should not be backfilling anything, those are unsynced commits that will
// be synced by another func.
assert.NotContains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
} else if i < 15 {
if i < 15 {
// Between 0-4, the tags should be synced.
// Between 5-15 the tags should be backfilled.
//
// The commit #5 is the git start sync point, which will also be handled by sync because it's
// sometimes already synced and sometimes not. It's handled by both sync and backfill tags.
//
// The func it's backfilling more than 5 commits, because it needs to backfill until both
// conditions are met, at least 5 commits and at least 24 hours.
assert.Contains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
assert.Contains(t, mockHandler.tagsByHash, commitHash)
} else {
// past the #15 the commits are too old, we don't backfill back there
assert.NotContains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
assert.NotContains(t, mockHandler.tagsByHash, commitHash)
}
}
}
Expand Down Expand Up @@ -141,25 +124,3 @@ func prepareGitRepoBackfillTags(t *testing.T, repoDir string, moduleIdentity buf
time.Sleep(1 * time.Second)
doEmptyCommitAndTag(15)
}

type mockTagsBackfiller struct {
backfilledCommitsToTags map[string]struct{}
}

func newMockTagsBackfiller() mockTagsBackfiller {
return mockTagsBackfiller{backfilledCommitsToTags: make(map[string]struct{})}
}

func (b *mockTagsBackfiller) backfillFunc() TagsBackfiller {
return func(_ context.Context, _ bufmoduleref.ModuleIdentity, alreadySyncedHash git.Hash, _, _ git.Ident, _ []string) (string, error) {
// we don't really test which tags were backfilled, only which commits had its tags backfilled
b.backfilledCommitsToTags[alreadySyncedHash.Hex()] = struct{}{}
return "some-BSR-commit-name", nil
}
}

type mockClock struct {
now time.Time
}

func (c *mockClock) Now() time.Time { return c.now }
176 changes: 79 additions & 97 deletions private/buf/bufsync/bufsync.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"time"

"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduleref"
"github.com/bufbuild/buf/private/pkg/git"
Expand Down Expand Up @@ -149,38 +150,95 @@ type ErrorHandler interface {
) error
}

// Syncer syncs a modules in a git.Repository.
// Handler is a handler for Syncer. It controls the way in which Syncer handles errors, provides
// any information the Syncer needs to Sync commits, and receives ModuleCommits that should be
// synced.
type Handler interface {
ErrorHandler

// SyncModuleCommit is invoked to process a sync point. If an error is returned, sync will abort.
SyncModuleCommit(ctx context.Context, commit ModuleCommit) error

// ResolveSyncPoint is invoked to resolve a syncpoint for a particular module at a particular branch.
// If no syncpoint is found, this function returns nil. If an error is returned, sync will abort.
ResolveSyncPoint(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
branch string,
) (git.Hash, error)

// CheckSyncedGitCommits is invoked when syncing branches to know which commits hashes from a set
// are already synced inthe BSR. It expects to receive the commit hashes that are synced already. If
// an error is returned, sync will abort.
CheckSyncedGitCommits(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
commitHashes map[string]struct{},
) (map[string]struct{}, error)

// GetModuleReleaseBranch is invoked before syncing, to gather release branch names for all the
// modules that are about to be synced. If the BSR module does not exist, the implementation should
// return `ModuleDoesNotExistErr` error.
GetModuleReleaseBranch(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
) (string, error)

// BackfillTags is invoked when a commit with valid modules is found within a lookback threshold
// past the start sync point for such module. The Syncer assumes that the "old" commit is already
// synced, so it will attempt to backfill existing tags using that git hash, in case they were
// recently created or moved there.
//
// A common scenario is SemVer releases: a commit is pushed to the default Git branch, the sync
// process triggers and completes, and some minutes later that commit is tagged "v1.2.3". The next
// time the sync command runs, this backfiller would pick such tag and backfill it to the correct
// BSR commit.
//
// It's expected to return the BSR commit name to which the tags were backfilled.
BackfillTags(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
alreadySyncedHash git.Hash,
author git.Ident,
committer git.Ident,
tags []string,
) (string, error)
}

// Syncer syncs modules in a git.Repository.
type Syncer interface {
// Sync syncs the repository using the provided SyncFunc. It processes commits in reverse
// topological order, loads any configured named modules, extracts any Git metadata for that
// commit, and invokes SyncFunc with a ModuleCommit.
Sync(context.Context, SyncFunc) error
// Sync syncs the repository. It processes commits in reverse topological order, loads any
// configured named modules, extracts any Git metadata for that commit, and invokes
// Handler#SyncModuleCommit with a ModuleCommit.
Sync(context.Context) error
}

// NewSyncer creates a new Syncer.
func NewSyncer(
logger *zap.Logger,
clock Clock,
repo git.Repository,
storageGitProvider storagegit.Provider,
errorHandler ErrorHandler,
handler Handler,
options ...SyncerOption,
) (Syncer, error) {
return newSyncer(
logger,
clock,
repo,
storageGitProvider,
errorHandler,
handler,
options...,
)
}

// SyncerOption configures the creation of a new Syncer.
type SyncerOption func(*syncer) error

// SyncerWithRemote configures a Syncer with a resumption using a SyncPointResolver.
func SyncerWithRemote(remoteName string) SyncerOption {
// SyncerWithGitRemote configures a Syncer to sync commits from particular Git remote.
func SyncerWithGitRemote(gitRemoteName string) SyncerOption {
return func(s *syncer) error {
s.remote = remoteName
s.gitRemoteName = gitRemoteName
return nil
}
}
Expand Down Expand Up @@ -208,43 +266,6 @@ func SyncerWithModule(moduleDir string, identityOverride bufmoduleref.ModuleIden
}
}

// SyncerWithResumption configures a Syncer with a resumption using a SyncPointResolver.
func SyncerWithResumption(resolver SyncPointResolver) SyncerOption {
return func(s *syncer) error {
s.syncPointResolver = resolver
return nil
}
}

// SyncerWithGitCommitChecker configures a git commit checker, to know if a module has a given git
// hash alrady synced in a BSR instance.
func SyncerWithGitCommitChecker(checker SyncedGitCommitChecker) SyncerOption {
return func(s *syncer) error {
s.syncedGitCommitChecker = checker
return nil
}
}

// SyncerWithModuleDefaultBranchGetter configures a getter for modules' default branch, to protect
// those branches syncs in cases like a Git history rewrite. If this option is not passed, the
// syncer treats the BSR default branch as any other branch.
func SyncerWithModuleDefaultBranchGetter(getter ModuleDefaultBranchGetter) SyncerOption {
return func(s *syncer) error {
s.moduleDefaultBranchGetter = getter
return nil
}
}

// SyncerWithTagsBackfiller configures a tags backfiller for older, already synced commits. If this
// option is not passed, the syncer won't try to sync older tags past each module's start sync
// points on all branches.
func SyncerWithTagsBackfiller(backfiller TagsBackfiller) SyncerOption {
return func(s *syncer) error {
s.tagsBackfiller = backfiller
return nil
}
}

// SyncerWithAllBranches sets the syncer to sync all branches. Be default the syncer only processes
// commits in the current checked out branch.
func SyncerWithAllBranches() SyncerOption {
Expand All @@ -254,56 +275,6 @@ func SyncerWithAllBranches() SyncerOption {
}
}

// SyncFunc is invoked by Syncer to process a sync point. If an error is returned,
// sync will abort.
type SyncFunc func(ctx context.Context, commit ModuleCommit) error

// SyncPointResolver is invoked by Syncer to resolve a syncpoint for a particular module
// at a particular branch. If no syncpoint is found, this function returns nil. If an error
// is returned, sync will abort.
type SyncPointResolver func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
branch string,
) (git.Hash, error)

// SyncedGitCommitChecker is invoked when syncing branches to know which commits hashes from a set
// are already synced inthe BSR. It expects to receive the commit hashes that are synced already. If
// an error is returned, sync will abort.
type SyncedGitCommitChecker func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
commitHashes map[string]struct{},
) (map[string]struct{}, error)

// ModuleDefaultBranchGetter is invoked before syncing, to gather default branch names for all the
// modules that are about to be synced. If the BSR module does not exist, the implementation should
// return `ModuleDoesNotExistErr` error.
type ModuleDefaultBranchGetter func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
) (string, error)

// TagsBackfiller is invoked when a commit with valid modules is found within a lookback threshold
// past the start sync point for such module. The Syncer assumes that the "old" commit is already
// synced, so it will attempt to backfill existing tags using that git hash, in case they were
// recently created or moved there.
//
// A common scenario is SemVer releases: a commit is pushed to the default Git branch, the sync
// process triggers and completes, and some minutes later that commit is tagged "v1.2.3". The next
// time the sync command runs, this backfiller would pick such tag and backfill it to the correct
// BSR commit.
//
// It's expected to return the BSR commit name to which the tags were backfilled.
type TagsBackfiller func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
alreadySyncedHash git.Hash,
author git.Ident,
committer git.Ident,
tags []string,
) (string, error)

// ModuleCommit is a module at a particular commit.
type ModuleCommit interface {
// Branch is the git branch that this module is sourced from.
Expand All @@ -320,3 +291,14 @@ type ModuleCommit interface {
// Bucket is the bucket for the module.
Bucket() storage.ReadBucket
}

// Clock provides the current time.
type Clock interface {
// Now provides the current time.
Now() time.Time
}

// NewRealClock returns a Clock that returns the current time using time#Now().
func NewRealClock() Clock {
return newClock()
}
Loading

0 comments on commit 3b837ea

Please sign in to comment.