diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index bb70b7de02a..8e31f84d649 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -130,6 +130,7 @@ func CreateCloneArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithMaxArgs("clone", 2) ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.") ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") + ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.") diff --git a/go/cmd/dolt/cli/flags.go b/go/cmd/dolt/cli/flags.go index 3b413fdd604..b045808b7c3 100644 --- a/go/cmd/dolt/cli/flags.go +++ b/go/cmd/dolt/cli/flags.go @@ -33,6 +33,7 @@ const ( DecorateFlag = "decorate" DeleteFlag = "delete" DeleteForceFlag = "D" + DepthFlag = "depth" DryRunFlag = "dry-run" ForceFlag = "force" HardResetParam = "hard" diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 2940f2fa486..92ca626cb4c 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -141,10 +141,15 @@ func clone(ctx context.Context, apr *argparser.ArgParseResults, dEnv *env.DoltEn return errhand.VerboseErrorFromError(err) } + depth, ok := apr.GetInt(cli.DepthFlag) + if !ok { + depth = -1 + } + // Nil out the old Dolt env so we don't accidentally operate on the wrong database dEnv = nil - err = actions.CloneRemote(ctx, srcDB, remoteName, branch, singleBranch, clonedEnv) + err = actions.CloneRemote(ctx, srcDB, remoteName, branch, singleBranch, depth, clonedEnv) if err != nil { // If we're cloning into a directory that already exists do not erase it. Otherwise // make best effort to delete the directory we created. diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index 7d117991134..5b744827955 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -16,6 +16,7 @@ package commands import ( "context" + "errors" "fmt" "io" "strconv" @@ -433,6 +434,9 @@ func (dArgs *diffArgs) applyDiffRoots(queryist cli.Queryist, sqlCtx *sql.Context fromRef := args[0] // treat the first arg as a ref spec _, err := getTableNamesAtRef(queryist, sqlCtx, fromRef) + if errors.Is(err, doltdb.ErrGhostCommitEncountered) { + return nil, err + } // if it doesn't resolve, treat it as a table name if err != nil { // `dolt diff table` diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go index 5e72e052665..103f082fc69 100644 --- a/go/cmd/dolt/commands/engine/sqlengine.go +++ b/go/cmd/dolt/commands/engine/sqlengine.go @@ -115,7 +115,7 @@ func NewSqlEngine( return nil, err } - all := append(dbs) + all := dbs[:] clusterDB := config.ClusterController.ClusterDatabase() if clusterDB != nil { diff --git a/go/cmd/dolt/commands/filter-branch.go b/go/cmd/dolt/commands/filter-branch.go index 7f05c2b5e5d..155feaa8a4a 100644 --- a/go/cmd/dolt/commands/filter-branch.go +++ b/go/cmd/dolt/commands/filter-branch.go @@ -195,10 +195,14 @@ func getNerf(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgParseResu return nil, err } - cm, err := dEnv.DoltDB.Resolve(ctx, cs, headRef) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, headRef) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } return rebase.StopAtCommit(cm), nil } diff --git a/go/cmd/dolt/commands/log_test.go b/go/cmd/dolt/commands/log_test.go index 29ab9e4b3e9..6712bf36031 100644 --- a/go/cmd/dolt/commands/log_test.go +++ b/go/cmd/dolt/commands/log_test.go @@ -41,7 +41,9 @@ func TestLog(t *testing.T) { } cs, _ := doltdb.NewCommitSpec(env.DefaultInitBranch) - commit, _ := dEnv.DoltDB.Resolve(context.Background(), cs, nil) + opt, _ := dEnv.DoltDB.Resolve(context.Background(), cs, nil) + commit, _ := opt.ToCommit() + meta, _ := commit.GetCommitMeta(context.Background()) require.Equal(t, "Bill Billerson", meta.Name) } @@ -60,7 +62,8 @@ func TestLogSigterm(t *testing.T) { } cs, _ := doltdb.NewCommitSpec(env.DefaultInitBranch) - commit, _ := dEnv.DoltDB.Resolve(context.Background(), cs, nil) + optCmt, _ := dEnv.DoltDB.Resolve(context.Background(), cs, nil) + commit, _ := optCmt.ToCommit() cMeta, _ := commit.GetCommitMeta(context.Background()) cHash, _ := commit.HashOf() diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 53dd66a742e..aefa6d83ff7 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -190,7 +190,7 @@ func pullTableValue(ctx context.Context, dEnv *env.DoltEnv, srcDB *doltdb.DoltDB cli.Println("Retrieving", tblName) runProgFunc := buildProgStarter(language) wg, pullerEventCh := runProgFunc(newCtx) - err = dEnv.DoltDB.PullChunks(ctx, tmpDir, srcDB, []hash.Hash{tblHash}, pullerEventCh) + err = dEnv.DoltDB.PullChunks(ctx, tmpDir, srcDB, []hash.Hash{tblHash}, pullerEventCh, nil) stopProgFuncs(cancelFunc, wg, pullerEventCh) if err != nil { return nil, errhand.BuildDError("Failed reading chunks for remote table '%s' at '%s'", tblName, commitStr).AddCause(err).Build() @@ -217,14 +217,16 @@ func getRemoteDBAtCommit(ctx context.Context, remoteUrl string, remoteUrlParams return nil, nil, errhand.BuildDError("Invalid Commit '%s'", commitStr).Build() } - cm, err := srcDB.Resolve(ctx, cs, nil) - + optCmt, err := srcDB.Resolve(ctx, cs, nil) if err != nil { return nil, nil, errhand.BuildDError("Failed to find commit '%s'", commitStr).Build() } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, nil, errhand.BuildDError(doltdb.ErrGhostCommitEncountered.Error()).Build() + } srcRoot, err := cm.GetRootValue(ctx) - if err != nil { return nil, nil, errhand.BuildDError("Failed to read from database").AddCause(err).Build() } diff --git a/go/cmd/dolt/commands/show.go b/go/cmd/dolt/commands/show.go index f33e0f3849d..43a7208af7f 100644 --- a/go/cmd/dolt/commands/show.go +++ b/go/cmd/dolt/commands/show.go @@ -278,10 +278,14 @@ func printObjects(ctx context.Context, dEnv *env.DoltEnv, opts *showOpts) error return err } - commit, err := dEnv.DoltDB.Resolve(ctx, headSpec, headRef) + optCmt, err := dEnv.DoltDB.Resolve(ctx, headSpec, headRef) if err != nil { return err } + commit, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } value := commit.Value() cli.Println(value.Kind(), value.HumanReadableString()) diff --git a/go/cmd/dolt/commands/stashcmds/pop.go b/go/cmd/dolt/commands/stashcmds/pop.go index 09ff5591d10..b76b7ddb1ea 100644 --- a/go/cmd/dolt/commands/stashcmds/pop.go +++ b/go/cmd/dolt/commands/stashcmds/pop.go @@ -142,10 +142,17 @@ func applyStashAtIdx(ctx *sql.Context, dEnv *env.DoltEnv, curWorkingRoot *doltdb if err != nil { return false, err } - parentCommit, err := dEnv.DoltDB.Resolve(ctx, headCommitSpec, headRef) + optCmt, err := dEnv.DoltDB.Resolve(ctx, headCommitSpec, headRef) if err != nil { return false, err } + parentCommit, ok := optCmt.ToCommit() + if !ok { + // Should not be possible to get into this situation. The parent of the stashed commit + // Must have been present at the time it was created + return false, doltdb.ErrGhostCommitEncountered + } + parentRoot, err := parentCommit.GetRootValue(ctx) if err != nil { return false, err diff --git a/go/cmd/dolt/commands/stashcmds/stash.go b/go/cmd/dolt/commands/stashcmds/stash.go index 666d7ad053d..3f426bd4373 100644 --- a/go/cmd/dolt/commands/stashcmds/stash.go +++ b/go/cmd/dolt/commands/stashcmds/stash.go @@ -227,10 +227,15 @@ func stashChanges(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPars if err != nil { return err } - commit, err := dEnv.DoltDB.Resolve(ctx, commitSpec, curHeadRef) + optCmt, err := dEnv.DoltDB.Resolve(ctx, commitSpec, curHeadRef) if err != nil { return err } + commit, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + commitMeta, err := commit.GetCommitMeta(ctx) if err != nil { return err diff --git a/go/libraries/doltcore/cherry_pick/cherry_pick.go b/go/libraries/doltcore/cherry_pick/cherry_pick.go index 967d3841050..fe0af2f3d99 100644 --- a/go/libraries/doltcore/cherry_pick/cherry_pick.go +++ b/go/libraries/doltcore/cherry_pick/cherry_pick.go @@ -204,10 +204,14 @@ func cherryPick(ctx *sql.Context, dSess *dsess.DoltSession, roots doltdb.Roots, if err != nil { return nil, "", err } - cherryCommit, err := doltDB.Resolve(ctx, cherryCommitSpec, headRef) + optCmt, err := doltDB.Resolve(ctx, cherryCommitSpec, headRef) if err != nil { return nil, "", err } + cherryCommit, ok := optCmt.ToCommit() + if !ok { + return nil, "", doltdb.ErrGhostCommitEncountered + } if len(cherryCommit.DatasParents()) > 1 { return nil, "", fmt.Errorf("cherry-picking a merge commit is not supported") @@ -223,10 +227,15 @@ func cherryPick(ctx *sql.Context, dSess *dsess.DoltSession, roots doltdb.Roots, // When cherry-picking, we need to use the parent of the cherry-picked commit as the ancestor. This // ensures that only the delta from the cherry-pick commit is applied. - parentCommit, err := doltDB.ResolveParent(ctx, cherryCommit, 0) + optCmt, err = doltDB.ResolveParent(ctx, cherryCommit, 0) if err != nil { return nil, "", err } + parentCommit, ok := optCmt.ToCommit() + if !ok { + return nil, "", doltdb.ErrGhostCommitEncountered + } + parentRoot, err := parentCommit.GetRootValue(ctx) if err != nil { return nil, "", err diff --git a/go/libraries/doltcore/dbfactory/file.go b/go/libraries/doltcore/dbfactory/file.go index 39a4942b9d6..d24bfa777d8 100644 --- a/go/libraries/doltcore/dbfactory/file.go +++ b/go/libraries/doltcore/dbfactory/file.go @@ -163,12 +163,16 @@ func (fact FileFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, } oldGenSt, err := nbs.NewLocalStore(ctx, newGenSt.Version(), oldgenPath, defaultMemTableSize, q) + if err != nil { + return nil, nil, nil, err + } + ghostGen, err := nbs.NewGhostBlockStore(path) if err != nil { return nil, nil, nil, err } - st := nbs.NewGenerationalCS(oldGenSt, newGenSt) + st := nbs.NewGenerationalCS(oldGenSt, newGenSt, ghostGen) // metrics? vrw := types.NewValueStore(st) diff --git a/go/libraries/doltcore/diff/diffsplitter.go b/go/libraries/doltcore/diff/diffsplitter.go index e39e59f5da5..a7e8279b08e 100644 --- a/go/libraries/doltcore/diff/diffsplitter.go +++ b/go/libraries/doltcore/diff/diffsplitter.go @@ -15,14 +15,10 @@ package diff import ( - "context" "errors" "strings" "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/env" ) const ( @@ -218,29 +214,3 @@ func (ds DiffSplitter) SplitDiffResultRow(row sql.Row) (from, to RowDiff, err er } return } - -// MaybeResolveRoot returns a root value and true if the a commit exists for given spec string; nil and false if it does not exist. -// todo: distinguish between non-existent CommitSpec and other errors, don't assume non-existent -func MaybeResolveRoot(ctx context.Context, rsr env.RepoStateReader, doltDB *doltdb.DoltDB, spec string) (*doltdb.RootValue, bool) { - cs, err := doltdb.NewCommitSpec(spec) - if err != nil { - // it's non-existent CommitSpec - return nil, false - } - - headRef, err := rsr.CWBHeadRef() - if err != nil { - return nil, false - } - cm, err := doltDB.Resolve(ctx, cs, headRef) - if err != nil { - return nil, false - } - - root, err := cm.GetRootValue(ctx) - if err != nil { - return nil, false - } - - return root, true -} diff --git a/go/libraries/doltcore/doltdb/commit.go b/go/libraries/doltcore/doltdb/commit.go index 4e069a4e617..ca1f365be59 100644 --- a/go/libraries/doltcore/doltdb/commit.go +++ b/go/libraries/doltcore/doltdb/commit.go @@ -29,6 +29,12 @@ import ( var errCommitHasNoMeta = errors.New("commit has no metadata") var errHasNoRootValue = errors.New("no root value") +// TODO: Include the commit id in the error. Unfortunately, this message is passed through the SQL layer. The only way we currently +// have on the client side to match an error is with string matching. We possibly need error codes as a prefix to the error message, but +// currently there is not standard for doing this in Dolt. +var ErrGhostCommitEncountered = errors.New("Commit not found. You are using a shallow clone which does not contain the requested commit. Please do a full clone.") +var ErrGhostCommitRuntimeFailure = errors.New("runtime failure: Ghost commit encountered unexpectedly. Please report bug to: https://github.com/dolthub/dolt/issues") + // Rootish is an object resolvable to a RootValue. type Rootish interface { // ResolveRootValue resolves a Rootish to a RootValue. @@ -46,10 +52,28 @@ type Commit struct { dCommit *datas.Commit } +type OptionalCommit struct { + Commit *Commit + Addr hash.Hash +} + +// ToCommit unwraps the *Commit contained by the OptionalCommit. If the commit is invalid, it returns (nil, false). +// Otherwise, it returns (commit, true). +func (cmt *OptionalCommit) ToCommit() (*Commit, bool) { + if cmt.Commit == nil { + return nil, false + } + return cmt.Commit, true +} + var _ Rootish = &Commit{} -// NewCommit generates a new Commit object that wraps a supplies datas.Commit. +// NewCommit generates a new Commit object that wraps a supplied datas.Commit. func NewCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, commit *datas.Commit) (*Commit, error) { + if commit.IsGhost() { + return nil, ErrGhostCommitRuntimeFailure + } + parents, err := datas.GetCommitParents(ctx, vrw, commit.NomsValue()) if err != nil { return nil, err @@ -116,14 +140,27 @@ func (c *Commit) GetRootValue(ctx context.Context) (*RootValue, error) { return newRootValue(c.vrw, c.ns, rootV) } -func (c *Commit) GetParent(ctx context.Context, idx int) (*Commit, error) { - return NewCommit(ctx, c.vrw, c.ns, c.parents[idx]) +func (c *Commit) GetParent(ctx context.Context, idx int) (*OptionalCommit, error) { + parent := c.parents[idx] + if parent.IsGhost() { + return &OptionalCommit{nil, parent.Addr()}, nil + } + + cmt, err := NewCommit(ctx, c.vrw, c.ns, parent) + if err != nil { + return nil, err + } + return &OptionalCommit{cmt, parent.Addr()}, nil } func (c *Commit) GetCommitClosure(ctx context.Context) (prolly.CommitClosure, error) { - switch v := c.dCommit.NomsValue().(type) { + return getCommitClosure(ctx, c.dCommit, c.vrw, c.ns) +} + +func getCommitClosure(ctx context.Context, cmt *datas.Commit, vrw types.ValueReadWriter, ns tree.NodeStore) (prolly.CommitClosure, error) { + switch v := cmt.NomsValue().(type) { case types.SerialMessage: - return datas.NewParentsClosure(ctx, c.dCommit, v, c.vrw, c.ns) + return datas.NewParentsClosure(ctx, cmt, v, vrw, ns) default: return prolly.CommitClosure{}, fmt.Errorf("old format lacks commit closure") } @@ -131,7 +168,7 @@ func (c *Commit) GetCommitClosure(ctx context.Context) (prolly.CommitClosure, er var ErrNoCommonAncestor = errors.New("no common ancestor") -func GetCommitAncestor(ctx context.Context, cm1, cm2 *Commit) (*Commit, error) { +func GetCommitAncestor(ctx context.Context, cm1, cm2 *Commit) (*OptionalCommit, error) { addr, err := getCommitAncestorAddr(ctx, cm1.dCommit, cm2.dCommit, cm1.vrw, cm2.vrw, cm1.ns, cm2.ns) if err != nil { return nil, err @@ -142,7 +179,15 @@ func GetCommitAncestor(ctx context.Context, cm1, cm2 *Commit) (*Commit, error) { return nil, err } - return NewCommit(ctx, cm1.vrw, cm1.ns, targetCommit) + if targetCommit.IsGhost() { + return &OptionalCommit{nil, addr}, nil + } + + cmt, err := NewCommit(ctx, cm1.vrw, cm1.ns, targetCommit) + if err != nil { + return nil, err + } + return &OptionalCommit{cmt, addr}, nil } func getCommitAncestorAddr(ctx context.Context, c1, c2 *datas.Commit, vrw1, vrw2 types.ValueReadWriter, ns1, ns2 tree.NodeStore) (hash.Hash, error) { @@ -159,11 +204,16 @@ func getCommitAncestorAddr(ctx context.Context, c1, c2 *datas.Commit, vrw1, vrw2 } func (c *Commit) CanFastForwardTo(ctx context.Context, new *Commit) (bool, error) { - ancestor, err := GetCommitAncestor(ctx, c, new) - + optAnc, err := GetCommitAncestor(ctx, c, new) if err != nil { return false, err - } else if ancestor == nil { + } + + ancestor, ok := optAnc.ToCommit() + if !ok { + return false, fmt.Errorf("Unexpected Ghost Commit") + } + if ancestor == nil { return false, errors.New("cannot perform fast forward merge; commits have no common ancestor") } else if ancestor.dCommit.Addr() == c.dCommit.Addr() { if ancestor.dCommit.Addr() == new.dCommit.Addr() { @@ -178,11 +228,16 @@ func (c *Commit) CanFastForwardTo(ctx context.Context, new *Commit) (bool, error } func (c *Commit) CanFastReverseTo(ctx context.Context, new *Commit) (bool, error) { - ancestor, err := GetCommitAncestor(ctx, c, new) - + optAnc, err := GetCommitAncestor(ctx, c, new) if err != nil { return false, err - } else if ancestor == nil { + } + + ancestor, ok := optAnc.ToCommit() + if !ok { + return false, ErrGhostCommitEncountered + } + if ancestor == nil { return false, errors.New("cannot perform fast forward merge; commits have no common ancestor") } else if ancestor.dCommit.Addr() == new.dCommit.Addr() { if ancestor.dCommit.Addr() == c.dCommit.Addr() { @@ -196,30 +251,37 @@ func (c *Commit) CanFastReverseTo(ctx context.Context, new *Commit) (bool, error return false, nil } -func (c *Commit) GetAncestor(ctx context.Context, as *AncestorSpec) (*Commit, error) { +func (c *Commit) GetAncestor(ctx context.Context, as *AncestorSpec) (*OptionalCommit, error) { + addr, err := c.HashOf() + if err != nil { + return nil, err + } + optInst := &OptionalCommit{c, addr} if as == nil || len(as.Instructions) == 0 { - return c, nil + return optInst, nil } - cur := c - + hardInst := c instructions := as.Instructions for _, inst := range instructions { - if inst >= cur.NumParents() { + if inst >= hardInst.NumParents() { return nil, ErrInvalidAncestorSpec } var err error - cur, err = cur.GetParent(ctx, inst) + optInst, err = hardInst.GetParent(ctx, inst) if err != nil { return nil, err } - if cur == nil { - return nil, ErrInvalidAncestorSpec + + var ok bool + hardInst, ok = optInst.ToCommit() + if !ok { + break } } - return cur, nil + return optInst, nil } // ResolveRootValue implements Rootish. diff --git a/go/libraries/doltcore/doltdb/commit_hooks.go b/go/libraries/doltcore/doltdb/commit_hooks.go index af2d9a8e68d..ae2de48326c 100644 --- a/go/libraries/doltcore/doltdb/commit_hooks.go +++ b/go/libraries/doltcore/doltdb/commit_hooks.go @@ -60,7 +60,7 @@ func pushDataset(ctx context.Context, destDB, srcDB datas.Database, ds datas.Dat return err } - err := pullHash(ctx, destDB, srcDB, []hash.Hash{addr}, tmpDir, nil) + err := pullHash(ctx, destDB, srcDB, []hash.Hash{addr}, tmpDir, nil, nil) if err != nil { return err } diff --git a/go/libraries/doltcore/doltdb/commit_hooks_test.go b/go/libraries/doltcore/doltdb/commit_hooks_test.go index 903b29f6351..762c642b20c 100644 --- a/go/libraries/doltcore/doltdb/commit_hooks_test.go +++ b/go/libraries/doltcore/doltdb/commit_hooks_test.go @@ -84,11 +84,12 @@ func TestPushOnWriteHook(t *testing.T) { // prepare a commit in the source repo cs, _ := NewCommitSpec("main") - commit, err := ddb.Resolve(context.Background(), cs, nil) - + optCmt, err := ddb.Resolve(context.Background(), cs, nil) if err != nil { t.Fatal("Couldn't find commit") } + commit, ok := optCmt.ToCommit() + assert.True(t, ok) meta, err := commit.GetCommitMeta(context.Background()) assert.NoError(t, err) @@ -142,8 +143,11 @@ func TestPushOnWriteHook(t *testing.T) { require.NoError(t, err) cs, _ = NewCommitSpec(defaultBranch) - destCommit, err := destDB.Resolve(context.Background(), cs, nil) + optCmt, err := destDB.Resolve(context.Background(), cs, nil) require.NoError(t, err) + destCommit, ok := optCmt.ToCommit() + require.True(t, ok) + srcHash, _ := srcCommit.HashOf() destHash, _ := destCommit.HashOf() assert.Equal(t, srcHash, destHash) @@ -228,11 +232,12 @@ func TestAsyncPushOnWrite(t *testing.T) { for i := 0; i < 200; i++ { cs, _ := NewCommitSpec("main") - commit, err := ddb.Resolve(context.Background(), cs, nil) - + optCmt, err := ddb.Resolve(context.Background(), cs, nil) if err != nil { t.Fatal("Couldn't find commit") } + commit, ok := optCmt.ToCommit() + assert.True(t, ok) meta, err := commit.GetCommitMeta(context.Background()) assert.NoError(t, err) diff --git a/go/libraries/doltcore/doltdb/commit_itr.go b/go/libraries/doltcore/doltdb/commit_itr.go index fc1a6ed64fa..4f5498b8e06 100644 --- a/go/libraries/doltcore/doltdb/commit_itr.go +++ b/go/libraries/doltcore/doltdb/commit_itr.go @@ -30,7 +30,7 @@ import ( type CommitItr interface { // Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle // making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF - Next(ctx context.Context) (hash.Hash, *Commit, error) + Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) // Reset the commit iterator back to the start Reset(ctx context.Context) error @@ -90,7 +90,7 @@ func (cmItr *commitItr) Reset(ctx context.Context) error { // Next returns the hash of the next commit, and a pointer to that commit. It handles making sure the list of commits // returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF -func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) { +func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { for cmItr.curr == nil { if cmItr.currentRoot >= len(cmItr.rootCommits) { return hash.Hash{}, nil, io.EOF @@ -106,7 +106,7 @@ func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) { if !cmItr.added[h] { cmItr.added[h] = true cmItr.curr = cm - return h, cmItr.curr, nil + return h, &OptionalCommit{cmItr.curr, h}, nil } cmItr.currentRoot++ @@ -136,12 +136,14 @@ func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) { next := cmItr.unprocessed[numUnprocessed-1] cmItr.unprocessed = cmItr.unprocessed[:numUnprocessed-1] cmItr.curr, err = HashToCommit(ctx, cmItr.ddb.ValueReadWriter(), cmItr.ddb.ns, next) - - if err != nil { + if err != nil && err != ErrGhostCommitEncountered { return hash.Hash{}, nil, err } + if err == ErrGhostCommitEncountered { + cmItr.curr = nil + } - return next, cmItr.curr, nil + return next, &OptionalCommit{cmItr.curr, next}, nil } func HashToCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, h hash.Hash) (*Commit, error) { @@ -149,11 +151,16 @@ func HashToCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeSt if err != nil { return nil, err } + + if dc.IsGhost() { + return nil, ErrGhostCommitEncountered + } + return NewCommit(ctx, vrw, ns, dc) } // CommitFilter is a function that returns true if a commit should be filtered out, and false if it should be kept -type CommitFilter func(context.Context, hash.Hash, *Commit) (filterOut bool, err error) +type CommitFilter func(context.Context, hash.Hash, *OptionalCommit) (filterOut bool, err error) // FilteringCommitItr is a CommitItr implementation that applies a filtering function to limit the commits returned type FilteringCommitItr struct { @@ -172,7 +179,7 @@ func NewFilteringCommitItr(itr CommitItr, filter CommitFilter) FilteringCommitIt // Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle // making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF -func (itr FilteringCommitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) { +func (itr FilteringCommitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { // iteration will terminate on io.EOF or a commit that is !filteredOut for { h, cm, err := itr.itr.Next(ctx) @@ -206,12 +213,12 @@ type CommitSliceIter struct { var _ CommitItr = (*CommitSliceIter)(nil) -func (i *CommitSliceIter) Next(ctx context.Context) (hash.Hash, *Commit, error) { +func (i *CommitSliceIter) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { if i.i >= len(i.h) { return hash.Hash{}, nil, io.EOF } i.i++ - return i.h[i.i-1], i.cm[i.i-1], nil + return i.h[i.i-1], &OptionalCommit{i.cm[i.i-1], i.h[i.i-1]}, nil } @@ -221,19 +228,19 @@ func (i *CommitSliceIter) Reset(ctx context.Context) error { } func NewOneCommitIter(cm *Commit, h hash.Hash, meta *datas.CommitMeta) *OneCommitIter { - return &OneCommitIter{cm: cm, h: h} + return &OneCommitIter{cm: &OptionalCommit{cm, h}, h: h} } type OneCommitIter struct { h hash.Hash - cm *Commit + cm *OptionalCommit m *datas.CommitMeta done bool } var _ CommitItr = (*OneCommitIter)(nil) -func (i *OneCommitIter) Next(_ context.Context) (hash.Hash, *Commit, error) { +func (i *OneCommitIter) Next(_ context.Context) (hash.Hash, *OptionalCommit, error) { if i.done { return hash.Hash{}, nil, io.EOF } diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index 869557a7730..dd9c652129a 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -391,7 +391,7 @@ func (ddb *DoltDB) getHashFromCommitSpec(ctx context.Context, cs *CommitSpec, cw // Resolve takes a CommitSpec and returns a Commit, or an error if the commit cannot be found. // If the CommitSpec is HEAD, Resolve also needs the DoltRef of the current working branch. -func (ddb *DoltDB) Resolve(ctx context.Context, cs *CommitSpec, cwb ref.DoltRef) (*Commit, error) { +func (ddb *DoltDB) Resolve(ctx context.Context, cs *CommitSpec, cwb ref.DoltRef) (*OptionalCommit, error) { if cs == nil { panic("nil commit spec") } @@ -406,14 +406,44 @@ func (ddb *DoltDB) Resolve(ctx context.Context, cs *CommitSpec, cwb ref.DoltRef) return nil, err } + if commitValue.IsGhost() { + return &OptionalCommit{nil, *hash}, nil + } + commit, err := NewCommit(ctx, ddb.vrw, ddb.ns, commitValue) if err != nil { return nil, err } + return commit.GetAncestor(ctx, cs.aSpec) } -func (ddb *DoltDB) ResolveByNomsRoot(ctx *sql.Context, cs *CommitSpec, cwb ref.DoltRef, root hash.Hash) (*Commit, error) { +// BootstrapShallowResolve is a special case of Resolve that is used to resolve a commit prior to pulling it's history +// in a shallow clone. In general, application code should call Resolve and get an OptionalCommit. This is a special case +// where we need to get the head commit for the commit closure used to determine what commits should skipped. +func (ddb *DoltDB) BootstrapShallowResolve(ctx context.Context, cs *CommitSpec) (prolly.CommitClosure, error) { + if cs == nil { + panic("nil commit spec") + } + + hash, err := ddb.getHashFromCommitSpec(ctx, cs, nil, hash.Hash{}) + if err != nil { + return prolly.CommitClosure{}, err + } + + commitValue, err := datas.LoadCommitAddr(ctx, ddb.vrw, *hash) + if err != nil { + return prolly.CommitClosure{}, err + } + + if commitValue.IsGhost() { + return prolly.CommitClosure{}, ErrGhostCommitEncountered + } + + return getCommitClosure(ctx, commitValue, ddb.vrw, ddb.ns) +} + +func (ddb *DoltDB) ResolveByNomsRoot(ctx *sql.Context, cs *CommitSpec, cwb ref.DoltRef, root hash.Hash) (*OptionalCommit, error) { if cs == nil { panic("nil commit spec") } @@ -428,6 +458,10 @@ func (ddb *DoltDB) ResolveByNomsRoot(ctx *sql.Context, cs *CommitSpec, cwb ref.D return nil, err } + if commitValue.IsGhost() { + return &OptionalCommit{nil, *hash}, nil + } + commit, err := NewCommit(ctx, ddb.vrw, ddb.ns, commitValue) if err != nil { return nil, err @@ -442,6 +476,11 @@ func (ddb *DoltDB) ResolveCommitRef(ctx context.Context, ref ref.DoltRef) (*Comm if err != nil { return nil, err } + + if commitVal.IsGhost() { + return nil, ErrGhostCommitEncountered + } + return NewCommit(ctx, ddb.vrw, ddb.ns, commitVal) } @@ -461,6 +500,11 @@ func (ddb *DoltDB) ResolveCommitRefAtRoot(ctx context.Context, ref ref.DoltRef, if err != nil { return nil, err } + + if commitVal.IsGhost() { + return nil, ErrGhostCommitEncountered + } + return NewCommit(ctx, ddb.vrw, ddb.ns, commitVal) } @@ -584,12 +628,21 @@ func (ddb *DoltDB) ReadRootValue(ctx context.Context, h hash.Hash) (*RootValue, } // ReadCommit reads the Commit whose hash is |h|, if one exists. -func (ddb *DoltDB) ReadCommit(ctx context.Context, h hash.Hash) (*Commit, error) { +func (ddb *DoltDB) ReadCommit(ctx context.Context, h hash.Hash) (*OptionalCommit, error) { c, err := datas.LoadCommitAddr(ctx, ddb.vrw, h) if err != nil { return nil, err } - return NewCommit(ctx, ddb.vrw, ddb.ns, c) + + if c.IsGhost() { + return &OptionalCommit{nil, h}, nil + } + + newC, err := NewCommit(ctx, ddb.vrw, ddb.ns, c) + if err != nil { + return nil, err + } + return &OptionalCommit{newC, h}, nil } // Commit will update a branch's head value to be that of a previously committed root value hash @@ -715,7 +768,13 @@ func (ddb *DoltDB) CommitWithParentSpecs(ctx context.Context, valHash hash.Hash, if err != nil { return nil, err } - parentCommits = append(parentCommits, cm) + + hardCommit, ok := cm.ToCommit() + if !ok { + return nil, ErrGhostCommitEncountered + } + + parentCommits = append(parentCommits, hardCommit) } return ddb.CommitWithParentCommits(ctx, valHash, dref, parentCommits, cm) } @@ -784,6 +843,10 @@ func (ddb *DoltDB) CommitValue(ctx context.Context, dref ref.DoltRef, val types. return nil, err } + if dc.IsGhost() { + return nil, ErrGhostCommitEncountered + } + return NewCommit(ctx, ddb.vrw, ddb.ns, dc) } @@ -844,13 +907,13 @@ func (ddb *DoltDB) Format() *types.NomsBinFormat { // ResolveParent returns the n-th ancestor of a given commit (direct parent is index 0). error return value will be // non-nil in the case that the commit cannot be resolved, there aren't as many ancestors as requested, or the // underlying storage cannot be accessed. -func (ddb *DoltDB) ResolveParent(ctx context.Context, commit *Commit, parentIdx int) (*Commit, error) { +func (ddb *DoltDB) ResolveParent(ctx context.Context, commit *Commit, parentIdx int) (*OptionalCommit, error) { return commit.GetParent(ctx, parentIdx) } -func (ddb *DoltDB) ResolveAllParents(ctx context.Context, commit *Commit) ([]*Commit, error) { +func (ddb *DoltDB) ResolveAllParents(ctx context.Context, commit *Commit) ([]*OptionalCommit, error) { num := commit.NumParents() - resolved := make([]*Commit, num) + resolved := make([]*OptionalCommit, num) for i := 0; i < num; i++ { parent, err := ddb.ResolveParent(ctx, commit, i) if err != nil { @@ -1393,6 +1456,10 @@ func (ddb *DoltDB) CommitWithWorkingSet( return nil, err } + if dc.IsGhost() { + return nil, ErrGhostCommitEncountered + } + return NewCommit(ctx, ddb.vrw, ddb.ns, dc) } @@ -1554,8 +1621,9 @@ func (ddb *DoltDB) PullChunks( srcDB *DoltDB, targetHashes []hash.Hash, statsCh chan pull.Stats, + skipHashes hash.HashSet, ) error { - return pullHash(ctx, ddb.db, srcDB.db, targetHashes, tempDir, statsCh) + return pullHash(ctx, ddb.db, srcDB.db, targetHashes, tempDir, statsCh, skipHashes) } func pullHash( @@ -1564,10 +1632,11 @@ func pullHash( targetHashes []hash.Hash, tempDir string, statsCh chan pull.Stats, + skipHashes hash.HashSet, ) error { srcCS := datas.ChunkStoreFromDatabase(srcDB) destCS := datas.ChunkStoreFromDatabase(destDB) - waf := types.WalkAddrsForNBF(srcDB.Format()) + waf := types.WalkAddrsForNBF(srcDB.Format(), skipHashes) if datas.CanUsePuller(srcDB) && datas.CanUsePuller(destDB) { puller, err := pull.NewPuller(ctx, tempDir, defaultChunksPerTF, srcCS, destCS, waf, targetHashes, statsCh) @@ -1866,3 +1935,10 @@ func (ddb *DoltDB) GetStashRootAndHeadCommitAtIdx(ctx context.Context, idx int) return getStashAtIdx(ctx, ds, ddb.vrw, ddb.NodeStore(), idx) } + +// PersistGhostCommits persists the set of ghost commits to the database. This is how the application layer passes +// information about ghost commits to the storage layer. This can be called multiple times over the course of performing +// a shallow clone, but should not be called after the clone is complete. +func (ddb *DoltDB) PersistGhostCommits(ctx context.Context, ghostCommits hash.HashSet) error { + return ddb.db.Database.PersistGhostCommitIDs(ctx, ghostCommits) +} diff --git a/go/libraries/doltcore/doltdb/doltdb_test.go b/go/libraries/doltcore/doltdb/doltdb_test.go index 0b8e4577f8e..5a792f19a8b 100644 --- a/go/libraries/doltcore/doltdb/doltdb_test.go +++ b/go/libraries/doltcore/doltdb/doltdb_test.go @@ -201,11 +201,12 @@ func TestEmptyInMemoryRepoCreation(t *testing.T) { } cs, _ := NewCommitSpec("master") - commit, err := ddb.Resolve(context.Background(), cs, nil) - + optCmt, err := ddb.Resolve(context.Background(), cs, nil) if err != nil { t.Fatal("Could not find commit") } + commit, ok := optCmt.ToCommit() + assert.True(t, ok) h, err := commit.HashOf() assert.NoError(t, err) @@ -276,11 +277,12 @@ func TestLDNoms(t *testing.T) { { ddb, _ := LoadDoltDB(context.Background(), types.Format_Default, LocalDirDoltDB, filesys.LocalFS) cs, _ := NewCommitSpec("master") - commit, err := ddb.Resolve(context.Background(), cs, nil) - + optCmt, err := ddb.Resolve(context.Background(), cs, nil) if err != nil { t.Fatal("Couldn't find commit") } + commit, ok := optCmt.ToCommit() + assert.True(t, ok) meta, err := commit.GetCommitMeta(context.Background()) assert.NoError(t, err) diff --git a/go/libraries/doltcore/doltdb/stash.go b/go/libraries/doltcore/doltdb/stash.go index 5c3488dcbe0..cc53ce68a72 100644 --- a/go/libraries/doltcore/doltdb/stash.go +++ b/go/libraries/doltcore/doltdb/stash.go @@ -62,6 +62,11 @@ func getStashList(ctx context.Context, ds datas.Dataset, vrw types.ValueReadWrit if err != nil { return nil, err } + + if hc.IsGhost() { + return nil, ErrGhostCommitEncountered + } + headCommit, err := NewCommit(ctx, vrw, ns, hc) if err != nil { return nil, err @@ -112,6 +117,11 @@ func getStashAtIdx(ctx context.Context, ds datas.Dataset, vrw types.ValueReadWri if err != nil { return nil, nil, nil, err } + + if hc.IsGhost() { + return nil, nil, nil, ErrGhostCommitEncountered + } + headCommit, err := NewCommit(ctx, vrw, ns, hc) if err != nil { return nil, nil, nil, err diff --git a/go/libraries/doltcore/doltdb/tag.go b/go/libraries/doltcore/doltdb/tag.go index 70c2f93469c..2186e8bd91c 100644 --- a/go/libraries/doltcore/doltdb/tag.go +++ b/go/libraries/doltcore/doltdb/tag.go @@ -42,6 +42,11 @@ func NewTag(ctx context.Context, name string, ds datas.Dataset, vrw types.ValueR if err != nil { return nil, err } + + if dc.IsGhost() { + return nil, ErrGhostCommitEncountered + } + commit, err := NewCommit(ctx, vrw, ns, dc) if err != nil { return nil, err diff --git a/go/libraries/doltcore/doltdb/workingset.go b/go/libraries/doltcore/doltdb/workingset.go index e0f27a1594b..f6d299182fa 100755 --- a/go/libraries/doltcore/doltdb/workingset.go +++ b/go/libraries/doltcore/doltdb/workingset.go @@ -406,6 +406,10 @@ func newWorkingSet(ctx context.Context, name string, vrw types.ValueReadWriter, return nil, err } + if fromDCommit.IsGhost() { + return nil, ErrGhostCommitEncountered + } + commit, err := NewCommit(ctx, vrw, ns, fromDCommit) if err != nil { return nil, err @@ -458,6 +462,10 @@ func newWorkingSet(ctx context.Context, name string, vrw types.ValueReadWriter, return nil, err } + if datasOntoCommit.IsGhost() { + return nil, ErrGhostCommitEncountered + } + ontoCommit, err := NewCommit(ctx, vrw, ns, datasOntoCommit) if err != nil { return nil, err diff --git a/go/libraries/doltcore/dtestutils/testcommands/multienv.go b/go/libraries/doltcore/dtestutils/testcommands/multienv.go index b148a1ebbb7..c18de9f3049 100644 --- a/go/libraries/doltcore/dtestutils/testcommands/multienv.go +++ b/go/libraries/doltcore/dtestutils/testcommands/multienv.go @@ -196,7 +196,7 @@ func (mr *MultiRepoTestSetup) CloneDB(fromRemote, dbName string) { mr.Errhand(err) } - err = actions.CloneRemote(ctx, srcDB, r.Name, "", false, dEnv) + err = actions.CloneRemote(ctx, srcDB, r.Name, "", false, -1, dEnv) if err != nil { mr.Errhand(err) } diff --git a/go/libraries/doltcore/env/actions/branch.go b/go/libraries/doltcore/env/actions/branch.go index 4b9cef3741e..52f461ff0e4 100644 --- a/go/libraries/doltcore/env/actions/branch.go +++ b/go/libraries/doltcore/env/actions/branch.go @@ -96,12 +96,15 @@ func CopyBranchOnDB(ctx context.Context, ddb *doltdb.DoltDB, oldBranch, newBranc cs, _ := doltdb.NewCommitSpec(oldBranch) cm, err := ddb.Resolve(ctx, cs, nil) - if err != nil { return err } - return ddb.NewBranchAtCommit(ctx, newRef, cm, rsc) + commit, ok := cm.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + return ddb.NewBranchAtCommit(ctx, newRef, commit, rsc) } type DeleteOptions struct { @@ -185,10 +188,14 @@ func validateBranchMergedIntoCurrentWorkingBranch(ctx context.Context, dbdata en return err } - branchHead, err := dbdata.Ddb.Resolve(ctx, branchSpec, nil) + optCmt, err := dbdata.Ddb.Resolve(ctx, branchSpec, nil) if err != nil { return err } + branchHead, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } cwbCs, err := doltdb.NewCommitSpec("HEAD") if err != nil { @@ -199,10 +206,14 @@ func validateBranchMergedIntoCurrentWorkingBranch(ctx context.Context, dbdata en if err != nil { return err } - cwbHead, err := dbdata.Ddb.Resolve(ctx, cwbCs, headRef) + optCmt, err = dbdata.Ddb.Resolve(ctx, cwbCs, headRef) if err != nil { return err } + cwbHead, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } isMerged, err := branchHead.CanFastForwardTo(ctx, cwbHead) if err != nil { @@ -245,15 +256,23 @@ func validateBranchMergedIntoUpstream(ctx context.Context, dbdata env.DbData, br return err } - remoteBranchHead, err := remoteDb.Resolve(ctx, cs, nil) + optCmt, err := remoteDb.Resolve(ctx, cs, nil) if err != nil { return err } + remoteBranchHead, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } - localBranchHead, err := dbdata.Ddb.Resolve(ctx, cs, nil) + optCmt, err = dbdata.Ddb.Resolve(ctx, cs, nil) if err != nil { return err } + localBranchHead, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } canFF, err := localBranchHead.CanFastForwardTo(ctx, remoteBranchHead) if err != nil { @@ -315,11 +334,16 @@ func CreateBranchOnDB(ctx context.Context, ddb *doltdb.DoltDB, newBranch, starti return err } - cm, err := ddb.Resolve(ctx, cs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + err = ddb.NewBranchAtCommit(ctx, branchRef, cm, rsc) if err != nil { return err @@ -355,22 +379,21 @@ func MaybeGetCommit(ctx context.Context, dEnv *env.DoltEnv, str string) (*doltdb if err != nil { return nil, err } - cm, err := dEnv.DoltDB.Resolve(ctx, cs, headRef) - - if errors.Is(err, doltdb.ErrBranchNotFound) { + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, headRef) + if err != nil && errors.Is(err, doltdb.ErrBranchNotFound) { return nil, nil } - - switch err { - case nil: - return cm, nil - - case doltdb.ErrHashNotFound, doltdb.ErrBranchNotFound: + if err != nil && errors.Is(err, doltdb.ErrHashNotFound) { return nil, nil - - default: + } + if err != nil { return nil, err } + + cm, ok := optCmt.ToCommit() + if ok { + return cm, nil + } } return nil, nil diff --git a/go/libraries/doltcore/env/actions/checkout.go b/go/libraries/doltcore/env/actions/checkout.go index 4f62d801205..cf3747776a1 100644 --- a/go/libraries/doltcore/env/actions/checkout.go +++ b/go/libraries/doltcore/env/actions/checkout.go @@ -235,11 +235,16 @@ func BranchHeadRoot(ctx context.Context, db *doltdb.DoltDB, brName string) (*dol return nil, doltdb.RootValueUnreadable{RootType: doltdb.HeadRoot, Cause: err} } - cm, err := db.Resolve(ctx, cs, nil) + optCmt, err := db.Resolve(ctx, cs, nil) if err != nil { return nil, doltdb.RootValueUnreadable{RootType: doltdb.HeadRoot, Cause: err} } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + branchRoot, err := cm.GetRootValue(ctx) if err != nil { return nil, err diff --git a/go/libraries/doltcore/env/actions/clone.go b/go/libraries/doltcore/env/actions/clone.go index ab1f751f1b7..34b8b8721e9 100644 --- a/go/libraries/doltcore/env/actions/clone.go +++ b/go/libraries/doltcore/env/actions/clone.go @@ -90,7 +90,7 @@ func EnvForClone(ctx context.Context, nbf *types.NomsBinFormat, r env.Remote, di return dEnv, nil } -func cloneProg(eventCh <-chan pull.TableFileEvent) { +func clonePrint(eventCh <-chan pull.TableFileEvent) { var ( chunksC int64 chunksDownloading int64 @@ -158,20 +158,29 @@ func sortedKeys(m map[string]iohelp.ReadStats) []string { } // CloneRemote - common entry point for both dolt_clone() and `dolt clone` -func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, dEnv *env.DoltEnv) error { - eventCh := make(chan pull.TableFileEvent, 128) +// The database must be initialized with a remote before calling this function. +// +// The `branch` parameter is the branch to clone. If it is empty, the default branch is used. +func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, depth int, dEnv *env.DoltEnv) error { + // We support two forms of cloning: full and shallow. These two approaches have little in common, with the exception + // of the first and last steps. Determining the branch to check out and setting the working set to the checked out commit. - wg := &sync.WaitGroup{} - wg.Add(1) - go func() { - defer wg.Done() - cloneProg(eventCh) - }() + srcRefHashes, branch, err := getSrcRefs(ctx, branch, srcDB, dEnv) + if err != nil { + return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) + } + if remoteName == "" { + remoteName = "origin" + } - err := Clone(ctx, srcDB, dEnv.DoltDB, eventCh) - close(eventCh) + var checkedOutCommit *doltdb.Commit - wg.Wait() + // Step 1) Pull the remote information we care about to a local disk. + if depth <= 0 { + checkedOutCommit, err = fullClone(ctx, srcDB, dEnv, srcRefHashes, branch, remoteName, singleBranch) + } else { + checkedOutCommit, err = shallowCloneDataPull(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth) + } if err != nil { if err == pull.ErrNoData { @@ -180,11 +189,48 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) } - // Get all the refs from the remote. These branch refs will be translated to remote branch refs, tags will - // be preserved, and all other refs will be ignored. - srcRefHashes, err := dEnv.DoltDB.GetRefsWithHashes(ctx) + // TODO: make this interface take a DoltRef and marshal it automatically + err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)}) if err != nil { - return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) + return err + } + + rootVal, err := checkedOutCommit.GetRootValue(ctx) + if err != nil { + return fmt.Errorf("%w: %s; %s", ErrFailedToGetRootValue, branch, err.Error()) + } + + wsRef, err := ref.WorkingSetRefForHead(ref.NewBranchRef(branch)) + if err != nil { + return err + } + + // Retrieve existing working set, delete if it exists + ws, err := dEnv.DoltDB.ResolveWorkingSet(ctx, wsRef) + if ws != nil { + dEnv.DoltDB.DeleteWorkingSet(ctx, wsRef) + } + ws = doltdb.EmptyWorkingSet(wsRef) + + // Update to use current Working and Staged root + err = dEnv.UpdateWorkingSet(ctx, ws.WithWorkingRoot(rootVal).WithStagedRoot(rootVal)) + if err != nil { + return err + } + + return nil +} + +// getSrcRefs returns the refs from the source database and the branch to check out. The input branch is used if it is +// not empty, otherwise the default branch is determined and returned. +func getSrcRefs(ctx context.Context, branch string, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv) ([]doltdb.RefWithHash, string, error) { + srcRefHashes, err := srcDB.GetRefsWithHashes(ctx) + if err != nil { + return nil, "", err + } + + if len(srcRefHashes) == 0 { + return nil, "", ErrNoDataAtRemote } branches := make([]ref.DoltRef, 0, len(srcRefHashes)) @@ -198,31 +244,36 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s branch = env.GetDefaultBranch(dEnv, branches) } - // If we couldn't find a branch but the repo cloned successfully, it's empty. Initialize it instead of pulling from - // the remote. - if branch == "" { - if err = InitEmptyClonedRepo(ctx, dEnv); err != nil { - return nil - } - branch = env.GetDefaultInitBranch(dEnv.Config) - } + return srcRefHashes, branch, nil +} - cs, _ := doltdb.NewCommitSpec(branch) - cm, err := dEnv.DoltDB.Resolve(ctx, cs, nil) +func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv, srcRefHashes []doltdb.RefWithHash, branch, remoteName string, singleBranch bool) (*doltdb.Commit, error) { + eventCh := make(chan pull.TableFileEvent, 128) + wg := &sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + clonePrint(eventCh) + }() - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToGetBranch, branch, err.Error()) + err := srcDB.Clone(ctx, dEnv.DoltDB, eventCh) - } + close(eventCh) + wg.Wait() - rootVal, err := cm.GetRootValue(ctx) + cs, _ := doltdb.NewCommitSpec(branch) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToGetRootValue, branch, err.Error()) + return nil, err + } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered } err = dEnv.DoltDB.DeleteAllRefs(ctx) if err != nil { - return err + return nil, err } // Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved. @@ -233,7 +284,7 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s remoteRef := ref.NewRemoteRef(remoteName, br.GetPath()) err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash) if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error()) + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error()) } } @@ -241,43 +292,64 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s // This is the only local branch after the clone is complete. err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash) if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) } } } else if refHash.Ref.GetType() == ref.TagRefType { tr := refHash.Ref.(ref.TagRef) err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash) if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error()) + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error()) } } } - // TODO: make this interface take a DoltRef and marshal it automatically - err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)}) + return cm, nil +} + +// shallowCloneDataPull is a shallow clone specific helper function to pull only the data required to show the given branch +// at the depth given. +func shallowCloneDataPull(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) (*doltdb.Commit, error) { + remotes, err := destData.Rsr.GetRemotes() if err != nil { - return err + return nil, err + } + remote, ok := remotes.Get(remoteName) + if !ok { + // By the time we get to this point, the remote should be created, so this should never happen. + return nil, fmt.Errorf("remote %s not found", remoteName) } - wsRef, err := ref.WorkingSetRefForHead(ref.NewBranchRef(branch)) + specs, err := env.ParseRefSpecs([]string{branch}, destData.Rsr, remote) if err != nil { - return err + return nil, err } - // Retrieve existing working set, delete if it exists - ws, err := dEnv.DoltDB.ResolveWorkingSet(ctx, wsRef) - if ws != nil { - dEnv.DoltDB.DeleteWorkingSet(ctx, wsRef) + err = ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth) + if err != nil { + return nil, err } - ws = doltdb.EmptyWorkingSet(wsRef) - // Update to use current Working and Staged root - err = dEnv.UpdateWorkingSet(ctx, ws.WithWorkingRoot(rootVal).WithStagedRoot(rootVal)) + // After the fetch approach, we just need to create the local branch. The single remote branch already exists. + br := ref.NewBranchRef(branch) + + cmt, err := srcDB.ResolveCommitRef(ctx, br) if err != nil { - return err + return nil, err } - return nil + hsh, err := cmt.HashOf() + if err != nil { + return nil, err + } + + // This is the only local branch after the clone is complete. + err = destData.Ddb.SetHead(ctx, br, hsh) + if err != nil { + return nil, err + } + + return cmt, nil } // InitEmptyClonedRepo inits an empty, newly cloned repo. This would be unnecessary if we properly initialized the diff --git a/go/libraries/doltcore/env/actions/commitwalk/commitwalk.go b/go/libraries/doltcore/env/actions/commitwalk/commitwalk.go index c954746a953..dbd1bcfa120 100644 --- a/go/libraries/doltcore/env/actions/commitwalk/commitwalk.go +++ b/go/libraries/doltcore/env/actions/commitwalk/commitwalk.go @@ -26,7 +26,7 @@ import ( type c struct { ddb *doltdb.DoltDB - commit *doltdb.Commit + commit *doltdb.OptionalCommit meta *datas.CommitMeta hash hash.Hash height uint64 @@ -63,10 +63,27 @@ func (q *q) Swap(i, j int) { q.pending[i], q.pending[j] = q.pending[j], q.pending[i] } +// Less returns true if the commit at index i is "less" than the commit at index j. It may be the case that you are comparing +// two resolved commits, two ghost commits, or a resolved commit and a ghost commit. Ghost commits will always be "less" than +// resolved commits. If both commits are resolved, then the commit with the higher height is "less". If the heights are equal, then +// the commit with the newer timestamp is "less". Finally if both commits are ghost commits, we don't really have enough +// information to compare on, so we just compare the hashes to ensure that the results are stable. func (q *q) Less(i, j int) bool { + _, okI := q.pending[i].commit.ToCommit() + _, okJ := q.pending[i].commit.ToCommit() + + if !okI && okJ { + return true + } else if okI && !okJ { + return false + } else if !okI && !okJ { + return q.pending[i].hash.String() < q.pending[j].hash.String() + } + if q.pending[i].height > q.pending[j].height { return true } + if q.pending[i].height == q.pending[j].height { return q.pending[i].meta.UserTimestamp > q.pending[j].meta.UserTimestamp } @@ -110,7 +127,7 @@ func (q *q) SetInvisible(ctx context.Context, ddb *doltdb.DoltDB, id hash.Hash) return nil } -func load(ctx context.Context, ddb *doltdb.DoltDB, h hash.Hash) (*doltdb.Commit, error) { +func load(ctx context.Context, ddb *doltdb.DoltDB, h hash.Hash) (*doltdb.OptionalCommit, error) { cs, err := doltdb.NewCommitSpec(h.String()) if err != nil { return nil, err @@ -127,20 +144,26 @@ func (q *q) Get(ctx context.Context, ddb *doltdb.DoltDB, id hash.Hash) (*c, erro return l, nil } - l, err := load(ctx, ddb, id) + optCmt, err := load(ctx, ddb, id) if err != nil { return nil, err } - h, err := l.Height() + + commit, ok := optCmt.ToCommit() + if !ok { + return &c{ddb: ddb, commit: optCmt, hash: id}, nil + } + + h, err := commit.Height() if err != nil { return nil, err } - meta, err := l.GetCommitMeta(ctx) + meta, err := commit.GetCommitMeta(ctx) if err != nil { return nil, err } - c := &c{ddb: ddb, commit: l, meta: meta, height: h, hash: id} + c := &c{ddb: ddb, commit: &doltdb.OptionalCommit{Commit: commit, Addr: id}, meta: meta, height: h, hash: id} q.loaded[id] = c return c, nil } @@ -159,13 +182,13 @@ func newQueue() *q { // // Roughly mimics `git log main..feature` or `git log main...feature` (if // more than one `includedHead` is provided). -func GetDotDotRevisions(ctx context.Context, includedDB *doltdb.DoltDB, includedHeads []hash.Hash, excludedDB *doltdb.DoltDB, excludedHeads []hash.Hash, num int) ([]*doltdb.Commit, error) { +func GetDotDotRevisions(ctx context.Context, includedDB *doltdb.DoltDB, includedHeads []hash.Hash, excludedDB *doltdb.DoltDB, excludedHeads []hash.Hash, num int) ([]*doltdb.OptionalCommit, error) { itr, err := GetDotDotRevisionsIterator(ctx, includedDB, includedHeads, excludedDB, excludedHeads, nil) if err != nil { return nil, err } - var commitList []*doltdb.Commit + var commitList []*doltdb.OptionalCommit for num < 0 || len(commitList) < num { _, commit, err := itr.Next(ctx) if err == io.EOF { @@ -180,29 +203,22 @@ func GetDotDotRevisions(ctx context.Context, includedDB *doltdb.DoltDB, included return commitList, nil } -// GetTopologicalOrderCommits returns the commits reachable from the commits in `startCommitHashes` -// in reverse topological order, with tiebreaking done by the height of the commit graph -- higher commits -// appear first. Remaining ties are broken by timestamp; newer commits appear first. -func GetTopologicalOrderCommits(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash) ([]*doltdb.Commit, error) { - return GetTopNTopoOrderedCommitsMatching(ctx, ddb, startCommitHashes, -1, nil) -} - // GetTopologicalOrderCommitIterator returns an iterator for commits generated with the same semantics as // GetTopologicalOrderCommits -func GetTopologicalOrderIterator(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash, matchFn func(*doltdb.Commit) (bool, error)) (doltdb.CommitItr, error) { +func GetTopologicalOrderIterator(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash, matchFn func(*doltdb.OptionalCommit) (bool, error)) (doltdb.CommitItr, error) { return newCommiterator(ctx, ddb, startCommitHashes, matchFn) } type commiterator struct { ddb *doltdb.DoltDB startCommitHashes []hash.Hash - matchFn func(*doltdb.Commit) (bool, error) + matchFn func(*doltdb.OptionalCommit) (bool, error) q *q } var _ doltdb.CommitItr = (*commiterator)(nil) -func newCommiterator(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash, matchFn func(*doltdb.Commit) (bool, error)) (*commiterator, error) { +func newCommiterator(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash, matchFn func(*doltdb.OptionalCommit) (bool, error)) (*commiterator, error) { itr := &commiterator{ ddb: ddb, startCommitHashes: startCommitHashes, @@ -218,23 +234,29 @@ func newCommiterator(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes } // Next implements doltdb.CommitItr -func (i *commiterator) Next(ctx context.Context) (hash.Hash, *doltdb.Commit, error) { - if i.q.NumVisiblePending() > 0 { - nextC := i.q.PopPending() - parents, err := nextC.commit.ParentHashes(ctx) - if err != nil { - return hash.Hash{}, nil, err +func (iter *commiterator) Next(ctx context.Context) (hash.Hash, *doltdb.OptionalCommit, error) { + if iter.q.NumVisiblePending() > 0 { + nextC := iter.q.PopPending() + + var err error + parents := []hash.Hash{} + commit, ok := nextC.commit.ToCommit() + if ok { + parents, err = commit.ParentHashes(ctx) + if err != nil { + return hash.Hash{}, nil, err + } } for _, parentID := range parents { - if err := i.q.AddPendingIfUnseen(ctx, nextC.ddb, parentID); err != nil { + if err := iter.q.AddPendingIfUnseen(ctx, nextC.ddb, parentID); err != nil { return hash.Hash{}, nil, err } } matches := true - if i.matchFn != nil { - matches, err = i.matchFn(nextC.commit) + if iter.matchFn != nil { + matches, err = iter.matchFn(nextC.commit) if err != nil { return hash.Hash{}, nil, err @@ -242,10 +264,10 @@ func (i *commiterator) Next(ctx context.Context) (hash.Hash, *doltdb.Commit, err } if matches { - return nextC.hash, nextC.commit, nil + return nextC.hash, &doltdb.OptionalCommit{Commit: commit, Addr: nextC.hash}, nil } - return i.Next(ctx) + return iter.Next(ctx) } return hash.Hash{}, nil, io.EOF @@ -262,33 +284,9 @@ func (i *commiterator) Reset(ctx context.Context) error { return nil } -// GetTopNTopoOrderedCommitsMatching returns the first N commits (If N <= 0 then all commits) reachable from the commits in -// `startCommitHashes` in reverse topological order, with tiebreaking done by the height of the commit graph -- higher -// commits appear first. Remaining ties are broken by timestamp; newer commits appear first. -func GetTopNTopoOrderedCommitsMatching(ctx context.Context, ddb *doltdb.DoltDB, startCommitHashes []hash.Hash, n int, matchFn func(*doltdb.Commit) (bool, error)) ([]*doltdb.Commit, error) { - itr, err := GetTopologicalOrderIterator(ctx, ddb, startCommitHashes, matchFn) - if err != nil { - return nil, err - } - - var commitList []*doltdb.Commit - for n < 0 || len(commitList) < n { - _, commit, err := itr.Next(ctx) - if err == io.EOF { - break - } else if err != nil { - return nil, err - } - - commitList = append(commitList, commit) - } - - return commitList, nil -} - // GetDotDotRevisionsIterator returns an iterator for commits generated with the same semantics as // GetDotDotRevisions -func GetDotDotRevisionsIterator(ctx context.Context, includedDdb *doltdb.DoltDB, startCommitHashes []hash.Hash, excludedDdb *doltdb.DoltDB, excludingCommitHashes []hash.Hash, matchFn func(*doltdb.Commit) (bool, error)) (doltdb.CommitItr, error) { +func GetDotDotRevisionsIterator(ctx context.Context, includedDdb *doltdb.DoltDB, startCommitHashes []hash.Hash, excludedDdb *doltdb.DoltDB, excludingCommitHashes []hash.Hash, matchFn func(*doltdb.OptionalCommit) (bool, error)) (doltdb.CommitItr, error) { return newDotDotCommiterator(ctx, includedDdb, startCommitHashes, excludedDdb, excludingCommitHashes, matchFn) } @@ -297,13 +295,13 @@ type dotDotCommiterator struct { excludedDdb *doltdb.DoltDB startCommitHashes []hash.Hash excludingCommitHashes []hash.Hash - matchFn func(*doltdb.Commit) (bool, error) + matchFn func(*doltdb.OptionalCommit) (bool, error) q *q } var _ doltdb.CommitItr = (*dotDotCommiterator)(nil) -func newDotDotCommiterator(ctx context.Context, includedDdb *doltdb.DoltDB, startCommitHashes []hash.Hash, excludedDdb *doltdb.DoltDB, excludingCommitHashes []hash.Hash, matchFn func(*doltdb.Commit) (bool, error)) (*dotDotCommiterator, error) { +func newDotDotCommiterator(ctx context.Context, includedDdb *doltdb.DoltDB, startCommitHashes []hash.Hash, excludedDdb *doltdb.DoltDB, excludingCommitHashes []hash.Hash, matchFn func(*doltdb.OptionalCommit) (bool, error)) (*dotDotCommiterator, error) { itr := &dotDotCommiterator{ includedDdb: includedDdb, excludedDdb: excludedDdb, @@ -321,10 +319,16 @@ func newDotDotCommiterator(ctx context.Context, includedDdb *doltdb.DoltDB, star } // Next implements doltdb.CommitItr -func (i *dotDotCommiterator) Next(ctx context.Context) (hash.Hash, *doltdb.Commit, error) { +func (i *dotDotCommiterator) Next(ctx context.Context) (hash.Hash, *doltdb.OptionalCommit, error) { if i.q.NumVisiblePending() > 0 { nextC := i.q.PopPending() - parents, err := nextC.commit.ParentHashes(ctx) + + commit, ok := nextC.commit.ToCommit() + if !ok { + return nextC.hash, nextC.commit, nil + } + + parents, err := commit.ParentHashes(ctx) if err != nil { return hash.Hash{}, nil, err } diff --git a/go/libraries/doltcore/env/actions/commitwalk/commitwalk_test.go b/go/libraries/doltcore/env/actions/commitwalk/commitwalk_test.go index 61886622610..020d2bf9ca2 100644 --- a/go/libraries/doltcore/env/actions/commitwalk/commitwalk_test.go +++ b/go/libraries/doltcore/env/actions/commitwalk/commitwalk_test.go @@ -68,8 +68,10 @@ func TestGetDotDotRevisions(t *testing.T) { cs, err := doltdb.NewCommitSpec(env.DefaultInitBranch) require.NoError(t, err) - commit, err := dEnv.DoltDB.Resolve(context.Background(), cs, nil) + opt, err := dEnv.DoltDB.Resolve(context.Background(), cs, nil) require.NoError(t, err) + commit, ok := opt.ToCommit() + require.True(t, ok) rv, err := commit.GetRootValue(context.Background()) require.NoError(t, err) @@ -238,8 +240,24 @@ func TestGetDotDotRevisions(t *testing.T) { assertEqualHashes(t, featureCommits[1], res[2]) } -func assertEqualHashes(t *testing.T, lc, rc *doltdb.Commit) { - assert.Equal(t, mustGetHash(t, lc), mustGetHash(t, rc)) +func assertEqualHashes(t *testing.T, lc, rc interface{}) { + leftCm, ok := lc.(*doltdb.Commit) + if !ok { + opt, ok := lc.(*doltdb.OptionalCommit) + require.True(t, ok) + leftCm, ok = opt.ToCommit() + require.True(t, ok) + } + + rightCm, ok := rc.(*doltdb.Commit) + if !ok { + opt, ok := rc.(*doltdb.OptionalCommit) + require.True(t, ok) + rightCm, ok = opt.ToCommit() + require.True(t, ok) + } + + assert.Equal(t, mustGetHash(t, leftCm), mustGetHash(t, rightCm)) } func mustCreateCommit(t *testing.T, ddb *doltdb.DoltDB, bn string, rvh hash.Hash, parents ...*doltdb.Commit) *doltdb.Commit { @@ -268,7 +286,7 @@ func mustForkDB(t *testing.T, fromDB *doltdb.DoltDB, bn string, cm *doltdb.Commi for range ps { } }() - err = forkEnv.DoltDB.PullChunks(context.Background(), "", fromDB, []hash.Hash{h}, ps) + err = forkEnv.DoltDB.PullChunks(context.Background(), "", fromDB, []hash.Hash{h}, ps, nil) if err == pull.ErrDBUpToDate { err = nil } diff --git a/go/libraries/doltcore/env/actions/remotes.go b/go/libraries/doltcore/env/actions/remotes.go index 6172cc425fb..21b9adfcca1 100644 --- a/go/libraries/doltcore/env/actions/remotes.go +++ b/go/libraries/doltcore/env/actions/remotes.go @@ -67,7 +67,7 @@ func Push(ctx context.Context, tempTableDir string, mode ref.UpdateMode, destRef return err } - err = destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{h}, statsCh) + err = destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{h}, statsCh, nil) if err != nil { return err @@ -187,7 +187,7 @@ func PushTag(ctx context.Context, tempTableDir string, destRef ref.TagRef, srcDB return err } - err = destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{addr}, statsCh) + err = destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{addr}, statsCh, nil) if err != nil { return err @@ -223,10 +223,14 @@ func PushToRemoteBranch(ctx context.Context, rsr env.RepoStateReader, tempTableD if err != nil { return err } - cm, err := localDB.Resolve(ctx, cs, headRef) + optCmt, err := localDB.Resolve(ctx, cs, headRef) if err != nil { return fmt.Errorf("%w; refspec not found: '%s'; %s", ref.ErrInvalidRefSpec, srcRef.GetPath(), err.Error()) } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } newCtx, cancelFunc := context.WithCancel(ctx) wg, statsCh := progStarter(newCtx) @@ -306,7 +310,7 @@ func FetchCommit(ctx context.Context, tempTablesDir string, srcDB, destDB *doltd return err } - return destDB.PullChunks(ctx, tempTablesDir, srcDB, []hash.Hash{h}, statsCh) + return destDB.PullChunks(ctx, tempTablesDir, srcDB, []hash.Hash{h}, statsCh, nil) } // FetchTag takes a fetches a commit tag and all underlying data from a remote source database to the local destination database. @@ -316,7 +320,7 @@ func FetchTag(ctx context.Context, tempTableDir string, srcDB, destDB *doltdb.Do return err } - return destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{addr}, statsCh) + return destDB.PullChunks(ctx, tempTableDir, srcDB, []hash.Hash{addr}, statsCh, nil) } // Clone pulls all data from a remote source database to a local destination database. @@ -352,8 +356,17 @@ func FetchFollowTags(ctx context.Context, tempTableDir string, srcDB, destDB *do if err != nil { return true, err } - if !has { - // neither tag nor commit has been fetched + if has { + // We _might_ have it. We need to check if it's a ghost, in which case we'll skip this commit. + optCmt, err := destDB.ReadCommit(ctx, cmHash) + if err != nil { + return true, err + } + _, ok := optCmt.ToCommit() + if !ok { + return false, nil + } + } else { return false, nil } @@ -405,11 +418,15 @@ func FetchRemoteBranch( } cs, _ := doltdb.NewCommitSpec(srcRef.String()) - srcDBCommit, err := srcDB.Resolve(ctx, cs, nil) - + optCmt, err := srcDB.Resolve(ctx, cs, nil) if err != nil { return nil, fmt.Errorf("unable to find '%s' on '%s'; %w", srcRef.GetPath(), rem.Name, err) } + srcDBCommit, ok := optCmt.ToCommit() + if !ok { + // This really should never happen. The source db is always expected to have everything. + return nil, doltdb.ErrGhostCommitRuntimeFailure + } // The code is structured this way (different paths for progress chan v. not) so that the linter can understand there // isn't a context leak happening on one path @@ -444,6 +461,24 @@ func FetchRemoteBranch( return srcDBCommit, nil } +// ShallFetchRefSpec fetches the remote refSpec from the source database to the destination database. Currently it is only +// used for shallow clones. +func ShallowFetchRefSpec( + ctx context.Context, + dbData env.DbData, + srcDB *doltdb.DoltDB, + refSpecs ref.RemoteRefSpec, + remote *env.Remote, + depth int, +) error { + + if depth < 1 { + return fmt.Errorf("invalid depth: %d", depth) + } + + return fetchRefSpecsWithDepth(ctx, dbData, srcDB, []ref.RemoteRefSpec{refSpecs}, remote, ref.ForceUpdate, depth, nil, nil) +} + // FetchRefSpecs is the common SQL and CLI entrypoint for fetching branches, tags, and heads from a remote. // This function takes dbData which is a env.DbData object for handling repoState read and write, and srcDB is // a remote *doltdb.DoltDB object that is used to fetch remote branches from. @@ -452,10 +487,24 @@ func FetchRefSpecs( dbData env.DbData, srcDB *doltdb.DoltDB, refSpecs []ref.RemoteRefSpec, - remote env.Remote, + remote *env.Remote, mode ref.UpdateMode, progStarter ProgStarter, progStopper ProgStopper, +) error { + return fetchRefSpecsWithDepth(ctx, dbData, srcDB, refSpecs, remote, mode, -1, progStarter, progStopper) +} + +func fetchRefSpecsWithDepth( + ctx context.Context, + dbData env.DbData, + srcDB *doltdb.DoltDB, + refSpecs []ref.RemoteRefSpec, + remote *env.Remote, + mode ref.UpdateMode, + depth int, + progStarter ProgStarter, + progStopper ProgStopper, ) error { var branchRefs []doltdb.RefWithHash err := srcDB.VisitRefsOfType(ctx, ref.HeadRefTypes, func(r ref.DoltRef, addr hash.Hash) error { @@ -491,12 +540,43 @@ func FetchRefSpecs( } } + shallowClone := depth > 0 + skipCmts := hash.NewHashSet() + allToFetch := toFetch + if shallowClone { + skipCmts, err = buildInitialSkipList(ctx, srcDB, toFetch) + if err != nil { + return err + } + curToFetch := toFetch + var newToFetch []hash.Hash + depth-- + for skipCmts.Size() > 0 && depth > 0 { + newToFetch, skipCmts, err = updateSkipList(ctx, srcDB, curToFetch, skipCmts) + if err != nil { + return err + } + + allToFetch = append(allToFetch, newToFetch...) + curToFetch = newToFetch + depth-- + } + } + toFetch = allToFetch + // Now we fetch all the new HEADs we need. tmpDir, err := dbData.Rsw.TempTableFilesDir() if err != nil { return err } + if skipCmts.Size() > 0 { + err = dbData.Ddb.PersistGhostCommits(ctx, skipCmts) + if err != nil { + return err + } + } + err = func() error { newCtx := ctx var statsCh chan pull.Stats @@ -509,7 +589,7 @@ func FetchRefSpecs( defer progStopper(cancelFunc, wg, statsCh) } - err = dbData.Ddb.PullChunks(ctx, tmpDir, srcDB, toFetch, statsCh) + err = dbData.Ddb.PullChunks(ctx, tmpDir, srcDB, toFetch, statsCh, skipCmts) if err == pull.ErrDBUpToDate { err = nil } @@ -520,10 +600,16 @@ func FetchRefSpecs( } for _, newHead := range newHeads { - commit, err := dbData.Ddb.ReadCommit(ctx, newHead.Hash) + optCmt, err := dbData.Ddb.ReadCommit(ctx, newHead.Hash) if err != nil { return err } + commit, ok := optCmt.ToCommit() + if !ok { + // Dest DB should have each hash in `newHeads` now. If we can't read a commit, something is wrong. + return doltdb.ErrGhostCommitRuntimeFailure + } + remoteTrackRef := newHead.Ref if mode.Force { @@ -556,20 +642,73 @@ func FetchRefSpecs( } if mode.Prune { - err = pruneBranches(ctx, dbData, remote, newHeads) + err = pruneBranches(ctx, dbData, *remote, newHeads) if err != nil { return err } } - err = FetchFollowTags(ctx, tmpDir, srcDB, dbData.Ddb, progStarter, progStopper) - if err != nil { - return err + if !shallowClone { + // TODO: Currently shallow clones don't pull any tags, but they could. We need to make FetchFollowTags wise + // to the skipped commits list, and then we can remove this conditional. Also, FetchFollowTags assumes that + // progStarter and progStopper are always non-nil, which we don't assume elsewhere. Shallow clone has no + // progress reporting, and as a result they are nil. + err = FetchFollowTags(ctx, tmpDir, srcDB, dbData.Ddb, progStarter, progStopper) + if err != nil { + return err + } } return nil } +func buildInitialSkipList(ctx context.Context, srcDB *doltdb.DoltDB, toFetch []hash.Hash) (hash.HashSet, error) { + if len(toFetch) > 1 { + return hash.HashSet{}, fmt.Errorf("runtime error: multiple refspecs not supported in shallow clone") + } + + cs, err := doltdb.NewCommitSpec(toFetch[0].String()) + if err != nil { + return hash.HashSet{}, err + } + + allCommits, err := srcDB.BootstrapShallowResolve(ctx, cs) + + return allCommits.AsHashSet(ctx) +} + +func updateSkipList(ctx context.Context, srcDB *doltdb.DoltDB, toFetch []hash.Hash, skipCmts hash.HashSet) ([]hash.Hash, hash.HashSet, error) { + newSkipList := skipCmts.Copy() + newFetchList := []hash.Hash{} + for _, h := range toFetch { + optCmt, err := srcDB.ReadCommit(ctx, h) + if err != nil { + return nil, nil, err + } + + // srcDB should always be the fully populated, so if there is a ghost commit here, someone is calling this + // function incorrectly. + commit, ok := optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } + + for i := 0; i < commit.NumParents(); i++ { + parent, err := commit.GetParent(ctx, i) + if err != nil { + return nil, nil, err + } + if newSkipList.Has(parent.Addr) { + newSkipList.Remove(parent.Addr) + newFetchList = append(newFetchList, parent.Addr) + } + } + + } + + return newFetchList, newSkipList, nil +} + func pruneBranches(ctx context.Context, dbData env.DbData, remote env.Remote, remoteRefs []doltdb.RefWithHash) error { remoteRefTypes := map[ref.RefType]struct{}{ ref.RemoteRefType: {}, @@ -733,7 +872,7 @@ func SyncRoots(ctx context.Context, srcDb, destDb *doltdb.DoltDB, tempTableDir s // If clone is unsupported, we can fall back to pull. } - err = destDb.PullChunks(ctx, tempTableDir, srcDb, []hash.Hash{srcRoot}, statsCh) + err = destDb.PullChunks(ctx, tempTableDir, srcDb, []hash.Hash{srcRoot}, statsCh, nil) if err != nil { return err } diff --git a/go/libraries/doltcore/env/actions/reset.go b/go/libraries/doltcore/env/actions/reset.go index 4c1031fe650..c677f243b4d 100644 --- a/go/libraries/doltcore/env/actions/reset.go +++ b/go/libraries/doltcore/env/actions/reset.go @@ -46,11 +46,16 @@ func resetHardTables(ctx context.Context, dbData env.DbData, cSpecStr string, ro if err != nil { return nil, doltdb.Roots{}, err } - newHead, err = ddb.Resolve(ctx, cs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, doltdb.Roots{}, err } + var ok bool + if newHead, ok = optCmt.ToCommit(); !ok { + return nil, doltdb.Roots{}, doltdb.ErrGhostCommitEncountered + } + roots.Head, err = newHead.GetRootValue(ctx) if err != nil { return nil, doltdb.Roots{}, err @@ -238,10 +243,14 @@ func ResetSoftToRef(ctx context.Context, dbData env.DbData, cSpecStr string) (do if err != nil { return doltdb.Roots{}, err } - newHead, err := dbData.Ddb.Resolve(ctx, cs, headRef) + optCmt, err := dbData.Ddb.Resolve(ctx, cs, headRef) if err != nil { return doltdb.Roots{}, err } + newHead, ok := optCmt.ToCommit() + if !ok { + return doltdb.Roots{}, doltdb.ErrGhostCommitEncountered + } foundRoot, err := newHead.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/env/actions/tag.go b/go/libraries/doltcore/env/actions/tag.go index ee433e41fc3..3dc7b40ab2a 100644 --- a/go/libraries/doltcore/env/actions/tag.go +++ b/go/libraries/doltcore/env/actions/tag.go @@ -57,16 +57,18 @@ func CreateTagOnDB(ctx context.Context, ddb *doltdb.DoltDB, tagName, startPoint } cs, err := doltdb.NewCommitSpec(startPoint) - if err != nil { return err } - cm, err := ddb.Resolve(ctx, cs, headRef) - + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } meta := datas.NewTagMeta(props.TaggerName, props.TaggerEmail, props.Description) diff --git a/go/libraries/doltcore/env/actions/workspace.go b/go/libraries/doltcore/env/actions/workspace.go index 7f525532af4..5346f37a267 100644 --- a/go/libraries/doltcore/env/actions/workspace.go +++ b/go/libraries/doltcore/env/actions/workspace.go @@ -63,10 +63,14 @@ func CreateWorkspaceOnDB(ctx context.Context, ddb *doltdb.DoltDB, name, startPoi return err } - cm, err := ddb.Resolve(ctx, cs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } return ddb.NewWorkspaceAtCommit(ctx, workRef, cm) } @@ -118,20 +122,28 @@ func DeleteWorkspaceOnDB(ctx context.Context, dEnv *env.DoltEnv, dref ref.DoltRe return err } - m, err := ddb.Resolve(ctx, ms, nil) + optCmt, err := ddb.Resolve(ctx, ms, nil) if err != nil { return err } + m, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } cs, err := doltdb.NewCommitSpec(dref.String()) if err != nil { return err } - cm, err := ddb.Resolve(ctx, cs, nil) + optCmt, err = ddb.Resolve(ctx, cs, nil) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } isMerged, _ := m.CanFastReverseTo(ctx, cm) if err != nil && err != doltdb.ErrUpToDate { diff --git a/go/libraries/doltcore/env/environment.go b/go/libraries/doltcore/env/environment.go index 2fafc587dc3..62ef6aef95e 100644 --- a/go/libraries/doltcore/env/environment.go +++ b/go/libraries/doltcore/env/environment.go @@ -274,11 +274,16 @@ func mergeStateToMergeState(ctx context.Context, mergeState *mergeState, db *dol panic("Corrupted repostate. Active merge state is not valid.") } - commit, err := db.Resolve(ctx, cs, nil) + optCmt, err := db.Resolve(ctx, cs, nil) if err != nil { return nil, err } + commit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + pmwh := hash.Parse(mergeState.PreMergeWorking) pmwr, err := db.ReadRootValue(ctx, pmwh) if err != nil { diff --git a/go/libraries/doltcore/merge/action.go b/go/libraries/doltcore/merge/action.go index 3c76830d43f..c144399a022 100644 --- a/go/libraries/doltcore/merge/action.go +++ b/go/libraries/doltcore/merge/action.go @@ -100,20 +100,29 @@ func NewMergeSpec( return nil, err } - headCM, err := ddb.Resolve(context.TODO(), headCS, headRef) + optCmt, err := ddb.Resolve(ctx, headCS, headRef) if err != nil { return nil, err } + headCM, ok := optCmt.ToCommit() + if !ok { + // HEAD should always resolve to a commit, so this should never happen. + return nil, doltdb.ErrGhostCommitRuntimeFailure + } mergeCS, err := doltdb.NewCommitSpec(commitSpecStr) if err != nil { return nil, err } - mergeCM, err := ddb.Resolve(context.TODO(), mergeCS, headRef) + optCmt, err = ddb.Resolve(ctx, mergeCS, headRef) if err != nil { return nil, err } + mergeCM, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } headH, err := headCM.HashOf() if err != nil { diff --git a/go/libraries/doltcore/merge/merge.go b/go/libraries/doltcore/merge/merge.go index 9b4a6d7f964..e603c8d1e4f 100644 --- a/go/libraries/doltcore/merge/merge.go +++ b/go/libraries/doltcore/merge/merge.go @@ -48,10 +48,15 @@ var ErrMultipleViolationsForRow = errors.New("multiple violations for row not su var ErrSameTblAddedTwice = goerrors.NewKind("table with same name '%s' added in 2 commits can't be merged") func MergeCommits(ctx *sql.Context, commit, mergeCommit *doltdb.Commit, opts editor.Options) (*Result, error) { - ancCommit, err := doltdb.GetCommitAncestor(ctx, commit, mergeCommit) + optCmt, err := doltdb.GetCommitAncestor(ctx, commit, mergeCommit) if err != nil { return nil, err } + ancCommit, ok := optCmt.ToCommit() + if !ok { + // Ancestor commit should have been resolved before getting this far. + return nil, doltdb.ErrGhostCommitRuntimeFailure + } ourRoot, err := commit.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/merge/merge_base.go b/go/libraries/doltcore/merge/merge_base.go index 30087bde92f..670761f4269 100644 --- a/go/libraries/doltcore/merge/merge_base.go +++ b/go/libraries/doltcore/merge/merge_base.go @@ -23,10 +23,14 @@ import ( ) func MergeBase(ctx context.Context, left, right *doltdb.Commit) (base hash.Hash, err error) { - ancestor, err := doltdb.GetCommitAncestor(ctx, left, right) + optCmt, err := doltdb.GetCommitAncestor(ctx, left, right) if err != nil { return base, err } + ancestor, ok := optCmt.ToCommit() + if !ok { + return base, doltdb.ErrGhostCommitEncountered + } return ancestor.HashOf() } diff --git a/go/libraries/doltcore/merge/merge_test.go b/go/libraries/doltcore/merge/merge_test.go index 91d6126c3c3..93ade329c86 100644 --- a/go/libraries/doltcore/merge/merge_test.go +++ b/go/libraries/doltcore/merge/merge_test.go @@ -716,8 +716,10 @@ func mustMakeEmptyRepo(t *testing.T) *doltdb.DoltDB { func buildLeftRightAncCommitsAndBranches(t *testing.T, ddb *doltdb.DoltDB, rootTbl, mergeTbl, ancTbl *doltdb.Table) (doltdb.Rootish, doltdb.Rootish, *doltdb.RootValue, *doltdb.RootValue, *doltdb.RootValue) { mainHeadSpec, _ := doltdb.NewCommitSpec(env.DefaultInitBranch) - mainHead, err := ddb.Resolve(context.Background(), mainHeadSpec, nil) + optCmt, err := ddb.Resolve(context.Background(), mainHeadSpec, nil) require.NoError(t, err) + mainHead, ok := optCmt.ToCommit() + require.True(t, ok) mRoot, err := mainHead.GetRootValue(context.Background()) require.NoError(t, err) @@ -756,8 +758,10 @@ func buildLeftRightAncCommitsAndBranches(t *testing.T, ddb *doltdb.DoltDB, rootT root, err := commit.GetRootValue(context.Background()) require.NoError(t, err) - ancCm, err := doltdb.GetCommitAncestor(context.Background(), commit, mergeCommit) + optCmt, err = doltdb.GetCommitAncestor(context.Background(), commit, mergeCommit) require.NoError(t, err) + ancCm, ok := optCmt.ToCommit() + require.True(t, ok) ancRoot, err := ancCm.GetRootValue(context.Background()) require.NoError(t, err) diff --git a/go/libraries/doltcore/merge/revert.go b/go/libraries/doltcore/merge/revert.go index e043b4d1ecb..7943fecfd50 100644 --- a/go/libraries/doltcore/merge/revert.go +++ b/go/libraries/doltcore/merge/revert.go @@ -60,10 +60,15 @@ func Revert(ctx *sql.Context, ddb *doltdb.DoltDB, root *doltdb.RootValue, commit } revertMessage = fmt.Sprintf(`%s "%s"`, revertMessage, baseMeta.Description) - parentCM, err := ddb.ResolveParent(ctx, baseCommit, 0) + optCmt, err := ddb.ResolveParent(ctx, baseCommit, 0) if err != nil { return nil, "", err } + parentCM, ok := optCmt.ToCommit() + if !ok { + return nil, "", doltdb.ErrGhostCommitEncountered + } + theirRoot, err := parentCM.GetRootValue(ctx) if err != nil { return nil, "", err diff --git a/go/libraries/doltcore/migrate/transform.go b/go/libraries/doltcore/migrate/transform.go index ae16f3154d9..dd09e86afb7 100644 --- a/go/libraries/doltcore/migrate/transform.go +++ b/go/libraries/doltcore/migrate/transform.go @@ -121,10 +121,15 @@ func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, return err } - oldParentCm, err := oldCm.GetParent(ctx, 0) + optCmt, err := oldCm.GetParent(ctx, 0) if err != nil { return err } + oldParentCm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + oldParentRoot, err := oldParentCm.GetRootValue(ctx) if err != nil { return err @@ -145,10 +150,15 @@ func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, if err != nil { return err } - newParentCm, err := new.ReadCommit(ctx, newParentAddr) + optCmt, err = new.ReadCommit(ctx, newParentAddr) if err != nil { return err } + newParentCm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + newParentRoot, err := newParentCm.GetRootValue(ctx) if err != nil { return err diff --git a/go/libraries/doltcore/migrate/traverse.go b/go/libraries/doltcore/migrate/traverse.go index a77a4114abc..4ed1cb92785 100644 --- a/go/libraries/doltcore/migrate/traverse.go +++ b/go/libraries/doltcore/migrate/traverse.go @@ -136,10 +136,15 @@ func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old if err != nil { return err } - cm, err := new.ReadCommit(ctx, newHash) + optCmt, err := new.ReadCommit(ctx, newHash) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } + return new.NewTagAtCommit(ctx, r, cm, t.Meta) } @@ -183,10 +188,14 @@ func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Com if err = prog.Push(ctx, cm); err != nil { return err } - cm, err = cm.GetParent(ctx, idx) + optCmt, err := cm.GetParent(ctx, idx) if err != nil { return err } + cm, ok = optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } } } diff --git a/go/libraries/doltcore/mvdata/data_loc_test.go b/go/libraries/doltcore/mvdata/data_loc_test.go index 3f45dec2825..a309e75fbe4 100644 --- a/go/libraries/doltcore/mvdata/data_loc_test.go +++ b/go/libraries/doltcore/mvdata/data_loc_test.go @@ -64,7 +64,8 @@ func createRootAndFS() (*doltdb.DoltDB, *doltdb.RootValue, filesys.Filesys) { ddb.WriteEmptyRepo(context.Background(), "master", "billy bob", "bigbillieb@fake.horse") cs, _ := doltdb.NewCommitSpec("master") - commit, _ := ddb.Resolve(context.Background(), cs, nil) + optCmt, _ := ddb.Resolve(context.Background(), cs, nil) + commit, _ := optCmt.ToCommit() root, err := commit.GetRootValue(context.Background()) if err != nil { diff --git a/go/libraries/doltcore/rebase/filter_branch.go b/go/libraries/doltcore/rebase/filter_branch.go index 0df7e77dc64..3af1498259f 100644 --- a/go/libraries/doltcore/rebase/filter_branch.go +++ b/go/libraries/doltcore/rebase/filter_branch.go @@ -176,15 +176,25 @@ func rebaseRecursive(ctx context.Context, ddb *doltdb.DoltDB, replay ReplayCommi return commit, nil } - allParents, err := ddb.ResolveAllParents(ctx, commit) + allOptParents, err := ddb.ResolveAllParents(ctx, commit) if err != nil { return nil, err } - if len(allParents) < 1 { + if len(allOptParents) < 1 { panic(fmt.Sprintf("commit: %s has no parents", commitHash.String())) } + // convert allOptParents to allParents + var allParents []*doltdb.Commit + for _, optParent := range allOptParents { + parent, ok := optParent.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + allParents = append(allParents, parent) + } + var allRebasedParents []*doltdb.Commit for _, p := range allParents { rp, err := rebaseRecursive(ctx, ddb, replay, nerf, vs, p) diff --git a/go/libraries/doltcore/rebase/rebase.go b/go/libraries/doltcore/rebase/rebase.go index 80303548264..13124887efd 100644 --- a/go/libraries/doltcore/rebase/rebase.go +++ b/go/libraries/doltcore/rebase/rebase.go @@ -193,13 +193,18 @@ func findRebaseCommits(ctx *sql.Context, currentBranchCommit, upstreamBranchComm // Drain the iterator into a slice so that we can easily reverse the order of the commits // so that the oldest commit is first in the generated rebase plan. for { - _, commit, err := commitItr.Next(ctx) + _, optCmt, err := commitItr.Next(ctx) if err == io.EOF { return commits, nil } else if err != nil { return nil, err } + commit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered // Not sure if we can get this far. commit walk is going to be a bear. + } + // Don't include merge commits in the rebase plan if commit.NumParents() == 1 { commits = append(commits, commit) diff --git a/go/libraries/doltcore/ref/ref_spec.go b/go/libraries/doltcore/ref/ref_spec.go index 90a22388aef..b7021d7b320 100644 --- a/go/libraries/doltcore/ref/ref_spec.go +++ b/go/libraries/doltcore/ref/ref_spec.go @@ -101,7 +101,7 @@ func ParseRefSpecForRemote(remote, refSpecStr string) (RefSpec, error) { } if fromRef.GetType() == BranchRefType && toRef.GetType() == RemoteRefType { - return newLocalToRemoteTrackingRef(remote, fromRef.(BranchRef), toRef.(RemoteRef)) + return NewLocalToRemoteTrackingRef(remote, fromRef.(BranchRef), toRef.(RemoteRef)) } else if fromRef.GetType() == BranchRefType && toRef.GetType() == BranchRefType { return NewBranchToBranchRefSpec(fromRef.(BranchRef), toRef.(BranchRef)) } else if fromRef.GetType() == TagRefType && toRef.GetType() == TagRefType { @@ -206,7 +206,7 @@ type BranchToTrackingBranchRefSpec struct { remRefToLocal branchMapper } -func newLocalToRemoteTrackingRef(remote string, srcRef BranchRef, destRef RemoteRef) (RefSpec, error) { +func NewLocalToRemoteTrackingRef(remote string, srcRef BranchRef, destRef RemoteRef) (RefSpec, error) { srcWCs := strings.Count(srcRef.GetPath(), "*") destWCs := strings.Count(destRef.GetPath(), "*") diff --git a/go/libraries/doltcore/remotestorage/chunk_store.go b/go/libraries/doltcore/remotestorage/chunk_store.go index 8571f3ce28e..f9be6a151ac 100644 --- a/go/libraries/doltcore/remotestorage/chunk_store.go +++ b/go/libraries/doltcore/remotestorage/chunk_store.go @@ -951,6 +951,10 @@ func (dcs *DoltChunkStore) StatsSummary() string { return fmt.Sprintf("CacheHits: %v", dcs.Stats().(CacheStats).CacheHits()) } +func (dcs *DoltChunkStore) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + panic("runtime error: PersistGhostHashes should never be called on a remote chunk store") +} + // Close tears down any resources in use by the implementation. After // Close(), the ChunkStore may not be used again. It is NOT SAFE to call // Close() concurrently with any other ChunkStore method; behavior is diff --git a/go/libraries/doltcore/sqle/cluster/commithook.go b/go/libraries/doltcore/sqle/cluster/commithook.go index 78fc79cbb14..120cd45061c 100644 --- a/go/libraries/doltcore/sqle/cluster/commithook.go +++ b/go/libraries/doltcore/sqle/cluster/commithook.go @@ -306,7 +306,7 @@ func (h *commithook) attemptReplicate(ctx context.Context) { } lgr.Tracef("cluster/commithook: pushing chunks for root hash %v to destDB", toPush.String()) - err := destDB.PullChunks(ctx, h.tempDir, h.srcDB, []hash.Hash{toPush}, nil) + err := destDB.PullChunks(ctx, h.tempDir, h.srcDB, []hash.Hash{toPush}, nil, nil) if err == nil { lgr.Tracef("cluster/commithook: successfully pushed chunks, setting root") datasDB := doltdb.HackDatasDatabaseFromDoltDB(destDB) diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go index 24b6cefe835..0b4aa29d033 100644 --- a/go/libraries/doltcore/sqle/database.go +++ b/go/libraries/doltcore/sqle/database.go @@ -505,10 +505,14 @@ func resolveAsOfTime(ctx *sql.Context, ddb *doltdb.DoltDB, head ref.DoltRef, asO return nil, nil, err } - cm, err := ddb.Resolve(ctx, cs, head) + optCmt, err := ddb.Resolve(ctx, cs, head) if err != nil { return nil, nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } h, err := cm.HashOf() if err != nil { @@ -521,12 +525,16 @@ func resolveAsOfTime(ctx *sql.Context, ddb *doltdb.DoltDB, head ref.DoltRef, asO } for { - _, curr, err := cmItr.Next(ctx) + _, optCmt, err := cmItr.Next(ctx) if err == io.EOF { break } else if err != nil { return nil, nil, err } + curr, ok := optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } meta, err := curr.GetCommitMeta(ctx) if err != nil { @@ -572,10 +580,14 @@ func resolveAsOfCommitRef(ctx *sql.Context, db Database, head ref.DoltRef, commi return nil, nil, err } - cm, err := ddb.ResolveByNomsRoot(ctx, cs, head, nomsRoot) + optCmt, err := ddb.ResolveByNomsRoot(ctx, cs, head, nomsRoot) if err != nil { return nil, nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } root, err := cm.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 818cd050588..857dde7fc04 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -262,7 +262,7 @@ func (p *DoltDatabaseProvider) attemptCloneReplica(ctx *sql.Context, dbName stri // TODO: remote params for AWS, others // TODO: this needs to be robust in the face of the DB not having the default branch // TODO: this treats every database not found error as a clone error, need to tighten - err := p.CloneDatabaseFromRemote(ctx, dbName, p.defaultBranch, remoteName, remoteUrl, nil) + err := p.CloneDatabaseFromRemote(ctx, dbName, p.defaultBranch, remoteName, remoteUrl, -1, nil) if err != nil { return err } @@ -491,6 +491,7 @@ func ConfigureReplicationDatabaseHook(ctx *sql.Context, p *DoltDatabaseProvider, func (p *DoltDatabaseProvider) CloneDatabaseFromRemote( ctx *sql.Context, dbName, branch, remoteName, remoteUrl string, + depth int, remoteParams map[string]string, ) error { p.mu.Lock() @@ -503,7 +504,7 @@ func (p *DoltDatabaseProvider) CloneDatabaseFromRemote( return fmt.Errorf("cannot create DB, file exists at %s", dbName) } - err := p.cloneDatabaseFromRemote(ctx, dbName, remoteName, branch, remoteUrl, remoteParams) + err := p.cloneDatabaseFromRemote(ctx, dbName, remoteName, branch, remoteUrl, depth, remoteParams) if err != nil { // Make a best effort to clean up any artifacts on disk from a failed clone // before we return the error @@ -527,6 +528,7 @@ func (p *DoltDatabaseProvider) CloneDatabaseFromRemote( func (p *DoltDatabaseProvider) cloneDatabaseFromRemote( ctx *sql.Context, dbName, remoteName, branch, remoteUrl string, + depth int, remoteParams map[string]string, ) error { if p.remoteDialer == nil { @@ -544,7 +546,7 @@ func (p *DoltDatabaseProvider) cloneDatabaseFromRemote( return err } - err = actions.CloneRemote(ctx, srcDB, remoteName, branch, false, dEnv) + err = actions.CloneRemote(ctx, srcDB, remoteName, branch, false, depth, dEnv) if err != nil { return err } @@ -1078,10 +1080,15 @@ func resolveAncestorSpec(ctx *sql.Context, revSpec string, ddb *doltdb.DoltDB) ( return "", err } - cm, err = cm.GetAncestor(ctx, ancestorSpec) + optCmt, err := cm.GetAncestor(ctx, ancestorSpec) if err != nil { return "", err } + ok := false + cm, ok = optCmt.ToCommit() + if !ok { + return "", doltdb.ErrGhostCommitEncountered + } hash, err := cm.HashOf() if err != nil { @@ -1469,10 +1476,14 @@ func initialStateForCommit(ctx context.Context, srcDb ReadOnlyDatabase) (dsess.I if err != nil { return dsess.InitialDbState{}, err } - cm, err := srcDb.DbData().Ddb.Resolve(ctx, spec, headRef) + optCmt, err := srcDb.DbData().Ddb.Resolve(ctx, spec, headRef) if err != nil { return dsess.InitialDbState{}, err } + cm, ok := optCmt.ToCommit() + if !ok { + return dsess.InitialDbState{}, doltdb.ErrGhostCommitEncountered + } init := dsess.InitialDbState{ Db: srcDb, diff --git a/go/libraries/doltcore/sqle/dfunctions/dolt_merge_base.go b/go/libraries/doltcore/sqle/dfunctions/dolt_merge_base.go index a4d892eda80..3c50f01d416 100644 --- a/go/libraries/doltcore/sqle/dfunctions/dolt_merge_base.go +++ b/go/libraries/doltcore/sqle/dfunctions/dolt_merge_base.go @@ -99,14 +99,23 @@ func resolveRefSpecs(ctx *sql.Context, leftSpec, rightSpec string) (left, right return nil, nil, err } - left, err = doltDB.Resolve(ctx, lcs, headRef) + optCmt, err := doltDB.Resolve(ctx, lcs, headRef) if err != nil { return nil, nil, err } - right, err = doltDB.Resolve(ctx, rcs, headRef) + left, ok = optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } + + optCmt, err = doltDB.Resolve(ctx, rcs, headRef) if err != nil { return nil, nil, err } + right, ok = optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitEncountered + } return } diff --git a/go/libraries/doltcore/sqle/dfunctions/has_ancestor.go b/go/libraries/doltcore/sqle/dfunctions/has_ancestor.go index 0a74a97674e..a0f0b256f48 100644 --- a/go/libraries/doltcore/sqle/dfunctions/has_ancestor.go +++ b/go/libraries/doltcore/sqle/dfunctions/has_ancestor.go @@ -74,10 +74,14 @@ func (a *HasAncestor) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { if err != nil { return nil, err } - headCommit, err = ddb.Resolve(ctx, cs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, fmt.Errorf("error during has_ancestor check: ref not found '%s'", headStr) } + headCommit, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } } var ancCommit *doltdb.Commit @@ -94,10 +98,14 @@ func (a *HasAncestor) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { if err != nil { return nil, err } - ancCommit, err = ddb.Resolve(ctx, cs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, fmt.Errorf("error during has_ancestor check: ref not found '%s'", ancStr) } + ancCommit, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } } diff --git a/go/libraries/doltcore/sqle/dfunctions/hashof.go b/go/libraries/doltcore/sqle/dfunctions/hashof.go index 2ab5b944252..7af9e759bcf 100644 --- a/go/libraries/doltcore/sqle/dfunctions/hashof.go +++ b/go/libraries/doltcore/sqle/dfunctions/hashof.go @@ -85,10 +85,14 @@ func (t *HashOf) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { hsh, parsed := hash.MaybeParse(name) if parsed { orgErr := err - cm, err = ddb.ReadCommit(ctx, hsh) + optCmt, err := ddb.ReadCommit(ctx, hsh) if err != nil { return nil, orgErr } + cm, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } } else { return nil, err } @@ -100,10 +104,14 @@ func (t *HashOf) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { } } - cm, err = cm.GetAncestor(ctx, as) + optCmt, err := cm.GetAncestor(ctx, as) if err != nil { return nil, err } + cm, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } h, err := cm.HashOf() if err != nil { diff --git a/go/libraries/doltcore/sqle/dolt_diff_table_function.go b/go/libraries/doltcore/sqle/dolt_diff_table_function.go index 1e00654aa0f..904f97e5b9b 100644 --- a/go/libraries/doltcore/sqle/dolt_diff_table_function.go +++ b/go/libraries/doltcore/sqle/dolt_diff_table_function.go @@ -326,17 +326,21 @@ func resolveRoot(ctx *sql.Context, sess *dsess.DoltSession, dbName, hashStr stri } func resolveCommit(ctx *sql.Context, ddb *doltdb.DoltDB, headRef ref.DoltRef, cSpecStr string) (*doltdb.Commit, error) { - rightCs, err := doltdb.NewCommitSpec(cSpecStr) + cs, err := doltdb.NewCommitSpec(cSpecStr) if err != nil { return nil, err } - rightCm, err := ddb.Resolve(ctx, rightCs, headRef) + optCmt, err := ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } - return rightCm, nil + return cm, nil } // WithChildren implements the sql.Node interface diff --git a/go/libraries/doltcore/sqle/dolt_log_table_function.go b/go/libraries/doltcore/sqle/dolt_log_table_function.go index 2ee03a36809..527cb86eaaa 100644 --- a/go/libraries/doltcore/sqle/dolt_log_table_function.go +++ b/go/libraries/doltcore/sqle/dolt_log_table_function.go @@ -401,7 +401,12 @@ func (ltf *LogTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter sess := dsess.DSessFromSess(ctx.Session) var commit *doltdb.Commit - matchFunc := func(commit *doltdb.Commit) (bool, error) { + matchFunc := func(optCmt *doltdb.OptionalCommit) (bool, error) { + commit, ok := optCmt.ToCommit() + if !ok { + return false, nil + } + return commit.NumParents() >= ltf.minParents, nil } @@ -432,10 +437,15 @@ func (ltf *LogTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter return nil, err } - commit, err = sqledb.DbData().Ddb.Resolve(ctx, cs, headRef) + optCmt, err := sqledb.DbData().Ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, err } + commit, ok = optCmt.ToCommit() + if err != nil { + return nil, doltdb.ErrGhostCommitEncountered + } + commits = append(commits, commit) } @@ -446,10 +456,15 @@ func (ltf *LogTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter return nil, err } - notCommit, err := sqledb.DbData().Ddb.Resolve(ctx, cs, headRef) + optCmt, err := sqledb.DbData().Ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, err } + notCommit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + notCommits = append(notCommits, notCommit) } @@ -465,10 +480,14 @@ func (ltf *LogTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter } // Use merge base as excluding commit - mergeCommit, err := sqledb.DbData().Ddb.Resolve(ctx, mergeCs, nil) + optCmt, err := sqledb.DbData().Ddb.Resolve(ctx, mergeCs, nil) if err != nil { return nil, err } + mergeCommit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } notCommits = append(notCommits, mergeCommit) @@ -583,7 +602,7 @@ type logTableFunctionRowIter struct { tableNames []string } -func (ltf *LogTableFunction) NewLogTableFunctionRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, commit *doltdb.Commit, matchFn func(*doltdb.Commit) (bool, error), cHashToRefs map[hash.Hash][]string, tableNames []string) (*logTableFunctionRowIter, error) { +func (ltf *LogTableFunction) NewLogTableFunctionRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, commit *doltdb.Commit, matchFn func(*doltdb.OptionalCommit) (bool, error), cHashToRefs map[hash.Hash][]string, tableNames []string) (*logTableFunctionRowIter, error) { h, err := commit.HashOf() if err != nil { return nil, err @@ -604,7 +623,7 @@ func (ltf *LogTableFunction) NewLogTableFunctionRowIter(ctx *sql.Context, ddb *d }, nil } -func (ltf *LogTableFunction) NewDotDotLogTableFunctionRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, commits []*doltdb.Commit, excludingCommits []*doltdb.Commit, matchFn func(*doltdb.Commit) (bool, error), cHashToRefs map[hash.Hash][]string, tableNames []string) (*logTableFunctionRowIter, error) { +func (ltf *LogTableFunction) NewDotDotLogTableFunctionRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, commits []*doltdb.Commit, excludingCommits []*doltdb.Commit, matchFn func(*doltdb.OptionalCommit) (bool, error), cHashToRefs map[hash.Hash][]string, tableNames []string) (*logTableFunctionRowIter, error) { hashes := make([]hash.Hash, len(commits)) for i, commit := range commits { h, err := commit.HashOf() @@ -649,12 +668,18 @@ func (ltf *LogTableFunction) NewDotDotLogTableFunctionRowIter(ctx *sql.Context, func (itr *logTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) { var commitHash hash.Hash var commit *doltdb.Commit + var optCmt *doltdb.OptionalCommit var err error for { - commitHash, commit, err = itr.child.Next(ctx) + commitHash, optCmt, err = itr.child.Next(ctx) if err != nil { return nil, err } + ok := false + commit, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } if itr.tableNames != nil { if commit.NumParents() == 0 { @@ -662,16 +687,25 @@ func (itr *logTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) { // we expect EOF to be returned on the next call to Next(), but continue in case there are more commits continue } - parent0Cm, err := commit.GetParent(ctx, 0) + optCmt, err := commit.GetParent(ctx, 0) if err != nil { return nil, err } + parent0Cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + var parent1Cm *doltdb.Commit if commit.NumParents() > 1 { - parent1Cm, err = commit.GetParent(ctx, 1) + optCmt, err = commit.GetParent(ctx, 1) if err != nil { return nil, err } + parent1Cm, ok = optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } } parent0RV, err := parent0Cm.GetRootValue(ctx) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index c0996237ab4..6fe1720ee4a 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -57,7 +57,12 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { remoteParms[dbfactory.GRPCUsernameAuthParam] = user } - err = sess.Provider().CloneDatabaseFromRemote(ctx, dir, branch, remoteName, remoteUrl, remoteParms) + depth, ok := apr.GetInt(cli.DepthFlag) + if !ok { + depth = -1 + } + + err = sess.Provider().CloneDatabaseFromRemote(ctx, dir, branch, remoteName, remoteUrl, depth, remoteParms) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_count_commits.go b/go/libraries/doltcore/sqle/dprocedures/dolt_count_commits.go index 415fb75097d..5ca6ddabed9 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_count_commits.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_count_commits.go @@ -77,10 +77,15 @@ func countCommits(ctx *sql.Context, args ...string) (ahead uint64, behind uint64 if err != nil { return 0, 0, err } - fromCommit, err := ddb.Resolve(ctx, fromSpec, headRef) + optCmt, err := ddb.Resolve(ctx, fromSpec, headRef) if err != nil { return 0, 0, err } + fromCommit, ok := optCmt.ToCommit() + if !ok { + return 0, 0, doltdb.ErrGhostCommitEncountered + } + fromHash, err := fromCommit.HashOf() if err != nil { return 0, 0, err @@ -90,19 +95,29 @@ func countCommits(ctx *sql.Context, args ...string) (ahead uint64, behind uint64 if err != nil { return 0, 0, err } - toCommit, err := ddb.Resolve(ctx, toSpec, headRef) + optCmt, err = ddb.Resolve(ctx, toSpec, headRef) if err != nil { return 0, 0, err } + toCommit, ok := optCmt.ToCommit() + if !ok { + return 0, 0, doltdb.ErrGhostCommitEncountered + } + toHash, err := toCommit.HashOf() if err != nil { return 0, 0, err } - ancestor, err := doltdb.GetCommitAncestor(ctx, fromCommit, toCommit) + optCmt, err = doltdb.GetCommitAncestor(ctx, fromCommit, toCommit) if err != nil { return 0, 0, err } + ancestor, ok := optCmt.ToCommit() + if !ok { + return 0, 0, doltdb.ErrGhostCommitEncountered + } + ancestorHash, err := ancestor.HashOf() if err != nil { return 0, 0, err diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_fetch.go b/go/libraries/doltcore/sqle/dprocedures/dolt_fetch.go index 6fa1a501b24..2a5a1f11d6b 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_fetch.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_fetch.go @@ -87,7 +87,7 @@ func doDoltFetch(ctx *sql.Context, args []string) (int, error) { prune := apr.Contains(cli.PruneFlag) mode := ref.UpdateMode{Force: true, Prune: prune} - err = actions.FetchRefSpecs(ctx, dbData, srcDB, refSpecs, remote, mode, runProgFuncs, stopProgFuncs) + err = actions.FetchRefSpecs(ctx, dbData, srcDB, refSpecs, &remote, mode, runProgFuncs, stopProgFuncs) if err != nil { return cmdFailure, fmt.Errorf("fetch failed: %w", err) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_pull.go b/go/libraries/doltcore/sqle/dprocedures/dolt_pull.go index 6367e5a4d1d..f3254cbd8f5 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_pull.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_pull.go @@ -152,7 +152,7 @@ func doDoltPull(ctx *sql.Context, args []string) (int, int, string, error) { } mode := ref.UpdateMode{Force: true, Prune: false} - err = actions.FetchRefSpecs(ctx, dbData, srcDB, pullSpec.RefSpecs, pullSpec.Remote, mode, runProgFuncs, stopProgFuncs) + err = actions.FetchRefSpecs(ctx, dbData, srcDB, pullSpec.RefSpecs, &pullSpec.Remote, mode, runProgFuncs, stopProgFuncs) if err != nil { return noConflictsOrViolations, threeWayMerge, "", fmt.Errorf("fetch failed: %w", err) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_rebase.go b/go/libraries/doltcore/sqle/dprocedures/dolt_rebase.go index ebea10747fb..e1ba6265ad1 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_rebase.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_rebase.go @@ -193,10 +193,14 @@ func startRebase(ctx *sql.Context, upstreamPoint string) error { return err } - upstreamCommit, err := dbData.Ddb.Resolve(ctx, commitSpec, headRef) + optCmt, err := dbData.Ddb.Resolve(ctx, commitSpec, headRef) if err != nil { return err } + upstreamCommit, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered + } // rebaseWorkingBranch is the name of the temporary branch used when performing a rebase. In Git, a rebase // happens with a detatched HEAD, but Dolt doesn't support that, we use a temporary branch. @@ -554,10 +558,16 @@ func squashCommitMessage(ctx *sql.Context, nextCommitHash string) (string, error if err != nil { return "", err } - nextCommit, err := ddb.Resolve(ctx, spec, headRef) + + optCmt, err := ddb.Resolve(ctx, spec, headRef) if err != nil { return "", err } + nextCommit, ok := optCmt.ToCommit() + if !ok { + return "", doltdb.ErrGhostCommitEncountered + } + nextCommitMeta, err := nextCommit.GetCommitMeta(ctx) if err != nil { return "", err diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_revert.go b/go/libraries/doltcore/sqle/dprocedures/dolt_revert.go index def2b6c034a..27d0d48b0ab 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_revert.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_revert.go @@ -98,10 +98,15 @@ func doDoltRevert(ctx *sql.Context, args []string) (int, error) { if err != nil { return 1, err } - commit, err := ddb.Resolve(ctx, commitSpec, headRef) + optCmt, err := ddb.Resolve(ctx, commitSpec, headRef) if err != nil { return 1, err } + commit, ok := optCmt.ToCommit() + if !ok { + return 1, doltdb.ErrGhostCommitEncountered + } + commits[i] = commit } diff --git a/go/libraries/doltcore/sqle/dsess/dolt_session_test.go b/go/libraries/doltcore/sqle/dsess/dolt_session_test.go index e89a86f12f4..3b74223322c 100644 --- a/go/libraries/doltcore/sqle/dsess/dolt_session_test.go +++ b/go/libraries/doltcore/sqle/dsess/dolt_session_test.go @@ -307,7 +307,7 @@ func (e emptyRevisionDatabaseProvider) FileSystemForDatabase(dbname string) (fil return nil, nil } -func (e emptyRevisionDatabaseProvider) CloneDatabaseFromRemote(ctx *sql.Context, dbName, branch, remoteName, remoteUrl string, remoteParams map[string]string) error { +func (e emptyRevisionDatabaseProvider) CloneDatabaseFromRemote(ctx *sql.Context, dbName, branch, remoteName, remoteUrl string, depth int, remoteParams map[string]string) error { return nil } diff --git a/go/libraries/doltcore/sqle/dsess/session.go b/go/libraries/doltcore/sqle/dsess/session.go index 652f4bd315e..d8c985b71e5 100644 --- a/go/libraries/doltcore/sqle/dsess/session.go +++ b/go/libraries/doltcore/sqle/dsess/session.go @@ -409,10 +409,14 @@ func (d *DoltSession) newWorkingSetForHead(ctx *sql.Context, wsRef ref.WorkingSe return nil, err } - headCommit, err := dbData.Ddb.Resolve(ctx, headSpec, headRef) + optCmt, err := dbData.Ddb.Resolve(ctx, headSpec, headRef) if err != nil { return nil, err } + headCommit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } headRoot, err := headCommit.GetRootValue(ctx) if err != nil { @@ -687,10 +691,15 @@ func (d *DoltSession) newPendingCommit(ctx *sql.Context, branchState *branchStat } else if props.Amend { numParentsHeadForAmend := headCommit.NumParents() for i := 0; i < numParentsHeadForAmend; i++ { - parentCommit, err := headCommit.GetParent(ctx, i) + optCmt, err := headCommit.GetParent(ctx, i) if err != nil { return nil, err } + parentCommit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + mergeParentCommits = append(mergeParentCommits, parentCommit) } @@ -882,10 +891,14 @@ func (d *DoltSession) ResolveRootForRef(ctx *sql.Context, dbName, refStr string) return nil, nil, "", err } - cm, err := dbData.Ddb.Resolve(ctx, cs, headRef) + optCmt, err := dbData.Ddb.Resolve(ctx, cs, headRef) if err != nil { return nil, nil, "", err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, nil, "", doltdb.ErrGhostCommitRuntimeFailure + } root, err = cm.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/dsess/session_db_provider.go b/go/libraries/doltcore/sqle/dsess/session_db_provider.go index 90194031fb7..4a20c26f925 100644 --- a/go/libraries/doltcore/sqle/dsess/session_db_provider.go +++ b/go/libraries/doltcore/sqle/dsess/session_db_provider.go @@ -88,7 +88,7 @@ type DoltDatabaseProvider interface { // dbName is the name for the new database, branch is an optional parameter indicating which branch to clone // (otherwise all branches are cloned), remoteName is the name for the remote created in the new database, and // remoteUrl is a URL (e.g. "file:///dbs/db1") or an / path indicating a database hosted on DoltHub. - CloneDatabaseFromRemote(ctx *sql.Context, dbName, branch, remoteName, remoteUrl string, remoteParams map[string]string) error + CloneDatabaseFromRemote(ctx *sql.Context, dbName, branch, remoteName, remoteUrl string, depth int, remoteParams map[string]string) error // SessionDatabase returns the SessionDatabase for the specified database, which may name a revision of a base // database. SessionDatabase(ctx *sql.Context, dbName string) (SqlDatabase, bool, error) diff --git a/go/libraries/doltcore/sqle/dsess/transactions.go b/go/libraries/doltcore/sqle/dsess/transactions.go index a29282ed3ab..e22bdf8e816 100644 --- a/go/libraries/doltcore/sqle/dsess/transactions.go +++ b/go/libraries/doltcore/sqle/dsess/transactions.go @@ -202,10 +202,14 @@ func doltCommit(ctx *sql.Context, } headSpec, _ := doltdb.NewCommitSpec("HEAD") - curHead, err := doltDb.Resolve(ctx, headSpec, headRef) + optCmt, err := doltDb.Resolve(ctx, headSpec, headRef) if err != nil { return nil, nil, err } + curHead, ok := optCmt.ToCommit() + if !ok { + return nil, nil, doltdb.ErrGhostCommitRuntimeFailure + } // We already got a new staged root via merge or ff via the doCommit method, so now apply it to the STAGED value // we're about to commit. diff --git a/go/libraries/doltcore/sqle/dtables/column_diff_table.go b/go/libraries/doltcore/sqle/dtables/column_diff_table.go index 0b650c004dd..29a5f69fa74 100644 --- a/go/libraries/doltcore/sqle/dtables/column_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/column_diff_table.go @@ -336,10 +336,15 @@ func (itr *doltColDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, erro } itr.commits = nil } else if itr.child != nil { - _, commit, err := itr.child.Next(ctx) + _, optCmt, err := itr.child.Next(ctx) if err != nil { return nil, err } + commit, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + err = itr.loadTableChanges(ctx, commit) if err != nil { return nil, err @@ -409,10 +414,14 @@ func (itr *doltColDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Con return nil, err } - parent, err := itr.ddb.ResolveParent(ctx, commit, 0) + optCmt, err := itr.ddb.ResolveParent(ctx, commit, 0) if err != nil { return nil, err } + parent, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } fromRootValue, err := parent.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/dtables/commit_ancestors_table.go b/go/libraries/doltcore/sqle/dtables/commit_ancestors_table.go index 95ff1f5e24b..22201568ef8 100644 --- a/go/libraries/doltcore/sqle/dtables/commit_ancestors_table.go +++ b/go/libraries/doltcore/sqle/dtables/commit_ancestors_table.go @@ -158,12 +158,17 @@ func NewCommitAncestorsRowItr(sqlCtx *sql.Context, ddb *doltdb.DoltDB) (*CommitA // After retrieving the last row, Close will be automatically closed. func (itr *CommitAncestorsRowItr) Next(ctx *sql.Context) (sql.Row, error) { if len(itr.cache) == 0 { - ch, cm, err := itr.itr.Next(ctx) + ch, optCmt, err := itr.itr.Next(ctx) if err != nil { // When complete itr.Next will return io.EOF return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + parents, err := itr.ddb.ResolveAllParents(ctx, cm) if err != nil { return nil, err @@ -175,7 +180,12 @@ func (itr *CommitAncestorsRowItr) Next(ctx *sql.Context) (sql.Row, error) { } itr.cache = make([]sql.Row, len(parents)) - for i, p := range parents { + for i, optParent := range parents { + p, ok := optParent.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + ph, err := p.HashOf() if err != nil { return nil, err diff --git a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go index 5907a5dada5..aaed885bb4e 100644 --- a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go @@ -261,16 +261,18 @@ func (dt *CommitDiffTable) rootValForHash(ctx *sql.Context, hashStr string) (*do root = dt.workingRoot } else { cs, err := doltdb.NewCommitSpec(hashStr) - if err != nil { return nil, "", nil, err } - cm, err := dt.ddb.Resolve(ctx, cs, nil) - + optCmt, err := dt.ddb.Resolve(ctx, cs, nil) if err != nil { return nil, "", nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, "", nil, doltdb.ErrGhostCommitEncountered + } root, err = cm.GetRootValue(ctx) diff --git a/go/libraries/doltcore/sqle/dtables/commits_table.go b/go/libraries/doltcore/sqle/dtables/commits_table.go index 9ad3a267025..d0009298625 100644 --- a/go/libraries/doltcore/sqle/dtables/commits_table.go +++ b/go/libraries/doltcore/sqle/dtables/commits_table.go @@ -1,4 +1,4 @@ -// Copyright 2020 Dolthub, Inc. +// Copyright 2021 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ package dtables import ( "fmt" + "io" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" @@ -150,10 +151,14 @@ func NewCommitsRowItr(ctx *sql.Context, ddb *doltdb.DoltDB) (CommitsRowItr, erro // Next retrieves the next row. It will return io.EOF if it's the last row. // After retrieving the last row, Close will be automatically closed. func (itr CommitsRowItr) Next(ctx *sql.Context) (sql.Row, error) { - h, cm, err := itr.itr.Next(ctx) + h, optCmt, err := itr.itr.Next(ctx) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, io.EOF + } meta, err := cm.GetCommitMeta(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/dtables/diff_table.go b/go/libraries/doltcore/sqle/dtables/diff_table.go index b98711f8ddb..13e0a16678e 100644 --- a/go/libraries/doltcore/sqle/dtables/diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/diff_table.go @@ -397,12 +397,19 @@ func (dt *DiffTable) scanHeightForChild(ctx *sql.Context, parent hash.Hash, heig func (dt *DiffTable) reverseIterForChild(ctx *sql.Context, parent hash.Hash) (*doltdb.Commit, hash.Hash, error) { iter := doltdb.CommitItrForRoots(dt.ddb, dt.head) for { - childHs, childCm, err := iter.Next(ctx) + childHs, optCmt, err := iter.Next(ctx) if errors.Is(err, io.EOF) { return nil, hash.Hash{}, nil } else if err != nil { return nil, hash.Hash{}, err } + + childCm, ok := optCmt.ToCommit() + if !ok { + // Should have been caught above from the Next() call on the iter. This is a runtime error. + return nil, hash.Hash{}, doltdb.ErrGhostCommitRuntimeFailure + } + phs, err := childCm.ParentHashes(ctx) if err != nil { return nil, hash.Hash{}, err @@ -506,10 +513,15 @@ func (dt *DiffTable) toCommitLookupPartitions(ctx *sql.Context, hashes []hash.Ha } for i, pj := range ph { - pc, err := cm.GetParent(ctx, i) + optCmt, err := cm.GetParent(ctx, i) if err != nil { return nil, err } + pc, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } + cmHashToTblInfo[pj] = toCmInfo cmHashToTblInfo[pj] = ti pCommits = append(pCommits, pc) @@ -786,10 +798,15 @@ func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) { for { - cmHash, cm, err := dps.cmItr.Next(ctx) + cmHash, optCmt, err := dps.cmItr.Next(ctx) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + // Should have been caught above from the Next() call on the iter. This is a runtime error. + return nil, doltdb.ErrGhostCommitRuntimeFailure + } root, err := cm.GetRootValue(ctx) diff --git a/go/libraries/doltcore/sqle/dtables/log_table.go b/go/libraries/doltcore/sqle/dtables/log_table.go index f22550ab745..a17b6ffacdb 100644 --- a/go/libraries/doltcore/sqle/dtables/log_table.go +++ b/go/libraries/doltcore/sqle/dtables/log_table.go @@ -231,11 +231,17 @@ func NewLogItr(ctx *sql.Context, ddb *doltdb.DoltDB, head *doltdb.Commit) (*LogI // Next retrieves the next row. It will return io.EOF if it's the last row. // After retrieving the last row, Close will be automatically closed. func (itr *LogItr) Next(ctx *sql.Context) (sql.Row, error) { - h, cm, err := itr.child.Next(ctx) + h, optCmt, err := itr.child.Next(ctx) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + // Should have been caught by the commit walk. + return nil, doltdb.ErrGhostCommitRuntimeFailure + } + meta, err := cm.GetCommitMeta(ctx) if err != nil { return nil, err diff --git a/go/libraries/doltcore/sqle/dtables/schema_conflicts_table.go b/go/libraries/doltcore/sqle/dtables/schema_conflicts_table.go index 84a671d850d..e5f75ada04a 100644 --- a/go/libraries/doltcore/sqle/dtables/schema_conflicts_table.go +++ b/go/libraries/doltcore/sqle/dtables/schema_conflicts_table.go @@ -103,10 +103,14 @@ func (dt *SchemaConflictsTable) PartitionRows(ctx *sql.Context, part sql.Partiti return nil, errors.New("unexpected partition for schema conflicts table") } - base, err := doltdb.GetCommitAncestor(ctx, p.head, p.state.Commit()) + optCmt, err := doltdb.GetCommitAncestor(ctx, p.head, p.state.Commit()) if err != nil { return nil, err } + base, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } baseRoot, err := base.GetRootValue(ctx) if err != nil { diff --git a/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go b/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go index a348b443c6e..bb0cd556a51 100644 --- a/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go @@ -331,14 +331,25 @@ func (itr *doltDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error) } itr.commits = nil } else if itr.child != nil { - _, commit, err := itr.child.Next(ctx) + _, optCmt, err := itr.child.Next(ctx) if err != nil { return nil, err } + commit, ok := optCmt.ToCommit() + if !ok { + return nil, io.EOF + } + err = itr.loadTableChanges(ctx, commit) + if err == doltdb.ErrGhostCommitEncountered { + // When showing the diff table in a shallow clone, we show as much of the dolt_history_{table} as we can, + // and don't consider it an error when we hit a ghost commit. + return nil, io.EOF + } if err != nil { return nil, err } + } else { return nil, io.EOF } @@ -401,10 +412,14 @@ func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Contex return nil, err } - parent, err := itr.ddb.ResolveParent(ctx, commit, 0) + optCmt, err := itr.ddb.ResolveParent(ctx, commit, 0) if err != nil { return nil, err } + parent, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrGhostCommitEncountered + } fromRootValue, err := parent.GetRootValue(ctx) if err != nil { @@ -453,8 +468,14 @@ func isTableDataEmpty(ctx *sql.Context, table *doltdb.Table) (bool, error) { func commitFilterForDiffTableFilterExprs(filters []sql.Expression) (doltdb.CommitFilter, error) { filters = transformFilters(filters...) - return func(ctx context.Context, h hash.Hash, cm *doltdb.Commit) (filterOut bool, err error) { + return func(ctx context.Context, h hash.Hash, optCmt *doltdb.OptionalCommit) (filterOut bool, err error) { sc := sql.NewContext(ctx) + + cm, ok := optCmt.ToCommit() + if !ok { + return false, doltdb.ErrGhostCommitEncountered + } + meta, err := cm.GetCommitMeta(ctx) if err != nil { return false, err @@ -542,9 +563,14 @@ func getCommitFromHash(ctx *sql.Context, ddb *doltdb.DoltDB, val string) *doltdb if err != nil { return nil } - cm, err := ddb.Resolve(ctx, cmSpec, headRef) + optCmt, err := ddb.Resolve(ctx, cmSpec, headRef) if err != nil { return nil } + cm, ok := optCmt.ToCommit() + if !ok { + return nil + } + return cm } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_transaction_commit_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_transaction_commit_test.go index e2afb6f2bba..f0e1f79527a 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_transaction_commit_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_transaction_commit_test.go @@ -174,8 +174,10 @@ func TestDoltTransactionCommitOneClient(t *testing.T) { require.NoError(t, err) headRefs, err := db.GetHeadRefs(context.Background()) require.NoError(t, err) - commit, err := db.Resolve(context.Background(), cs, headRefs[0]) + optCmt, err := db.Resolve(context.Background(), cs, headRefs[0]) require.NoError(t, err) + commit, ok := optCmt.ToCommit() + require.True(t, ok) cm, err := commit.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, cm.Description, "Commit Message 42") @@ -184,16 +186,20 @@ func TestDoltTransactionCommitOneClient(t *testing.T) { require.NoError(t, err) headRefs, err = db.GetHeadRefs(context.Background()) require.NoError(t, err) - commit, err = db.Resolve(context.Background(), cs, headRefs[0]) + optCmt, err = db.Resolve(context.Background(), cs, headRefs[0]) require.NoError(t, err) + commit, ok = optCmt.ToCommit() + require.True(t, ok) cm, err = commit.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, cm.Description, "Transaction commit") as, err := doltdb.NewAncestorSpec("~1") require.NoError(t, err) - initialCommit, err := commit.GetAncestor(context.Background(), as) + optCmt, err = commit.GetAncestor(context.Background(), as) require.NoError(t, err) + initialCommit, ok := optCmt.ToCommit() + require.True(t, ok) icm, err := initialCommit.GetCommitMeta(context.Background()) require.NoError(t, err) require.Equal(t, "checkpoint enginetest database mydb", icm.Description) @@ -347,22 +353,28 @@ func TestDoltTransactionCommitTwoClients(t *testing.T) { require.NoError(t, err) headRefs, err := db.GetHeadRefs(context.Background()) require.NoError(t, err) - commit2, err := db.Resolve(context.Background(), cs, headRefs[0]) + optCmt, err := db.Resolve(context.Background(), cs, headRefs[0]) require.NoError(t, err) + commit2, ok := optCmt.ToCommit() + require.True(t, ok) cm2, err := commit2.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, cm2.Description, "ClientA Commit") as, err := doltdb.NewAncestorSpec("~1") require.NoError(t, err) - commit1, err := commit2.GetAncestor(context.Background(), as) + optCmt, err = commit2.GetAncestor(context.Background(), as) require.NoError(t, err) + commit1, ok := optCmt.ToCommit() + require.True(t, ok) cm1, err := commit1.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, cm1.Description, "ClientB Commit") - commit0, err := commit1.GetAncestor(context.Background(), as) + optCmt, err = commit1.GetAncestor(context.Background(), as) require.NoError(t, err) + commit0, ok := optCmt.ToCommit() + require.True(t, ok) cm0, err := commit0.GetCommitMeta(context.Background()) require.NoError(t, err) require.Equal(t, "checkpoint enginetest database mydb", cm0.Description) @@ -427,22 +439,28 @@ func TestDoltTransactionCommitAutocommit(t *testing.T) { require.NoError(t, err) headRefs, err := db.GetHeadRefs(context.Background()) require.NoError(t, err) - head, err := db.Resolve(context.Background(), headSpec, headRefs[0]) + optCmt, err := db.Resolve(context.Background(), headSpec, headRefs[0]) require.NoError(t, err) + head, ok := optCmt.ToCommit() + require.True(t, ok) headMeta, err := head.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, headMeta.Description, "ClientB Commit") ancestorSpec, err := doltdb.NewAncestorSpec("~1") require.NoError(t, err) - parent, err := head.GetAncestor(context.Background(), ancestorSpec) + optCmt, err = head.GetAncestor(context.Background(), ancestorSpec) require.NoError(t, err) + parent, ok := optCmt.ToCommit() + require.True(t, ok) parentMeta, err := parent.GetCommitMeta(context.Background()) require.NoError(t, err) require.Contains(t, parentMeta.Description, "Transaction commit") - grandParent, err := parent.GetAncestor(context.Background(), ancestorSpec) + optCmt, err = parent.GetAncestor(context.Background(), ancestorSpec) require.NoError(t, err) + grandParent, ok := optCmt.ToCommit() + require.True(t, ok) grandparentMeta, err := grandParent.GetCommitMeta(context.Background()) require.NoError(t, err) require.Equal(t, "checkpoint enginetest database mydb", grandparentMeta.Description) diff --git a/go/libraries/doltcore/sqle/history_table.go b/go/libraries/doltcore/sqle/history_table.go index e3986ab5ca0..c91722d4b5d 100644 --- a/go/libraries/doltcore/sqle/history_table.go +++ b/go/libraries/doltcore/sqle/history_table.go @@ -29,7 +29,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" - "github.com/dolthub/dolt/go/libraries/utils/set" "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/hash" ) @@ -250,12 +249,15 @@ func substituteWorkingHash(h hash.Hash, f []sql.Expression) []sql.Expression { return ret } -var historyTableCommitMetaCols = set.NewStrSet([]string{CommitHashCol, CommitDateCol, CommitterCol}) - func commitFilterForExprs(ctx *sql.Context, filters []sql.Expression) (doltdb.CommitFilter, error) { filters = transformFilters(ctx, filters...) - return func(ctx context.Context, h hash.Hash, cm *doltdb.Commit) (filterOut bool, err error) { + return func(ctx context.Context, h hash.Hash, optCmt *doltdb.OptionalCommit) (filterOut bool, err error) { + cm, ok := optCmt.ToCommit() + if !ok { + return false, nil // NM4 TEST. + } + meta, err := cm.GetCommitMeta(ctx) if err != nil { @@ -450,11 +452,14 @@ type commitPartitioner struct { // Next returns the next partition and nil, io.EOF when complete func (cp commitPartitioner) Next(ctx *sql.Context) (sql.Partition, error) { - h, cm, err := cp.cmItr.Next(ctx) - + h, optCmt, err := cp.cmItr.Next(ctx) if err != nil { return nil, err } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, io.EOF + } return &commitPartition{h, cm}, nil } diff --git a/go/libraries/doltcore/sqle/index/mergeable_indexes_setup_test.go b/go/libraries/doltcore/sqle/index/mergeable_indexes_setup_test.go index e6c6b79292f..0ea23dd5bb3 100644 --- a/go/libraries/doltcore/sqle/index/mergeable_indexes_setup_test.go +++ b/go/libraries/doltcore/sqle/index/mergeable_indexes_setup_test.go @@ -180,9 +180,12 @@ func getDbState(t *testing.T, db sql.Database, dEnv *env.DoltEnv) (dsess.Initial if err != nil { return dsess.InitialDbState{}, err } - headCommit, err := dEnv.DoltDB.Resolve(ctx, headSpec, headRef) + optCmt, err := dEnv.DoltDB.Resolve(ctx, headSpec, headRef) require.NoError(t, err) + headCommit, ok := optCmt.ToCommit() + require.True(t, ok) + ws, err := dEnv.WorkingSet(ctx) require.NoError(t, err) diff --git a/go/libraries/doltcore/sqle/read_replica_database.go b/go/libraries/doltcore/sqle/read_replica_database.go index bc59d05f1dc..8fe4e3030fb 100644 --- a/go/libraries/doltcore/sqle/read_replica_database.go +++ b/go/libraries/doltcore/sqle/read_replica_database.go @@ -301,7 +301,7 @@ func pullBranches( // back changes which were applied from another thread. _, err := rrd.limiter.Run(ctx, "-all", func() (any, error) { - pullErr := rrd.ddb.PullChunks(ctx, rrd.tmpDir, rrd.srcDB, remoteHashes, nil) + pullErr := rrd.ddb.PullChunks(ctx, rrd.tmpDir, rrd.srcDB, remoteHashes, nil, nil) if pullErr != nil { return nil, pullErr } @@ -406,10 +406,14 @@ func (rrd ReadReplicaDatabase) createNewBranchFromRemote(ctx *sql.Context, remot return err } - cm, err := rrd.ddb.Resolve(ctx, spec, nil) + optCmt, err := rrd.ddb.Resolve(ctx, spec, nil) if err != nil { return err } + cm, ok := optCmt.ToCommit() + if !ok { + return doltdb.ErrGhostCommitEncountered // NM4 - TEST. + } err = rrd.ddb.NewBranchAtCommit(ctx, remoteRef.Ref, cm, nil) return rrd.ddb.SetHead(ctx, trackingRef, remoteRef.Hash) diff --git a/go/libraries/doltcore/sqle/sqlselect_test.go b/go/libraries/doltcore/sqle/sqlselect_test.go index 78c2a1e9e6b..eb646229c4a 100644 --- a/go/libraries/doltcore/sqle/sqlselect_test.go +++ b/go/libraries/doltcore/sqle/sqlselect_test.go @@ -1561,9 +1561,12 @@ func testSelectDiffQuery(t *testing.T, test SelectTest) { cs, err := doltdb.NewCommitSpec("main") require.NoError(t, err) - cm, err := dEnv.DoltDB.Resolve(ctx, cs, nil) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) require.NoError(t, err) + cm, ok := optCmt.ToCommit() + require.True(t, ok) + root, err := cm.GetRootValue(ctx) require.NoError(t, err) @@ -1674,9 +1677,12 @@ func initializeWithHistory(t *testing.T, ctx context.Context, dEnv *env.DoltEnv, cs, err := doltdb.NewCommitSpec(env.DefaultInitBranch) require.NoError(t, err) - cm, err := dEnv.DoltDB.Resolve(ctx, cs, nil) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) require.NoError(t, err) + cm, ok := optCmt.ToCommit() + require.True(t, ok) + processNode(t, ctx, dEnv, node, cm) } } @@ -1694,9 +1700,12 @@ func processNode(t *testing.T, ctx context.Context, dEnv *env.DoltEnv, node Hist cs, err := doltdb.NewCommitSpec(branchRef.String()) require.NoError(t, err) - cm, err := dEnv.DoltDB.Resolve(ctx, cs, nil) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) require.NoError(t, err) + cm, ok := optCmt.ToCommit() + require.True(t, ok) + root, err := cm.GetRootValue(ctx) require.NoError(t, err) diff --git a/go/store/chunks/chunk.go b/go/store/chunks/chunk.go index d858a288f41..13c0a1eace8 100644 --- a/go/store/chunks/chunk.go +++ b/go/store/chunks/chunk.go @@ -31,8 +31,9 @@ import ( // Chunk is a unit of stored data in noms type Chunk struct { - r hash.Hash - data []byte + r hash.Hash + data []byte + ghost bool } var EmptyChunk = NewChunk([]byte{}) @@ -57,15 +58,25 @@ func (c Chunk) IsEmpty() bool { return len(c.data) == 0 } +// IsGhost returns true if the chunk is a ghost chunk. Ghost chunks have no data, so if IsGhost() returns true, Data() will be an empty slice. +func (c Chunk) IsGhost() bool { + return c.ghost +} + // NewChunk creates a new Chunk backed by data. This means that the returned Chunk has ownership of this slice of memory. func NewChunk(data []byte) Chunk { r := hash.Of(data) - return Chunk{r, data} + return Chunk{r, data, false} } // NewChunkWithHash creates a new chunk with a known hash. The hash is not re-calculated or verified. This should obviously only be used in cases where the caller already knows the specified hash is correct. func NewChunkWithHash(r hash.Hash, data []byte) Chunk { - return Chunk{r, data} + return Chunk{r, data, false} +} + +// NewGhostChunk creates a new ghost Chunk with a specified hash. The data will be an empty slice. +func NewGhostChunk(r hash.Hash) *Chunk { + return &Chunk{r, []byte{}, true} } // ChunkWriter wraps an io.WriteCloser, additionally providing the ability to grab the resulting Chunk for all data written through the interface. Calling Chunk() or Close() on an instance disallows further writing. diff --git a/go/store/chunks/chunk_store.go b/go/store/chunks/chunk_store.go index 79a06fcc700..7a6f6ea3b70 100644 --- a/go/store/chunks/chunk_store.go +++ b/go/store/chunks/chunk_store.go @@ -113,6 +113,12 @@ type ChunkStore interface { // supported. StatsSummary() string + // PersistGhostHashes is used to persist a set of addresses that are known to exist, but + // are not currently stored here. Only the GenerationalChunkStore implementation allows use of this method, as + // shallow clones are only allowed in local copies currently. Note that at the application level, the only + // hashes which can be ghosted are commit ids, but the chunk store doesn't know what those are. + PersistGhostHashes(ctx context.Context, refs hash.HashSet) error + // Close tears down any resources in use by the implementation. After // Close(), the ChunkStore may not be used again. It is NOT SAFE to call // Close() concurrently with any other ChunkStore method; behavior is @@ -182,6 +188,7 @@ type PrefixChunkStore interface { type GenerationalCS interface { NewGen() ChunkStoreGarbageCollector OldGen() ChunkStoreGarbageCollector + GhostGen() ChunkStore } var ErrUnsupportedOperation = errors.New("operation not supported") diff --git a/go/store/chunks/memory_store.go b/go/store/chunks/memory_store.go index d363037bae9..e55d8b4cd68 100644 --- a/go/store/chunks/memory_store.go +++ b/go/store/chunks/memory_store.go @@ -388,6 +388,10 @@ func (ms *MemoryStoreView) StatsSummary() string { return "Unsupported" } +func (ms *MemoryStoreView) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + panic("not implemented") +} + func (ms *MemoryStoreView) Close() error { return nil } diff --git a/go/store/cmd/noms/noms_cat.go b/go/store/cmd/noms/noms_cat.go index e534f7284f1..787b30f7585 100644 --- a/go/store/cmd/noms/noms_cat.go +++ b/go/store/cmd/noms/noms_cat.go @@ -161,7 +161,7 @@ func runCat(ctx context.Context, args []string) int { //Want a clean db every loop sp, _ := spec.ForDatabase("mem") vrw := sp.GetVRW(ctx) - waf := types.WalkAddrsForNBF(vrw.Format()) + waf := types.WalkAddrsForNBF(vrw.Format(), nil) fmt.Printf(" chunk[%d].raw.len: %d\n", cidx, len(currCD.compressed)) diff --git a/go/store/cmd/noms/noms_show.go b/go/store/cmd/noms/noms_show.go index efeb12ee943..64895b01c5b 100644 --- a/go/store/cmd/noms/noms_show.go +++ b/go/store/cmd/noms/noms_show.go @@ -33,8 +33,6 @@ import ( flag "github.com/juju/gnuflag" "github.com/dolthub/dolt/go/gen/fb/serial" - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/schema/encoding" "github.com/dolthub/dolt/go/store/cmd/noms/util" "github.com/dolthub/dolt/go/store/config" "github.com/dolthub/dolt/go/store/hash" @@ -159,6 +157,8 @@ func typeString(value types.Value) string { typeString = "AddressMap" case serial.CommitClosureFileID: typeString = "CommitClosure" + case serial.TableSchemaFileID: + typeString = "TableSchema" default: t, err := types.TypeOf(value) util.CheckErrorNoUsage(err) @@ -235,19 +235,6 @@ func outputEncodedValue(ctx context.Context, w io.Writer, value types.Value) err return err } return tree.OutputAddressMapNode(w, node) - case serial.TableSchemaFileID: - sch, err := encoding.DeserializeSchema(ctx, types.Format_Default, value) - if err != nil { - return err - } - - fmt.Fprintf(w, " {\n") - sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - fmt.Fprintf(w, "\t%s: %s (additional info not shown)\n", col.Name, col.TypeInfo.ToSqlType().String()) - return false, nil - }) - fmt.Fprintf(w, "}\n") - return nil case serial.ProllyTreeNodeFileID: fallthrough case serial.AddressMapFileID: diff --git a/go/store/datas/commit.go b/go/store/datas/commit.go index 1c4efaf83e0..79bfedfabc0 100644 --- a/go/store/datas/commit.go +++ b/go/store/datas/commit.go @@ -74,6 +74,11 @@ func (c *Commit) NomsValue() types.Value { return c.val } +func (c *Commit) IsGhost() bool { + _, ok := c.val.(types.GhostValue) + return ok +} + func (c *Commit) Height() uint64 { return c.height } @@ -306,6 +311,10 @@ func commitPtr(nbf *types.NomsBinFormat, v types.Value, r *types.Ref) (*Commit, // CommitFromValue deserializes a types.Value into a Commit. func CommitFromValue(nbf *types.NomsBinFormat, v types.Value) (*Commit, error) { + if g, ok := v.(types.GhostValue); ok { + return &Commit{val: g}, nil + } + isCommit, err := IsCommit(v) if err != nil { return nil, err @@ -448,6 +457,12 @@ func FindClosureCommonAncestor(ctx context.Context, cl CommitClosure, cm *Commit // GetCommitParents returns |Ref|s to the parents of the commit. func GetCommitParents(ctx context.Context, vr types.ValueReader, cv types.Value) ([]*Commit, error) { + _, ok := cv.(types.GhostValue) + if ok { + // Not using the common error here because they are in the doltdb package which results in a cycle. + return nil, fmt.Errorf("runtime exception. GetCommitParents called with GhostCommit.") + } + if sm, ok := cv.(types.SerialMessage); ok { data := []byte(sm) if serial.GetFileID(data) != serial.CommitFileID { @@ -457,6 +472,7 @@ func GetCommitParents(ctx context.Context, vr types.ValueReader, cv types.Value) if err != nil { return nil, err } + vals, err := vr.ReadManyValues(ctx, addrs) if err != nil { return nil, err @@ -466,19 +482,28 @@ func GetCommitParents(ctx context.Context, vr types.ValueReader, cv types.Value) if v == nil { return nil, fmt.Errorf("GetCommitParents: Did not find parent Commit in ValueReader: %s", addrs[i].String()) } - var csm serial.Commit - err := serial.InitCommitRoot(&csm, []byte(v.(types.SerialMessage)), serial.MessagePrefixSz) - if err != nil { - return nil, err - } - res[i] = &Commit{ - val: v, - height: csm.Height(), - addr: addrs[i], + + if g, ok := v.(types.GhostValue); ok { + res[i] = &Commit{ + val: g, + addr: addrs[i], + } + } else { + var csm serial.Commit + err := serial.InitCommitRoot(&csm, []byte(v.(types.SerialMessage)), serial.MessagePrefixSz) + if err != nil { + return nil, err + } + res[i] = &Commit{ + val: v, + height: csm.Height(), + addr: addrs[i], + } } } return res, nil } + c, ok := cv.(types.Struct) if !ok { return nil, errors.New("GetCommitParents: provided value is not a commit.") diff --git a/go/store/datas/database.go b/go/store/datas/database.go index f5e1c328be9..56559d48a30 100644 --- a/go/store/datas/database.go +++ b/go/store/datas/database.go @@ -166,6 +166,11 @@ type Database interface { Format() *types.NomsBinFormat + // PersistGhostCommitIDs persists the given set of ghost commit IDs to the storage layer of the database. Ghost + // commits are commits which are real but have not been replicated to this instance of the database. Currently, + // it is only appropriate to use this method during a shallow clone operation. + PersistGhostCommitIDs(ctx context.Context, ghosts hash.HashSet) error + // chunkStore returns the ChunkStore used to read and write // groups of values to the database efficiently. This interface is a low- // level detail of the database that should infrequently be needed by diff --git a/go/store/datas/database_common.go b/go/store/datas/database_common.go index b202fcf028c..509c22cdb32 100644 --- a/go/store/datas/database_common.go +++ b/go/store/datas/database_common.go @@ -852,6 +852,19 @@ func assertDatasetHash( return curr.(types.Ref).TargetHash().Equal(currHash), nil } +func (db *database) PersistGhostCommitIDs(ctx context.Context, ghosts hash.HashSet) error { + cs := db.ChunkStore() + + gcs, ok := cs.(chunks.GenerationalCS) + if !ok { + return errors.New("Generational Chunk Store expected. database does not support shallow clone instances.") + } + + err := gcs.GhostGen().PersistGhostHashes(ctx, ghosts) + + return err +} + // CommitWithWorkingSet updates two Datasets atomically: the working set, and its corresponding HEAD. Uses the same // global locking mechanism as UpdateWorkingSet. // The current dataset head will be filled in as the first parent of the new commit if not already present. diff --git a/go/store/hash/hash.go b/go/store/hash/hash.go index 0c4c50225e1..bf0f2ba19c7 100644 --- a/go/store/hash/hash.go +++ b/go/store/hash/hash.go @@ -159,9 +159,9 @@ func (hs HashSet) Insert(hash Hash) { } // Has returns true if the HashSet contains hash. -func (hs HashSet) Has(hash Hash) (has bool) { - _, has = hs[hash] - return +func (hs HashSet) Has(hash Hash) bool { + _, has := hs[hash] + return has } // Remove removes hash from the HashSet. diff --git a/go/store/nbs/benchmarks/file_block_store.go b/go/store/nbs/benchmarks/file_block_store.go index aab327c32c0..a617f98ee80 100644 --- a/go/store/nbs/benchmarks/file_block_store.go +++ b/go/store/nbs/benchmarks/file_block_store.go @@ -96,3 +96,7 @@ func (fb fileBlockStore) Commit(ctx context.Context, current, last hash.Hash) (b err := fb.bw.Flush() return true, err } + +func (fb fileBlockStore) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + panic("not impl") +} diff --git a/go/store/nbs/benchmarks/null_block_store.go b/go/store/nbs/benchmarks/null_block_store.go index 9dd97b51aad..558d06d69b9 100644 --- a/go/store/nbs/benchmarks/null_block_store.go +++ b/go/store/nbs/benchmarks/null_block_store.go @@ -86,3 +86,7 @@ func (nb nullBlockStore) Root(ctx context.Context) (hash.Hash, error) { func (nb nullBlockStore) Commit(ctx context.Context, current, last hash.Hash) (bool, error) { return true, nil } + +func (nb nullBlockStore) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + panic("not impl") +} diff --git a/go/store/nbs/generational_chunk_store.go b/go/store/nbs/generational_chunk_store.go index ac94b0cf3e5..d051af2a591 100644 --- a/go/store/nbs/generational_chunk_store.go +++ b/go/store/nbs/generational_chunk_store.go @@ -16,6 +16,7 @@ package nbs import ( "context" + "fmt" "io" "path/filepath" "strings" @@ -30,18 +31,31 @@ var _ chunks.GenerationalCS = (*GenerationalNBS)(nil) var _ chunks.TableFileStore = (*GenerationalNBS)(nil) type GenerationalNBS struct { - oldGen *NomsBlockStore - newGen *NomsBlockStore + oldGen *NomsBlockStore + newGen *NomsBlockStore + ghostGen *GhostBlockStore } -func NewGenerationalCS(oldGen, newGen *NomsBlockStore) *GenerationalNBS { +func (gcs *GenerationalNBS) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + if gcs.ghostGen == nil { + return gcs.ghostGen.PersistGhostHashes(ctx, refs) + } + return fmt.Errorf("runtime error. ghostGen is nil but an attempt to persist ghost hashes was made") +} + +func (gcs *GenerationalNBS) GhostGen() chunks.ChunkStore { + return gcs.ghostGen +} + +func NewGenerationalCS(oldGen, newGen *NomsBlockStore, ghostGen *GhostBlockStore) *GenerationalNBS { if oldGen.Version() != "" && oldGen.Version() != newGen.Version() { panic("oldgen and newgen chunkstore versions vary") } return &GenerationalNBS{ - oldGen: oldGen, - newGen: newGen, + oldGen: oldGen, + newGen: newGen, + ghostGen: ghostGen, } } @@ -62,7 +76,17 @@ func (gcs *GenerationalNBS) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, } if c.IsEmpty() { - return gcs.newGen.Get(ctx, h) + c, err = gcs.newGen.Get(ctx, h) + } + if err != nil { + return chunks.EmptyChunk, err + } + + if c.IsEmpty() && gcs.ghostGen != nil { + c, err = gcs.ghostGen.Get(ctx, h) + if err != nil { + return chunks.EmptyChunk, err + } } return c, nil @@ -72,26 +96,45 @@ func (gcs *GenerationalNBS) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, // which have been found. Any non-present chunks will silently be ignored. func (gcs *GenerationalNBS) GetMany(ctx context.Context, hashes hash.HashSet, found func(context.Context, *chunks.Chunk)) error { mu := &sync.Mutex{} - notInOldGen := hashes.Copy() + notFound := hashes.Copy() err := gcs.oldGen.GetMany(ctx, hashes, func(ctx context.Context, chunk *chunks.Chunk) { func() { mu.Lock() defer mu.Unlock() - delete(notInOldGen, chunk.Hash()) + delete(notFound, chunk.Hash()) }() found(ctx, chunk) }) - if err != nil { return err } + if len(notFound) == 0 { + return nil + } - if len(notInOldGen) == 0 { + err = gcs.newGen.GetMany(ctx, notFound, func(ctx context.Context, chunk *chunks.Chunk) { + func() { + mu.Lock() + defer mu.Unlock() + delete(notFound, chunk.Hash()) + }() + + found(ctx, chunk) + }) + if err != nil { + return err + } + if len(notFound) == 0 { return nil } - return gcs.newGen.GetMany(ctx, notInOldGen, found) + // Last ditch effort to see if the requested objects are commits we've decided to ignore. Note the function spec + // considers non-present chunks to be silently ignored, so we don't need to return an error here + if gcs.ghostGen == nil { + return nil + } + return gcs.ghostGen.GetMany(ctx, notFound, found) } func (gcs *GenerationalNBS) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(context.Context, CompressedChunk)) error { @@ -121,16 +164,23 @@ func (gcs *GenerationalNBS) GetManyCompressed(ctx context.Context, hashes hash.H // Has returns true iff the value at the address |h| is contained in the store func (gcs *GenerationalNBS) Has(ctx context.Context, h hash.Hash) (bool, error) { has, err := gcs.oldGen.Has(ctx, h) - - if err != nil { - return false, err + if err != nil || has { + return has, err } - if has { - return true, nil + has, err = gcs.newGen.Has(ctx, h) + if err != nil || has { + return has, err } - return gcs.newGen.Has(ctx, h) + // Possibly a truncated commit. + if gcs.ghostGen != nil { + has, err = gcs.ghostGen.Has(ctx, h) + if err != nil { + return has, err + } + } + return has, nil } // HasMany returns a new HashSet containing any members of |hashes| that are absent from the store. @@ -148,9 +198,20 @@ func (gcs *GenerationalNBS) hasMany(recs []hasRecord) (absent hash.HashSet, err return absent, nil } - gcs.oldGen.mu.RLock() - defer gcs.oldGen.mu.RUnlock() - return gcs.oldGen.hasMany(recs) + absent, err = func() (hash.HashSet, error) { + gcs.oldGen.mu.RLock() + defer gcs.oldGen.mu.RUnlock() + return gcs.oldGen.hasMany(recs) + }() + if err != nil { + return nil, err + } + + if len(absent) == 0 || gcs.ghostGen == nil { + return absent, nil + } + + return gcs.ghostGen.hasMany(absent) } // Put caches c in the ChunkSource. Upon return, c must be visible to diff --git a/go/store/nbs/generational_chunk_store_test.go b/go/store/nbs/generational_chunk_store_test.go index 492c9263ebd..8293763a62b 100644 --- a/go/store/nbs/generational_chunk_store_test.go +++ b/go/store/nbs/generational_chunk_store_test.go @@ -147,7 +147,7 @@ func TestGenerationalCS(t *testing.T) { putChunks(t, ctx, chnks, oldGen, inOld, 0, 1, 2, 3, 4) - cs := NewGenerationalCS(oldGen, newGen) + cs := NewGenerationalCS(oldGen, newGen, nil) // NM4 - I guess we need more test here. requireChunks(t, ctx, chnks, cs, inOld, inNew) putChunks(t, ctx, chnks, cs, inNew, 6, 7, 8, 9) diff --git a/go/store/nbs/ghost_store.go b/go/store/nbs/ghost_store.go new file mode 100644 index 00000000000..a52cf146a48 --- /dev/null +++ b/go/store/nbs/ghost_store.go @@ -0,0 +1,169 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nbs + +import ( + "bufio" + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/hash" +) + +type GhostBlockStore struct { + skippedRefs *hash.HashSet + ghostObjectsFile string +} + +// We use the Has, HasMany, Get, GetMany, and PersistGhostHashes methods from the ChunkStore interface. All other methods are not supported. +var _ chunks.ChunkStore = &GhostBlockStore{} + +// NewGhostBlockStore returns a new GhostBlockStore instance. Currently the only parameter is the path to the directory +// where we will create a text file called ghostObjects.txt. This file will contain the hashes of the ghost objects. Creation +// and use of this file is constrained to this instance. If there is no ghostObjects.txt file, then the GhostBlockStore will +// be empty - never returning any values from the Has, HasMany, Get, or GetMany methods. +func NewGhostBlockStore(nomsPath string) (*GhostBlockStore, error) { + ghostPath := filepath.Join(nomsPath, "ghostObjects.txt") + f, err := os.Open(ghostPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return &GhostBlockStore{ + skippedRefs: &hash.HashSet{}, + ghostObjectsFile: ghostPath, + }, nil + } + // Other error, permission denied, etc, we want to hear about. + return nil, err + } + scanner := bufio.NewScanner(f) + skiplist := &hash.HashSet{} + for scanner.Scan() { + h := scanner.Text() + if hash.IsValid(h) { + skiplist.Insert(hash.Parse(h)) + } else { + return nil, fmt.Errorf("invalid hash %s in ghostObjects.txt", h) + } + } + + return &GhostBlockStore{ + skippedRefs: skiplist, + ghostObjectsFile: ghostPath, + }, nil +} + +// Get returns a ghost chunk if the hash is in the ghostObjectsFile. Otherwise, it returns an empty chunk. Chunks returned +// by this code will always be ghost chunks, ie chunk.IsGhost() will always return true. +func (g GhostBlockStore) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) { + if g.skippedRefs.Has(h) { + return *chunks.NewGhostChunk(h), nil + } + return chunks.EmptyChunk, nil +} + +func (g GhostBlockStore) GetMany(ctx context.Context, hashes hash.HashSet, found func(context.Context, *chunks.Chunk)) error { + for h := range hashes { + if g.skippedRefs.Has(h) { + found(ctx, chunks.NewGhostChunk(h)) + } + } + return nil +} + +func (g *GhostBlockStore) PersistGhostHashes(ctx context.Context, hashes hash.HashSet) error { + if hashes.Size() == 0 { + return fmt.Errorf("runtime error. PersistGhostHashes called with empty hash set") + } + + f, err := os.OpenFile(g.ghostObjectsFile, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + + for h := range hashes { + if _, err := f.WriteString(h.String() + "\n"); err != nil { + return err + } + } + + g.skippedRefs = &hash.HashSet{} + for h := range hashes { + g.skippedRefs.Insert(h) + } + + return nil +} + +func (g GhostBlockStore) Has(ctx context.Context, h hash.Hash) (bool, error) { + if g.skippedRefs.Has(h) { + return true, nil + } + return false, nil +} + +func (g GhostBlockStore) HasMany(ctx context.Context, hashes hash.HashSet) (absent hash.HashSet, err error) { + return g.hasMany(hashes) +} + +func (g GhostBlockStore) hasMany(hashes hash.HashSet) (absent hash.HashSet, err error) { + absent = hash.HashSet{} + for h := range hashes { + if !g.skippedRefs.Has(h) { + absent.Insert(h) + } + } + return absent, nil +} + +func (g GhostBlockStore) Put(ctx context.Context, c chunks.Chunk, getAddrs chunks.GetAddrsCb) error { + panic("GhostBlockStore does not support Put") +} + +func (g GhostBlockStore) Version() string { + panic("GhostBlockStore does not support Version") +} + +func (g GhostBlockStore) AccessMode() chunks.ExclusiveAccessMode { + panic("GhostBlockStore does not support AccessMode") +} + +func (g GhostBlockStore) Rebase(ctx context.Context) error { + panic("GhostBlockStore does not support Rebase") +} + +func (g GhostBlockStore) Root(ctx context.Context) (hash.Hash, error) { + panic("GhostBlockStore does not support Root") +} + +func (g GhostBlockStore) Commit(ctx context.Context, current, last hash.Hash) (bool, error) { + panic("GhostBlockStore does not support Commit") +} + +func (g GhostBlockStore) Stats() interface{} { + panic("GhostBlockStore does not support Stats") +} + +func (g GhostBlockStore) StatsSummary() string { + panic("GhostBlockStore does not support StatsSummary") +} + +func (g GhostBlockStore) Close() error { + panic("GhostBlockStore does not support Close") +} diff --git a/go/store/nbs/nbs_metrics_wrapper.go b/go/store/nbs/nbs_metrics_wrapper.go index 2011d3bb776..b8609584499 100644 --- a/go/store/nbs/nbs_metrics_wrapper.go +++ b/go/store/nbs/nbs_metrics_wrapper.go @@ -95,3 +95,7 @@ func (nbsMW *NBSMetricWrapper) GetManyCompressed(ctx context.Context, hashes has atomic.AddInt32(&nbsMW.TotalChunkGets, int32(len(hashes))) return nbsMW.nbs.GetManyCompressed(ctx, hashes, found) } + +func (nbsMW NBSMetricWrapper) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + return nbsMW.nbs.PersistGhostHashes(ctx, refs) +} diff --git a/go/store/nbs/store.go b/go/store/nbs/store.go index e10b989ef9a..adce49963fe 100644 --- a/go/store/nbs/store.go +++ b/go/store/nbs/store.go @@ -110,6 +110,10 @@ type NomsBlockStore struct { stats *Stats } +func (nbs *NomsBlockStore) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + return fmt.Errorf("runtime error: PersistGhostHashes should never be called on the NomsBlockStore") +} + var _ chunks.TableFileStore = &NomsBlockStore{} var _ chunks.ChunkStoreGarbageCollector = &NomsBlockStore{} diff --git a/go/store/prolly/commit_closure.go b/go/store/prolly/commit_closure.go index ae15fb87cba..0835069b5b6 100644 --- a/go/store/prolly/commit_closure.go +++ b/go/store/prolly/commit_closure.go @@ -18,6 +18,8 @@ import ( "bytes" "context" "encoding/binary" + "errors" + "io" "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/pool" @@ -26,6 +28,9 @@ import ( "github.com/dolthub/dolt/go/store/types" ) +// Closure values are a long (8 bytes) followed by a hash (20 bytes, hash.ByteLen). +const prefixWidth = 8 + type CommitClosureValue []byte type CommitClosure struct { @@ -39,7 +44,7 @@ var _ tree.Ordering[CommitClosureKey] = commitClosureKeyOrdering{} func (o commitClosureKeyOrdering) Compare(left, right CommitClosureKey) int { lh, rh := left.Height(), right.Height() if lh == rh { - return bytes.Compare(left[8:], right[8:]) + return bytes.Compare(left[prefixWidth:], right[prefixWidth:]) } else if lh < rh { return -1 } @@ -113,10 +118,35 @@ func (c CommitClosure) ContainsKey(ctx context.Context, h hash.Hash, height uint func DecodeCommitClosureKey(key []byte) (height uint64, addr hash.Hash) { height = binary.LittleEndian.Uint64(key) - addr = hash.New(key[8:]) + addr = hash.New(key[prefixWidth:]) + return } +func (c CommitClosure) AsHashSet(ctx context.Context) (hash.HashSet, error) { + closureIter, err := c.IterAllReverse(ctx) + if err != nil { + return hash.HashSet{}, err + } + + skipCmts := hash.NewHashSet() + for { + key, _, err := closureIter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return hash.HashSet{}, err + + } + + clsrHash := hash.New(key[prefixWidth:]) + skipCmts.Insert(clsrHash) + } + + return skipCmts, nil +} + type CommitClosureEditor struct { closure tree.MutableMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] } @@ -126,9 +156,9 @@ type CommitClosureKey []byte type CommitClosureIter tree.KvIter[CommitClosureKey, CommitClosureValue] func NewCommitClosureKey(p pool.BuffPool, height uint64, addr hash.Hash) CommitClosureKey { - r := p.Get(8 + 20) + r := p.Get(prefixWidth + hash.ByteLen) binary.LittleEndian.PutUint64(r, height) - copy(r[8:], addr[:]) + copy(r[prefixWidth:], addr[:]) return CommitClosureKey(r) } @@ -137,7 +167,7 @@ func (k CommitClosureKey) Height() uint64 { } func (k CommitClosureKey) Addr() hash.Hash { - return hash.New(k[8:]) + return hash.New(k[prefixWidth:]) } func (k CommitClosureKey) Less(other CommitClosureKey) bool { diff --git a/go/store/spec/spec.go b/go/store/spec/spec.go index b39532ea4ff..45946305033 100644 --- a/go/store/spec/spec.go +++ b/go/store/spec/spec.go @@ -511,7 +511,7 @@ func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueR oldGenSt, err := nbs.NewLocalStore(ctx, newGenSt.Version(), oldgenDb, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) d.PanicIfError(err) - cs := nbs.NewGenerationalCS(oldGenSt, newGenSt) + cs := nbs.NewGenerationalCS(oldGenSt, newGenSt, nil) ns := tree.NewNodeStore(cs) vrw := types.NewValueStore(cs) diff --git a/go/store/types/noms_kind.go b/go/store/types/noms_kind.go index 51d32337d26..51f18be4207 100644 --- a/go/store/types/noms_kind.go +++ b/go/store/types/noms_kind.go @@ -73,6 +73,7 @@ const ( GeometryCollectionKind ExtendedKind + GhostKind = 254 UnknownKind NomsKind = 255 ) diff --git a/go/store/types/ref.go b/go/store/types/ref.go index 215e13c6cb6..fda34877b05 100644 --- a/go/store/types/ref.go +++ b/go/store/types/ref.go @@ -237,12 +237,16 @@ func WalkAddrsForChunkStore(cs chunks.ChunkStore) (func(chunks.Chunk, func(h has if err != nil { return nil, fmt.Errorf("could not find binary format corresponding to %s. try upgrading dolt.", cs.Version()) } - return WalkAddrsForNBF(nbf), nil + return WalkAddrsForNBF(nbf, nil), nil } -func WalkAddrsForNBF(nbf *NomsBinFormat) func(chunks.Chunk, func(h hash.Hash, isleaf bool) error) error { +func WalkAddrsForNBF(nbf *NomsBinFormat, skipAddrs hash.HashSet) func(chunks.Chunk, func(h hash.Hash, isleaf bool) error) error { return func(c chunks.Chunk, cb func(h hash.Hash, isleaf bool) error) error { return walkRefs(c.Data(), nbf, func(r Ref) error { + if skipAddrs != nil && skipAddrs.Has(r.TargetHash()) { + return nil + } + return cb(r.TargetHash(), r.Height() == 1) }) } diff --git a/go/store/types/serial_message.go b/go/store/types/serial_message.go index ce2e008fa88..9f4ac0e394f 100644 --- a/go/store/types/serial_message.go +++ b/go/store/types/serial_message.go @@ -128,7 +128,7 @@ func (sm SerialMessage) humanReadableStringAtIndentationLevel(level int) string printWithIndendationLevel(level, ret, "\tRootValue: {\n") hashes := msg.RootBytes() for i := 0; i < len(hashes)/hash.ByteLen; i++ { - addr := hash.New(hashes[i*20 : (i+1)*20]) + addr := hash.New(hashes[i*hash.ByteLen : (i+1)*hash.ByteLen]) printWithIndendationLevel(level, ret, "\t\t#%s\n", addr.String()) } printWithIndendationLevel(level, ret, "\t}\n") @@ -136,7 +136,7 @@ func (sm SerialMessage) humanReadableStringAtIndentationLevel(level int) string printWithIndendationLevel(level, ret, "\tParents: {\n") hashes = msg.ParentAddrsBytes() for i := 0; i < msg.ParentAddrsLength()/hash.ByteLen; i++ { - addr := hash.New(hashes[i*20 : (i+1)*20]) + addr := hash.New(hashes[i*hash.ByteLen : (i+1)*hash.ByteLen]) printWithIndendationLevel(level, ret, "\t\t#%s\n", addr.String()) } printWithIndendationLevel(level, ret, "\t}\n") @@ -144,7 +144,7 @@ func (sm SerialMessage) humanReadableStringAtIndentationLevel(level int) string printWithIndendationLevel(level, ret, "\tParentClosure: {\n") hashes = msg.ParentClosureBytes() for i := 0; i < msg.ParentClosureLength()/hash.ByteLen; i++ { - addr := hash.New(hashes[i*20 : (i+1)*20]) + addr := hash.New(hashes[i*hash.ByteLen : (i+1)*hash.ByteLen]) printWithIndendationLevel(level, ret, "\t\t#%s\n", addr.String()) } printWithIndendationLevel(level, ret, "\t}\n") @@ -230,7 +230,54 @@ func (sm SerialMessage) humanReadableStringAtIndentationLevel(level int) string level -= 1 printWithIndendationLevel(level, ret, "}\n") return ret.String() + case serial.TableSchemaFileID: + msg, _ := serial.TryGetRootAsTableSchema(sm, serial.MessagePrefixSz) + + // loop over columns + columns := msg.ColumnsLength() + ret := &strings.Builder{} + printWithIndendationLevel(level, ret, "{\n") + level += 1 + printWithIndendationLevel(level, ret, "Columns: [\n") + level += 1 + for i := 0; i < columns; i++ { + printWithIndendationLevel(level, ret, "{\n") + level += 1 + col := serial.Column{} + ok, err := msg.TryColumns(&col, i) + if err != nil { + return fmt.Sprintf("error in HumanReadString(): %s", err) + } + if !ok { + return fmt.Sprintf("error in HumanReadString(): could not get column %d", i) + } + + printWithIndendationLevel(level, ret, "Name: %s\n", col.Name()) + printWithIndendationLevel(level, ret, "SQLType: %s\n", col.SqlType()) + printWithIndendationLevel(level, ret, "DefaultValue: %s\n", col.DefaultValue()) + printWithIndendationLevel(level, ret, "Comment: %s\n", col.Comment()) + printWithIndendationLevel(level, ret, "DisplayOrder: %d\n", col.DisplayOrder()) + printWithIndendationLevel(level, ret, "Tag: %d\n", col.Tag()) + printWithIndendationLevel(level, ret, "Encoding: %s\n", col.Encoding()) + printWithIndendationLevel(level, ret, "Primary Key: %t\n", col.PrimaryKey()) + printWithIndendationLevel(level, ret, "Nullable: %t\n", col.Nullable()) + printWithIndendationLevel(level, ret, "Auto Increment: %t\n", col.AutoIncrement()) + printWithIndendationLevel(level, ret, "Hidden: %t\n", col.Hidden()) + printWithIndendationLevel(level, ret, "Generated: %t\n", col.Generated()) + printWithIndendationLevel(level, ret, "Virtual: %t\n", col.Virtual()) + printWithIndendationLevel(level, ret, "OnUpdateValue: %s\n", col.OnUpdateValue()) + level -= 1 + printWithIndendationLevel(level, ret, "}\n") + + } + level -= 1 + printWithIndendationLevel(level, ret, "]\n") + + level -= 1 + printWithIndendationLevel(level, ret, "}") + return ret.String() default: + return fmt.Sprintf("SerialMessage (HumanReadableString not implemented), [%v]: %s", id, strings.ToUpper(hex.EncodeToString(sm))) } } @@ -455,11 +502,11 @@ func SerialCommitParentAddrs(nbf *NomsBinFormat, sm SerialMessage) ([]hash.Hash, return nil, err } addrs := msg.ParentAddrsBytes() - n := len(addrs) / 20 + n := len(addrs) / hash.ByteLen ret := make([]hash.Hash, n) for i := 0; i < n; i++ { - addr := hash.New(addrs[:20]) - addrs = addrs[20:] + addr := hash.New(addrs[:hash.ByteLen]) + addrs = addrs[hash.ByteLen:] ret[i] = addr } return ret, nil diff --git a/go/store/types/value.go b/go/store/types/value.go index 0f0341fc875..509d06d7bab 100644 --- a/go/store/types/value.go +++ b/go/store/types/value.go @@ -305,3 +305,58 @@ type asValueImpl interface { func (v valueImpl) Kind() NomsKind { return NomsKind(v.buff[0]) } + +// GhostValue is a placeholder for a value that has not been pulled from a remote. The structure holds no information, +// All methods will panic if called. +type GhostValue struct { +} + +var _ Value = GhostValue{} + +func (g GhostValue) Kind() NomsKind { + panic("Error: GhostValue.Kind() called.") +} + +func (g GhostValue) Value(ctx context.Context) (Value, error) { + panic("Error: GhostValue.Value() called.") +} + +func (g GhostValue) Less(ctx context.Context, nbf *NomsBinFormat, other LesserValuable) (bool, error) { + panic("Error: GhostValue.Less() called.") +} + +func (g GhostValue) Equals(other Value) bool { + panic("Error: GhostValue.Equals() called.") +} + +func (g GhostValue) Hash(format *NomsBinFormat) (hash.Hash, error) { + panic("Error: GhostValue.Hash() called.") +} + +func (g GhostValue) isPrimitive() bool { + panic("Error: GhostValue.isPrimitive() called.") +} + +func (g GhostValue) HumanReadableString() string { + panic("Error: GhostValue.HumanReadableString() called.") +} + +func (g GhostValue) walkRefs(format *NomsBinFormat, callback RefCallback) error { + panic("Error: GhostValue.walkRefs() called.") +} + +func (g GhostValue) typeOf() (*Type, error) { + panic("Error: GhostValue.typeOf() called.") +} + +func (g GhostValue) writeTo(writer nomsWriter, format *NomsBinFormat) error { + panic("Error: GhostValue.writeTo() called.") +} + +func (g GhostValue) readFrom(format *NomsBinFormat, reader *binaryNomsReader) (Value, error) { + panic("Error: GhostValue.readFrom() called.") +} + +func (g GhostValue) skip(format *NomsBinFormat, reader *binaryNomsReader) { + panic("Error: GhostValue.skip() called.") +} diff --git a/go/store/types/value_store.go b/go/store/types/value_store.go index 94f55ca4f6b..4c5f33a3568 100644 --- a/go/store/types/value_store.go +++ b/go/store/types/value_store.go @@ -197,6 +197,10 @@ func (lvs *ValueStore) ReadValue(ctx context.Context, h hash.Hash) (Value, error if err != nil { return nil, err } + if chunk.IsGhost() { + return GhostValue{}, nil + } + if chunk.IsEmpty() { return nil, nil } @@ -226,6 +230,10 @@ func (lvs *ValueStore) ReadValue(ctx context.Context, h hash.Hash) (Value, error func (lvs *ValueStore) ReadManyValues(ctx context.Context, hashes hash.HashSlice) (ValueSlice, error) { lvs.versOnce.Do(lvs.expectVersion) decode := func(h hash.Hash, chunk *chunks.Chunk) (Value, error) { + if chunk.IsGhost() { + return GhostValue{}, nil + } + v, ferr := DecodeValue(*chunk, lvs) if ferr != nil { diff --git a/go/store/valuefile/file_value_store.go b/go/store/valuefile/file_value_store.go index b3b54fd6dd5..d403bf57fa3 100644 --- a/go/store/valuefile/file_value_store.go +++ b/go/store/valuefile/file_value_store.go @@ -287,3 +287,8 @@ func (f *FileValueStore) iterChunks(cb func(ch chunks.Chunk) error) error { return nil } + +func (f *FileValueStore) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { + // Current unimplemented, but may be useful for testing someday. + panic("not implemented") +} diff --git a/integration-tests/bats/helper/local-remote.bash b/integration-tests/bats/helper/local-remote.bash index 86f138c0b1f..7d5896532fe 100644 --- a/integration-tests/bats/helper/local-remote.bash +++ b/integration-tests/bats/helper/local-remote.bash @@ -137,6 +137,7 @@ SKIP_SERVER_TESTS=$(cat <<-EOM ~profile.bats~ ~ls.bats~ ~rebase.bats~ +~shallow-clone.bats~ EOM ) diff --git a/integration-tests/bats/shallow-clone.bats b/integration-tests/bats/shallow-clone.bats new file mode 100644 index 00000000000..db5363d3b3a --- /dev/null +++ b/integration-tests/bats/shallow-clone.bats @@ -0,0 +1,669 @@ +#!/usr/bin/env bats +# +# Tests for shallow clone behavior. These tests use a remotesrv +# instance which hold databases with different commit history +# structures, then tests clone with different depths and validate +# behavior of other commands and stored procedures. + +load $BATS_TEST_DIRNAME/helper/common.bash + +remotesrv_pid="" +setup() { + skiponwindows "tests are flaky on Windows" + setup_common +} + +teardown() { + teardown_common + stop_remotesrv +} + +stop_remotesrv() { + if [ -n "$remotesrv_pid" ]; then + kill $remotesrv_pid || : + remotesrv_pid="" + fi +} + +# serial repository is 7 commits: +# (init) <- (table create) <- (val 1) <- (val 2) <- (val 3) <- (val 4) <- (val 5) [main] +seed_and_start_serial_remote() { + mkdir remote + cd remote + dolt init + dolt sql -q 'create table vals (i int primary key, s varchar(64));' + dolt add vals + dolt commit -m 'create table' + + for SEQ in $(seq 5); do + dolt sql -q "insert into vals (i,s) values ($SEQ, \"val $SEQ\")" + dolt commit -a -m "Added Val: $SEQ" + done + + dolt tag nonheadtag HEAD~2 + + remotesrv --http-port 1234 --repo-mode & + remotesrv_pid=$! + + cd .. +} + +@test "shallow-clone: dolt_clone depth 1" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt sql -q "call dolt_clone('--depth', '1','http://localhost:50051/test-org/test-repo')" + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 1 ] + + run dolt sql -q "select count(*) = 1 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. +} + +@test "shallow-clone: dolt_clone depth 2" { + seed_and_start_serial_remote + + mkdir clones + cd clones + run dolt sql -q "call dolt_clone('--depth', '2','http://localhost:50051/test-org/test-repo')" + [ "$status" -eq 0 ] + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 2 ] + + run dolt sql -q "select count(*) = 2 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. +} + +@test "shallow-clone: shallow clone with a file path" { + seed_and_start_serial_remote + stop_remotesrv + cd remote + dolt remote add origin file://../file-remote + dolt push origin main + cd .. + + mkdir clones + cd clones + run dolt sql -q "call dolt_clone('--depth', '1','file://../file-remote')" + [ "$status" -eq 0 ] + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 1 ] + + run dolt sql -q "select count(*) = 1 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. +} + +@test "shallow-clone: push to a new remote should error" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt clone --depth 1 http://localhost:50051/test-org/test-repo + cd test-repo + + dolt remote add altremote file://../file-remote-alt + + run dolt push altremote main + [ "$status" -eq 1 ] + # NM4 - give a better error message. + [[ "$output" =~ "failed to get all chunks" ]] || false +} + +@test "shallow-clone: depth 3 clone of serial history" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + run dolt clone --depth 3 http://localhost:50051/test-org/test-repo + [ "$status" -eq 0 ] + + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [[ "${#lines[@]}" -eq 3 ]] || false + + run dolt sql -q "select count(*) = 3 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + +# NM4 - system table bug. +# run dolt sql -q "select count(*) = 3 from dolt_log" +# [ "$status" -eq 0 ] +# [[ "$output" =~ "true" ]] || false +# + + # dolt_diff table will show two rows, because each row is a delta. + run dolt sql -q "select * from dolt_diff" + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 4 " ]] || false + ! [[ "$output" =~ "Added Val: 3 " ]] || false + + run dolt sql -q "select * from dolt_commits" + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 5 " ]] || false + [[ "$output" =~ "Added Val: 4 " ]] || false + [[ "$output" =~ "Added Val: 3 " ]] || false + ! [[ "$output" =~ "Added Val: 2 " ]] || false + + # A full clone would have 5 commits with i=1, so if we have 3, we are looking good. + run dolt sql -q "select count(*) = 3 from dolt_history_vals where i = 1" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + # A full clone would have 2 commits with i=4, and our shallow clone has all the commits for that row. + run dolt sql -q "select count(*) = 2 from dolt_history_vals where i = 4" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + run dolt sql -q "select count(distinct commit_hash) = 3 from dolt_history_vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] # 1+2+3+4+5 = 15. + + run dolt show HEAD + [ "$status" -eq 0 ] + [[ "$output" =~ "+ | 5 | val 5" ]] || false + ! [[ "$output" =~ "val 4" ]] || false + + run dolt show HEAD~1 + [ "$status" -eq 0 ] + [[ "$output" =~ "+ | 4 | val 4" ]] || false + ! [[ "$output" =~ "val 3" ]] || false + ! [[ "$output" =~ "val 5" ]] || false + + run dolt diff HEAD~2..HEAD + [ "$status" -eq 0 ] + [[ "$output" =~ "diff --dolt a/vals b/vals" ]] || false + [[ "$output" =~ "+ | 5 | val 5" ]] || false + [[ "$output" =~ "+ | 4 | val 4" ]] || false + ! [[ "$output" =~ "val 3" ]] || false + + # reverse diff check. + run dolt diff HEAD..HEAD~1 + [ "$status" -eq 0 ] + [[ "$output" =~ "- | 5 | val 5" ]] || false + + # Verify that the table is complete, with an as of query. + run dolt sql -q "select sum(i) from vals as of 'HEAD~1'" + [ "$status" -eq 0 ] + [[ "$output" =~ "10" ]] # 1+2+3+4 = 10. +} + +@test "shallow-clone: depth which exceeds history" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + # Depth is far greater than actual history length. + run dolt clone --depth 42 http://localhost:50051/test-org/test-repo + [ "$status" -eq 0 ] + + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [[ "${#lines[@]}" -eq 7 ]] || false + + run dolt sql -q "select count(*) = 7 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + run dolt show HEAD~6 + [ "$status" -eq 0 ] + [[ "$output" =~ "Initialize data repository" ]] || false +} + +@test "shallow-clone: as of gives decent error message" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + run dolt clone --depth 3 http://localhost:50051/test-org/test-repo + [ "$status" -eq 0 ] + + cd test-repo + + run dolt sql -q "select sum(i) from vals as of 'HEAD~4'" + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false +} + +@test "shallow-clone: hashof sql function gives an error message" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + run dolt clone --depth 2 http://localhost:50051/test-org/test-repo + [ "$status" -eq 0 ] + + cd test-repo + + run dolt sql -q "select hashof('HEAD~4')" + + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false +} + +@test "shallow-clone: single depth clone of serial history" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt clone --depth 1 http://localhost:50051/test-org/test-repo + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 1 ] + + + run dolt sql -q "select count(*) = 1 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + +## not working at the moment.... NM4. +## run dolt sql -q "select count(*) = 1 from dolt_log" +## [ "$status" -eq 0 ] +## [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. + + + run dolt diff HEAD~1..HEAD + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false + + # Dolt show can't show the diff because we only have one half of the delta. + run dolt show + [ "$status" -eq 1 ] + [[ "$output" =~ "Added Val: 5" ]] || false # We do print the message of the commit, even though we can't show the diff. + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false + + run dolt tag tagfoo HEAD~1 + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false + + run dolt revert HEAD~1 + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false + + run dolt cherry-pick HEAD~1 + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false +} + +@test "shallow-clone: shallow clone can push" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt clone --depth 1 http://localhost:50051/test-org/test-repo + cd test-repo + + dolt sql -q "insert into vals (i,s) values (42, \"val 42\")" + + run dolt status + [ "$status" -eq 0 ] + [[ "$output" =~ "Your branch is up to date with 'origin/main'." ]] || false + [[ "$output" =~ "Changes not staged for commit:" ]] || false + [[ "$output" =~ "modified: vals" ]] || false + + dolt commit -a -m "Added Val: 42" + run dolt push origin main + [ "$status" -eq 0 ] + + # Do a full clone and verify that the commit is there. + cd .. + dolt clone http://localhost:50051/test-org/test-repo full-clone + cd full-clone + + run dolt show HEAD + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 42" ]] || false + + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "57" ]] || false # 1+2+3+4+5+42 = 57. +} + +@test "shallow-clone: fetch new changes after shallow clone" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + # initial clone + dolt clone --depth 2 http://localhost:50051/test-org/test-repo + + # clone another copy, and push to remote srv. + dolt clone http://localhost:50051/test-org/test-repo full-clone + cd full-clone + + dolt sql -q "insert into vals (i,s) values (23, \"val 23\")" + dolt commit -a -m "Added Val: 23" + run dolt push origin main + [ "$status" -eq 0 ] + + # Go to out of date clone, and fetch. + cd ../test-repo + run dolt pull + [ "$status" -eq 0 ] + [[ "$output" =~ "Fast-forward" ]] || false + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 3 ] + + dolt show + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 23" ]] || false +} + +@test "shallow-clone: fetch connected new branch works after shallow clone" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + # initial clone + dolt clone --depth 2 http://localhost:50051/test-org/test-repo + + # clone another copy, and push new branch to remote srv. + dolt clone http://localhost:50051/test-org/test-repo full-clone + cd full-clone + + # Create two new commits on top of commit which exists in shallow clone. + dolt sql -q "insert into vals (i,s) values (23, \"val 23\")" + dolt commit -a -m "Added Val: 23" + dolt sql -q "insert into vals (i,s) values (42, \"val 42\")" + dolt commit -a -m "Added Val: 42" + dolt push origin HEAD:refs/heads/brch + + cd ../test-repo + + dolt fetch # Should pull new branch, and it's history should be length 4. + run dolt branch -a + [ "$status" -eq 0 ] + [[ "$output" =~ "remotes/origin/brch" ]] || false + + run dolt log --oneline --decorate=no origin/brch + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 4 ] + + run dolt show origin/brch + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 42" ]] || false + + run dolt show origin/brch~1 + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 23" ]] || falses +} + +@test "shallow-clone: fetch disconnected new branch works after shallow clone" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + # initial clone + dolt clone --depth 2 http://localhost:50051/test-org/test-repo + + # clone another copy, and push new branch to remote srv. + dolt clone http://localhost:50051/test-org/test-repo full-clone + cd full-clone + # Create two new commits rooted from a commit which doesn't exist in the + dolt reset --hard HEAD~3 # HEAD~3 == (val 2) + dolt sql -q "insert into vals (i,s) values (13, \"val 13\")" + dolt commit -a -m "Added Val: 13" + dolt sql -q "insert into vals (i,s) values (11, \"val 11\")" + dolt commit -a -m "Added Val: 11" + dolt push origin HEAD:refs/heads/brch + + cd ../test-repo + + dolt fetch # Should pull new branch, and it's history should be length 2. + run dolt branch -a + [ "$status" -eq 0 ] + [[ "$output" =~ "remotes/origin/brch" ]] || false + + run dolt log --oneline --decorate=no origin/brch + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 2 ] + + run dolt show origin/brch + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 11" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals as of 'origin/brch'" + [ "$status" -eq 0 ] + [[ "$output" =~ "27" ]] # 1+2+11+13 = 27. +} + +# complex repository is 14 commits with the following dag: +# +# (init) <- (table create) <- (val 1) <- (val 2) <- (val 3) <- (val 4) <- (val 5) <- (merge 2) [main] +# \ \ / +# \- (val 6) <- (val 7) <- (merge 1) <- (val 8) <-----/ <- (val 9) <- (val 10) [brch] +seed_and_start_complex_remote() { + mkdir remote + cd remote + dolt init + dolt sql -q 'create table vals (i int primary key, s varchar(64));' + dolt add vals + dolt commit -m 'create table' + + for SEQ in $(seq 5); do + dolt sql -q "insert into vals (i,s) values ($SEQ, \"val $SEQ\")" + dolt commit -a -m "Added Val: $SEQ" + done + + dolt checkout -b brch HEAD~5 + for SEQ in $(seq 6 7); do + dolt sql -q "insert into vals (i,s) values ($SEQ, \"val $SEQ\")" + dolt commit -a -m "Added Val: $SEQ" + done + + dolt merge main~2 + for SEQ in $(seq 8 10); do + dolt sql -q "insert into vals (i,s) values ($SEQ, \"val $SEQ\")" + dolt commit -a -m "Added Val: $SEQ" + done + + dolt checkout main + dolt merge brch~2 + + remotesrv --http-port 1234 --repo-mode & + remotesrv_pid=$! + + cd .. +} + +@test "shallow-clone: single depth clone of a complex history" { + seed_and_start_complex_remote + + mkdir clones + cd clones + + dolt clone --depth 1 http://localhost:50051/test-org/test-repo + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 1 ] + + run dolt sql -q "select count(*) = 1 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + ## not working at the moment.... NM4. + ## run dolt sql -q "select count(*) = 1 from dolt_log" + ## [ "$status" -eq 0 ] + ## [[ "$output" =~ "true" ]] || false + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "36" ]] || false # 1+2+3+4+5+6+7+8 = 36. +} + +@test "shallow-clone: depth 2 clone of a complex history" { + seed_and_start_complex_remote + + mkdir clones + cd clones + + # GHOST <- (val 5) <-\ + # (merge 2) [main] + # GHOST <- (val 8) <-/ + dolt clone --depth 2 http://localhost:50051/test-org/test-repo + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 3 ] + + run dolt sql -q "select count(*) = 3 from dolt_log()" + [ "$status" -eq 0 ] + [[ "$output" =~ "true" ]] || false + + ## not working at the moment.... NM4. + ## run dolt sql -q "select count(*) = 1 from dolt_log" + ## [ "$status" -eq 0 ] + ## [[ "$output" =~ "true" ]] || false + + # compare the diff between the two parents of the merge commit. + run dolt diff HEAD^..HEAD^2 + [ "$status" -eq 0 ] + [[ "$output" =~ "- | 4 | val 4" ]] || false + [[ "$output" =~ "- | 5 | val 5" ]] || false + [[ "$output" =~ "+ | 6 | val 6" ]] || false + [[ "$output" =~ "+ | 7 | val 7" ]] || false + [[ "$output" =~ "+ | 8 | val 8" ]] || false +} + +@test "shallow-clone: clone alternate branch" { + seed_and_start_complex_remote + + mkdir clones + cd clones + + # Cloning depth 5 from brch should result in the following 6 commits: + # GHOST <- (val 3) <-\ + # \ + # GHOST <- (val 7) <- (merge 1) <- (val 8) <- (val 9) <- (val 10) [brch] + dolt clone --depth 5 --branch brch http://localhost:50051/test-org/test-repo + cd test-repo + + run dolt status + [ "$status" -eq 0 ] + [[ "$output" =~ "On branch brch" ]] || false + [[ "$output" =~ "Your branch is up to date with 'origin/brch'" ]] || false + [[ "$output" =~ "nothing to commit, working tree clean" ]] || false + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 6 ] + + run dolt diff HEAD~3^..HEAD~3^2 # compare (val 7) --> (val 3) [parents of the first merge] + [ "$status" -eq 0 ] + [[ "$output" =~ "+ | 1 | val 1" ]] || false + [[ "$output" =~ "+ | 2 | val 2" ]] || false + [[ "$output" =~ "+ | 3 | val 3" ]] || false + [[ "$output" =~ "- | 6 | val 6" ]] || false + [[ "$output" =~ "- | 7 | val 7" ]] || false + + run dolt merge-base HEAD~3^ HEAD~3^2 # (val 3) and (val 3) have a common ancestor in a full clone, should error. + [ "$status" -eq 1 ] + [[ "$output" =~ "Commit not found. You are using a shallow clone" ]] || false + +} + +@test "shallow-clone: clone depth 5 on complex main" { + seed_and_start_complex_remote + mkdir clones + cd clones + + # GHOST <- (val 2) <- (val 3) <- (val 4) <- (val 5) <- (merge 2) [main] + # \ / + # GHOST <- (val 6) <-(val 7) <- (merge 1) <- (val 8) + dolt clone --depth 5 http://localhost:50051/test-org/test-repo + cd test-repo + + run dolt log --oneline --decorate=no + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 9 ] + + run dolt merge-base HEAD^ HEAD^2 # (val 5) and (val 8) are the parents of the merge commit. Should return (val 3) + [ "$status" -eq 0 ] + commitid="$output" + + run dolt show "$commitid" + [ "$status" -eq 0 ] + [[ "$output" =~ "Added Val: 3" ]] || false +} + + + +# Tests to write: +# - Fetch after initial clone +# - Fetch when no changes have happened. +# - Fetch when there are remote changes on main +# - Fetch when there are remote changes on a branch +# - Pull after initial clone +# - Pull when no changes have happened. +# - Pull when there are remote changes on main +# - Sensible error when branching/checking out a commit which they don't have. +# - merge base errors +# - GC works? or gives a decent error message? +# - reset work to a commit we have, and errors when we don't have the commit. +# - Sensible error when we attempt to use HEAD~51 or something. +# - Don't serve from a shallow repository +# - remotesrv +# - sql-server +# - file (stretch?) +# - Dump? +# - Rebase? +# - Stash? +# - Fetch tags which refer to commits we don't have. Punt on tags entirely?