diff --git a/DEPS.bzl b/DEPS.bzl index 09e4ad4ab63d..78a2ae3cb08f 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -1482,10 +1482,10 @@ def go_deps(): name = "com_github_cockroachdb_cockroach_go_v2", build_file_proto_mode = "disable_global", importpath = "github.com/cockroachdb/cockroach-go/v2", - sha256 = "25ae716dc921dce8336555cbc52b98243b8f1e5e33716afcd351cfd9c2538777", - strip_prefix = "github.com/cockroachdb/cockroach-go/v2@v2.3.3", + sha256 = "70860a2615f3df73f7d00b4801b1fffe30a3306ae1cda1e9bf5245bb74e86d9a", + strip_prefix = "github.com/cockroachdb/cockroach-go/v2@v2.3.5", urls = [ - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cockroach-go/v2/com_github_cockroachdb_cockroach_go_v2-v2.3.3.zip", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cockroach-go/v2/com_github_cockroachdb_cockroach_go_v2-v2.3.5.zip", ], ) go_repository( diff --git a/build/bazelutil/distdir_files.bzl b/build/bazelutil/distdir_files.bzl index 25991376c1ff..3486740a4c16 100644 --- a/build/bazelutil/distdir_files.bzl +++ b/build/bazelutil/distdir_files.bzl @@ -306,7 +306,7 @@ DISTDIR_FILES = { "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/bubbletea/com_github_cockroachdb_bubbletea-v0.23.1-bracketed-paste2.zip": "d7916a0e7d8d814566e8f8d162c3764aea947296396a0a669564ff3ee53414bc", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/circuitbreaker/com_github_cockroachdb_circuitbreaker-v2.2.2-0.20190114160014-a614b14ccf63+incompatible.zip": "52fdb5ba6a60e9a2f1db42d5b3c4c13cc5bb3947d5ce7f1bba9b0a14de71813a", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cmux/com_github_cockroachdb_cmux-v0.0.0-20170110192607-30d10be49292.zip": "88f6f9cf33eb535658540b46f6222f029398e590a3ff9cc873d7d561ac6debf0", - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cockroach-go/v2/com_github_cockroachdb_cockroach_go_v2-v2.3.3.zip": "25ae716dc921dce8336555cbc52b98243b8f1e5e33716afcd351cfd9c2538777", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cockroach-go/v2/com_github_cockroachdb_cockroach_go_v2-v2.3.5.zip": "70860a2615f3df73f7d00b4801b1fffe30a3306ae1cda1e9bf5245bb74e86d9a", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/crlfmt/com_github_cockroachdb_crlfmt-v0.0.0-20221214225007-b2fc5c302548.zip": "fedc01bdd6d964da0425d5eaac8efadc951e78e13f102292cc0774197f09ab63", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/datadriven/com_github_cockroachdb_datadriven-v1.0.3-0.20230413201302-be42291fc80f.zip": "f6de9c180e1ea80c602d98247b8e8fe89f491648ab1425417b9aca082697cbc0", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/errors/com_github_cockroachdb_errors-v1.9.1.zip": "eeec411c969fb66b63af83527677cb84cceb50ed5003c51209e6bd76f1103a0b", diff --git a/go.mod b/go.mod index 8b25b066a542..3cb13d88c95f 100644 --- a/go.mod +++ b/go.mod @@ -109,7 +109,7 @@ require ( github.com/cockroachdb/apd/v3 v3.2.0 github.com/cockroachdb/circuitbreaker v2.2.2-0.20190114160014-a614b14ccf63+incompatible github.com/cockroachdb/cmux v0.0.0-20170110192607-30d10be49292 - github.com/cockroachdb/cockroach-go/v2 v2.3.3 + github.com/cockroachdb/cockroach-go/v2 v2.3.5 github.com/cockroachdb/crlfmt v0.0.0-20221214225007-b2fc5c302548 github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f github.com/cockroachdb/errors v1.9.1 diff --git a/go.sum b/go.sum index c227a1c9bb65..f6dc976b161e 100644 --- a/go.sum +++ b/go.sum @@ -469,8 +469,8 @@ github.com/cockroachdb/circuitbreaker v2.2.2-0.20190114160014-a614b14ccf63+incom github.com/cockroachdb/circuitbreaker v2.2.2-0.20190114160014-a614b14ccf63+incompatible/go.mod h1:v3T8+rm/HmCL0D1BwDcGaHHAQDuFPW7EsnYs2nBRqUo= github.com/cockroachdb/cmux v0.0.0-20170110192607-30d10be49292 h1:dzj1/xcivGjNPwwifh/dWTczkwcuqsXXFHY1X/TZMtw= github.com/cockroachdb/cmux v0.0.0-20170110192607-30d10be49292/go.mod h1:qRiX68mZX1lGBkTWyp3CLcenw9I94W2dLeRvMzcn9N4= -github.com/cockroachdb/cockroach-go/v2 v2.3.3 h1:fNmtG6XhoA1DhdDCIu66YyGSsNb1szj4CaAsbDxRmy4= -github.com/cockroachdb/cockroach-go/v2 v2.3.3/go.mod h1:1wNJ45eSXW9AnOc3skntW9ZUZz6gxrQK3cOj3rK+BC8= +github.com/cockroachdb/cockroach-go/v2 v2.3.5 h1:Khtm8K6fTTz/ZCWPzU9Ne3aOW9VyAnj4qIPCJgKtwK0= +github.com/cockroachdb/cockroach-go/v2 v2.3.5/go.mod h1:1wNJ45eSXW9AnOc3skntW9ZUZz6gxrQK3cOj3rK+BC8= github.com/cockroachdb/crlfmt v0.0.0-20221214225007-b2fc5c302548 h1:i0bnjanlWAvM50wHMT7EFyxlt5HQusznWrkwl+HBIsU= github.com/cockroachdb/crlfmt v0.0.0-20221214225007-b2fc5c302548/go.mod h1:qtkxNlt5i3rrdirfJE/bQeW/IeLajKexErv7jEIV+Uc= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 171d47bc5f19..2c4422eb8414 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -1254,7 +1254,19 @@ var _ = ((*logicTest)(nil)).newTestServerCluster // bootstrapBinaryPath is given by the config's CockroachGoBootstrapVersion. // upgradeBinaryPath is given by the config's CockroachGoUpgradeVersion, or // is the locally built version if CockroachGoUpgradeVersion was not specified. -func (t *logicTest) newTestServerCluster(bootstrapBinaryPath string, upgradeBinaryPath string) { +func (t *logicTest) newTestServerCluster(bootstrapBinaryPath, upgradeBinaryPath string) { + logsDir, err := os.MkdirTemp("", "cockroach-logs*") + if err != nil { + t.Fatal(err) + } + cleanupLogsDir := func() { + if t.rootT.Failed() { + fmt.Fprintf(os.Stderr, "cockroach logs captured in: %s\n", logsDir) + } else { + _ = os.RemoveAll(logsDir) + } + } + // During config initialization, NumNodes is required to be 3. opts := []testserver.TestServerOpt{ testserver.ThreeNodeOpt(), @@ -1262,6 +1274,7 @@ func (t *logicTest) newTestServerCluster(bootstrapBinaryPath string, upgradeBina testserver.CockroachBinaryPathOpt(bootstrapBinaryPath), testserver.UpgradeCockroachBinaryPathOpt(upgradeBinaryPath), testserver.PollListenURLTimeoutOpt(120), + testserver.CockroachLogsDirOpt(logsDir), } if strings.Contains(upgradeBinaryPath, "cockroach-short") { // If we're using a cockroach-short binary, that means it was @@ -1284,7 +1297,7 @@ func (t *logicTest) newTestServerCluster(bootstrapBinaryPath string, upgradeBina } t.testserverCluster = ts - t.clusterCleanupFuncs = append(t.clusterCleanupFuncs, ts.Stop) + t.clusterCleanupFuncs = append(t.clusterCleanupFuncs, ts.Stop, cleanupLogsDir) t.setUser(username.RootUser, 0 /* nodeIdx */) } diff --git a/pkg/upgrade/upgrademanager/BUILD.bazel b/pkg/upgrade/upgrademanager/BUILD.bazel index 3d1bb2db6f65..e7600e8aa1be 100644 --- a/pkg/upgrade/upgrademanager/BUILD.bazel +++ b/pkg/upgrade/upgrademanager/BUILD.bazel @@ -49,24 +49,29 @@ go_test( args = ["-test.timeout=295s"], deps = [ "//pkg/base", + "//pkg/ccl/kvccl/kvtenantccl", "//pkg/clusterversion", "//pkg/jobs", "//pkg/jobs/jobspb", + "//pkg/kv", "//pkg/kv/kvserver/batcheval", "//pkg/kv/kvserver/liveness", "//pkg/roachpb", "//pkg/security/securityassets", "//pkg/security/securitytest", "//pkg/server", + "//pkg/server/settingswatcher", "//pkg/settings/cluster", "//pkg/sql/execinfra", "//pkg/sql/isql", + "//pkg/sql/sem/eval", "//pkg/testutils", "//pkg/testutils/serverutils", "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/upgrade", "//pkg/upgrade/upgradebase", + "//pkg/upgrade/upgrades", "//pkg/util", "//pkg/util/leaktest", "//pkg/util/log", diff --git a/pkg/upgrade/upgrademanager/manager.go b/pkg/upgrade/upgrademanager/manager.go index 4cb4f3d07524..e96f2a994e39 100644 --- a/pkg/upgrade/upgrademanager/manager.go +++ b/pkg/upgrade/upgrademanager/manager.go @@ -544,7 +544,7 @@ func (m *Manager) Migrate( if mustPersistFenceVersion { var err error for { - err = updateSystemVersionSetting(ctx, cv, validate) + err = updateSystemVersionSetting(ctx, fenceVersion, validate) if errors.Is(err, upgradecluster.InconsistentSQLServersError) { if err := bumpClusterVersion(ctx, m.deps.Cluster, fenceVersion); err != nil { return err diff --git a/pkg/upgrade/upgrademanager/manager_external_test.go b/pkg/upgrade/upgrademanager/manager_external_test.go index 0304f7d79ff7..3105ee00cedf 100644 --- a/pkg/upgrade/upgrademanager/manager_external_test.go +++ b/pkg/upgrade/upgrademanager/manager_external_test.go @@ -14,27 +14,34 @@ import ( "context" gosql "database/sql" "fmt" + "math/rand" "sort" "sync/atomic" "testing" "time" "github.com/cockroachdb/cockroach/pkg/base" + _ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl/kvtenantccl" "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/server/settingswatcher" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/execinfra" "github.com/cockroachdb/cockroach/pkg/sql/isql" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/upgrade" "github.com/cockroachdb/cockroach/pkg/upgrade/upgradebase" + "github.com/cockroachdb/cockroach/pkg/upgrade/upgrades" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -618,6 +625,7 @@ func TestPrecondition(t *testing.T) { require.Equalf(t, exp, got, "server %d", i) } } + defer tc.Stopper().Stop(ctx) sqlDB := tc.ServerConn(0) { @@ -662,3 +670,126 @@ func TestPrecondition(t *testing.T) { checkActiveVersion(t, v2) } } + +func TestMigrationFailure(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + // Configure the range of versions used by the test + startVersionKey := clusterversion.BinaryMinSupportedVersionKey + startVersion := clusterversion.ByKey(startVersionKey) + endVersionKey := clusterversion.BinaryVersionKey + endVersion := clusterversion.ByKey(endVersionKey) + + // Pick a random version in to fail at + versions := clusterversion.ListBetween(startVersion, endVersion) + failVersion := versions[rand.Intn(len(versions))] + fenceVersion := upgrade.FenceVersionFor(ctx, clusterversion.ClusterVersion{Version: failVersion}).Version + t.Logf("test will fail at version: %s", failVersion.String()) + + // Create a storage cluster for the tenant + testCluster := serverutils.StartNewTestCluster(t, 1, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + DefaultTestTenant: base.TestTenantDisabled, + Knobs: base.TestingKnobs{ + SQLEvalContext: &eval.TestingKnobs{ + TenantLogicalVersionKeyOverride: startVersionKey, + }, + }, + }, + }) + defer testCluster.Stopper().Stop(ctx) + + // Set the version override so that the tenant is able to upgrade. If this is + // not set, the tenant treats the storage cluster as if it had the oldest + // supported binary version. + s := testCluster.Server(0) + goDB := serverutils.OpenDBConn(t, s.ServingSQLAddr(), "system", false, s.Stopper()) + _, err := goDB.Exec(`ALTER TENANT ALL SET CLUSTER SETTING version = $1`, endVersion.String()) + require.NoError(t, err) + + // setting failUpgrade to false disables the upgrade error logic. + var failUpgrade atomic.Bool + failUpgrade.Store(true) + + // Create a tenant cluster at the oldest supported binary version. Configure + // upgrade manager to fail the upgrade at a random version. + tenantSettings := cluster.MakeTestingClusterSettingsWithVersions( + endVersion, + startVersion, + false, + ) + require.NoError(t, clusterversion.Initialize(ctx, startVersion, &tenantSettings.SV)) + tenant, db := serverutils.StartTenant(t, testCluster.Server(0), base.TestTenantArgs{ + TenantID: roachpb.MustMakeTenantID(10), + Settings: tenantSettings, + TestingKnobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: make(chan struct{}), + BootstrapVersionKeyOverride: startVersionKey, + BinaryVersionOverride: startVersion, + }, + UpgradeManager: &upgradebase.TestingKnobs{ + DontUseJobs: true, + RegistryOverride: func(cv roachpb.Version) (upgradebase.Upgrade, bool) { + if failUpgrade.Load() && cv == failVersion { + errorUpgrade := func(ctx context.Context, version clusterversion.ClusterVersion, deps upgrade.TenantDeps) error { + return errors.New("the upgrade failed with some error!") + } + return upgrade.NewTenantUpgrade("test", cv, nil, errorUpgrade), true + } + return upgrades.GetUpgrade(cv) + }, + }, + }, + }) + + // Wait for the storage cluster version to propogate + watcher := tenant.SettingsWatcher().(*settingswatcher.SettingsWatcher) + testutils.SucceedsSoon(t, func() error { + storageVersion := watcher.GetStorageClusterActiveVersion() + if !storageVersion.IsActive(endVersionKey) { + return errors.Newf("expected storage version of at least %s found %s", endVersion.PrettyPrint(), storageVersion.PrettyPrint()) + } + return nil + }) + + checkActiveVersion := func(t *testing.T, exp roachpb.Version) { + got := tenant.ClusterSettings().Version.ActiveVersion(ctx).Version + require.Equal(t, exp, got) + } + checkSettingVersion := func(t *testing.T, exp roachpb.Version) { + var settingVersion clusterversion.ClusterVersion + require.NoError(t, tenant.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + var err error + settingVersion, err = watcher.GetClusterVersionFromStorage(ctx, txn) + return err + })) + require.Equal(t, exp, settingVersion.Version) + } + + checkActiveVersion(t, startVersion) + checkSettingVersion(t, startVersion) + + // Try to finalize + _, err = db.Exec(`SET CLUSTER SETTING version = $1`, endVersion.String()) + require.Error(t, err) + checkActiveVersion(t, fenceVersion) + // Note: we don't check the setting version here because the fence setting + // is only materialized on the first attempt. + + // Try to finalize again. + _, err = db.Exec(`SET CLUSTER SETTING version = $1`, endVersion.String()) + require.Error(t, err) + checkActiveVersion(t, fenceVersion) + checkSettingVersion(t, fenceVersion) + + // Try to finalize a final time, allowing the upgrade to succeed. + failUpgrade.Store(false) + _, err = db.Exec(`SET CLUSTER SETTING version = $1`, endVersion.String()) + require.NoError(t, err) + checkActiveVersion(t, endVersion) + checkSettingVersion(t, endVersion) +} diff --git a/pkg/upgrade/upgrades/BUILD.bazel b/pkg/upgrade/upgrades/BUILD.bazel index 7806b5217b05..87705026f72c 100644 --- a/pkg/upgrade/upgrades/BUILD.bazel +++ b/pkg/upgrade/upgrades/BUILD.bazel @@ -135,6 +135,8 @@ go_test( shard_count = 16, deps = [ "//pkg/base", + "//pkg/ccl/backupccl", + "//pkg/ccl/changefeedccl", "//pkg/cloud/userfile", "//pkg/clusterversion", "//pkg/jobs", diff --git a/pkg/upgrade/upgrades/backfill_job_info_table_migration_test.go b/pkg/upgrade/upgrades/backfill_job_info_table_migration_test.go index 02e2ee1ede8c..4aebc75d7356 100644 --- a/pkg/upgrade/upgrades/backfill_job_info_table_migration_test.go +++ b/pkg/upgrade/upgrades/backfill_job_info_table_migration_test.go @@ -15,11 +15,15 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + _ "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" + _ "github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl" "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/sql" + "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/upgrade/upgrades" @@ -53,25 +57,20 @@ func TestBackfillJobsInfoTable(t *testing.T) { r := tc.Server(0).JobRegistry().(*jobs.Registry) sqlDB := sqlutils.MakeSQLRunner(tc.ServerConn(0)) defer tc.Stopper().Stop(ctx) - jobspb.ForEachType(func(typ jobspb.Type) { - // The upgrade creates migration and schemachange jobs, so we do not - // need to create more. We should not override resumers for these job types, - // otherwise the upgrade will hang. - if typ != jobspb.TypeMigration && typ != jobspb.TypeSchemaChange { - r.TestingWrapResumerConstructor(typ, func(r jobs.Resumer) jobs.Resumer { return &fakeResumer{} }) - } - }, false) - createJob := func(id jobspb.JobID, details jobspb.Details, progress jobspb.ProgressDetails) *jobs.Job { + createJob := func(id jobspb.JobID, details jobspb.Details, progress jobspb.ProgressDetails) { defaultRecord := jobs.Record{ Details: details, Progress: progress, Username: username.TestUserName(), } - job, err := r.CreateJobWithTxn(ctx, defaultRecord, id, nil /* txn */) + var job *jobs.StartableJob + db := tc.Server(0).InternalDB().(*sql.InternalDB) + err := db.Txn(ctx, func(ctx context.Context, txn isql.Txn) error { + return r.CreateStartableJobWithTxn(ctx, &job, id, txn, defaultRecord) + }) require.NoError(t, err) - return job } // Create a few different types of jobs.