Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
upgrade: use high priority txn's to update the cluster version
Previously, it was possible for the leasing subsystem to starve out attempts to set the cluster version during upgrades, since the leasing subsystem uses high priority txn for renewals. To address this, this patch makes the logic to set the cluster version high priority so it can't be pushed out by lease renewals. Fixes: #113908 Release note (bug fix): Addressed a bug that could cause cluster version finalization to get starved out by descriptor lease renewals on larger clusters.
- Loading branch information
Showing
4 changed files
with
171 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
// Copyright 2023 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package upgrades_test | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/base" | ||
"github.com/cockroachdb/cockroach/pkg/clusterversion" | ||
"github.com/cockroachdb/cockroach/pkg/server" | ||
clustersettings "github.com/cockroachdb/cockroach/pkg/settings/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease" | ||
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster" | ||
"github.com/cockroachdb/cockroach/pkg/upgrade/upgradebase" | ||
"github.com/cockroachdb/cockroach/pkg/upgrade/upgrades" | ||
"github.com/cockroachdb/cockroach/pkg/util/leaktest" | ||
"github.com/cockroachdb/cockroach/pkg/util/retry" | ||
"github.com/cockroachdb/errors" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
// TestLeasingClusterVersionStarvation validates that setting | ||
// the cluster version is done with a high priority txn and cannot | ||
// be pushed out. Previously, this would be normal priority and | ||
// get pushed by the leasing code, leading to starvation | ||
// when leases were acquired with sufficiently high frequency | ||
// Note: This test just confirms its not normal priority by checking | ||
// if it can push other txns. | ||
func TestLeasingClusterVersionStarvation(t *testing.T) { | ||
defer leaktest.AfterTest(t)() | ||
ctx := context.Background() | ||
|
||
routineChan := make(chan error) | ||
waitToStartBump := make(chan struct{}) | ||
resumeBump := make(chan struct{}) | ||
clusterArgs := base.TestClusterArgs{ | ||
ServerArgs: base.TestServerArgs{ | ||
Knobs: base.TestingKnobs{ | ||
UpgradeManager: &upgradebase.TestingKnobs{ | ||
InterlockPausePoint: upgradebase.AfterVersionBumpRPC, | ||
InterlockReachedPausePointChannel: &waitToStartBump, | ||
InterlockResumeChannel: &resumeBump, | ||
}, | ||
Server: &server.TestingKnobs{ | ||
DisableAutomaticVersionUpgrade: make(chan struct{}), | ||
BinaryVersionOverride: clusterversion.ByKey( | ||
clusterversion.V23_1), | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
// Disable lease renewals intentionally, so that we validate | ||
// no deadlock risk exists with the settings table. | ||
st := clustersettings.MakeTestingClusterSettingsWithVersions( | ||
clusterversion.TestingBinaryVersion, | ||
clusterversion.TestingBinaryMinSupportedVersion, | ||
false) | ||
|
||
clusterArgs.ServerArgs.Settings = st | ||
|
||
tc := testcluster.StartTestCluster(t, 1, clusterArgs) | ||
lease.LeaseDuration.Override(ctx, &st.SV, 0) | ||
lease.LeaseRenewalDuration.Override(ctx, &st.SV, 0) | ||
|
||
defer tc.Stopper().Stop(ctx) | ||
db := tc.ServerConn(0) | ||
defer db.Close() | ||
|
||
proceedWithCommit := make(chan struct{}) | ||
// Start a background transaction that will have an intent | ||
// on the version key inside the settings table, with a | ||
// normal priority (which should get pushed by the upgrade). | ||
go func() { | ||
<-waitToStartBump | ||
tx, err := db.Begin() | ||
if err != nil { | ||
routineChan <- err | ||
return | ||
} | ||
_, err = tx.Exec("SELECT name from system.settings where name='version' FOR UPDATE") | ||
if err != nil { | ||
routineChan <- err | ||
return | ||
} | ||
resumeBump <- struct{}{} | ||
for retry := retry.Start(retry.Options{}); retry.Next(); { | ||
_, err = tx.Exec("SELECT name from system.settings where name='version' FOR UPDATE") | ||
if err != nil { | ||
rollbackErr := tx.Rollback() | ||
routineChan <- errors.WithSecondaryError(err, rollbackErr) | ||
return | ||
} | ||
} | ||
}() | ||
|
||
upgrades.Upgrade( | ||
t, | ||
db, | ||
clusterversion.V23_2, | ||
nil, | ||
false, | ||
) | ||
|
||
// Our txn should have been pushed by the upgrade, | ||
// which has a higher txn priority. | ||
close(proceedWithCommit) | ||
require.ErrorContainsf(t, <-routineChan, "pq: restart transaction: TransactionRetryWithProtoRefreshError:", | ||
"upgrade was not able to push transaction") | ||
} |