Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
117872: bench: ignore cluster startup and shutdown r=yuzefovich a=yuzefovich

This commit adjusts our benchmark harness to ignore the overhead of starting up and shutting down the test cluster. This allows us to avoid spurious "regressions" like that we've seen due to #117116.

Epic: None

Release note: None

117894: tests: skip some tests under `race` r=rail a=rickystewart

Some of these are skipped only under `StressRace` but not simply under `race` as they should have been.

Additionally, two tests: `spanconfigsqltranslatorccl.TestDataDriven` and `kvserver.TestStoreMetrics` are so long running that they stall or fail to complete under `race`, which has gone unnoticed before.

Epic: CRDB-8308
Release note: None

117898: testcluster: re-enable tenant in multi-node clusters under race r=yuzefovich a=yuzefovich

This commit is a partial revert of adfc98a. In that commit we disabled tenant randomization in multi-node-clusters under race because EngFlow executors were too overloaded in such a setup. We now use beefier executors, so let's see how they fare.

Epic: None

Release note: None

Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com>
Co-authored-by: Ricky Stewart <ricky@cockroachlabs.com>
  • Loading branch information
3 people committed Jan 18, 2024
4 parents 1c67f8e + 2857dca + b5f6e54 + fefdf81 commit c24aedd
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 32 deletions.
14 changes: 14 additions & 0 deletions pkg/bench/foreachdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ var runSepProcessTenant = flag.Bool("run-sep-process-tenant", false, "run separa
// BenchmarkFn is a function that runs a benchmark using the given SQLRunner.
type BenchmarkFn func(b *testing.B, db *sqlutils.SQLRunner)

// timerUtil is a helper method that should be called right before the
// invocation of BenchmarkFn and the returned function should be deferred.
func timerUtil(b *testing.B) func() {
b.ResetTimer()
b.StartTimer()
return b.StopTimer
}

func benchmarkCockroach(b *testing.B, f BenchmarkFn) {
s, db, _ := serverutils.StartServer(
b, base.TestServerArgs{
Expand All @@ -54,6 +62,7 @@ func benchmarkCockroach(b *testing.B, f BenchmarkFn) {
b.Fatal(err)
}

defer timerUtil(b)()
f(b, sqlutils.MakeSQLRunner(db))
}

Expand Down Expand Up @@ -103,6 +112,7 @@ func benchmarkSharedProcessTenantCockroach(b *testing.B, f BenchmarkFn) {
_, err = tenantDB.Exec(`CREATE DATABASE bench`)
require.NoError(b, err)

defer timerUtil(b)()
f(b, sqlutils.MakeSQLRunner(tenantDB))
}

Expand Down Expand Up @@ -133,6 +143,7 @@ func benchmarkSepProcessTenantCockroach(b *testing.B, f BenchmarkFn) {
_, err = tenantDB.Exec(`CREATE DATABASE bench`)
require.NoError(b, err)

defer timerUtil(b)()
f(b, sqlutils.MakeSQLRunner(tenantDB))
}

Expand All @@ -150,6 +161,7 @@ func benchmarkMultinodeCockroach(b *testing.B, f BenchmarkFn) {
}
defer tc.Stopper().Stop(context.TODO())

defer timerUtil(b)()
f(b, sqlutils.MakeRoundRobinSQLRunner(tc.Conns[0], tc.Conns[1], tc.Conns[2]))
}

Expand Down Expand Up @@ -198,6 +210,7 @@ func benchmarkPostgres(b *testing.B, f BenchmarkFn) {
r := sqlutils.MakeSQLRunner(db)
r.Exec(b, `CREATE SCHEMA IF NOT EXISTS bench`)

defer timerUtil(b)()
f(b, r)
}

Expand All @@ -218,6 +231,7 @@ func benchmarkMySQL(b *testing.B, f BenchmarkFn) {
r := sqlutils.MakeSQLRunner(db)
r.Exec(b, `CREATE DATABASE IF NOT EXISTS bench`)

defer timerUtil(b)()
f(b, r)
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/ccl/kvccl/kvfollowerreadsccl/boundedstaleness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,9 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.UnderStress(t, "1μs staleness reads may actually succeed due to the slow environment")
const msg = "1μs staleness reads may actually succeed due to the slow environment"
skip.UnderStress(t, msg)
skip.UnderRace(t, msg)
defer ccl.TestingEnableEnterprise()()

ctx := context.Background()
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ go_test(
"//pkg/sql/catalog/tabledesc",
"//pkg/testutils/datapathutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/skip",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -85,6 +86,8 @@ func TestDataDriven(t *testing.T) {
scope.Close(t)
})

skip.UnderRace(t, "very long-running test under race")

ctx := context.Background()
datadriven.Walk(t, datapathutils.TestDataPath(t), func(t *testing.T, path string) {
t.Parallel() // SAFE FOR TESTING
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ func TestStreamingAutoReplan(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.UnderStressRace(t, "multi cluster/node config exhausts hardware")
skip.UnderRace(t, "multi cluster/node config exhausts hardware")

ctx := context.Background()
args := replicationtestutils.DefaultTenantStreamingClustersArgs
Expand Down Expand Up @@ -1209,7 +1209,7 @@ func TestLoadProducerAndIngestionProgress(t *testing.T) {
func TestStreamingRegionalConstraint(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.UnderStressRace(t, "takes too long under stress race")
skip.UnderRace(t, "takes too long under race")

ctx := context.Background()
regions := []string{"mars", "venus", "mercury"}
Expand Down
3 changes: 3 additions & 0 deletions pkg/kv/kvserver/client_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -236,6 +237,8 @@ func TestStoreMetrics(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.UnderRace(t, "does not complete under race")

ctx := context.Background()
const numServers int = 3
stickyVFSRegistry := server.NewStickyVFSRegistry()
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/client_raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6498,9 +6498,9 @@ func TestRaftLeaderRemovesItself(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Timing-sensitive, so skip under deadlock detector and stressrace.
// Timing-sensitive, so skip under deadlock detector and race.
skip.UnderDeadlock(t)
skip.UnderStressRace(t)
skip.UnderRace(t)

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/client_replica_circuit_breaker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ type circuitBreakerTest struct {
}

func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
skip.UnderStressRace(t)
skip.UnderRace(t)
manualClock := hlc.NewHybridManualClock()
var rangeID int64 // atomic
slowThresh := &atomic.Value{} // supports .SetSlowThreshold(x)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4061,7 +4061,7 @@ func RunLogicTest(
// with less concurrency in the nightly stress runs. If you see problems
// please make adjustments there.
// As of 6/4/2019, the logic tests never complete under race.
skip.UnderStressRace(t, "logic tests and race detector don't mix: #37993")
skip.UnderRace(t, "logic tests and race detector don't mix: #37993")

// This test relies on repeated sequential storage.EventuallyFileOnlySnapshot
// acquisitions. Reduce the max wait time for each acquisition to speed up
Expand Down
5 changes: 0 additions & 5 deletions pkg/sql/logictest/parallel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,6 @@ func TestParallel(t *testing.T) {
defer leaktest.AfterTest(t)()

skip.UnderRace(t, "takes >1 min under race")
// Note: there is special code in teamcity-trigger/main.go to run this package
// with less concurrency in the nightly stress runs. If you see problems
// please make adjustments there.
// As of 6/4/2019, the logic tests never complete under race.
skip.UnderStressRace(t, "logic tests and race detector don't mix: #37993")

glob := *paralleltestdata
paths, err := filepath.Glob(glob)
Expand Down
17 changes: 2 additions & 15 deletions pkg/testutils/serverutils/test_server_shim.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ var PreventStartTenantError = errors.New("attempting to manually start a virtual
// directly so that it only gets linked into test code (and to avoid a linter
// error that 'skip' must only be used in test code).
func ShouldStartDefaultTestTenant(
t TestLogger, baseArg base.DefaultTestTenantOptions, multiNodeCluster bool,
t TestLogger, baseArg base.DefaultTestTenantOptions,
) (retval base.DefaultTestTenantOptions) {
// Explicit case for disabling the default test tenant.
if baseArg.TestTenantAlwaysDisabled() {
Expand Down Expand Up @@ -149,17 +149,6 @@ func ShouldStartDefaultTestTenant(
return override
}

if multiNodeCluster && util.RaceEnabled {
// Race builds now run in the EngFlow environment which seems to be
// often overloaded if we have multi-node clusters and start a default
// test tenant, so we disable the tenant randomization in such a
// scenario.
if t != nil {
t.Log("cluster virtualization disabled under race")
}
return base.InternalNonDefaultDecision(baseArg, false /* enable */, false /* shared */)
}

// Note: we ask the metamorphic framework for a "disable" value, instead
// of an "enable" value, because it probabilistically returns its default value
// more often than not and that is what we want.
Expand Down Expand Up @@ -269,9 +258,7 @@ func StartServerOnlyE(t TestLogger, params base.TestServerArgs) (TestServerInter
allowAdditionalTenants := params.DefaultTestTenant.AllowAdditionalTenants()
// Update the flags with the actual decision as to whether we should
// start the service for a default test tenant.
params.DefaultTestTenant = ShouldStartDefaultTestTenant(
t, params.DefaultTestTenant, false, /* multiNodeCluster */
)
params.DefaultTestTenant = ShouldStartDefaultTestTenant(t, params.DefaultTestTenant)

s, err := NewServer(params)
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions pkg/testutils/testcluster/testcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,7 @@ func NewTestCluster(
args.DefaultTestTenant, defaultTestTenantOptions)
}
}
tc.defaultTestTenantOptions = serverutils.ShouldStartDefaultTestTenant(
t, defaultTestTenantOptions, nodes > 1, /* multiNodeCluster */
)
tc.defaultTestTenantOptions = serverutils.ShouldStartDefaultTestTenant(t, defaultTestTenantOptions)

var firstListener net.Listener
for i := 0; i < nodes; i++ {
Expand Down

0 comments on commit c24aedd

Please sign in to comment.