Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: rename failover tests #121211

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
73 changes: 34 additions & 39 deletions pkg/cmd/roachtest/tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@ var rangeLeaseRenewalDuration = func() time.Duration {
// https://github.com/cockroachdb/cockroach/issues/103654
func registerFailover(r registry.Registry) {
for _, leases := range []registry.LeaseType{registry.EpochLeases, registry.ExpirationLeases} {
var suffix string
if leases == registry.ExpirationLeases {
suffix = "/lease=expiration"
}
suffix := fmt.Sprintf("/lease=%s", leases)

for _, readOnly := range []bool{false, true} {
readOnly := readOnly // pin loop variable
Expand All @@ -92,7 +89,7 @@ func registerFailover(r registry.Registry) {
}

r.Add(registry.TestSpec{
Name: "failover/partial/lease-gateway" + suffix,
Name: "failover/partial-partition/lease-gateway" + suffix,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 30 * time.Minute,
Expand All @@ -104,7 +101,7 @@ func registerFailover(r registry.Registry) {
})

r.Add(registry.TestSpec{
Name: "failover/partial/lease-leader" + suffix,
Name: "failover/partial-partition/lease-leader" + suffix,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 30 * time.Minute,
Expand All @@ -116,7 +113,7 @@ func registerFailover(r registry.Registry) {
})

r.Add(registry.TestSpec{
Name: "failover/partial/lease-liveness" + suffix,
Name: "failover/partial-partition/lease-liveness" + suffix,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 30 * time.Minute,
Expand Down Expand Up @@ -144,7 +141,7 @@ func registerFailover(r registry.Registry) {
postValidation = registry.PostValidationNoDeadNodes
}
r.Add(registry.TestSpec{
Name: fmt.Sprintf("failover/non-system/%s%s", failureMode, suffix),
Name: fmt.Sprintf("failover/%s/user%s", failureMode, suffix),
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 30 * time.Minute,
Expand All @@ -154,11 +151,11 @@ func registerFailover(r registry.Registry) {
Suites: registry.Suites(registry.Nightly),
Leases: leases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runFailoverNonSystem(ctx, t, c, failureMode)
runFailoverUser(ctx, t, c, failureMode)
},
})
r.Add(registry.TestSpec{
Name: fmt.Sprintf("failover/liveness/%s%s", failureMode, suffix),
Name: fmt.Sprintf("failover/%s/liveness%s", failureMode, suffix),
Owner: registry.OwnerKV,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Weekly),
Expand All @@ -172,7 +169,7 @@ func registerFailover(r registry.Registry) {
},
})
r.Add(registry.TestSpec{
Name: fmt.Sprintf("failover/system-non-liveness/%s%s", failureMode, suffix),
Name: fmt.Sprintf("failover/%s/system%s", failureMode, suffix),
Owner: registry.OwnerKV,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Weekly),
Expand All @@ -182,7 +179,7 @@ func registerFailover(r registry.Registry) {
Cluster: r.MakeClusterSpec(7, clusterOpts...),
Leases: leases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runFailoverSystemNonLiveness(ctx, t, c, failureMode)
runFailoverSystem(ctx, t, c, failureMode)
},
})
}
Expand Down Expand Up @@ -394,7 +391,7 @@ func runFailoverPartialLeaseGateway(ctx context.Context, t test.Test, c cluster.

m := c.NewMonitor(ctx, c.Range(1, 7))

failer := makeFailer(t, c, m, failureModeBlackhole, opts, settings, rng).(PartialFailer)
failer := makeFailer(t, c, m, failureModePartitionFull, opts, settings, rng).(PartialFailer)
failer.Setup(ctx)
defer failer.Cleanup(ctx)

Expand Down Expand Up @@ -530,7 +527,7 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C

m := c.NewMonitor(ctx, c.Range(1, 6))

failer := makeFailer(t, c, m, failureModeBlackhole, opts, settings, rng).(PartialFailer)
failer := makeFailer(t, c, m, failureModePartitionFull, opts, settings, rng).(PartialFailer)
failer.Setup(ctx)
defer failer.Cleanup(ctx)

Expand Down Expand Up @@ -661,7 +658,7 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster

m := c.NewMonitor(ctx, c.Range(1, 7))

failer := makeFailer(t, c, m, failureModeBlackhole, opts, settings, rng).(PartialFailer)
failer := makeFailer(t, c, m, failureModePartitionFull, opts, settings, rng).(PartialFailer)
failer.Setup(ctx)
defer failer.Cleanup(ctx)

Expand Down Expand Up @@ -746,8 +743,8 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
m.Wait()
}

// runFailoverNonSystem benchmarks the maximum duration of range unavailability
// following a leaseholder failure with only non-system ranges.
// runFailoverUser benchmarks the maximum duration of range unavailability
// following a leaseholder failure with only user ranges.
//
// - No system ranges located on the failed node.
//
Expand All @@ -763,9 +760,7 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
//
// The test runs a kv50 workload via gateways on n1-n3, measuring the pMax
// latency for graphing.
func runFailoverNonSystem(
ctx context.Context, t test.Test, c cluster.Cluster, failureMode failureMode,
) {
func runFailoverUser(ctx context.Context, t test.Test, c cluster.Cluster, failureMode failureMode) {
require.Equal(t, 7, c.Spec().NodeCount)

rng, _ := randutil.NewTestRand()
Expand Down Expand Up @@ -968,7 +963,7 @@ func runFailoverLiveness(
m.Wait()
}

// runFailoverSystemNonLiveness benchmarks the maximum duration of range
// runFailoverSystem benchmarks the maximum duration of range
// unavailability following a leaseholder failure with only system ranges,
// excluding the liveness range which is tested separately in
// runFailoverLiveness.
Expand All @@ -987,7 +982,7 @@ func runFailoverLiveness(
//
// The test runs a kv50 workload via gateways on n1-n3, measuring the pMax
// latency for graphing.
func runFailoverSystemNonLiveness(
func runFailoverSystem(
ctx context.Context, t test.Test, c cluster.Cluster, failureMode failureMode,
) {
require.Equal(t, 7, c.Spec().NodeCount)
Expand Down Expand Up @@ -1089,23 +1084,23 @@ func runFailoverSystemNonLiveness(
type failureMode string

const (
failureModeBlackhole failureMode = "blackhole"
failureModeBlackholeRecv failureMode = "blackhole-recv"
failureModeBlackholeSend failureMode = "blackhole-send"
failureModeCrash failureMode = "crash"
failureModeDeadlock failureMode = "deadlock"
failureModeDiskStall failureMode = "disk-stall"
failureModePause failureMode = "pause"
failureModeNoop failureMode = "noop"
failureModeCrash failureMode = "crash"
failureModeDeadlock failureMode = "deadlock"
failureModeDiskStall failureMode = "disk-stall"
failureModePartitionFull failureMode = "partition-full"
failureModePartitionReceive failureMode = "partition-receive"
failureModePartitionSend failureMode = "partition-send"
failureModePause failureMode = "pause"
failureModeNoop failureMode = "noop"
)

var allFailureModes = []failureMode{
failureModeBlackhole,
failureModeBlackholeRecv,
failureModeBlackholeSend,
failureModeCrash,
failureModeDeadlock,
failureModeDiskStall,
failureModePartitionFull,
failureModePartitionReceive,
failureModePartitionSend,
failureModePause,
// failureModeNoop intentionally omitted
}
Expand Down Expand Up @@ -1141,20 +1136,20 @@ func makeFailerWithoutLocalNoop(
rng *rand.Rand,
) Failer {
switch failureMode {
case failureModeBlackhole:
case failureModePartitionFull:
return &blackholeFailer{
t: t,
c: c,
input: true,
output: true,
}
case failureModeBlackholeRecv:
case failureModePartitionReceive:
return &blackholeFailer{
t: t,
c: c,
input: true,
}
case failureModeBlackholeSend:
case failureModePartitionSend:
return &blackholeFailer{
t: t,
c: c,
Expand Down Expand Up @@ -1271,11 +1266,11 @@ type blackholeFailer struct {

func (f *blackholeFailer) Mode() failureMode {
if f.input && !f.output {
return failureModeBlackholeRecv
return failureModePartitionReceive
} else if f.output && !f.input {
return failureModeBlackholeSend
return failureModePartitionSend
}
return failureModeBlackhole
return failureModePartitionFull
}

func (f *blackholeFailer) String() string { return string(f.Mode()) }
Expand Down