Skip to content

Commit

Permalink
Merge pull request #115467 from cockroachdb/blathers/backport-release…
Browse files Browse the repository at this point in the history
…-23.2-115286

release-23.2: roachtest: no longer show other errors if a test times out
  • Loading branch information
DarrylWong committed Dec 4, 2023
2 parents 09f8ec0 + 1fe7c9d commit 228ad8e
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
26 changes: 23 additions & 3 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ type testImpl struct {
// referencing 0+ errors. failure captures all the errors
failures []failure

// failuresSuppressed indicates if further failures should be added to mu.failures.
failuresSuppressed bool

// numFailures is the number of failures that have been added via addFailures.
// This can deviate from len(failures) if failures have been suppressed.
numFailures int

// status is a map from goroutine id to status set by that goroutine. A
// special goroutine is indicated by runnerID; that one provides the test's
// "main status".
Expand Down Expand Up @@ -393,13 +400,16 @@ func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
t.mu.Lock()
defer t.mu.Unlock()

t.mu.failures = append(t.mu.failures, reportFailure)
if !t.mu.failuresSuppressed {
t.mu.failures = append(t.mu.failures, reportFailure)
}

var b strings.Builder
formatFailure(&b, reportFailure)
msg := b.String()

failureNum := len(t.mu.failures)
t.mu.numFailures++
failureNum := t.mu.numFailures
failureLog := fmt.Sprintf("failure_%d", failureNum)
t.L().Printf("test failure #%d: full stack retained in %s.log: %s", failureNum, failureLog, msg)
// Also dump the verbose error (incl. all stack traces) to a log file, in case
Expand All @@ -425,6 +435,16 @@ func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
t.mu.output = append(t.mu.output, '\n')
}

// suppressFailures will stop future failures from being surfaced to github posting
// or the test logger. It will not stop those failures from being logged in their
// own failure.log files. Used if we are confident on the root cause of a failure and
// want to reduce noise of other failures, i.e. timeouts.
func (t *testImpl) suppressFailures() {
t.mu.Lock()
defer t.mu.Unlock()
t.mu.failuresSuppressed = true
}

// We take the "squashed" error that contains information of all the errors for each failure.
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
Expand All @@ -450,7 +470,7 @@ func (t *testImpl) Failed() bool {
}

func (t *testImpl) failedRLocked() bool {
return len(t.mu.failures) > 0
return t.mu.numFailures > 0
}

func (t *testImpl) firstFailure() failure {
Expand Down
3 changes: 3 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,9 @@ func (r *testRunner) runTest(
// NB: We're adding the timeout failure intentionally without cancelling the context
// to capture as much state as possible during artifact collection.
t.addFailure(0, "test timed out (%s)", timeout)
// We suppress other failures from being surfaced to the top as the timeout is always going
// to be the main error and subsequent errors (i.e. context cancelled) add noise.
t.suppressFailures()
timedOut = true
}

Expand Down

0 comments on commit 228ad8e

Please sign in to comment.