Skip to content

Commit

Permalink
Merge #32140
Browse files Browse the repository at this point in the history
32140: roachtest: restart stopped nodes before exiting chaos r=tschottdorf a=petermattis

When chaos exits due to the stopper channel becoming ready, restart any
stopped nodes. Some tests (notably `scaledata/jobcoordinator`) do not
finish when a down node is present.

Add a timeout to the `scaledata/*` tests which is twice their expected
10m duration.

Fixes #32125
Fixes #32126

Release note: None

Co-authored-by: Peter Mattis <petermattis@gmail.com>
  • Loading branch information
craig[bot] and petermattis committed Nov 3, 2018
2 parents 5d9ddab + 68c0169 commit fb4a974
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/chaos.go
Expand Up @@ -94,6 +94,8 @@ func (ch *Chaos) Runner(c *cluster, m *monitor) func(context.Context) error {

select {
case <-ch.Stopper:
l.Printf("restarting %v (chaos is done)\n", target)
c.Start(ctx, c.t.(*test), target)
return nil
case <-ctx.Done():
return ctx.Err()
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/roachtest/scaledata.go
Expand Up @@ -46,9 +46,10 @@ func registerScaleData(r *registry) {
const duration = 10 * time.Minute
for _, n := range []int{3, 6} {
r.Add(testSpec{
Name: fmt.Sprintf("scaledata/%s/nodes=%d", app, n),
Nodes: nodes(n + 1),
Stable: true, // DO NOT COPY to new tests
Name: fmt.Sprintf("scaledata/%s/nodes=%d", app, n),
Timeout: 2 * duration,
Nodes: nodes(n + 1),
Stable: true, // DO NOT COPY to new tests
Run: func(ctx context.Context, t *test, c *cluster) {
runSqlapp(ctx, t, c, app, flags, duration)
},
Expand Down

0 comments on commit fb4a974

Please sign in to comment.