Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-21.1: kvserver: add safeguard against spurious calls to AdminRelocateRange #64303

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 46 additions & 0 deletions pkg/kv/kvserver/client_relocate_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,52 @@ func TestAdminRelocateRange(t *testing.T) {
}
}

func TestAdminRelocateRangeFailsWithDuplicates(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
args := base.TestClusterArgs{
ReplicationMode: base.ReplicationManual,
}

tc := testcluster.StartTestCluster(t, numNodes, args)
defer tc.Stopper().Stop(ctx)

k := keys.MustAddr(tc.ScratchRange(t))

tests := []struct {
voterTargets, nonVoterTargets []int
expectedErr string
}{
{
voterTargets: []int{1, 1, 2},
expectedErr: "list of desired voter targets contains duplicates",
},
{
voterTargets: []int{1, 2},
nonVoterTargets: []int{0, 1, 0},
expectedErr: "list of desired non-voter targets contains duplicates",
},
{
voterTargets: []int{1, 2},
nonVoterTargets: []int{1},
expectedErr: "list of voter targets overlaps with the list of non-voter targets",
},
{
voterTargets: []int{1, 2},
nonVoterTargets: []int{1, 2},
expectedErr: "list of voter targets overlaps with the list of non-voter targets",
},
}
for _, subtest := range tests {
err := tc.Servers[0].DB().AdminRelocateRange(
context.Background(), k.AsRawKey(), tc.Targets(subtest.voterTargets...), tc.Targets(subtest.nonVoterTargets...),
)
require.Regexp(t, subtest.expectedErr, err)
}
}

// TestAdminRelocateRangeRandom runs a series of random relocations on a scratch
// range and checks to ensure that the relocations were successfully executed.
func TestAdminRelocateRangeRandom(t *testing.T) {
Expand Down
30 changes: 30 additions & 0 deletions pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2657,6 +2657,25 @@ func (s *Store) AdminRelocateRange(
rangeDesc roachpb.RangeDescriptor,
voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
) error {
if containsDuplicates(voterTargets) {
return errors.AssertionFailedf(
"list of desired voter targets contains duplicates: %+v",
voterTargets,
)
}
if containsDuplicates(nonVoterTargets) {
return errors.AssertionFailedf(
"list of desired non-voter targets contains duplicates: %+v",
nonVoterTargets,
)
}
if containsDuplicates(append(voterTargets, nonVoterTargets...)) {
return errors.AssertionFailedf(
"list of voter targets overlaps with the list of non-voter targets: voters: %+v, non-voters: %+v",
voterTargets, nonVoterTargets,
)
}

// Remove learners so we don't have to think about relocating them, and leave
// the joint config if we're in one.
newDesc, err := maybeLeaveAtomicChangeReplicasAndRemoveLearners(ctx, s, &rangeDesc)
Expand Down Expand Up @@ -3090,6 +3109,17 @@ func getRelocationArgs(
return args
}

func containsDuplicates(targets []roachpb.ReplicationTarget) bool {
for i := range targets {
for j := i + 1; j < len(targets); j++ {
if targets[i] == targets[j] {
return true
}
}
}
return false
}

// subtractTargets returns the set of replica descriptors in `left` but not in
// `right` (i.e. left - right).
//
Expand Down
49 changes: 36 additions & 13 deletions pkg/kv/kvserver/store_rebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package kvserver

import (
"context"
"fmt"
"math"
"math/rand"
"sort"
Expand Down Expand Up @@ -309,8 +310,18 @@ func (sr *StoreRebalancer) rebalanceStore(
}

descBeforeRebalance := replWithStats.repl.Desc()
log.VEventf(ctx, 1, "rebalancing r%d (%.2f qps) from %v to %v to better balance load",
replWithStats.repl.RangeID, replWithStats.qps, descBeforeRebalance.Replicas(), voterTargets)
log.VEventf(
ctx,
1,
"rebalancing r%d (%.2f qps) to better balance load: voters from %v to %v; non-voters from %v to %v",
replWithStats.repl.RangeID,
replWithStats.qps,
descBeforeRebalance.Replicas().Voters(),
voterTargets,
descBeforeRebalance.Replicas().NonVoters(),
nonVoterTargets,
)

timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl)
if err := contextutil.RunWithTimeout(ctx, "relocate range", timeout, func(ctx context.Context) error {
return sr.rq.store.AdminRelocateRange(ctx, *descBeforeRebalance, voterTargets, nonVoterTargets)
Expand Down Expand Up @@ -472,6 +483,17 @@ type rangeRebalanceContext struct {
numDesiredVoters, numDesiredNonVoters int
}

func (rbc *rangeRebalanceContext) numDesiredReplicas(targetType targetReplicaType) int {
switch targetType {
case voterTarget:
return rbc.numDesiredVoters
case nonVoterTarget:
return rbc.numDesiredNonVoters
default:
panic(fmt.Sprintf("unknown targetReplicaType %s", targetType))
}
}

func (sr *StoreRebalancer) chooseRangeToRebalance(
ctx context.Context,
hottestRanges *[]replicaWithStats,
Expand Down Expand Up @@ -729,20 +751,21 @@ func (sr *StoreRebalancer) pickRemainingRepls(
options scorerOptions,
minQPS, maxQPS float64,
targetType targetReplicaType,
) (finalTargetsForType []roachpb.ReplicaDescriptor) {
var numDesiredReplsForType int
) []roachpb.ReplicaDescriptor {
// Alias the slice that corresponds to the set of replicas that is being
// appended to. This is because we want subsequent calls to
// `allocateTargetFromList` to observe the results of previous calls (note the
// append to the slice referenced by `finalTargetsForType`).
var finalTargetsForType *[]roachpb.ReplicaDescriptor
switch targetType {
case voterTarget:
finalTargetsForType = partialVoterTargets
numDesiredReplsForType = rebalanceCtx.numDesiredVoters
finalTargetsForType = &partialVoterTargets
case nonVoterTarget:
finalTargetsForType = partialNonVoterTargets
numDesiredReplsForType = rebalanceCtx.numDesiredNonVoters
finalTargetsForType = &partialNonVoterTargets
default:
log.Fatalf(ctx, "unknown targetReplicaType %s", targetType)
log.Fatalf(ctx, "unknown targetReplicaType: %s", targetType)
}

for len(finalTargetsForType) < numDesiredReplsForType {
for len(*finalTargetsForType) < rebalanceCtx.numDesiredReplicas(targetType) {
// Use the preexisting Allocate{Non}Voter logic to ensure that
// considerations such as zone constraints, locality diversity, and full
// disk come into play.
Expand Down Expand Up @@ -780,12 +803,12 @@ func (sr *StoreRebalancer) pickRemainingRepls(
break
}

finalTargetsForType = append(finalTargetsForType, roachpb.ReplicaDescriptor{
*finalTargetsForType = append(*finalTargetsForType, roachpb.ReplicaDescriptor{
NodeID: target.Node.NodeID,
StoreID: target.StoreID,
})
}
return finalTargetsForType
return *finalTargetsForType
}

// pickReplsToKeep determines the set of existing replicas for a range which
Expand Down