Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allocatorimpl,asim: fix replace constraints check fn #117900

Merged
merged 2 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 20 additions & 31 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1116,20 +1116,9 @@ func rankedCandidateListForAllocation(
continue
}

constraintsOK, necessary := constraintsCheck(s)
if !constraintsOK {
if necessary {
log.KvDistribution.VEventf(
ctx,
3,
"cannot allocate necessary %s on s%d",
targetType,
s.StoreID,
)
}
continue
if constraintsOK, _ := constraintsCheck(s); constraintsOK {
validCandidateStores = append(validCandidateStores, s)
}
validCandidateStores = append(validCandidateStores, s)
}

// Create a new store list, which will update the average for each stat to
Expand Down Expand Up @@ -2074,7 +2063,8 @@ func allocateConstraintsCheck(
// that is not already overly satisfied by existing replicas (other than the
// replacement), then it's necessary. If there are any necessary constraints
// that are not satisfied by the candidate when the existing store did satisfy
// that constraint, then the candidate is considered invalid entirely.
// that constraint, then the candidate is considered invalid and unnecessary
// entirely.
func replaceConstraintsCheck(
store, existingStore roachpb.StoreDescriptor, analyzed constraint.AnalyzedConstraints,
) (valid bool, necessary bool) {
Expand All @@ -2089,23 +2079,22 @@ func replaceConstraintsCheck(
satisfiedByCandidateStore := constraint.CheckStoreConjunction(store, constraints.Constraints)
if satisfiedByCandidateStore {
valid = true
}

// If the constraint is not already satisfied, it's necessary.
// Additionally, if the constraint is only just satisfied by the existing
// store being replaced, since that store is going away, the constraint is
// also marked as necessary.
if len(matchingStores) < int(constraints.NumReplicas) ||
(len(matchingStores) == int(constraints.NumReplicas) &&
satisfiedByExistingStore) {
necessary = true
}

// Check if existing store matches a constraint that isn't overly satisfied.
// If so, then only replacing it with a satisfying store is valid to ensure
// that the constraint stays fully satisfied.
if necessary && satisfiedByExistingStore && !satisfiedByCandidateStore {
return false, necessary
// If the constraint is not already satisfied, it's necessary.
// Additionally, if the constraint is only just satisfied by the existing
// store being replaced, since that store is going away, the constraint is
// also marked as necessary.
if len(matchingStores) < int(constraints.NumReplicas) ||
(len(matchingStores) == int(constraints.NumReplicas) &&
satisfiedByExistingStore) {
necessary = true
}
} else if satisfiedByExistingStore {
// Check if existing store matches a constraint that isn't overly satisfied.
// If so, then only replacing it with a satisfying store is valid to ensure
// that the constraint stays fully satisfied.
if len(matchingStores) <= int(constraints.NumReplicas) {
return false, false
}
}
}

Expand Down
233 changes: 233 additions & 0 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1089,7 +1089,240 @@ func TestRemoveConstraintsCheck(t *testing.T) {
}
})
}
}

func TestReplaceConstraintsCheck(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

testCases := []struct {
name string
constraints []roachpb.ConstraintsConjunction
numReplicas int32
existing []roachpb.StoreID
replacingStore roachpb.StoreID
expectedValid map[roachpb.StoreID]bool
expectedNecessary map[roachpb.StoreID]bool
}{
{
name: "required constraint",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "a", Type: roachpb.Constraint_REQUIRED},
},
},
},
existing: []roachpb.StoreID{testStoreUSa15},
replacingStore: testStoreUSa15,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
expectedNecessary: map[roachpb.StoreID]bool{
// NB: No stores are considered necessary replacements as the
// num_replicas is not specified for the constraint conjunction.
testStoreUSa15Dupe: false,
testStoreUSa1: false,
testStoreUSb: false,
testStoreEurope: false,
},
},
{
name: "prohibited constraint",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "b", Type: roachpb.Constraint_PROHIBITED},
},
},
},
existing: []roachpb.StoreID{testStoreUSa15},
replacingStore: testStoreUSa15,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: false,
testStoreUSb: false,
testStoreEurope: true,
},
expectedNecessary: map[roachpb.StoreID]bool{
// NB: No stores are considered necessary replacements as the
// num_replicas is not specified for the constraint conjunction.
testStoreUSa15Dupe: false,
testStoreUSa1: false,
testStoreUSb: false,
testStoreEurope: false,
},
},
{
name: "single per-replica constraint existing=num_replicas",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "a", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
},
existing: []roachpb.StoreID{testStoreUSa15},
replacingStore: testStoreUSa15,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
expectedNecessary: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
},
{
name: "single per-replica constraint existing < num_replicas",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "c", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
},
existing: []roachpb.StoreID{testStoreEurope},
replacingStore: testStoreEurope,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15: false,
testStoreUSa15Dupe: false,
testStoreUSa1: false,
testStoreUSb: true,
},
expectedNecessary: map[roachpb.StoreID]bool{
testStoreUSa15: false,
testStoreUSa15Dupe: false,
testStoreUSa1: false,
testStoreUSb: true,
},
},
{
name: "single per-replica constraint existing > num_replicas",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "a", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
},
existing: []roachpb.StoreID{testStoreUSa15, testStoreUSa15Dupe},
replacingStore: testStoreUSa15,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
expectedNecessary: map[roachpb.StoreID]bool{
// NB: The constraint is over-satisfied, no store should be considered
// necessary as a replacement for testStoreUSa15.
testStoreUSa15Dupe: false,
testStoreUSa1: false,
testStoreUSb: false,
testStoreEurope: false,
},
},
{
name: "multiple per-replica constraint existing < num_replicas",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "a", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
{
Constraints: []roachpb.Constraint{
{Value: "b", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
},
numReplicas: 2,
// We are missing a replica which satisfies the "b" constraint here.
existing: []roachpb.StoreID{testStoreUSa15, testStoreUSa15Dupe},
replacingStore: testStoreUSa15,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: true,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
expectedNecessary: map[roachpb.StoreID]bool{
testStoreUSa15Dupe: false,
testStoreUSa1: true,
testStoreUSb: true,
testStoreEurope: false,
},
},
{
name: "multiple per-replica constraint existing == num_replicas unconstrained",
constraints: []roachpb.ConstraintsConjunction{
{
Constraints: []roachpb.Constraint{
{Value: "a", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
{
Constraints: []roachpb.Constraint{
{Value: "b", Type: roachpb.Constraint_REQUIRED},
},
NumReplicas: 1,
},
},
// One replica is unconstrained (sum(constraint_num_replicas) !=
// num_replicas).
numReplicas: 3,
existing: []roachpb.StoreID{testStoreUSa1},
replacingStore: testStoreUSa1,
expectedValid: map[roachpb.StoreID]bool{
testStoreUSa15: false,
testStoreUSa15Dupe: false,
testStoreUSb: true,
testStoreEurope: false,
},
expectedNecessary: map[roachpb.StoreID]bool{
testStoreUSa15: false,
testStoreUSa15Dupe: false,
testStoreUSb: true,
testStoreEurope: false,
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
conf := roachpb.SpanConfig{
Constraints: tc.constraints,
NumReplicas: tc.numReplicas,
}
analyzed := constraint.AnalyzeConstraints(mockStoreResolver{}, testStoreReplicas(tc.existing), conf.NumReplicas, conf.Constraints)
for storeID, s := range testStores {
if storeID == tc.replacingStore {
continue
}
t.Run(fmt.Sprintf("%s/candidate=s%d,replacing=s%d", tc.name, s.StoreID, tc.replacingStore), func(t *testing.T) {
valid, necessary := replaceConstraintsCheck(s, testStores[tc.replacingStore], analyzed)
require.Equal(t, tc.expectedValid[s.StoreID], valid,
"mismatch replaceConstraintsCheck(s%d,s%d).valid", s.StoreID, tc.replacingStore)
require.Equal(t, tc.expectedNecessary[s.StoreID], necessary,
"mismatch replaceConstraintsCheck(s%d,s%d).necessary", s.StoreID, tc.replacingStore)
})
}
})
}
}

func TestShouldRebalanceDiversity(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# This test reproduces #117886, where decommissioning could become stuck due to
# incorrect constraint analysis when replacing a decommissioning replica. If a
# constraint conjunction were initially undersatisfied and a different
# constraint conjunction currently satisfied by the decommissioning replica
# being replaced, it was possible that a valid replacement target would never
# be selected.
gen_cluster nodes=4
----

set_locality node=1 locality=region=a
----

set_locality node=2 locality=region=b
----

set_locality node=3 locality=region=c
----

set_locality node=4 locality=region=c
----

# Generate 5 ranges, where initially there will be two replicas in region c and
# one replica in region a.
gen_ranges ranges=5 repl_factor=3
----

set_span_config
[0,10000): num_replicas=3 constraints={'+region=a':1,'+region=c':2}
----

set_span_config delay=5m
[0,10000): num_replicas=3 constraints={'+region=a':1,'+region=b':1,'+region=c':1}
----

set_liveness node=4 liveness=decommissioning delay=5m
----

assertion type=conformance under=0 over=0 unavailable=0 violating=0
----

eval duration=20m
----
OK

topology
----
a
└── [1]
b
└── [2]
c
└── [3 4]

# vim:ft=sh
2 changes: 0 additions & 2 deletions pkg/kv/kvserver/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3794,7 +3794,6 @@ func TestAllocatorCheckRange(t *testing.T) {
expectedAction: allocatorimpl.AllocatorReplaceDecommissioningVoter,
expectAllocatorErr: true,
expectedErrStr: "replicas must match constraints",
expectedLogMessage: "cannot allocate necessary voter on s3",
},
{
name: "decommissioning without satisfying multiple partial constraints",
Expand Down Expand Up @@ -3834,7 +3833,6 @@ func TestAllocatorCheckRange(t *testing.T) {
expectedAction: allocatorimpl.AllocatorReplaceDecommissioningVoter,
expectAllocatorErr: true,
expectedErrStr: "replicas must match constraints",
expectedLogMessage: "cannot allocate necessary voter on s3",
},
{
name: "decommissioning during upreplication with partial constraints",
Expand Down
Loading