Skip to content

Commit b03f11f

Browse files
Return NOT_PREFERRED decisions in allocation explain
Adds numerous NOT_PREFERRED options to allocation decision / status types. Adds NOT_PREFERRED option to AllocationDecision (resolving ES-12729). Closes ES-12833, ES-13288, ES-12729
1 parent 292f65b commit b03f11f

File tree

11 files changed

+448
-36
lines changed

11 files changed

+448
-36
lines changed

server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
6767
public final class ClusterAllocationExplainIT extends ESIntegTestCase {
6868

69+
// TODO (DIANNA) NOMERGE: can I test with a fake AllocationDecider NOT_PREFERRED behavior?
70+
6971
public void testUnassignedPrimaryWithExistingIndex() throws Exception {
7072
logger.info("--> starting 2 nodes");
7173
internalCluster().startNodes(2);

server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
package org.elasticsearch.cluster.routing.allocation.decider;
1111

1212
import org.apache.logging.log4j.Level;
13+
import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest;
1314
import org.elasticsearch.action.admin.cluster.allocation.DesiredBalanceRequest;
1415
import org.elasticsearch.action.admin.cluster.allocation.DesiredBalanceResponse;
16+
import org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction;
1517
import org.elasticsearch.action.admin.cluster.allocation.TransportGetDesiredBalanceAction;
1618
import org.elasticsearch.action.admin.cluster.node.usage.NodeUsageStatsForThreadPoolsAction;
1719
import org.elasticsearch.action.admin.cluster.node.usage.TransportNodeUsageStatsForThreadPoolsAction;
@@ -27,11 +29,13 @@
2729
import org.elasticsearch.cluster.routing.RoutingNodes;
2830
import org.elasticsearch.cluster.routing.ShardRouting;
2931
import org.elasticsearch.cluster.routing.UnassignedInfo;
32+
import org.elasticsearch.cluster.routing.allocation.AllocationDecision;
3033
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
3134
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
3235
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics;
3336
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator;
3437
import org.elasticsearch.cluster.service.ClusterService;
38+
import org.elasticsearch.common.Strings;
3539
import org.elasticsearch.common.settings.Settings;
3640
import org.elasticsearch.common.util.CollectionUtils;
3741
import org.elasticsearch.core.TimeValue;
@@ -264,13 +268,54 @@ public void testShardsAreAssignedToNotPreferredWhenAlternativeIsNo() {
264268
}
265269
}
266270

271+
@TestLogging(
272+
reason = "track when reconciliation has completed",
273+
value = "org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator:DEBUG"
274+
)
275+
public void testAllocationExplainNotPreferred() {
276+
TestHarness harness = setUpThreeTestNodesAndAllIndexShardsOnFirstNode();
277+
runCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred(harness);
278+
279+
/**
280+
* Running the {@link #runCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred} logic should set up a cluster where
281+
* shards are all allocated to a {@link AllocationDeciders#canRemain} {@link Decision#NOT_PREFERRED} node, while the other nodes
282+
* return {@link AllocationDeciders#canAllocate} {@link Decision#NOT_PREFERRED} responses. This should exercise NOT_PREFERRED in
283+
* the allocation/explain paths for remaining on a node AND assignment to other nodes.
284+
*/
285+
286+
ClusterAllocationExplainRequest allocationExplainRequest = new ClusterAllocationExplainRequest(
287+
TEST_REQUEST_TIMEOUT,
288+
harness.indexName,
289+
0,
290+
true,
291+
null
292+
);
293+
var allocationExplainResponse = safeGet(client().execute(TransportClusterAllocationExplainAction.TYPE, allocationExplainRequest));
294+
logger.info("---> Allocation explain response: " + Strings.toString(allocationExplainResponse.getExplanation(), true, true));
295+
296+
var decision = allocationExplainResponse.getExplanation().getShardAllocationDecision().getMoveDecision();
297+
assertThat(decision.getCanRemainDecision().type(), equalTo(Decision.NOT_PREFERRED.type()));
298+
assertNull(decision.getTargetNode());
299+
assertThat(decision.getAllocationDecision(), equalTo(AllocationDecision.NOT_PREFERRED));
300+
301+
var canAllocateDecisions = allocationExplainResponse.getExplanation()
302+
.getShardAllocationDecision()
303+
.getMoveDecision()
304+
.getNodeDecisions();
305+
assertThat(canAllocateDecisions.size(), equalTo(2));
306+
canAllocateDecisions.forEach(nodeDecision -> assertThat(nodeDecision.getNodeDecision(), equalTo(AllocationDecision.NOT_PREFERRED)));
307+
}
308+
267309
@TestLogging(
268310
reason = "track when reconciliation has completed",
269311
value = "org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator:DEBUG"
270312
)
271313
public void testCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred() {
272314
TestHarness harness = setUpThreeTestNodesAndAllIndexShardsOnFirstNode();
315+
runCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred(harness);
316+
}
273317

318+
private void runCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred(TestHarness harness) {
274319
/**
275320
* Override the {@link TransportNodeUsageStatsForThreadPoolsAction} action on the data nodes to supply artificial thread pool write
276321
* load stats. The stats will show all the nodes above the high utilization threshold, so they do not accept new shards, while the
@@ -604,6 +649,8 @@ private Settings enabledWriteLoadDeciderSettings(int utilizationThresholdPercent
604649
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING.getKey(),
605650
TimeValue.timeValueMillis(queueLatencyThresholdMillis)
606651
)
652+
// Keep all the debug logging, no throttling of decider messages.
653+
.put(WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_MINIMUM_LOGGING_INTERVAL.getKey(), TimeValue.timeValueMinutes(0))
607654
// Disable rebalancing so that testing can see Decider change outcomes only.
608655
.put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")
609656
.build();

server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainRequest.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,14 @@ public ClusterAllocationExplainRequest(StreamInput in) throws IOException {
8080
* will be picked for explanation. If no replicas are unassigned, the first assigned replica will
8181
* be explained.
8282
*/
83-
// Package private for testing.
84-
ClusterAllocationExplainRequest(TimeValue masterNodeTimeout, String index, int shard, boolean primary, @Nullable String currentNode) {
83+
// Public for testing.
84+
public ClusterAllocationExplainRequest(
85+
TimeValue masterNodeTimeout,
86+
String index,
87+
int shard,
88+
boolean primary,
89+
@Nullable String currentNode
90+
) {
8591
super(masterNodeTimeout);
8692
this.index = index;
8793
this.shard = shard;

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocateUnassignedDecision.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ public String getExplanation() {
267267
checkDecisionState();
268268
return switch (getAllocationDecision()) {
269269
case YES -> Explanations.Allocation.YES;
270+
case NOT_PREFERRED -> Explanations.Allocation.NOT_PREFERRED;
270271
case THROTTLED -> Explanations.Allocation.THROTTLED;
271272
case AWAITING_INFO -> Explanations.Allocation.AWAITING_INFO;
272273
case NO_VALID_SHARD_COPY -> hasNodeWithStaleOrCorruptShard()

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationDecision.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,14 @@ public enum AllocationDecision implements Writeable {
6060
/**
6161
* No attempt was made to allocate the shard
6262
*/
63-
NO_ATTEMPT((byte) 7);
63+
NO_ATTEMPT((byte) 7),
64+
65+
/**
66+
* It is _not_ preferred to allocate a shard to this node, preference should be given to a YES node.
67+
* This can happen when the shard allocation to a node is allowed, but the node resource usage is
68+
* already high. Preference can be overridden if a shard's current allocation is no longer allowed.
69+
*/
70+
NOT_PREFERRED((byte) 8);
6471

6572
private final byte id;
6673

@@ -84,6 +91,7 @@ public static AllocationDecision readFrom(StreamInput in) throws IOException {
8491
case 5 -> ALLOCATION_DELAYED;
8592
case 6 -> NO_VALID_SHARD_COPY;
8693
case 7 -> NO_ATTEMPT;
94+
case 8 -> NOT_PREFERRED;
8795
default -> throw new IllegalArgumentException("Unknown value [" + id + "]");
8896
};
8997
}
@@ -111,8 +119,8 @@ public static AllocationDecision fromAllocationStatus(AllocationStatus allocatio
111119
*/
112120
public static AllocationDecision fromDecisionType(Decision.Type type) {
113121
return switch (type) {
114-
// TODO: should not_preferred have own variant? ES-12729
115-
case YES, NOT_PREFERRED -> YES;
122+
case YES -> YES;
123+
case NOT_PREFERRED -> NOT_PREFERRED;
116124
case THROTTLE -> THROTTLED;
117125
case NO -> NO;
118126
};

server/src/main/java/org/elasticsearch/cluster/routing/allocation/Explanations.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ public static final class Allocation {
1616
public static final String YES = """
1717
Elasticsearch can allocate the shard.""";
1818

19+
public static final String NOT_PREFERRED = """
20+
Elasticsearch can allocate the shard, but the assignment is not preferred due to resource usage of the cluster nodes.
21+
Assignment will proceed if either the shard is unassigned or cannot remain on its currently assigned node.""";
22+
1923
public static final String THROTTLED = """
2024
Elasticsearch is currently busy with other activities. It expects to be able to allocate this shard when those activities \
2125
finish. Please wait.""";
@@ -61,6 +65,11 @@ public static final class Rebalance {
6165
public static final String YES = """
6266
Elasticsearch can rebalance this shard to another node.""";
6367

68+
public static final String NOT_PREFERRED = """
69+
Elasticsearch will not rebalance this shard to another node because all other eligible nodes have high resource usage. The \
70+
total cluster balance weights might improve, were the shard relocated, but it would push one resource usage dimension \
71+
too high and threaten performance. See the node-by-node explanation to understand what resource would be endangered.""";
72+
6473
public static final String ALREADY_BALANCED = """
6574
This shard is in a well-balanced location and satisfies all allocation rules so it will remain on this node. Elasticsearch \
6675
cannot improve the cluster balance by moving it to another node. If you expect this shard to be rebalanced to another node, \
@@ -98,6 +107,10 @@ public static final class Move {
98107
public static final String YES = """
99108
This shard may not remain on its current node. Elasticsearch will move it to another node.""";
100109

110+
public static final String NOT_PREFERRED = """
111+
This shard may not remain on its current node. Elasticsearch can only move it to cluster nodes with already significant \
112+
resource usage, but will do so anyway.""";
113+
101114
public static final String THROTTLED = """
102115
This shard may not remain on its current node. Elasticsearch is currently busy with other activities and will move this shard \
103116
to another node when those activities finish. Please wait.""";
@@ -107,6 +120,20 @@ public static final class Move {
107120
which you expect this shard to be allocated, find this node in the node-by-node explanation, and address the reasons which \
108121
prevent Elasticsearch from allocating this shard there.""";
109122

123+
public static final String NOT_PREFERRED_TO_YES = SHOULD_NOT_REMAIN_PREFIX + ". Elasticsearch will move it to another node.";
124+
125+
public static final String NOT_PREFERRED_TO_NOT_PREFERRED = SHOULD_NOT_REMAIN_PREFIX
126+
+ ", but there are no other eligible nodes without already high resource usage";
127+
128+
public static final String NOT_PREFERRED_TO_NO = SHOULD_NOT_REMAIN_PREFIX
129+
+ ", but Elasticsearch isn't allowed to move it to "
130+
+ "another node. Choose a node to which you expect this shard to be allocated, find this node in the node-by-node "
131+
+ "explanation, and address the reasons which prevent Elasticsearch from allocating this shard there.";
132+
133+
public static final String NOT_PREFERRED_TO_THROTTLED = SHOULD_NOT_REMAIN_PREFIX
134+
+ ". Elasticsearch is currently busy with other "
135+
+ "activities and will move this shard to another node when those activities finish. Please wait.";
110136
}
111137

138+
private static final String SHOULD_NOT_REMAIN_PREFIX = "This shard should not remain on its current node due to resource usage";
112139
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/MoveDecision.java

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ public static MoveDecision move(
122122
// the final decision is NO (no node to move the shard to) and we are not in explain mode, return a cached version
123123
return CACHED_CANNOT_MOVE_DECISION;
124124
} else {
125-
assert ((targetNode == null) == (moveDecision != AllocationDecision.YES));
126125
return new MoveDecision(targetNode, nodeDecisions, moveDecision, canRemainDecision, null, 0);
127126
}
128127
}
@@ -153,7 +152,12 @@ public boolean isDecisionTaken() {
153152
*/
154153
public boolean cannotRemainAndCanMove() {
155154
checkDecisionState();
156-
return canRemain() == false && canMoveDecision == AllocationDecision.YES;
155+
return cannotRemain() && (canMoveDecision == AllocationDecision.YES);
156+
}
157+
158+
public boolean cannotRemainAndNotPreferredMove() {
159+
checkDecisionState();
160+
return cannotRemain() && (canMoveDecision == AllocationDecision.NOT_PREFERRED);
157161
}
158162

159163
/**
@@ -163,7 +167,7 @@ public boolean cannotRemainAndCanMove() {
163167
*/
164168
public boolean cannotRemainAndCannotMove() {
165169
checkDecisionState();
166-
return canRemain() == false && canMoveDecision != AllocationDecision.YES;
170+
return cannotRemain() && canMoveDecision != AllocationDecision.YES;
167171
}
168172

169173
/**
@@ -175,6 +179,16 @@ public boolean canRemain() {
175179
return canRemainDecision.type() == Type.YES;
176180
}
177181

182+
public boolean cannotRemain() {
183+
checkDecisionState();
184+
return canRemainDecision.type() != Type.YES;
185+
}
186+
187+
public boolean canRemainNotPreferred() {
188+
checkDecisionState();
189+
return canRemainDecision.type() == Type.NOT_PREFERRED;
190+
}
191+
178192
/**
179193
* Returns the decision for the shard being allowed to remain on its current node. If {@link #isDecisionTaken()}
180194
* returns {@code false}, then invoking this method will throw an {@code IllegalStateException}.
@@ -256,12 +270,15 @@ public String getExplanation() {
256270
}
257271
};
258272
} else {
259-
// it was a decision to force move the shard
260-
assert canRemain() == false;
273+
// it was a decision by an allocation decider to move the shard
274+
assert cannotRemain();
261275
return switch (canMoveDecision) {
262-
case YES -> Explanations.Move.YES;
263-
case THROTTLED -> Explanations.Move.THROTTLED;
264-
case NO -> Explanations.Move.NO;
276+
case YES -> canRemainNotPreferred() ? Explanations.Move.NOT_PREFERRED_TO_YES : Explanations.Move.YES;
277+
case NOT_PREFERRED -> canRemainNotPreferred()
278+
? Explanations.Move.NOT_PREFERRED_TO_NOT_PREFERRED
279+
: Explanations.Move.NOT_PREFERRED;
280+
case THROTTLED -> canRemainNotPreferred() ? Explanations.Move.NOT_PREFERRED_TO_THROTTLED : Explanations.Move.THROTTLED;
281+
case NO -> canRemainNotPreferred() ? Explanations.Move.NOT_PREFERRED_TO_NO : Explanations.Move.NO;
265282
case WORSE_BALANCE, AWAITING_INFO, ALLOCATION_DELAYED, NO_VALID_SHARD_COPY, NO_ATTEMPT -> {
266283
assert false : canMoveDecision;
267284
yield canMoveDecision.toString();
@@ -280,7 +297,7 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
280297
builder.endObject();
281298
}
282299
builder.field("can_remain_on_current_node", canRemain() ? "yes" : "no");
283-
if (canRemain() == false && canRemainDecision.getDecisions().isEmpty() == false) {
300+
if (cannotRemain() && canRemainDecision.getDecisions().isEmpty() == false) {
284301
builder.startArray("can_remain_decisions");
285302
canRemainDecision.toXContent(builder, params);
286303
builder.endArray();
@@ -298,7 +315,13 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
298315
builder.field("can_rebalance_to_other_node", canMoveDecision);
299316
builder.field("rebalance_explanation", getExplanation());
300317
} else {
301-
builder.field("can_move_to_other_node", cannotRemainAndCanMove() ? "yes" : "no");
318+
if (cannotRemainAndCanMove()) {
319+
builder.field("can_move_to_other_node", "yes");
320+
} else if (cannotRemainAndNotPreferredMove()) {
321+
builder.field("can_move_to_other_node", "not-preferred");
322+
} else {
323+
builder.field("can_move_to_other_node", "no");
324+
}
302325
builder.field("move_explanation", getExplanation());
303326
}
304327
return builder;

0 commit comments

Comments
 (0)