Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
return Decision.single(Decision.Type.YES, NAME, "Decider is disabled");
}

// Never reject allocation of an unassigned shard
if (shardRouting.assignedToNode() == false) {
return Decision.single(Decision.Type.YES, NAME, "Shard is unassigned. Decider takes no action.");
}

// Check whether the shard being relocated has any write load estimate. If it does not, then this decider has no opinion.
var allShardWriteLoads = allocation.clusterInfo().getShardWriteLoads();
var shardWriteLoad = allShardWriteLoads.get(shardRouting.shardId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.elasticsearch.cluster.routing.TestShardRouting;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
Expand Down Expand Up @@ -107,38 +109,66 @@ public void testWriteLoadDeciderCanAllocate() {
)
.build()
);
assertEquals(
assertDecisionMatches(
"Assigning a new shard to a node that is above the threshold should fail",
Decision.Type.NOT_PREFERRED,
writeLoadDecider.canAllocate(
testHarness.shardRouting2,
testHarness.exceedingThresholdRoutingNode,
testHarness.routingAllocation
).type()
),
Decision.Type.NOT_PREFERRED,
"Node [*] with write thread pool utilization [0.99] already exceeds the high utilization threshold of [0.900000]. "
+ "Cannot allocate shard [[test-index][1]] to node without risking increased write latencies."
);
assertEquals(
assertDecisionMatches(
"Unassigned shard should always be accepted",
writeLoadDecider.canAllocate(
testHarness.unassignedShardRouting,
randomFrom(testHarness.exceedingThresholdRoutingNode, testHarness.belowThresholdRoutingNode),
testHarness.routingAllocation
),
Decision.Type.YES,
"Shard is unassigned. Decider takes no action."
);
assertDecisionMatches(
"Assigning a new shard to a node that has capacity should succeed",
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation),
Decision.Type.YES,
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation)
.type()
null
);
assertEquals(
assertDecisionMatches(
"Assigning a new shard without a write load estimate should _not_ be blocked by lack of capacity",
Decision.Type.YES,
writeLoadDecider.canAllocate(
testHarness.thirdRoutingNoWriteLoad,
testHarness.exceedingThresholdRoutingNode,
testHarness.routingAllocation
).type()
),
Decision.Type.YES,
"Shard has no estimated write load. Decider takes no action."
);
assertEquals(
assertDecisionMatches(
"Assigning a new shard that would cause the node to exceed capacity should fail",
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation),
Decision.Type.NOT_PREFERRED,
writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation)
.type()
"The high utilization threshold of [0.900000] would be exceeded on node [*] with utilization [0.89] "
+ "if shard [[test-index][0]] with estimated additional utilisation [0.06250] (write load [0.50000] / threads [8]) were "
+ "assigned to it. Cannot allocate shard to node without risking increased write latencies."

);
}

private void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) {
assertEquals(description, type, decision.type());
if (explanationPattern == null) {
assertNull(decision.getExplanation());
} else {
assertTrue(
Strings.format("Expected: \"%s\", got \"%s\"", explanationPattern, decision.getExplanation()),
Regex.simpleMatch(explanationPattern, decision.getExplanation())
);
}
}

/**
* Carries all the cluster state objects needed for testing after {@link #createClusterStateAndRoutingAllocation} sets them up.
*/
Expand All @@ -150,7 +180,8 @@ private record TestHarness(
RoutingNode nearThresholdRoutingNode,
ShardRouting shardRouting1,
ShardRouting shardRouting2,
ShardRouting thirdRoutingNoWriteLoad
ShardRouting thirdRoutingNoWriteLoad,
ShardRouting unassignedShardRouting
) {}

/**
Expand Down Expand Up @@ -188,6 +219,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
ShardId testShardId1 = new ShardId(testIndex, 0);
ShardId testShardId2 = new ShardId(testIndex, 1);
ShardId testShardId3NoWriteLoad = new ShardId(testIndex, 2);
ShardId testShardId4Unassigned = new ShardId(testIndex, 3);

/**
* Create a ClusterInfo that includes the node and shard level write load estimates for a variety of node capacity situations.
Expand All @@ -213,6 +245,9 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
shardIdToWriteLoadEstimate.put(testShardId1, 0.5);
shardIdToWriteLoadEstimate.put(testShardId2, 0.5);
shardIdToWriteLoadEstimate.put(testShardId3NoWriteLoad, 0d);
if (randomBoolean()) {
shardIdToWriteLoadEstimate.put(testShardId4Unassigned, randomDoubleBetween(0.0, 2.0, true));
}

ClusterInfo clusterInfo = ClusterInfo.builder()
.nodeUsageStatsForThreadPools(nodeIdToNodeUsageStatsForThreadPools)
Expand Down Expand Up @@ -253,6 +288,12 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
true,
ShardRoutingState.STARTED
);
ShardRouting unassignedShardRouting = TestShardRouting.newShardRouting(
testShardId4Unassigned,
null,
true,
ShardRoutingState.UNASSIGNED
);
Comment on lines +291 to +296
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: We can randomly also add write load for this shard into ClusterInfo.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, fixed in 4527995


RoutingNode exceedingThresholdRoutingNode = RoutingNodesHelper.routingNode(
exceedingThresholdDiscoveryNode.getId(),
Expand All @@ -266,8 +307,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
);
RoutingNode nearThresholdRoutingNode = RoutingNodesHelper.routingNode(
nearThresholdDiscoveryNode3.getId(),
nearThresholdDiscoveryNode3,
new ShardRouting[] {}
nearThresholdDiscoveryNode3
);

return new TestHarness(
Expand All @@ -278,7 +318,8 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) {
nearThresholdRoutingNode,
shardRouting1,
shardRouting2,
thirdRoutingNoWriteLoad
thirdRoutingNoWriteLoad,
unassignedShardRouting
);
}

Expand Down