diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java index 8c427227e4717..57d1b668851dd 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java @@ -43,6 +43,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); } + // Never reject allocation of an unassigned shard + if (shardRouting.assignedToNode() == false) { + return Decision.single(Decision.Type.YES, NAME, "Shard is unassigned. Decider takes no action."); + } + // Check whether the shard being relocated has any write load estimate. If it does not, then this decider has no opinion. var allShardWriteLoads = allocation.clusterInfo().getShardWriteLoads(); var shardWriteLoad = allShardWriteLoads.get(shardRouting.shardId()); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java index 08b4a706e4785..9617297474693 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java @@ -25,6 +25,8 @@ import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; @@ -107,38 +109,66 @@ public void testWriteLoadDeciderCanAllocate() { ) .build() ); - assertEquals( + assertDecisionMatches( "Assigning a new shard to a node that is above the threshold should fail", - Decision.Type.NOT_PREFERRED, writeLoadDecider.canAllocate( testHarness.shardRouting2, testHarness.exceedingThresholdRoutingNode, testHarness.routingAllocation - ).type() + ), + Decision.Type.NOT_PREFERRED, + "Node [*] with write thread pool utilization [0.99] already exceeds the high utilization threshold of [0.900000]. " + + "Cannot allocate shard [[test-index][1]] to node without risking increased write latencies." ); - assertEquals( + assertDecisionMatches( + "Unassigned shard should always be accepted", + writeLoadDecider.canAllocate( + testHarness.unassignedShardRouting, + randomFrom(testHarness.exceedingThresholdRoutingNode, testHarness.belowThresholdRoutingNode), + testHarness.routingAllocation + ), + Decision.Type.YES, + "Shard is unassigned. Decider takes no action." + ); + assertDecisionMatches( "Assigning a new shard to a node that has capacity should succeed", + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation), Decision.Type.YES, - writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation) - .type() + null ); - assertEquals( + assertDecisionMatches( "Assigning a new shard without a write load estimate should _not_ be blocked by lack of capacity", - Decision.Type.YES, writeLoadDecider.canAllocate( testHarness.thirdRoutingNoWriteLoad, testHarness.exceedingThresholdRoutingNode, testHarness.routingAllocation - ).type() + ), + Decision.Type.YES, + "Shard has no estimated write load. Decider takes no action." ); - assertEquals( + assertDecisionMatches( "Assigning a new shard that would cause the node to exceed capacity should fail", + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation), Decision.Type.NOT_PREFERRED, - writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation) - .type() + "The high utilization threshold of [0.900000] would be exceeded on node [*] with utilization [0.89] " + + "if shard [[test-index][0]] with estimated additional utilisation [0.06250] (write load [0.50000] / threads [8]) were " + + "assigned to it. Cannot allocate shard to node without risking increased write latencies." + ); } + private void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) { + assertEquals(description, type, decision.type()); + if (explanationPattern == null) { + assertNull(decision.getExplanation()); + } else { + assertTrue( + Strings.format("Expected: \"%s\", got \"%s\"", explanationPattern, decision.getExplanation()), + Regex.simpleMatch(explanationPattern, decision.getExplanation()) + ); + } + } + /** * Carries all the cluster state objects needed for testing after {@link #createClusterStateAndRoutingAllocation} sets them up. */ @@ -150,7 +180,8 @@ private record TestHarness( RoutingNode nearThresholdRoutingNode, ShardRouting shardRouting1, ShardRouting shardRouting2, - ShardRouting thirdRoutingNoWriteLoad + ShardRouting thirdRoutingNoWriteLoad, + ShardRouting unassignedShardRouting ) {} /** @@ -188,6 +219,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) { ShardId testShardId1 = new ShardId(testIndex, 0); ShardId testShardId2 = new ShardId(testIndex, 1); ShardId testShardId3NoWriteLoad = new ShardId(testIndex, 2); + ShardId testShardId4Unassigned = new ShardId(testIndex, 3); /** * Create a ClusterInfo that includes the node and shard level write load estimates for a variety of node capacity situations. @@ -213,6 +245,9 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) { shardIdToWriteLoadEstimate.put(testShardId1, 0.5); shardIdToWriteLoadEstimate.put(testShardId2, 0.5); shardIdToWriteLoadEstimate.put(testShardId3NoWriteLoad, 0d); + if (randomBoolean()) { + shardIdToWriteLoadEstimate.put(testShardId4Unassigned, randomDoubleBetween(0.0, 2.0, true)); + } ClusterInfo clusterInfo = ClusterInfo.builder() .nodeUsageStatsForThreadPools(nodeIdToNodeUsageStatsForThreadPools) @@ -253,6 +288,12 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) { true, ShardRoutingState.STARTED ); + ShardRouting unassignedShardRouting = TestShardRouting.newShardRouting( + testShardId4Unassigned, + null, + true, + ShardRoutingState.UNASSIGNED + ); RoutingNode exceedingThresholdRoutingNode = RoutingNodesHelper.routingNode( exceedingThresholdDiscoveryNode.getId(), @@ -266,8 +307,7 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) { ); RoutingNode nearThresholdRoutingNode = RoutingNodesHelper.routingNode( nearThresholdDiscoveryNode3.getId(), - nearThresholdDiscoveryNode3, - new ShardRouting[] {} + nearThresholdDiscoveryNode3 ); return new TestHarness( @@ -278,7 +318,8 @@ private TestHarness createClusterStateAndRoutingAllocation(String indexName) { nearThresholdRoutingNode, shardRouting1, shardRouting2, - thirdRoutingNoWriteLoad + thirdRoutingNoWriteLoad, + unassignedShardRouting ); }