From 8d4d03650a966b887175f8352171288d43bd1c20 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 28 Sep 2025 15:29:33 -0700 Subject: [PATCH 01/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 24 ++++++++++++++ .../elasticsearch/cluster/ClusterModule.java | 2 ++ .../IndexShardCountAllocationDecider.java | 33 +++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java new file mode 100644 index 0000000000000..5fa2308b3b99e --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.test.ESIntegTestCase; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { + + + public void testIndexShardCountExceedsAverageAllocation() { + + } + + + +} diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index c3f4055c8d061..151c06fba0f08 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -56,6 +56,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.IndexShardCountAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.IndexVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; @@ -482,6 +483,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new ThrottlingAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); + addAllocationDecider(deciders, new IndexShardCountAllocationDecider()); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java new file mode 100644 index 0000000000000..a954c5f5f3ccb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; + +/** + * An allocation decider + */ + +public class IndexShardCountAllocationDecider extends AllocationDecider { + + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { + return Decision.NOT_PREFERRED; + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return Decision.NOT_PREFERRED; + } + +} From 19dea870dc6298f08a6d01de6575a4254e8d209b Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Mon, 29 Sep 2025 10:19:11 -0700 Subject: [PATCH 02/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 16 ++++++++++++++++ .../IndexShardCountAllocationDecider.java | 6 ++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index 5fa2308b3b99e..1a1fca015d28a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -9,14 +9,30 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { + /** + * Happy path - + */ public void testIndexShardCountExceedsAverageAllocation() { + Settings settings = Settings.builder().build(); + + internalCluster().startMasterOnlyNode(settings); + final var dataNodes = internalCluster().startDataOnlyNodes(3, settings); + final String firstDataNodeName = dataNodes.get(0); + final String secondDataNodeName = dataNodes.get(1); + final String thirdDataNodeName = dataNodes.get(2); + final String firstDataNodeId = getNodeId(firstDataNodeName); + final String secondDataNodeId = getNodeId(secondDataNodeName); + final String thirdDataNodeId = getNodeId(thirdDataNodeName); + ensureStableCluster(4); + } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index a954c5f5f3ccb..1b61c5ae6e7ac 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -14,9 +14,11 @@ import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; /** - * An allocation decider + * For an index of n shards hosted by a cluster of m nodes, a node should not host + * significantly more than n / m shards. This allocation decider enforces this principle. + * This allocation decider excludes any nodes flagged for shutdown from consideration + * when computing optimal shard distributions. */ - public class IndexShardCountAllocationDecider extends AllocationDecider { From 3016a652e0f657ec53cb7019306068e4dc311d7e Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 30 Sep 2025 15:46:44 -0700 Subject: [PATCH 03/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 108 +++++++++++++++++- .../IndexShardCountConstraintSettings.java | 86 ++++++++++++++ 2 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index 1a1fca015d28a..069f735591dfe 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -9,22 +9,81 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.transport.TransportService; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { + + /* + Happy path test scenario + + 3 ingestion node + + 5 shards index , + A 1 + B 2 + C 2 + + Propose Allocation to move shard from B to C -> Not preferred + + random number of shards + + + */ + + /** * Happy path - + * + * + * + * + * */ public void testIndexShardCountExceedsAverageAllocation() { - Settings settings = Settings.builder().build(); + + + } + + + private boolean checkShardAssignment( + RoutingNodes routingNodes, + Index index, + String firstDataNodeId, + String secondDataNodeId, + String thirdDataNodeId, + int shards + ) { + + int firstDataNodeRealNumberOfShards = routingNodes.node(firstDataNodeId).numberOfOwningShardsForIndex(index); + int secondDataNodeRealNumberOfShards = routingNodes.node(secondDataNodeId).numberOfOwningShardsForIndex(index); + int thirdDataNodeRealNumberOfShards = routingNodes.node(thirdDataNodeId).numberOfOwningShardsForIndex(index); + + return firstDataNodeRealNumberOfShards + secondDataNodeRealNumberOfShards + thirdDataNodeRealNumberOfShards == shards; + } + + + private TestHarness setUpIndex() { + + Settings settings = Settings.builder().build(); internalCluster().startMasterOnlyNode(settings); + final var dataNodes = internalCluster().startDataOnlyNodes(3, settings); + final String firstDataNodeName = dataNodes.get(0); final String secondDataNodeName = dataNodes.get(1); final String thirdDataNodeName = dataNodes.get(2); @@ -33,8 +92,55 @@ public void testIndexShardCountExceedsAverageAllocation() { final String thirdDataNodeId = getNodeId(thirdDataNodeName); ensureStableCluster(4); + final DiscoveryNode firstDiscoveryNode = internalCluster().getInstance(TransportService.class, firstDataNodeName).getLocalNode(); + final DiscoveryNode secondDiscoveryNode = internalCluster().getInstance(TransportService.class, secondDataNodeName).getLocalNode(); + final DiscoveryNode thirdDiscoveryNode = internalCluster().getInstance(TransportService.class, thirdDataNodeName).getLocalNode(); + int randomNumberOfShards = randomIntBetween(10, 20); + + String indexName = "test1"; + int numberOfShards = 4; + + logger.info( + "---> first node name " + + firstDataNodeName + + " and ID " + + firstDataNodeId + + "; second node name " + + secondDataNodeName + + " and ID " + + secondDataNodeId + + "; third node name " + + thirdDataNodeName + + " and ID " + + thirdDataNodeId + ); + + var verifyShardAllocationListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { + var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(indexName); + if (indexRoutingTable == null) { + return false; + } + return checkShardAssignment(clusterState.getRoutingNodes(), indexRoutingTable.getIndex(), + firstDataNodeId, secondDataNodeId, thirdDataNodeId, numberOfShards); + + }); + + createIndex( + indexName, + Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomNumberOfShards).put(SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(indexName); + logger.info("---> wait for [" + randomNumberOfShards + "] shards to be assigned to node "); + + safeAwait(verifyShardAllocationListener); + return new TestHarness(indexName); } + record TestHarness( + String indexName + ) {}; + + } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java new file mode 100644 index 0000000000000..20c8580bbd269 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.util.FeatureFlag; + +/** + * Settings definitions for the write load allocation decider and associated infrastructure + */ +public class IndexShardCountConstraintSettings { + + private static final String SETTING_PREFIX = "cluster.routing.allocation.index_shard_count_decider."; + private static final FeatureFlag INDEX_COUNT_DECIDER_FEATURE_FLAG = new FeatureFlag("index_shard_count_decider"); + + public enum IndexShardCountDeciderStatus { + /** + * The decider is disabled + */ + DISABLED, + + /** + * Index shard count decider is turned on. + */ + ENABLED; + + public boolean enabled() { + return this == ENABLED; + } + + public boolean disabled() { + return this == DISABLED; + } + } + + public static final Setting INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING = Setting.enumSetting( + IndexShardCountDeciderStatus.class, + SETTING_PREFIX + "enabled", + INDEX_COUNT_DECIDER_FEATURE_FLAG.isEnabled() ? IndexShardCountDeciderStatus.ENABLED : IndexShardCountDeciderStatus.DISABLED, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting permits nodes to host more than ideally balanced number of index shards. + * Maximum tolerated index shard count = ceil(ideal * skew_tolerance) + * i.e. ideal = 4 shards, skew_tolerance = 1.3 + * maximum tolerated index shards = Math.ceil(4 * 1.3) = 6. + */ + public static final Setting INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE = Setting.doubleSetting( + SETTING_PREFIX + "load_skew_tolerance", + 1.5d, + 0.0d, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + private volatile IndexShardCountDeciderStatus indexShardCountDeciderStatus; + private volatile double loadSkewTolerance; + + + + public IndexShardCountConstraintSettings(ClusterSettings clusterSettings) { + clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, + status -> this.indexShardCountDeciderStatus = status); + clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE, + value -> this.loadSkewTolerance = value); + } + + public IndexShardCountDeciderStatus getIndexShardCountDeciderStatus() { + return this.indexShardCountDeciderStatus; + } + + public double getLoadSkewTolerance() { + return this.loadSkewTolerance; + } + +} From 25b5bca42b21d00fdc2d13446a3d8058932aa031 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 1 Oct 2025 20:21:50 -0700 Subject: [PATCH 04/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 11 ++- .../elasticsearch/cluster/ClusterModule.java | 2 +- .../IndexShardCountConstraintSettings.java | 17 +---- .../IndexShardCountAllocationDecider.java | 75 ++++++++++++++++++- 4 files changed, 86 insertions(+), 19 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index 069f735591dfe..41dd0db59a21a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -38,7 +38,14 @@ public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { Propose Allocation to move shard from B to C -> Not preferred - random number of shards + Enable this decider + put very strict load skew tolerance basically ideal + with this in place the end result should be all not exceeding ideal + + + What happes + + So basically first */ @@ -54,6 +61,8 @@ public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { */ public void testIndexShardCountExceedsAverageAllocation() { + setUpIndex(); + diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 151c06fba0f08..a833508c08944 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -483,7 +483,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new ThrottlingAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); - addAllocationDecider(deciders, new IndexShardCountAllocationDecider()); + addAllocationDecider(deciders, new IndexShardCountAllocationDecider(clusterSettings)); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java index 20c8580bbd269..bda28ae920370 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java @@ -14,22 +14,15 @@ import org.elasticsearch.common.util.FeatureFlag; /** - * Settings definitions for the write load allocation decider and associated infrastructure + * Settings definitions for the index shard count allocation decider and associated infrastructure */ public class IndexShardCountConstraintSettings { private static final String SETTING_PREFIX = "cluster.routing.allocation.index_shard_count_decider."; - private static final FeatureFlag INDEX_COUNT_DECIDER_FEATURE_FLAG = new FeatureFlag("index_shard_count_decider"); + private static final FeatureFlag INDEX_SHARD_COUNT_DECIDER_FEATURE_FLAG = new FeatureFlag("index_shard_count_decider"); public enum IndexShardCountDeciderStatus { - /** - * The decider is disabled - */ DISABLED, - - /** - * Index shard count decider is turned on. - */ ENABLED; public boolean enabled() { @@ -44,7 +37,7 @@ public boolean disabled() { public static final Setting INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING = Setting.enumSetting( IndexShardCountDeciderStatus.class, SETTING_PREFIX + "enabled", - INDEX_COUNT_DECIDER_FEATURE_FLAG.isEnabled() ? IndexShardCountDeciderStatus.ENABLED : IndexShardCountDeciderStatus.DISABLED, + INDEX_SHARD_COUNT_DECIDER_FEATURE_FLAG.isEnabled() ? IndexShardCountDeciderStatus.ENABLED : IndexShardCountDeciderStatus.DISABLED, Setting.Property.Dynamic, Setting.Property.NodeScope ); @@ -58,7 +51,7 @@ public boolean disabled() { public static final Setting INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE = Setting.doubleSetting( SETTING_PREFIX + "load_skew_tolerance", 1.5d, - 0.0d, + 1.0d, Setting.Property.Dynamic, Setting.Property.NodeScope ); @@ -66,8 +59,6 @@ public boolean disabled() { private volatile IndexShardCountDeciderStatus indexShardCountDeciderStatus; private volatile double loadSkewTolerance; - - public IndexShardCountConstraintSettings(ClusterSettings clusterSettings) { clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, status -> this.indexShardCountDeciderStatus = status); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index 1b61c5ae6e7ac..353e86010e92a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -9,9 +9,17 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.core.Strings; +import org.elasticsearch.index.Index; /** * For an index of n shards hosted by a cluster of m nodes, a node should not host @@ -21,15 +29,74 @@ */ public class IndexShardCountAllocationDecider extends AllocationDecider { + private static final Logger logger = LogManager.getLogger(IndexShardCountAllocationDecider.class); - @Override - public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { - return Decision.NOT_PREFERRED; + public static final String NAME = "index_shard_count"; + + private final IndexShardCountConstraintSettings indexShardCountConstraintSettings; + + public IndexShardCountAllocationDecider(ClusterSettings clusterSettings) { + this.indexShardCountConstraintSettings = new IndexShardCountConstraintSettings(clusterSettings); } @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - return Decision.NOT_PREFERRED; + if (indexShardCountConstraintSettings.getIndexShardCountDeciderStatus().disabled()) { + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); + } + + Index index = shardRouting.index(); + if (node.hasIndex(index)) { + var nodeShutdowns = allocation.metadata().nodeShutdowns().getAll().size(); + var allNodes = allocation.nodes().size(); + var availableNodes = allNodes - nodeShutdowns; + + assert availableNodes > 0; + assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); + + var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); + var idealAllocation = Math.ceil((double) totalShards / availableNodes); + var threshold = (int) Math.ceil(idealAllocation * indexShardCountConstraintSettings.getLoadSkewTolerance()); + var currentAllocation = node.numberOfOwningShardsForIndex(index); + + if (currentAllocation >= threshold) { + + String rationale = """ + For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] + nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of + Math.ceil([%0.f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, + assigning additional shards is not preferred. + """; + + String explanation = Strings.format(rationale, + index, totalShards, node.nodeId(), idealAllocation, index, availableNodes, + indexShardCountConstraintSettings.getLoadSkewTolerance(), idealAllocation, + indexShardCountConstraintSettings.getLoadSkewTolerance(), threshold, node.nodeId(), currentAllocation, index); + + if (logger.isTraceEnabled()) { + logger.trace(explanation); + } + + return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); + } + } + + return Decision.YES; + } + + @Override + public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (indexShardCountConstraintSettings.getIndexShardCountDeciderStatus().disabled()) { + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); + } + + // TODO: implement + return Decision.single(Decision.Type.YES, NAME, "canRemain() is not yet implemented"); } + + + + + } From 7913808c75301a234c69dc235026569557c8a6cd Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 2 Oct 2025 10:40:28 -0700 Subject: [PATCH 05/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 101 ++++++++++++------ .../IndexShardCountAllocationDecider.java | 3 +- .../common/settings/ClusterSettings.java | 5 +- 3 files changed, 77 insertions(+), 32 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index 41dd0db59a21a..a767afa839f2b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.test.ClusterServiceUtils; @@ -61,7 +62,7 @@ public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { */ public void testIndexShardCountExceedsAverageAllocation() { - setUpIndex(); + var testHarness = setUpIndex(); @@ -75,20 +76,37 @@ private boolean checkShardAssignment( String firstDataNodeId, String secondDataNodeId, String thirdDataNodeId, - int shards - ) { + int upperLimitFirstDataNode, + int lowerLimitFirstDataNode, + int upperLimitSecondDataNode, + int lowerLimitSecondDataNode, + int upperLimitThirdDataNode, + int lowerLimitThirdDataNode + ) { int firstDataNodeRealNumberOfShards = routingNodes.node(firstDataNodeId).numberOfOwningShardsForIndex(index); int secondDataNodeRealNumberOfShards = routingNodes.node(secondDataNodeId).numberOfOwningShardsForIndex(index); int thirdDataNodeRealNumberOfShards = routingNodes.node(thirdDataNodeId).numberOfOwningShardsForIndex(index); - return firstDataNodeRealNumberOfShards + secondDataNodeRealNumberOfShards + thirdDataNodeRealNumberOfShards == shards; + return firstDataNodeRealNumberOfShards <= upperLimitFirstDataNode + && firstDataNodeRealNumberOfShards >= lowerLimitFirstDataNode + && secondDataNodeRealNumberOfShards <= upperLimitSecondDataNode + && secondDataNodeRealNumberOfShards >= lowerLimitSecondDataNode + && thirdDataNodeRealNumberOfShards <= upperLimitThirdDataNode + && thirdDataNodeRealNumberOfShards >= lowerLimitThirdDataNode; } private TestHarness setUpIndex() { - Settings settings = Settings.builder().build(); + Settings settings = Settings.builder() + .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), + IndexShardCountConstraintSettings.IndexShardCountDeciderStatus.ENABLED + ) + .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), + 1.0d + ) + .build(); internalCluster().startMasterOnlyNode(settings); final var dataNodes = internalCluster().startDataOnlyNodes(3, settings); @@ -104,34 +122,39 @@ private TestHarness setUpIndex() { final DiscoveryNode firstDiscoveryNode = internalCluster().getInstance(TransportService.class, firstDataNodeName).getLocalNode(); final DiscoveryNode secondDiscoveryNode = internalCluster().getInstance(TransportService.class, secondDataNodeName).getLocalNode(); final DiscoveryNode thirdDiscoveryNode = internalCluster().getInstance(TransportService.class, thirdDataNodeName).getLocalNode(); - int randomNumberOfShards = randomIntBetween(10, 20); - - String indexName = "test1"; - int numberOfShards = 4; + String format = """ + ---> first node NAME %s and ID %s; second node NAME %s and ID %s; third node NAME %s and ID %s; + """; logger.info( - "---> first node name " - + firstDataNodeName - + " and ID " - + firstDataNodeId - + "; second node name " - + secondDataNodeName - + " and ID " - + secondDataNodeId - + "; third node name " - + thirdDataNodeName - + " and ID " - + thirdDataNodeId + String.format(format, + firstDataNodeName, firstDataNodeId, + secondDataNodeName, secondDataNodeId, + thirdDataNodeName, thirdDataNodeId) ); + int randomNumberOfShards = randomIntBetween(15, 20); + String indexName = randomIdentifier(); + int lowerThreshold = randomNumberOfShards / 3; + int upperThreshold = (int)Math.ceil((double) randomNumberOfShards / 3); + var verifyShardAllocationListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(indexName); if (indexRoutingTable == null) { return false; } - return checkShardAssignment(clusterState.getRoutingNodes(), indexRoutingTable.getIndex(), - firstDataNodeId, secondDataNodeId, thirdDataNodeId, numberOfShards); - + return checkShardAssignment(clusterState.getRoutingNodes(), + indexRoutingTable.getIndex(), + firstDataNodeId, + secondDataNodeId, + thirdDataNodeId, + upperThreshold, + lowerThreshold, + upperThreshold, + lowerThreshold, + upperThreshold, + lowerThreshold + ); }); createIndex( @@ -142,14 +165,32 @@ private TestHarness setUpIndex() { logger.info("---> wait for [" + randomNumberOfShards + "] shards to be assigned to node "); safeAwait(verifyShardAllocationListener); - return new TestHarness(indexName); + return new TestHarness( + firstDataNodeName, + secondDataNodeName, + thirdDataNodeName, + firstDataNodeId, + secondDataNodeId, + thirdDataNodeName, + firstDiscoveryNode, + secondDiscoveryNode, + thirdDiscoveryNode, + indexName, + randomNumberOfShards + ); } - record TestHarness( - String indexName + String firstDataNodeName, + String secondDataNodeName, + String thirdDataNodeName, + String firstDataNodeId, + String secondDataNodeId, + String thirdDataNodeId, + DiscoveryNode firstDiscoveryNode, + DiscoveryNode secondDiscoveryNode, + DiscoveryNode thirdDiscoveryNode, + String indexName, + int randomNumberOfShards ) {}; - - - } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index 353e86010e92a..390253447fd55 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -48,6 +48,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing Index index = shardRouting.index(); if (node.hasIndex(index)) { var nodeShutdowns = allocation.metadata().nodeShutdowns().getAll().size(); + var allNodes = allocation.nodes().size(); var availableNodes = allNodes - nodeShutdowns; @@ -64,7 +65,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing String rationale = """ For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of - Math.ceil([%0.f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, + Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, assigning additional shards is not preferred. """; diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 9c2d6fab10368..9d94226493ec8 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -46,6 +46,7 @@ import org.elasticsearch.cluster.routing.OperationRouting; import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings; +import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundSummaryService; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; @@ -648,6 +649,8 @@ public void apply(Settings value, Settings current, Settings previous) { WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_HIGH_UTILIZATION_DURATION_SETTING, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING, - WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING + WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, + IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, + IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE ); } From b0e7186113d4813fe6a886b35ac21bac5c119698 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 2 Oct 2025 11:34:36 -0700 Subject: [PATCH 06/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 2 -- .../IndexShardCountAllocationDecider.java | 20 ++++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index a767afa839f2b..260f8a6fda3f6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -96,9 +96,7 @@ private boolean checkShardAssignment( && thirdDataNodeRealNumberOfShards >= lowerLimitThirdDataNode; } - private TestHarness setUpIndex() { - Settings settings = Settings.builder() .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), IndexShardCountConstraintSettings.IndexShardCountDeciderStatus.ENABLED diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index 390253447fd55..78f51252fcb5a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -13,6 +13,8 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; @@ -21,6 +23,9 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.index.Index; +import java.util.function.Predicate; +import java.util.stream.Collectors; + /** * For an index of n shards hosted by a cluster of m nodes, a node should not host * significantly more than n / m shards. This allocation decider enforces this principle. @@ -47,16 +52,17 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing Index index = shardRouting.index(); if (node.hasIndex(index)) { - var nodeShutdowns = allocation.metadata().nodeShutdowns().getAll().size(); - - var allNodes = allocation.nodes().size(); - var availableNodes = allNodes - nodeShutdowns; + var dataNodes = allocation.nodes().stream() + .filter(DiscoveryNode::canContainData).map(DiscoveryNode::getId).collect(Collectors.toSet()); + var nodesShuttingDown = allocation.metadata().nodeShutdowns().getAll().values().stream() + .map(SingleNodeShutdownMetadata::getNodeId).collect(Collectors.toSet()); + var availableDataNodes = dataNodes.stream().filter(Predicate.not(nodesShuttingDown::contains)).collect(Collectors.toSet()); - assert availableNodes > 0; + assert availableDataNodes.isEmpty() == false; assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); - var idealAllocation = Math.ceil((double) totalShards / availableNodes); + var idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); var threshold = (int) Math.ceil(idealAllocation * indexShardCountConstraintSettings.getLoadSkewTolerance()); var currentAllocation = node.numberOfOwningShardsForIndex(index); @@ -70,7 +76,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing """; String explanation = Strings.format(rationale, - index, totalShards, node.nodeId(), idealAllocation, index, availableNodes, + index, totalShards, node.nodeId(), idealAllocation, index, availableDataNodes.size(), indexShardCountConstraintSettings.getLoadSkewTolerance(), idealAllocation, indexShardCountConstraintSettings.getLoadSkewTolerance(), threshold, node.nodeId(), currentAllocation, index); From 0d797529b3a5f0017d4c8c058a17a3b80290882f Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 2 Oct 2025 12:45:44 -0700 Subject: [PATCH 07/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexShardCountAllocationDeciderIT.java | 72 +++++++++---------- .../IndexShardCountAllocationDecider.java | 5 -- 2 files changed, 34 insertions(+), 43 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index 260f8a6fda3f6..d54cf7298fdc8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -21,55 +21,51 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.hamcrest.Matchers.equalTo; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { - - - /* - Happy path test scenario - - 3 ingestion node - - 5 shards index , - A 1 - B 2 - C 2 - - Propose Allocation to move shard from B to C -> Not preferred - - Enable this decider - put very strict load skew tolerance basically ideal - with this in place the end result should be all not exceeding ideal - - - What happes - - So basically first - - - */ - - - /** - * Happy path - - * - * - * - * - * - */ public void testIndexShardCountExceedsAverageAllocation() { + var testHarness = setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed(); + + /** + * Exclude assignment of shards to the first data nodes via the {@link FilterAllocationDecider} settings. + * This triggers the balancer to work out a new routing. + */ + logger.info("---> Remove shard assignments of node " + testHarness.firstDataNodeName + " by excluding first data node."); + updateClusterSettings( + Settings.builder().put("cluster.routing.allocation.exclude._name", testHarness.firstDataNodeName) + ); - var testHarness = setUpIndex(); + refreshClusterInfo(); + int lowerThreshold = testHarness.randomNumberOfShards / 2; + int upperThreshold = (int)Math.ceil((double) testHarness.randomNumberOfShards / 2); + var verifyShardCountBalanceListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { + var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(testHarness.indexName); + if (indexRoutingTable == null) { + return false; + } + if (indexRoutingTable.numberOfNodesShardsAreAllocatedOn() != 2) { + return false; + } + Index index = indexRoutingTable.getIndex(); + assertThat(indexRoutingTable.numberOfNodesShardsAreAllocatedOn(), equalTo(2)); + clusterState.getRoutingNodes().stream().forEach(node -> { + if (node.hasIndex(index)) { + assert node.numberOfOwningShardsForIndex(index) >= lowerThreshold; + assert node.numberOfOwningShardsForIndex(index) <= upperThreshold; + } + }); + return true; + }); + safeAwait(verifyShardCountBalanceListener); } - private boolean checkShardAssignment( RoutingNodes routingNodes, Index index, @@ -96,7 +92,7 @@ private boolean checkShardAssignment( && thirdDataNodeRealNumberOfShards >= lowerLimitThirdDataNode; } - private TestHarness setUpIndex() { + private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { Settings settings = Settings.builder() .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), IndexShardCountConstraintSettings.IndexShardCountDeciderStatus.ENABLED diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index 78f51252fcb5a..ccd3d7cabb421 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -101,9 +101,4 @@ public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting return Decision.single(Decision.Type.YES, NAME, "canRemain() is not yet implemented"); } - - - - - } From 5fc01c8b5c1daa3087e1b2cfd42bdf2d1e6374eb Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Thu, 2 Oct 2025 13:17:37 -0700 Subject: [PATCH 08/67] Update docs/changelog/135875.yaml --- docs/changelog/135875.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/135875.yaml diff --git a/docs/changelog/135875.yaml b/docs/changelog/135875.yaml new file mode 100644 index 0000000000000..4bf217a1672e2 --- /dev/null +++ b/docs/changelog/135875.yaml @@ -0,0 +1,6 @@ +pr: 135875 +summary: "Allocation: introduce a new decider that balances the index shard count\ + \ among nodes" +area: "Allocation, Distributed" +type: enhancement +issues: [] From 6d95721c742a64333714f694d278e666c8be1d5a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 2 Oct 2025 20:27:20 +0000 Subject: [PATCH 09/67] [CI] Auto commit changes from spotless --- .../IndexShardCountAllocationDeciderIT.java | 37 +++++++++--------- .../IndexShardCountConstraintSettings.java | 6 +-- .../IndexShardCountAllocationDecider.java | 38 ++++++++++++++----- 3 files changed, 51 insertions(+), 30 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index d54cf7298fdc8..b05eaba8f0469 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -34,14 +34,12 @@ public void testIndexShardCountExceedsAverageAllocation() { * This triggers the balancer to work out a new routing. */ logger.info("---> Remove shard assignments of node " + testHarness.firstDataNodeName + " by excluding first data node."); - updateClusterSettings( - Settings.builder().put("cluster.routing.allocation.exclude._name", testHarness.firstDataNodeName) - ); + updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", testHarness.firstDataNodeName)); refreshClusterInfo(); int lowerThreshold = testHarness.randomNumberOfShards / 2; - int upperThreshold = (int)Math.ceil((double) testHarness.randomNumberOfShards / 2); + int upperThreshold = (int) Math.ceil((double) testHarness.randomNumberOfShards / 2); var verifyShardCountBalanceListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(testHarness.indexName); @@ -78,7 +76,7 @@ private boolean checkShardAssignment( int lowerLimitSecondDataNode, int upperLimitThirdDataNode, int lowerLimitThirdDataNode - ) { + ) { int firstDataNodeRealNumberOfShards = routingNodes.node(firstDataNodeId).numberOfOwningShardsForIndex(index); int secondDataNodeRealNumberOfShards = routingNodes.node(secondDataNodeId).numberOfOwningShardsForIndex(index); @@ -94,12 +92,11 @@ private boolean checkShardAssignment( private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { Settings settings = Settings.builder() - .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), + .put( + IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), IndexShardCountConstraintSettings.IndexShardCountDeciderStatus.ENABLED ) - .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), - 1.0d - ) + .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 1.0d) .build(); internalCluster().startMasterOnlyNode(settings); @@ -113,7 +110,7 @@ private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistri final String thirdDataNodeId = getNodeId(thirdDataNodeName); ensureStableCluster(4); - final DiscoveryNode firstDiscoveryNode = internalCluster().getInstance(TransportService.class, firstDataNodeName).getLocalNode(); + final DiscoveryNode firstDiscoveryNode = internalCluster().getInstance(TransportService.class, firstDataNodeName).getLocalNode(); final DiscoveryNode secondDiscoveryNode = internalCluster().getInstance(TransportService.class, secondDataNodeName).getLocalNode(); final DiscoveryNode thirdDiscoveryNode = internalCluster().getInstance(TransportService.class, thirdDataNodeName).getLocalNode(); @@ -121,23 +118,29 @@ private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistri ---> first node NAME %s and ID %s; second node NAME %s and ID %s; third node NAME %s and ID %s; """; logger.info( - String.format(format, - firstDataNodeName, firstDataNodeId, - secondDataNodeName, secondDataNodeId, - thirdDataNodeName, thirdDataNodeId) + String.format( + format, + firstDataNodeName, + firstDataNodeId, + secondDataNodeName, + secondDataNodeId, + thirdDataNodeName, + thirdDataNodeId + ) ); int randomNumberOfShards = randomIntBetween(15, 20); String indexName = randomIdentifier(); int lowerThreshold = randomNumberOfShards / 3; - int upperThreshold = (int)Math.ceil((double) randomNumberOfShards / 3); + int upperThreshold = (int) Math.ceil((double) randomNumberOfShards / 3); var verifyShardAllocationListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(indexName); if (indexRoutingTable == null) { return false; } - return checkShardAssignment(clusterState.getRoutingNodes(), + return checkShardAssignment( + clusterState.getRoutingNodes(), indexRoutingTable.getIndex(), firstDataNodeId, secondDataNodeId, @@ -148,7 +151,7 @@ private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistri lowerThreshold, upperThreshold, lowerThreshold - ); + ); }); createIndex( diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java index bda28ae920370..9f4b5ac169ccd 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java @@ -60,10 +60,8 @@ public boolean disabled() { private volatile double loadSkewTolerance; public IndexShardCountConstraintSettings(ClusterSettings clusterSettings) { - clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, - status -> this.indexShardCountDeciderStatus = status); - clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE, - value -> this.loadSkewTolerance = value); + clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, status -> this.indexShardCountDeciderStatus = status); + clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE, value -> this.loadSkewTolerance = value); } public IndexShardCountDeciderStatus getIndexShardCountDeciderStatus() { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java index ccd3d7cabb421..1908470f69be0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java @@ -52,10 +52,18 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing Index index = shardRouting.index(); if (node.hasIndex(index)) { - var dataNodes = allocation.nodes().stream() - .filter(DiscoveryNode::canContainData).map(DiscoveryNode::getId).collect(Collectors.toSet()); - var nodesShuttingDown = allocation.metadata().nodeShutdowns().getAll().values().stream() - .map(SingleNodeShutdownMetadata::getNodeId).collect(Collectors.toSet()); + var dataNodes = allocation.nodes() + .stream() + .filter(DiscoveryNode::canContainData) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + var nodesShuttingDown = allocation.metadata() + .nodeShutdowns() + .getAll() + .values() + .stream() + .map(SingleNodeShutdownMetadata::getNodeId) + .collect(Collectors.toSet()); var availableDataNodes = dataNodes.stream().filter(Predicate.not(nodesShuttingDown::contains)).collect(Collectors.toSet()); assert availableDataNodes.isEmpty() == false; @@ -63,7 +71,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); var idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); - var threshold = (int) Math.ceil(idealAllocation * indexShardCountConstraintSettings.getLoadSkewTolerance()); + var threshold = (int) Math.ceil(idealAllocation * indexShardCountConstraintSettings.getLoadSkewTolerance()); var currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { @@ -75,10 +83,22 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assigning additional shards is not preferred. """; - String explanation = Strings.format(rationale, - index, totalShards, node.nodeId(), idealAllocation, index, availableDataNodes.size(), - indexShardCountConstraintSettings.getLoadSkewTolerance(), idealAllocation, - indexShardCountConstraintSettings.getLoadSkewTolerance(), threshold, node.nodeId(), currentAllocation, index); + String explanation = Strings.format( + rationale, + index, + totalShards, + node.nodeId(), + idealAllocation, + index, + availableDataNodes.size(), + indexShardCountConstraintSettings.getLoadSkewTolerance(), + idealAllocation, + indexShardCountConstraintSettings.getLoadSkewTolerance(), + threshold, + node.nodeId(), + currentAllocation, + index + ); if (logger.isTraceEnabled()) { logger.trace(explanation); From 5212d02b654259e6b9ebbb989ab633c196f5e4c9 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 14 Oct 2025 11:26:55 -0700 Subject: [PATCH 10/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- docs/changelog/135875.yaml | 2 +- .../decider/IndexShardCountAllocationDeciderIT.java | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/changelog/135875.yaml b/docs/changelog/135875.yaml index 4bf217a1672e2..ee9fdb9e06214 100644 --- a/docs/changelog/135875.yaml +++ b/docs/changelog/135875.yaml @@ -1,6 +1,6 @@ pr: 135875 summary: "Allocation: introduce a new decider that balances the index shard count\ \ among nodes" -area: "Allocation, Distributed" +area: Allocation type: enhancement issues: [] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java index b05eaba8f0469..b67a5e8f70c61 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.test.ClusterServiceUtils; @@ -29,10 +30,8 @@ public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { public void testIndexShardCountExceedsAverageAllocation() { var testHarness = setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed(); - /** - * Exclude assignment of shards to the first data nodes via the {@link FilterAllocationDecider} settings. - * This triggers the balancer to work out a new routing. - */ + // Exclude assignment of shards to the first data nodes via the {@link FilterAllocationDecider} settings. + // This triggers the balancer to work out a new routing. logger.info("---> Remove shard assignments of node " + testHarness.firstDataNodeName + " by excluding first data node."); updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", testHarness.firstDataNodeName)); @@ -118,7 +117,7 @@ private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistri ---> first node NAME %s and ID %s; second node NAME %s and ID %s; third node NAME %s and ID %s; """; logger.info( - String.format( + Strings.format( format, firstDataNodeName, firstDataNodeId, @@ -189,5 +188,5 @@ record TestHarness( DiscoveryNode thirdDiscoveryNode, String indexName, int randomNumberOfShards - ) {}; + ) {} } From 11aaace94e5125717ef3615517a786f2bd6dc0fa Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sat, 18 Oct 2025 15:08:16 -0700 Subject: [PATCH 11/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- ...a => IndexBalanceAllocationDeciderIT.java} | 11 +- .../elasticsearch/cluster/ClusterModule.java | 4 +- ...va => IndexBalanceConstraintSettings.java} | 38 ++---- .../IndexBalanceAllocationDecider.java | 115 ++++++++++++++++ .../IndexShardCountAllocationDecider.java | 124 ------------------ .../common/settings/ClusterSettings.java | 6 +- 6 files changed, 136 insertions(+), 162 deletions(-) rename server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/{IndexShardCountAllocationDeciderIT.java => IndexBalanceAllocationDeciderIT.java} (94%) rename server/src/main/java/org/elasticsearch/cluster/routing/allocation/{IndexShardCountConstraintSettings.java => IndexBalanceConstraintSettings.java} (52%) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java delete mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java similarity index 94% rename from server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java rename to server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java index b67a5e8f70c61..c2be7355fdb33 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java @@ -12,7 +12,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; +import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; @@ -25,7 +25,7 @@ import static org.hamcrest.Matchers.equalTo; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) -public class IndexShardCountAllocationDeciderIT extends ESIntegTestCase { +public class IndexBalanceAllocationDeciderIT extends ESIntegTestCase { public void testIndexShardCountExceedsAverageAllocation() { var testHarness = setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed(); @@ -91,11 +91,8 @@ private boolean checkShardAssignment( private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { Settings settings = Settings.builder() - .put( - IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING.getKey(), - IndexShardCountConstraintSettings.IndexShardCountDeciderStatus.ENABLED - ) - .put(IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 1.0d) + .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING.getKey(), true) + .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 1.0d) .build(); internalCluster().startMasterOnlyNode(settings); diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 87e3d42877c3e..67d0b3659266b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -57,7 +57,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; -import org.elasticsearch.cluster.routing.allocation.decider.IndexShardCountAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.IndexBalanceAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.IndexVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; @@ -487,7 +487,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new ThrottlingAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); - addAllocationDecider(deciders, new IndexShardCountAllocationDecider(clusterSettings)); + addAllocationDecider(deciders, new IndexBalanceAllocationDecider(clusterSettings)); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java similarity index 52% rename from server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java rename to server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java index 9f4b5ac169ccd..e34d03cdb5b46 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexShardCountConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java @@ -16,28 +16,14 @@ /** * Settings definitions for the index shard count allocation decider and associated infrastructure */ -public class IndexShardCountConstraintSettings { +public class IndexBalanceConstraintSettings { - private static final String SETTING_PREFIX = "cluster.routing.allocation.index_shard_count_decider."; - private static final FeatureFlag INDEX_SHARD_COUNT_DECIDER_FEATURE_FLAG = new FeatureFlag("index_shard_count_decider"); + private static final String SETTING_PREFIX = "cluster.routing.allocation.index_balance_decider."; + private static final FeatureFlag INDEX_BALANCE_DECIDER_FEATURE_FLAG = new FeatureFlag("index_balance_decider"); - public enum IndexShardCountDeciderStatus { - DISABLED, - ENABLED; - - public boolean enabled() { - return this == ENABLED; - } - - public boolean disabled() { - return this == DISABLED; - } - } - - public static final Setting INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING = Setting.enumSetting( - IndexShardCountDeciderStatus.class, + public static final Setting INDEX_BALANCE_DECIDER_ENABLED_SETTING = Setting.boolSetting( SETTING_PREFIX + "enabled", - INDEX_SHARD_COUNT_DECIDER_FEATURE_FLAG.isEnabled() ? IndexShardCountDeciderStatus.ENABLED : IndexShardCountDeciderStatus.DISABLED, + INDEX_BALANCE_DECIDER_FEATURE_FLAG.isEnabled(), Setting.Property.Dynamic, Setting.Property.NodeScope ); @@ -48,7 +34,7 @@ public boolean disabled() { * i.e. ideal = 4 shards, skew_tolerance = 1.3 * maximum tolerated index shards = Math.ceil(4 * 1.3) = 6. */ - public static final Setting INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE = Setting.doubleSetting( + public static final Setting INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE = Setting.doubleSetting( SETTING_PREFIX + "load_skew_tolerance", 1.5d, 1.0d, @@ -56,16 +42,16 @@ public boolean disabled() { Setting.Property.NodeScope ); - private volatile IndexShardCountDeciderStatus indexShardCountDeciderStatus; + private volatile boolean deciderEnabled; private volatile double loadSkewTolerance; - public IndexShardCountConstraintSettings(ClusterSettings clusterSettings) { - clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, status -> this.indexShardCountDeciderStatus = status); - clusterSettings.initializeAndWatch(INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE, value -> this.loadSkewTolerance = value); + public IndexBalanceConstraintSettings(ClusterSettings clusterSettings) { + clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_ENABLED_SETTING, enabled -> this.deciderEnabled = enabled); + clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE, value -> this.loadSkewTolerance = value); } - public IndexShardCountDeciderStatus getIndexShardCountDeciderStatus() { - return this.indexShardCountDeciderStatus; + public boolean isDeciderEnabled() { + return this.deciderEnabled; } public double getLoadSkewTolerance() { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java new file mode 100644 index 0000000000000..70d5102507065 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.core.Strings; +import org.elasticsearch.index.Index; + +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * For an index of n shards hosted by a cluster of m nodes, a node should not host + * significantly more than n / m shards. This allocation decider enforces this principle. + * This allocation decider excludes any nodes flagged for shutdown from consideration + * when computing optimal shard distributions. + */ +public class IndexBalanceAllocationDecider extends AllocationDecider { + + private static final Logger logger = LogManager.getLogger(IndexBalanceAllocationDecider.class); + + public static final String NAME = "index_balance"; + + private final IndexBalanceConstraintSettings indexBalanceConstraintSettings; + + public IndexBalanceAllocationDecider(ClusterSettings clusterSettings) { + this.indexBalanceConstraintSettings = new IndexBalanceConstraintSettings(clusterSettings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (indexBalanceConstraintSettings.isDeciderEnabled()) { + Index index = shardRouting.index(); + if (node.hasIndex(index)) { + var dataNodes = allocation.nodes() + .stream() + .filter(DiscoveryNode::canContainData) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + var nodesShuttingDown = allocation.metadata() + .nodeShutdowns() + .getAll() + .values() + .stream() + .map(SingleNodeShutdownMetadata::getNodeId) + .collect(Collectors.toSet()); + var availableDataNodes = dataNodes.stream().filter(Predicate.not(nodesShuttingDown::contains)).collect(Collectors.toSet()); + + assert availableDataNodes.isEmpty() == false; + assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); + + var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); + var idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); + var threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); + var currentAllocation = node.numberOfOwningShardsForIndex(index); + + if (currentAllocation >= threshold) { + + String rationale = + """ + For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] + nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of + Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, + assigning additional shards is not preferred. + """; + + String explanation = Strings.format( + rationale, + index, + totalShards, + node.nodeId(), + idealAllocation, + index, + availableDataNodes.size(), + indexBalanceConstraintSettings.getLoadSkewTolerance(), + idealAllocation, + indexBalanceConstraintSettings.getLoadSkewTolerance(), + threshold, + node.nodeId(), + currentAllocation, + index + ); + + if (logger.isTraceEnabled()) { + logger.trace(explanation); + } + + return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); + } + } + + return Decision.YES; + } else { + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); + } + + } + +} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java deleted file mode 100644 index 1908470f69be0..0000000000000 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexShardCountAllocationDecider.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.cluster.routing.allocation.decider; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; -import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; -import org.elasticsearch.common.settings.ClusterSettings; -import org.elasticsearch.core.Strings; -import org.elasticsearch.index.Index; - -import java.util.function.Predicate; -import java.util.stream.Collectors; - -/** - * For an index of n shards hosted by a cluster of m nodes, a node should not host - * significantly more than n / m shards. This allocation decider enforces this principle. - * This allocation decider excludes any nodes flagged for shutdown from consideration - * when computing optimal shard distributions. - */ -public class IndexShardCountAllocationDecider extends AllocationDecider { - - private static final Logger logger = LogManager.getLogger(IndexShardCountAllocationDecider.class); - - public static final String NAME = "index_shard_count"; - - private final IndexShardCountConstraintSettings indexShardCountConstraintSettings; - - public IndexShardCountAllocationDecider(ClusterSettings clusterSettings) { - this.indexShardCountConstraintSettings = new IndexShardCountConstraintSettings(clusterSettings); - } - - @Override - public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexShardCountConstraintSettings.getIndexShardCountDeciderStatus().disabled()) { - return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); - } - - Index index = shardRouting.index(); - if (node.hasIndex(index)) { - var dataNodes = allocation.nodes() - .stream() - .filter(DiscoveryNode::canContainData) - .map(DiscoveryNode::getId) - .collect(Collectors.toSet()); - var nodesShuttingDown = allocation.metadata() - .nodeShutdowns() - .getAll() - .values() - .stream() - .map(SingleNodeShutdownMetadata::getNodeId) - .collect(Collectors.toSet()); - var availableDataNodes = dataNodes.stream().filter(Predicate.not(nodesShuttingDown::contains)).collect(Collectors.toSet()); - - assert availableDataNodes.isEmpty() == false; - assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); - - var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); - var idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); - var threshold = (int) Math.ceil(idealAllocation * indexShardCountConstraintSettings.getLoadSkewTolerance()); - var currentAllocation = node.numberOfOwningShardsForIndex(index); - - if (currentAllocation >= threshold) { - - String rationale = """ - For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] - nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of - Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, - assigning additional shards is not preferred. - """; - - String explanation = Strings.format( - rationale, - index, - totalShards, - node.nodeId(), - idealAllocation, - index, - availableDataNodes.size(), - indexShardCountConstraintSettings.getLoadSkewTolerance(), - idealAllocation, - indexShardCountConstraintSettings.getLoadSkewTolerance(), - threshold, - node.nodeId(), - currentAllocation, - index - ); - - if (logger.isTraceEnabled()) { - logger.trace(explanation); - } - - return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); - } - } - - return Decision.YES; - } - - @Override - public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexShardCountConstraintSettings.getIndexShardCountDeciderStatus().disabled()) { - return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); - } - - // TODO: implement - return Decision.single(Decision.Type.YES, NAME, "canRemain() is not yet implemented"); - } - -} diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 2429b7e5260bf..44ebbbf4b57b6 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -46,7 +46,7 @@ import org.elasticsearch.cluster.routing.OperationRouting; import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings; -import org.elasticsearch.cluster.routing.allocation.IndexShardCountConstraintSettings; +import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundSummaryService; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; @@ -653,8 +653,8 @@ public void apply(Settings value, Settings current, Settings previous) { WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_HIGH_UTILIZATION_DURATION_SETTING, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, - IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_ENABLED_SETTING, - IndexShardCountConstraintSettings.INDEX_SHARD_COUNT_DECIDER_LOAD_SKEW_TOLERANCE + IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING, + IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE, SamplingService.TTL_POLL_INTERVAL_SETTING ); } From 0365a62f49efab38c5d49a846b0934c0ae8cb03e Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sat, 18 Oct 2025 15:40:48 -0700 Subject: [PATCH 12/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../IndexBalanceAllocationDecider.java | 130 +++++++++--------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 70d5102507065..0d3e298c95036 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -22,6 +22,7 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.index.Index; +import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -45,71 +46,74 @@ public IndexBalanceAllocationDecider(ClusterSettings clusterSettings) { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexBalanceConstraintSettings.isDeciderEnabled()) { - Index index = shardRouting.index(); - if (node.hasIndex(index)) { - var dataNodes = allocation.nodes() - .stream() - .filter(DiscoveryNode::canContainData) - .map(DiscoveryNode::getId) - .collect(Collectors.toSet()); - var nodesShuttingDown = allocation.metadata() - .nodeShutdowns() - .getAll() - .values() - .stream() - .map(SingleNodeShutdownMetadata::getNodeId) - .collect(Collectors.toSet()); - var availableDataNodes = dataNodes.stream().filter(Predicate.not(nodesShuttingDown::contains)).collect(Collectors.toSet()); - - assert availableDataNodes.isEmpty() == false; - assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); - - var totalShards = allocation.getClusterState().routingTable(ProjectId.DEFAULT).index(index).size(); - var idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); - var threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); - var currentAllocation = node.numberOfOwningShardsForIndex(index); - - if (currentAllocation >= threshold) { - - String rationale = - """ - For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] - nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of - Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, - assigning additional shards is not preferred. - """; - - String explanation = Strings.format( - rationale, - index, - totalShards, - node.nodeId(), - idealAllocation, - index, - availableDataNodes.size(), - indexBalanceConstraintSettings.getLoadSkewTolerance(), - idealAllocation, - indexBalanceConstraintSettings.getLoadSkewTolerance(), - threshold, - node.nodeId(), - currentAllocation, - index - ); - - if (logger.isTraceEnabled()) { - logger.trace(explanation); - } - - return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); - } - } - - return Decision.YES; - } else { - return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); + if (indexBalanceConstraintSettings.isDeciderEnabled() == false) { + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } + // Never reject allocation of an unassigned shard + if (shardRouting.assignedToNode() == false) { + return Decision.single(Decision.Type.YES, NAME, "Shard is unassigned. Decider takes no action."); + } + + Index index = shardRouting.index(); + + if (node.hasIndex(index) == false) { + return Decision.single(Decision.Type.YES, NAME, "Node does not currently host this index."); + } + + final Set dataNodes = allocation.nodes() + .stream() + .filter(DiscoveryNode::canContainData) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + final Set nodesShuttingDown = allocation.metadata() + .nodeShutdowns() + .getAll() + .values() + .stream() + .map(SingleNodeShutdownMetadata::getNodeId) + .collect(Collectors.toSet()); + final Set availableDataNodes = dataNodes.stream() + .filter(Predicate.not(nodesShuttingDown::contains)) + .collect(Collectors.toSet()); + + assert availableDataNodes.isEmpty() == false; + assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); + + final int totalShards = allocation.getClusterState().metadata().getProject(ProjectId.DEFAULT).index(index).getTotalNumberOfShards(); + final double idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); + final int threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); + final int currentAllocation = node.numberOfOwningShardsForIndex(index); + + if (currentAllocation >= threshold) { + String explanation = Strings.format( + """ + For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] + nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of + Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, + assigning additional shards is not preferred. + """, + index, + totalShards, + node.nodeId(), + idealAllocation, + index, + availableDataNodes.size(), + indexBalanceConstraintSettings.getLoadSkewTolerance(), + idealAllocation, + indexBalanceConstraintSettings.getLoadSkewTolerance(), + threshold, + node.nodeId(), + currentAllocation, + index + ); + + logger.trace(explanation); + + return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); + } + + return Decision.YES; } } From 4a55cce7f012750b19cac7e803322f675f899580 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sat, 18 Oct 2025 16:34:38 -0700 Subject: [PATCH 13/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 5 ++--- .../java/org/elasticsearch/cluster/ClusterModuleTests.java | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 0d3e298c95036..77b87f1dd9007 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -11,7 +11,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNode; @@ -78,9 +77,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing .collect(Collectors.toSet()); assert availableDataNodes.isEmpty() == false; - assert allocation.getClusterState().routingTable(ProjectId.DEFAULT).hasIndex(index); + assert allocation.getClusterState().routingTable().hasIndex(index); - final int totalShards = allocation.getClusterState().metadata().getProject(ProjectId.DEFAULT).index(index).getTotalNumberOfShards(); + final int totalShards = allocation.getClusterState().metadata().getProject().index(index).getTotalNumberOfShards(); final double idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); final int threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); final int currentAllocation = node.numberOfOwningShardsForIndex(index); diff --git a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java index 70df5f78615ca..9f66381e7025d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.IndexBalanceAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.IndexVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAllocationDecider; @@ -286,7 +287,8 @@ public void testAllocationDeciderOrder() { DiskThresholdDecider.class, ThrottlingAllocationDecider.class, ShardsLimitAllocationDecider.class, - AwarenessAllocationDecider.class + AwarenessAllocationDecider.class, + IndexBalanceAllocationDecider.class ); Collection deciders = ClusterModule.createAllocationDeciders( Settings.EMPTY, From 760b87881a574826435279533def400bd15d33f6 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 19 Oct 2025 00:24:39 -0700 Subject: [PATCH 14/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 77b87f1dd9007..f157d6fe453d5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingNode; @@ -75,11 +76,12 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing final Set availableDataNodes = dataNodes.stream() .filter(Predicate.not(nodesShuttingDown::contains)) .collect(Collectors.toSet()); + final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); assert availableDataNodes.isEmpty() == false; - assert allocation.getClusterState().routingTable().hasIndex(index); + assert allocation.getClusterState().routingTable(projectId).hasIndex(index); - final int totalShards = allocation.getClusterState().metadata().getProject().index(index).getTotalNumberOfShards(); + final int totalShards = allocation.getClusterState().metadata().getProject(projectId).index(index).getTotalNumberOfShards(); final double idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); final int threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); final int currentAllocation = node.numberOfOwningShardsForIndex(index); From eb32f726050ad3d2a7195e56ce0689fb76ef89db Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 19 Oct 2025 01:32:25 -0700 Subject: [PATCH 15/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index f157d6fe453d5..3bf0b8a6165af 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -114,7 +114,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); } - return Decision.YES; + return allocation.decision(Decision.YES, NAME, "Node index shard allocation is under the threshold."); } } From 81cf332d821eea5a18b844299d6d5f267ce15844 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 19 Oct 2025 04:17:07 -0700 Subject: [PATCH 16/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../elasticsearch/cluster/ClusterModule.java | 2 +- .../decider/IndexBalanceAllocationDecider.java | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 67d0b3659266b..ed12396879637 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -487,7 +487,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new ThrottlingAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); - addAllocationDecider(deciders, new IndexBalanceAllocationDecider(clusterSettings)); + addAllocationDecider(deciders, new IndexBalanceAllocationDecider(settings, clusterSettings)); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 3bf0b8a6165af..5ade8bee7ed13 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -14,18 +14,25 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeFilters; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.index.Index; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; +import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.OR; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING; + /** * For an index of n shards hosted by a cluster of m nodes, a node should not host * significantly more than n / m shards. This allocation decider enforces this principle. @@ -39,9 +46,12 @@ public class IndexBalanceAllocationDecider extends AllocationDecider { public static final String NAME = "index_balance"; private final IndexBalanceConstraintSettings indexBalanceConstraintSettings; + private volatile DiscoveryNodeFilters clusterExcludeFilters; - public IndexBalanceAllocationDecider(ClusterSettings clusterSettings) { + public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterSettings) { this.indexBalanceConstraintSettings = new IndexBalanceConstraintSettings(clusterSettings); + setClusterExcludeFilters(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING.getAsMap(settings)); + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, this::setClusterExcludeFilters, (a, b) -> {}); } @Override @@ -64,6 +74,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing final Set dataNodes = allocation.nodes() .stream() .filter(DiscoveryNode::canContainData) + .filter(it -> clusterExcludeFilters == null || clusterExcludeFilters.match(it) == false) .map(DiscoveryNode::getId) .collect(Collectors.toSet()); final Set nodesShuttingDown = allocation.metadata() @@ -117,4 +128,8 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return allocation.decision(Decision.YES, NAME, "Node index shard allocation is under the threshold."); } + private void setClusterExcludeFilters(Map> filters) { + clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); + logger.info(clusterExcludeFilters); + } } From ab2bc005a6d45abbdc2220325b72936a8da0ee2e Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 19 Oct 2025 04:38:18 -0700 Subject: [PATCH 17/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 5ade8bee7ed13..c0b0d68069836 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -130,6 +130,5 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing private void setClusterExcludeFilters(Map> filters) { clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); - logger.info(clusterExcludeFilters); } } From 45c11b6256bcc942838b115d23e90737f534a103 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 19 Oct 2025 04:38:43 -0700 Subject: [PATCH 18/67] Allocation: Include index shard counts as a criteria In a balanced allocation, for an index with n shards on a cluster of m nodes, each node should host not significantly more than n / m shards. This decider enforces this principle. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index c0b0d68069836..d3226bade37d5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -120,8 +120,6 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing index ); - logger.trace(explanation); - return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); } From 3a9f656445906e575cb665f36d7bb9f9565a01d1 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Mon, 20 Oct 2025 11:02:16 -0700 Subject: [PATCH 19/67] Allocation: Include index shard counts as a criteria fix test regressions. --- .../allocation/decider/WriteLoadConstraintDeciderIT.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index 2021818db57b1..820e30a6c3c1f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -168,7 +168,10 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() { "---> Update the filter to exclude " + harness.firstDataNodeName + " so shards will be reassigned away to the other nodes" ); // Updating the cluster settings will trigger a reroute request, no need to explicitly request one in the test. - updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName)); + updateClusterSettings( + Settings.builder() + .put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName) + .put("cluster.routing.allocation.index_balance_decider.enabled", false)); safeAwait(temporaryClusterStateListener); } catch (AssertionError error) { From 850a7e75b98fdd748fcf77b83d5f023f1434c0af Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 20 Oct 2025 18:09:42 +0000 Subject: [PATCH 20/67] [CI] Auto commit changes from spotless --- .../allocation/decider/WriteLoadConstraintDeciderIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index 820e30a6c3c1f..cedf6d148e21e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -171,7 +171,8 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() { updateClusterSettings( Settings.builder() .put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName) - .put("cluster.routing.allocation.index_balance_decider.enabled", false)); + .put("cluster.routing.allocation.index_balance_decider.enabled", false) + ); safeAwait(temporaryClusterStateListener); } catch (AssertionError error) { From ff29c079f1608bf401bb6fca22e331ab2946f171 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 22 Oct 2025 11:01:33 -0700 Subject: [PATCH 21/67] Allocation: Include index shard counts as a criteria fix test regressions. --- .../allocation/decider/WriteLoadConstraintDeciderIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index cedf6d148e21e..01a3f797f5921 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -171,7 +171,7 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() { updateClusterSettings( Settings.builder() .put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName) - .put("cluster.routing.allocation.index_balance_decider.enabled", false) + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1000) ); safeAwait(temporaryClusterStateListener); From aabb0991d9b8add554dfcabfa8a25a545e4b0d8b Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 22 Oct 2025 13:09:21 -0700 Subject: [PATCH 22/67] Allocation: Include index shard counts as a criteria Convert decider to stateless. --- .../IndexBalanceAllocationDecider.java | 80 ++++++++++++------- 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index d3226bade37d5..e5b65e5b0b2b1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -12,9 +12,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeFilters; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; @@ -24,13 +24,14 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.index.Index; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Predicate; -import java.util.stream.Collectors; import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.OR; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.INDEX_ROLE; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.SEARCH_ROLE; import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING; /** @@ -46,12 +47,14 @@ public class IndexBalanceAllocationDecider extends AllocationDecider { public static final String NAME = "index_balance"; private final IndexBalanceConstraintSettings indexBalanceConstraintSettings; + private final boolean isStateless; private volatile DiscoveryNodeFilters clusterExcludeFilters; public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterSettings) { this.indexBalanceConstraintSettings = new IndexBalanceConstraintSettings(clusterSettings); setClusterExcludeFilters(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING.getAsMap(settings)); clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, this::setClusterExcludeFilters, (a, b) -> {}); + isStateless = DiscoveryNode.isStateless(settings); } @Override @@ -60,40 +63,47 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } - // Never reject allocation of an unassigned shard - if (shardRouting.assignedToNode() == false) { - return Decision.single(Decision.Type.YES, NAME, "Shard is unassigned. Decider takes no action."); + if (isStateless == false) { + return Decision.single(Decision.Type.YES, NAME, "Decider does not currently support stateful."); } Index index = shardRouting.index(); - if (node.hasIndex(index) == false) { return Decision.single(Decision.Type.YES, NAME, "Node does not currently host this index."); } - final Set dataNodes = allocation.nodes() - .stream() - .filter(DiscoveryNode::canContainData) - .filter(it -> clusterExcludeFilters == null || clusterExcludeFilters.match(it) == false) - .map(DiscoveryNode::getId) - .collect(Collectors.toSet()); - final Set nodesShuttingDown = allocation.metadata() - .nodeShutdowns() - .getAll() - .values() - .stream() - .map(SingleNodeShutdownMetadata::getNodeId) - .collect(Collectors.toSet()); - final Set availableDataNodes = dataNodes.stream() - .filter(Predicate.not(nodesShuttingDown::contains)) - .collect(Collectors.toSet()); + assert node.node() != null; + assert node.node().getRoles() != null && node.node().getRoles().isEmpty() == false; + if (node.node().getRoles().contains(INDEX_ROLE) == false && node.node().getRoles().contains(SEARCH_ROLE) == false) { + return Decision.single(Decision.Type.YES, NAME, "Node has neither index nor search roles, outside purview."); + } + + if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { + return Decision.single(Decision.Type.YES, NAME, "Decider allows replicas move to index nodes."); + } + + if (node.node().getRoles().contains(SEARCH_ROLE) && shardRouting.primary()) { + return Decision.single(Decision.Type.YES, NAME, "Decider allows primaries move to search nodes."); + } + final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); + final Set eligibleNodes = new HashSet<>(); + int totalShards = 0; + + if (node.node().getRoles().contains(INDEX_ROLE)) { + collectEligibleNodes(allocation, eligibleNodes, INDEX_ROLE); + // Primary shards only. + totalShards = allocation.getClusterState().routingTable(projectId).index(index).size(); + } else if (node.node().getRoles().contains(SEARCH_ROLE)) { + collectEligibleNodes(allocation, eligibleNodes, SEARCH_ROLE); + // Replicas only. + totalShards = allocation.getClusterState().metadata().getProject(projectId).index(index).getNumberOfReplicas(); + } - assert availableDataNodes.isEmpty() == false; - assert allocation.getClusterState().routingTable(projectId).hasIndex(index); + assert eligibleNodes.isEmpty() == false; + assert totalShards > 0; - final int totalShards = allocation.getClusterState().metadata().getProject(projectId).index(index).getTotalNumberOfShards(); - final double idealAllocation = Math.ceil((double) totalShards / availableDataNodes.size()); + final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); final int threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); final int currentAllocation = node.numberOfOwningShardsForIndex(index); @@ -110,7 +120,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing node.nodeId(), idealAllocation, index, - availableDataNodes.size(), + eligibleNodes.size(), indexBalanceConstraintSettings.getLoadSkewTolerance(), idealAllocation, indexBalanceConstraintSettings.getLoadSkewTolerance(), @@ -120,13 +130,27 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing index ); + logger.debug(explanation); + return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); } return allocation.decision(Decision.YES, NAME, "Node index shard allocation is under the threshold."); } + private void collectEligibleNodes(RoutingAllocation allocation, Set eligibleNodes, DiscoveryNodeRole searchRole) { + for (DiscoveryNode discoveryNode : allocation.nodes()) { + if (discoveryNode.canContainData() + && discoveryNode.getRoles().contains(searchRole) + && (clusterExcludeFilters == null || clusterExcludeFilters.match(discoveryNode) == false) + && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { + eligibleNodes.add(discoveryNode); + } + } + } + private void setClusterExcludeFilters(Map> filters) { clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); } + } From b54975c2908dece6bd5be60d265c0ddb1b64b0c8 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 22 Oct 2025 13:10:33 -0700 Subject: [PATCH 23/67] Allocation: Include index shard counts as a criteria Convert decider to stateless. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index e5b65e5b0b2b1..0f138ce4a7ead 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -130,7 +130,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing index ); - logger.debug(explanation); + logger.trace(explanation); return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); } From 2fb51c39857b6ec940a324f2a6fecb253f02d1ef Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 10:42:27 -0700 Subject: [PATCH 24/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDecider.java | 4 +- .../IndexBalanceAllocationDeciderTests.java | 163 ++++++++++++++++++ 2 files changed, 165 insertions(+), 2 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 0f138ce4a7ead..1fe7297f5732e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -138,10 +138,10 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return allocation.decision(Decision.YES, NAME, "Node index shard allocation is under the threshold."); } - private void collectEligibleNodes(RoutingAllocation allocation, Set eligibleNodes, DiscoveryNodeRole searchRole) { + private void collectEligibleNodes(RoutingAllocation allocation, Set eligibleNodes, DiscoveryNodeRole role) { for (DiscoveryNode discoveryNode : allocation.nodes()) { if (discoveryNode.canContainData() - && discoveryNode.getRoles().contains(searchRole) + && discoveryNode.getRoles().contains(role) && (clusterExcludeFilters == null || clusterExcludeFilters.match(discoveryNode) == false) && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { eligibleNodes.add(discoveryNode); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java new file mode 100644 index 0000000000000..26cd1884a688b --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -0,0 +1,163 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.ClusterInfo; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ESAllocationTestCase; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.ProjectMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.GlobalRoutingTable; +import org.elasticsearch.cluster.routing.IndexRoutingTable; +import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.RoutingNodesHelper; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.shard.IndexLongFieldRange; +import org.elasticsearch.index.shard.ShardId; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; +import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder; +import static org.elasticsearch.common.settings.ClusterSettings.createBuiltInClusterSettings; +import static org.elasticsearch.test.ESTestCase.indexSettings; + +public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { + + private DiscoveryNode indexNodeOne; + private DiscoveryNode indexNodeTwo; + private DiscoveryNode searchNodeOne; + private DiscoveryNode searchNodeTwo; + private DiscoveryNode masterNode; + private List allNodes; + private DiscoveryNodes discoveryNodes; + private ShardId[] shardIds; + private final String indexName = "IndexBalanceAllocationDeciderIndex"; + private final List shardRoutings = new ArrayList<>(); + private ClusterState clusterState; + + @Before + public void setup() { + indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); + indexNodeTwo = DiscoveryNodeUtils.builder("indexNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); + searchNodeOne = DiscoveryNodeUtils.builder("searchNodeOne").roles(Collections.singleton(DiscoveryNodeRole.SEARCH_ROLE)).build(); + searchNodeTwo = DiscoveryNodeUtils.builder("searchNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.SEARCH_ROLE)).build(); + masterNode = DiscoveryNodeUtils.builder("masterNode").roles(Collections.singleton(DiscoveryNodeRole.MASTER_ROLE)).build(); + allNodes = List.of(indexNodeOne, indexNodeTwo, searchNodeOne, searchNodeTwo, masterNode); + + DiscoveryNodes.Builder discoveryNodeBuilder = DiscoveryNodes.builder(); + + discoveryNodeBuilder.add(indexNodeOne); + discoveryNodeBuilder.add(indexNodeTwo); + discoveryNodeBuilder.add(searchNodeOne); + discoveryNodeBuilder.add(searchNodeTwo); + discoveryNodeBuilder.add(masterNode); + + ProjectId projectId = ProjectId.fromId("test-IndexBalanceAllocationDecider"); + ClusterState.Builder state = ClusterState.builder(new ClusterName("test-IndexBalanceAllocationDecider")); + final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); + IndexMetadata indexMetadata = IndexMetadata.builder(indexName) + .settings(indexSettings(IndexVersion.current(), 10, 3).put(SETTING_CREATION_DATE, System.currentTimeMillis())) + .timestampRange(IndexLongFieldRange.UNKNOWN) + .eventIngestedRange(IndexLongFieldRange.UNKNOWN) + .build(); + projectBuilder.put(indexMetadata, false); + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); + + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + Metadata.Builder metadataBuilder = Metadata.builder(); + + shardIds = new ShardId[10]; + for (int i = 0; i <= 9; i++) { + shardIds[i] = new ShardId(indexMetadata.getIndex(), i); + + IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); + + String indexNodeId = i % 2 == 0 ? indexNodeOne.getId() : indexNodeTwo.getId(); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(shardIds[i], indexNodeId, null, true, ShardRoutingState.STARTED) + ); + + for (int j = 1; j <= 1; j++) { + String searchNodeId = j % 2 == 0 ? searchNodeOne.getId() : searchNodeTwo.getId(); + indexShardRoutingBuilder.addShard( + shardRoutingBuilder(shardIds[i], searchNodeId, false, ShardRoutingState.STARTED).withRole(ShardRouting.Role.DEFAULT) + .build() + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder); + routingTableBuilder.add(indexRoutingTableBuilder.build()); + } + + metadataBuilder.put(projectBuilder).generateClusterUuidIfNeeded(); + state.nodes(discoveryNodeBuilder); + state.metadata(metadataBuilder); + state.routingTable(GlobalRoutingTable.builder().put(projectId, routingTableBuilder).build()); + clusterState = state.build(); + } + + public void test() { + + Settings settings = Settings.builder() + .put("stateless.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1.0d) + .build(); + + IndexBalanceAllocationDecider decider = new IndexBalanceAllocationDecider(settings, createBuiltInClusterSettings(settings)); + + ClusterInfo clusterInfo = ClusterInfo.builder().build(); + + var routingAllocation = new RoutingAllocation( + null, + RoutingNodes.immutable(clusterState.globalRoutingTable(), clusterState.nodes()), + clusterState, + clusterInfo, + null, + System.nanoTime() + ); + + routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); + + ShardRouting shardRouting = TestShardRouting.newShardRouting( + shardIds[0], + indexNodeTwo.getId(), + null, + true, + ShardRoutingState.STARTED + ); + + Decision decision = decider.canAllocate( + shardRouting, + RoutingNodesHelper.routingNode(indexNodeTwo.getId(), indexNodeTwo, shardRouting), + routingAllocation + ); + + decision.getDecisions(); + } + +} From d14661e4a6d42c36bdebe5fb0aceddda6bf27337 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 12:36:39 -0700 Subject: [PATCH 25/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDeciderTests.java | 126 +++++++++++++----- .../WriteLoadConstraintDeciderTests.java | 14 -- .../cluster/ESAllocationTestCase.java | 13 ++ 3 files changed, 103 insertions(+), 50 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 26cd1884a688b..63a877c4371dc 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.cluster.routing.GlobalRoutingTable; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodesHelper; import org.elasticsearch.cluster.routing.RoutingTable; @@ -39,29 +40,44 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder; import static org.elasticsearch.common.settings.ClusterSettings.createBuiltInClusterSettings; -import static org.elasticsearch.test.ESTestCase.indexSettings; public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { + private ProjectId projectId; private DiscoveryNode indexNodeOne; private DiscoveryNode indexNodeTwo; private DiscoveryNode searchNodeOne; private DiscoveryNode searchNodeTwo; private DiscoveryNode masterNode; + + private RoutingNode routingIndexNodeOne; + private RoutingNode routingIndexNodeTwo; + private RoutingNode routingSearchNodeOne; + private RoutingNode routingSearchNodeTwo; + private RoutingNode routingMasterNode; + private List allNodes; - private DiscoveryNodes discoveryNodes; + + private int numberOfPrimaryShards; + private int replicationFactor; + private ShardId[] shardIds; private final String indexName = "IndexBalanceAllocationDeciderIndex"; - private final List shardRoutings = new ArrayList<>(); + private final Map> nodeToShardRoutings = new HashMap<>(); private ClusterState clusterState; @Before public void setup() { + numberOfPrimaryShards = 10; + replicationFactor = 2; + indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); indexNodeTwo = DiscoveryNodeUtils.builder("indexNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); searchNodeOne = DiscoveryNodeUtils.builder("searchNodeOne").roles(Collections.singleton(DiscoveryNodeRole.SEARCH_ROLE)).build(); @@ -70,18 +86,21 @@ public void setup() { allNodes = List.of(indexNodeOne, indexNodeTwo, searchNodeOne, searchNodeTwo, masterNode); DiscoveryNodes.Builder discoveryNodeBuilder = DiscoveryNodes.builder(); + for (DiscoveryNode node : allNodes) { + discoveryNodeBuilder.add(node); + } - discoveryNodeBuilder.add(indexNodeOne); - discoveryNodeBuilder.add(indexNodeTwo); - discoveryNodeBuilder.add(searchNodeOne); - discoveryNodeBuilder.add(searchNodeTwo); - discoveryNodeBuilder.add(masterNode); - - ProjectId projectId = ProjectId.fromId("test-IndexBalanceAllocationDecider"); + projectId = ProjectId.fromId("test-IndexBalanceAllocationDecider"); ClusterState.Builder state = ClusterState.builder(new ClusterName("test-IndexBalanceAllocationDecider")); + final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(indexSettings(IndexVersion.current(), 10, 3).put(SETTING_CREATION_DATE, System.currentTimeMillis())) + .settings( + indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( + SETTING_CREATION_DATE, + System.currentTimeMillis() + ) + ) .timestampRange(IndexLongFieldRange.UNKNOWN) .eventIngestedRange(IndexLongFieldRange.UNKNOWN) .build(); @@ -91,23 +110,31 @@ public void setup() { RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); Metadata.Builder metadataBuilder = Metadata.builder(); - shardIds = new ShardId[10]; - for (int i = 0; i <= 9; i++) { + shardIds = new ShardId[numberOfPrimaryShards]; + for (int i = 0; i < numberOfPrimaryShards; i++) { shardIds[i] = new ShardId(indexMetadata.getIndex(), i); - IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); - String indexNodeId = i % 2 == 0 ? indexNodeOne.getId() : indexNodeTwo.getId(); - indexShardRoutingBuilder.addShard( - TestShardRouting.newShardRouting(shardIds[i], indexNodeId, null, true, ShardRoutingState.STARTED) + DiscoveryNode indexNode = i % 2 == 0 ? indexNodeOne : indexNodeTwo; + ShardRouting primaryShardRouting = TestShardRouting.newShardRouting( + shardIds[i], + indexNode.getId(), + null, + true, + ShardRoutingState.STARTED ); - - for (int j = 1; j <= 1; j++) { - String searchNodeId = j % 2 == 0 ? searchNodeOne.getId() : searchNodeTwo.getId(); - indexShardRoutingBuilder.addShard( - shardRoutingBuilder(shardIds[i], searchNodeId, false, ShardRoutingState.STARTED).withRole(ShardRouting.Role.DEFAULT) - .build() - ); + indexShardRoutingBuilder.addShard(primaryShardRouting); + nodeToShardRoutings.putIfAbsent(indexNode, new ArrayList<>()); + nodeToShardRoutings.get(indexNode).add(primaryShardRouting); + + for (int j = 1; j <= replicationFactor; j++) { + DiscoveryNode searchNode = j % 2 == 0 ? searchNodeOne : searchNodeTwo; + ShardRouting replicaShardRouting = shardRoutingBuilder(shardIds[i], searchNode.getId(), false, ShardRoutingState.STARTED) + .withRole(ShardRouting.Role.DEFAULT) + .build(); + indexShardRoutingBuilder.addShard(replicaShardRouting); + nodeToShardRoutings.putIfAbsent(searchNode, new ArrayList<>()); + nodeToShardRoutings.get(searchNode).add(replicaShardRouting); } indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder); routingTableBuilder.add(indexRoutingTableBuilder.build()); @@ -118,9 +145,31 @@ public void setup() { state.metadata(metadataBuilder); state.routingTable(GlobalRoutingTable.builder().put(projectId, routingTableBuilder).build()); clusterState = state.build(); + + routingIndexNodeOne = RoutingNodesHelper.routingNode( + indexNodeOne.getId(), + indexNodeOne, + nodeToShardRoutings.get(indexNodeOne).toArray(new ShardRouting[0]) + ); + routingIndexNodeTwo = RoutingNodesHelper.routingNode( + indexNodeTwo.getId(), + indexNodeTwo, + nodeToShardRoutings.get(indexNodeTwo).toArray(new ShardRouting[0]) + ); + routingSearchNodeOne = RoutingNodesHelper.routingNode( + searchNodeOne.getId(), + searchNodeOne, + nodeToShardRoutings.get(searchNodeOne).toArray(new ShardRouting[0]) + ); + routingSearchNodeTwo = RoutingNodesHelper.routingNode( + searchNodeTwo.getId(), + searchNodeTwo, + nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) + ); + routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); } - public void test() { + public void testCanAllocate() { Settings settings = Settings.builder() .put("stateless.enabled", "true") @@ -128,8 +177,10 @@ public void test() { .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1.0d) .build(); - IndexBalanceAllocationDecider decider = new IndexBalanceAllocationDecider(settings, createBuiltInClusterSettings(settings)); - + IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( + settings, + createBuiltInClusterSettings(settings) + ); ClusterInfo clusterInfo = ClusterInfo.builder().build(); var routingAllocation = new RoutingAllocation( @@ -140,24 +191,27 @@ public void test() { null, System.nanoTime() ); - routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); - ShardRouting shardRouting = TestShardRouting.newShardRouting( - shardIds[0], + ShardRouting newIndexShardRouting = TestShardRouting.newShardRouting( + new ShardId("newIndex", "uuid", 1), indexNodeTwo.getId(), null, true, ShardRoutingState.STARTED ); - Decision decision = decider.canAllocate( - shardRouting, - RoutingNodesHelper.routingNode(indexNodeTwo.getId(), indexNodeTwo, shardRouting), - routingAllocation - ); + for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo, routingSearchNodeOne, routingSearchNodeTwo)) { + + assertDecisionMatches( + "Assigning a new shard to a node that has capacity should succeed", + indexBalanceAllocationDecider.canAllocate(newIndexShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Node does not currently host this index." + ); + + } - decision.getDecisions(); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java index c47848a88c266..aa3cf23d90215 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java @@ -25,8 +25,6 @@ import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; @@ -238,18 +236,6 @@ public void testWriteLoadDeciderCanRemain() { ); } - private void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) { - assertEquals(description, type, decision.type()); - if (explanationPattern == null) { - assertNull(decision.getExplanation()); - } else { - assertTrue( - Strings.format("Expected: \"%s\", got \"%s\"", explanationPattern, decision.getExplanation()), - Regex.simpleMatch(explanationPattern, decision.getExplanation()) - ); - } - } - /** * Carries all the cluster state objects needed for testing after {@link #createClusterStateAndRoutingAllocation} sets them up. */ diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java index 933649ca20c6e..1bbbe420d0197 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java @@ -43,6 +43,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.service.ClusterApplierService; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; @@ -401,6 +402,18 @@ public static ClusterState reroute(AllocationService allocationService, ClusterS return result; } + public static void assertDecisionMatches(String description, Decision decision, Decision.Type type, String explanationPattern) { + assertEquals(description, type, decision.type()); + if (explanationPattern == null) { + assertNull(decision.getExplanation()); + } else { + assertTrue( + org.elasticsearch.common.Strings.format("Expected: \"%s\", got \"%s\"", explanationPattern, decision.getExplanation()), + Regex.simpleMatch(explanationPattern, decision.getExplanation()) + ); + } + } + public static class TestAllocateDecision extends AllocationDecider { private final Decision decision; From dc8ac5b89316c1ab27f6df6b28619bcddad2ed3c Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 15:17:27 -0700 Subject: [PATCH 26/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDeciderTests.java | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 63a877c4371dc..d94c501850b29 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.cluster.routing.GlobalRoutingTable; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodesHelper; @@ -31,6 +32,7 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; @@ -72,10 +74,11 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private final String indexName = "IndexBalanceAllocationDeciderIndex"; private final Map> nodeToShardRoutings = new HashMap<>(); private ClusterState clusterState; + private IndexMetadata indexMetadata; + private ShardRouting masterPrimaryShardRouting; - @Before - public void setup() { - numberOfPrimaryShards = 10; + private void setup() { + numberOfPrimaryShards = 11; replicationFactor = 2; indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); @@ -94,7 +97,7 @@ public void setup() { ClusterState.Builder state = ClusterState.builder(new ClusterName("test-IndexBalanceAllocationDecider")); final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) + indexMetadata = IndexMetadata.builder(indexName) .settings( indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( SETTING_CREATION_DATE, @@ -111,7 +114,7 @@ public void setup() { Metadata.Builder metadataBuilder = Metadata.builder(); shardIds = new ShardId[numberOfPrimaryShards]; - for (int i = 0; i < numberOfPrimaryShards; i++) { + for (int i = 0; i < numberOfPrimaryShards - 1; i++) { shardIds[i] = new ShardId(indexMetadata.getIndex(), i); IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); @@ -140,6 +143,19 @@ public void setup() { routingTableBuilder.add(indexRoutingTableBuilder.build()); } + ShardId lastPrimaryShardId = new ShardId(indexMetadata.getIndex(), numberOfPrimaryShards - 1); + masterPrimaryShardRouting = TestShardRouting.newShardRouting( + lastPrimaryShardId, + masterNode.getId(), + null, + true, + ShardRoutingState.STARTED + ); + IndexShardRoutingTable.Builder indexShardRoutingBuilderMasterNode = IndexShardRoutingTable.builder(lastPrimaryShardId); + indexShardRoutingBuilderMasterNode.addShard(masterPrimaryShardRouting); + indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMasterNode); + routingTableBuilder.add(indexRoutingTableBuilder.build()); + metadataBuilder.put(projectBuilder).generateClusterUuidIfNeeded(); state.nodes(discoveryNodeBuilder); state.metadata(metadataBuilder); @@ -166,10 +182,13 @@ public void setup() { searchNodeTwo, nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) ); - routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); + routingMasterNode = RoutingNodesHelper.routingNode( + masterNode.getId(), masterNode, masterPrimaryShardRouting); + } public void testCanAllocate() { + setup(); Settings settings = Settings.builder() .put("stateless.enabled", "true") @@ -202,16 +221,30 @@ public void testCanAllocate() { ); for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo, routingSearchNodeOne, routingSearchNodeTwo)) { - assertDecisionMatches( - "Assigning a new shard to a node that has capacity should succeed", + "Assigning a new index to a node should succeed", indexBalanceAllocationDecider.canAllocate(newIndexShardRouting, routingNode, routingAllocation), Decision.Type.YES, "Node does not currently host this index." ); - } + ShardRouting primaryIndexShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + indexNodeTwo.getId(), + null, + true, + ShardRoutingState.STARTED + ); + + assertDecisionMatches( + "Assigning a new shard to a node that is not index or search node should succeed", + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMasterNode, routingAllocation), + Decision.Type.YES, + "Node has neither index nor search roles, outside purview." + ); + + } } From f69bc6f221f5c163d80326dc640e12b24fba5794 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 23 Oct 2025 22:24:23 +0000 Subject: [PATCH 27/67] [CI] Auto commit changes from spotless --- .../decider/IndexBalanceAllocationDeciderTests.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d94c501850b29..a69fa0980afba 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.cluster.routing.GlobalRoutingTable; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodesHelper; @@ -32,13 +31,11 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.TestShardRouting; -import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.shard.IndexLongFieldRange; import org.elasticsearch.index.shard.ShardId; -import org.junit.Before; import java.util.ArrayList; import java.util.Collections; @@ -182,8 +179,7 @@ private void setup() { searchNodeTwo, nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) ); - routingMasterNode = RoutingNodesHelper.routingNode( - masterNode.getId(), masterNode, masterPrimaryShardRouting); + routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode, masterPrimaryShardRouting); } @@ -244,7 +240,6 @@ public void testCanAllocate() { "Node has neither index nor search roles, outside purview." ); - } } From 868fc338fe12b0b9cdc7cd02540037ce0480131f Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 15:24:40 -0700 Subject: [PATCH 28/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDeciderTests.java | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d94c501850b29..4bbe43d21c8a7 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -237,13 +237,47 @@ public void testCanAllocate() { ShardRoutingState.STARTED ); + ShardRouting replicaIndexShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + searchNodeTwo.getId(), + null, + false, + ShardRoutingState.STARTED + ); + assertDecisionMatches( - "Assigning a new shard to a node that is not index or search node should succeed", + "Assigning a shard to a node that is not index or search node should succeed", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMasterNode, routingAllocation), Decision.Type.YES, "Node has neither index nor search roles, outside purview." ); + for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + assertDecisionMatches( + "Assigning a new primary shard to a search node should succeed", + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Decider allows primaries move to search nodes." + ); + } + + for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + assertDecisionMatches( + "Assigning a primary shard to a search node should succeed", + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Decider allows primaries move to search nodes." + ); + } + + for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + assertDecisionMatches( + "Assigning a replica shard to a search node should succeed", + indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Decider allows replicas move to index nodes." + ); + } } From be83b88cf9c804d31b08a50fe76e092b03b5f76a Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 15:55:54 -0700 Subject: [PATCH 29/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDeciderTests.java | 96 +++++++++++++++---- 1 file changed, 77 insertions(+), 19 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d5699a32f7ede..2c6878b70672b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -74,8 +74,8 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private IndexMetadata indexMetadata; private ShardRouting masterPrimaryShardRouting; - private void setup() { - numberOfPrimaryShards = 11; + private void setup(boolean allowMaster) { + numberOfPrimaryShards = allowMaster ? 11 : 10; replicationFactor = 2; indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); @@ -111,7 +111,8 @@ private void setup() { Metadata.Builder metadataBuilder = Metadata.builder(); shardIds = new ShardId[numberOfPrimaryShards]; - for (int i = 0; i < numberOfPrimaryShards - 1; i++) { + int shardCount = allowMaster ? numberOfPrimaryShards - 1 : numberOfPrimaryShards; + for (int i = 0; i < shardCount; i++) { shardIds[i] = new ShardId(indexMetadata.getIndex(), i); IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); @@ -140,18 +141,20 @@ private void setup() { routingTableBuilder.add(indexRoutingTableBuilder.build()); } - ShardId lastPrimaryShardId = new ShardId(indexMetadata.getIndex(), numberOfPrimaryShards - 1); - masterPrimaryShardRouting = TestShardRouting.newShardRouting( - lastPrimaryShardId, - masterNode.getId(), - null, - true, - ShardRoutingState.STARTED - ); - IndexShardRoutingTable.Builder indexShardRoutingBuilderMasterNode = IndexShardRoutingTable.builder(lastPrimaryShardId); - indexShardRoutingBuilderMasterNode.addShard(masterPrimaryShardRouting); - indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMasterNode); - routingTableBuilder.add(indexRoutingTableBuilder.build()); + if (allowMaster) { + ShardId lastPrimaryShardId = new ShardId(indexMetadata.getIndex(), numberOfPrimaryShards - 1); + masterPrimaryShardRouting = TestShardRouting.newShardRouting( + lastPrimaryShardId, + masterNode.getId(), + null, + true, + ShardRoutingState.STARTED + ); + IndexShardRoutingTable.Builder indexShardRoutingBuilderMasterNode = IndexShardRoutingTable.builder(lastPrimaryShardId); + indexShardRoutingBuilderMasterNode.addShard(masterPrimaryShardRouting); + indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMasterNode); + routingTableBuilder.add(indexRoutingTableBuilder.build()); + } metadataBuilder.put(projectBuilder).generateClusterUuidIfNeeded(); state.nodes(discoveryNodeBuilder); @@ -179,12 +182,15 @@ private void setup() { searchNodeTwo, nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) ); - routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode, masterPrimaryShardRouting); - + if (allowMaster) { + routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode, masterPrimaryShardRouting); + } else { + routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); + } } - public void testCanAllocate() { - setup(); + public void testCanAllocate_under_threshold() { + setup(true); Settings settings = Settings.builder() .put("stateless.enabled", "true") @@ -275,6 +281,58 @@ public void testCanAllocate() { ); } + assertDecisionMatches( + "Assigning an additional primary shard to an index node has capacity should succeed", + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), + Decision.Type.YES, + "Node index shard allocation is under the threshold." + ); + } + + public void testCanAllocate_exceed_threshold() { + setup(false); + + Settings settings = Settings.builder() + .put("stateless.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1.0d) + .build(); + + IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( + settings, + createBuiltInClusterSettings(settings) + ); + ClusterInfo clusterInfo = ClusterInfo.builder().build(); + + var routingAllocation = new RoutingAllocation( + null, + RoutingNodes.immutable(clusterState.globalRoutingTable(), clusterState.nodes()), + clusterState, + clusterInfo, + null, + System.nanoTime() + ); + routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); + + ShardRouting primaryIndexShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + indexNodeTwo.getId(), + null, + true, + ShardRoutingState.STARTED + ); + + assertDecisionMatches( + "Assigning an additional primary shard to an index node at capacity should fail", + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), + Decision.Type.NOT_PREFERRED, + "For index [[IndexBalanceAllocationDeciderIndex]] with [10] shards, Node [indexNodeOne] is " + + "expected to hold [5] shards for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" + + "nodes available. The configured load skew tolerance is [1.00], which yields an allocation threshold of\n" + + "Math.ceil([5] × [1.00]) = [5] shards. Currently, node [indexNodeOne] is assigned [5] shards of index " + + "[[IndexBalanceAllocationDeciderIndex]]. Therefore,\n" + + "assigning additional shards is not preferred.\n" + ); } } From 09f2602e173b4d169ea2c6174bc49f9064566a3d Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 23 Oct 2025 16:22:37 -0700 Subject: [PATCH 30/67] Allocation: Include index shard counts as a criteria Add unit tests --- .../IndexBalanceAllocationDecider.java | 22 +++++++++++---- .../IndexBalanceAllocationDeciderTests.java | 28 ++++++++++++++++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 1fe7297f5732e..9d3d813c9a902 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -11,6 +11,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeFilters; @@ -43,6 +45,7 @@ public class IndexBalanceAllocationDecider extends AllocationDecider { private static final Logger logger = LogManager.getLogger(IndexBalanceAllocationDecider.class); + private static final String EMPTY = ""; public static final String NAME = "index_balance"; @@ -89,15 +92,19 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); final Set eligibleNodes = new HashSet<>(); int totalShards = 0; + String nomenclature = EMPTY; if (node.node().getRoles().contains(INDEX_ROLE)) { collectEligibleNodes(allocation, eligibleNodes, INDEX_ROLE); // Primary shards only. totalShards = allocation.getClusterState().routingTable(projectId).index(index).size(); + nomenclature = "primary shards"; } else if (node.node().getRoles().contains(SEARCH_ROLE)) { collectEligibleNodes(allocation, eligibleNodes, SEARCH_ROLE); // Replicas only. - totalShards = allocation.getClusterState().metadata().getProject(projectId).index(index).getNumberOfReplicas(); + final IndexMetadata indexMetadata = allocation.getClusterState().metadata().getProject(projectId).index(index); + totalShards = indexMetadata.getNumberOfShards() * indexMetadata.getNumberOfReplicas(); + nomenclature = "replicas"; } assert eligibleNodes.isEmpty() == false; @@ -110,24 +117,29 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing if (currentAllocation >= threshold) { String explanation = Strings.format( """ - For index [%s] with [%d] shards, Node [%s] is expected to hold [%.0f] shards for index [%s], based on the total of [%d] + For index [%s] with [%d] %s, Node [%s] is expected to hold [%.0f] %s for index [%s], based on the total of [%d] nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of - Math.ceil([%.0f] × [%.2f]) = [%d] shards. Currently, node [%s] is assigned [%d] shards of index [%s]. Therefore, - assigning additional shards is not preferred. + Math.ceil([%.0f] × [%.2f]) = [%d] %s. Currently, node [%s] is assigned [%d] %s of index [%s]. Therefore, + assigning additional %s is not preferred. """, index, totalShards, + nomenclature, node.nodeId(), idealAllocation, + nomenclature, index, eligibleNodes.size(), indexBalanceConstraintSettings.getLoadSkewTolerance(), idealAllocation, indexBalanceConstraintSettings.getLoadSkewTolerance(), threshold, + nomenclature, node.nodeId(), currentAllocation, - index + nomenclature, + index, + nomenclature ); logger.trace(explanation); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 2c6878b70672b..1877812b0458c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -322,16 +322,36 @@ public void testCanAllocate_exceed_threshold() { ShardRoutingState.STARTED ); + ShardRouting replicaIndexShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + searchNodeTwo.getId(), + null, + false, + ShardRoutingState.STARTED + ); + assertDecisionMatches( "Assigning an additional primary shard to an index node at capacity should fail", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "For index [[IndexBalanceAllocationDeciderIndex]] with [10] shards, Node [indexNodeOne] is " - + "expected to hold [5] shards for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" + "For index [[IndexBalanceAllocationDeciderIndex]] with [10] primary shards, Node [indexNodeOne] is " + + "expected to hold [5] primary shards for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" + + "nodes available. The configured load skew tolerance is [1.00], which yields an allocation threshold of\n" + + "Math.ceil([5] × [1.00]) = [5] primary shards. Currently, node [indexNodeOne] is assigned [5] primary shards of index " + + "[[IndexBalanceAllocationDeciderIndex]]. Therefore,\n" + + "assigning additional primary shards is not preferred.\n" + ); + + assertDecisionMatches( + "Assigning an additional replica shard to an replica node at capacity should fail", + indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingSearchNodeOne, routingAllocation), + Decision.Type.NOT_PREFERRED, + "For index [[IndexBalanceAllocationDeciderIndex]] with [20] replicas, Node [searchNodeOne] is " + + "expected to hold [10] replicas for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" + "nodes available. The configured load skew tolerance is [1.00], which yields an allocation threshold of\n" - + "Math.ceil([5] × [1.00]) = [5] shards. Currently, node [indexNodeOne] is assigned [5] shards of index " + + "Math.ceil([10] × [1.00]) = [10] replicas. Currently, node [searchNodeOne] is assigned [10] replicas of index " + "[[IndexBalanceAllocationDeciderIndex]]. Therefore,\n" - + "assigning additional shards is not preferred.\n" + + "assigning additional replicas is not preferred.\n" ); } From 7d6da1f9a85c44c35c0d96b9a90db7d467b780b2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 23 Oct 2025 23:29:37 +0000 Subject: [PATCH 31/67] [CI] Auto commit changes from spotless --- .../allocation/decider/IndexBalanceAllocationDecider.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 9d3d813c9a902..91a759a3d6c61 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -11,7 +11,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.action.ActionResponse; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.node.DiscoveryNode; From 27c5f8a91df7b7dc63e4ef71d3951c23358c5e52 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 28 Oct 2025 10:40:58 -0700 Subject: [PATCH 32/67] fix test regressions --- .../allocation/decider/WriteLoadConstraintDeciderTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java index 9a6e2d9848140..23571e8e73dcf 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; +import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; From e66278db71d700115055367fd410a5c36e962cf7 Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Thu, 30 Oct 2025 15:12:05 -0700 Subject: [PATCH 33/67] Update server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java Co-authored-by: Dianna Hohensee --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 91a759a3d6c61..2b0525df165d6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -66,7 +66,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } if (isStateless == false) { - return Decision.single(Decision.Type.YES, NAME, "Decider does not currently support stateful."); + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } Index index = shardRouting.index(); From b95a7ba605dfa3b70379e3c675d5fbfa815b09b8 Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Thu, 30 Oct 2025 15:12:23 -0700 Subject: [PATCH 34/67] Update server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java Co-authored-by: Dianna Hohensee --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 2b0525df165d6..5f055e3b70b7d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -77,7 +77,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert node.node() != null; assert node.node().getRoles() != null && node.node().getRoles().isEmpty() == false; if (node.node().getRoles().contains(INDEX_ROLE) == false && node.node().getRoles().contains(SEARCH_ROLE) == false) { - return Decision.single(Decision.Type.YES, NAME, "Node has neither index nor search roles, outside purview."); + return Decision.single(Decision.Type.YES, NAME, "Node has neither index nor search roles."); } if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { From d45908fb52d3ad65c1b04a560991d883e4de3c1e Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Thu, 30 Oct 2025 15:12:40 -0700 Subject: [PATCH 35/67] Update server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java Co-authored-by: Dianna Hohensee --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 5f055e3b70b7d..03589a0bc27a4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -81,7 +81,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { - return Decision.single(Decision.Type.YES, NAME, "Decider allows replicas move to index nodes."); + return Decision.single(Decision.Type.YES, NAME, "An index node cannot own search shards. Decider inactive."); } if (node.node().getRoles().contains(SEARCH_ROLE) && shardRouting.primary()) { From 83c61012d0310588ed42a266c6a31e3656e0f63c Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 31 Oct 2025 08:46:24 -0700 Subject: [PATCH 36/67] fix test regressions --- .../decider/WriteLoadConstraintDeciderIT.java | 2 +- .../IndexBalanceAllocationDecider.java | 32 ++++++------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index 27d57de48da99..1c57d0c481bcd 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -169,7 +169,6 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() { updateClusterSettings( Settings.builder() .put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName) - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1000) ); safeAwait(temporaryClusterStateListener); @@ -608,6 +607,7 @@ private Settings enabledWriteLoadDeciderSettings(int utilizationThresholdPercent WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING.getKey(), TimeValue.timeValueMillis(queueLatencyThresholdMillis) ) + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1000) // Disable rebalancing so that testing can see Decider change outcomes only. .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") .build(); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 91a759a3d6c61..4aae5fc86736d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -75,7 +75,6 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } assert node.node() != null; - assert node.node().getRoles() != null && node.node().getRoles().isEmpty() == false; if (node.node().getRoles().contains(INDEX_ROLE) == false && node.node().getRoles().contains(SEARCH_ROLE) == false) { return Decision.single(Decision.Type.YES, NAME, "Node has neither index nor search roles, outside purview."); } @@ -88,6 +87,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return Decision.single(Decision.Type.YES, NAME, "Decider allows primaries move to search nodes."); } + final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); final Set eligibleNodes = new HashSet<>(); int totalShards = 0; @@ -116,29 +116,16 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing if (currentAllocation >= threshold) { String explanation = Strings.format( """ - For index [%s] with [%d] %s, Node [%s] is expected to hold [%.0f] %s for index [%s], based on the total of [%d] - nodes available. The configured load skew tolerance is [%.2f], which yields an allocation threshold of - Math.ceil([%.0f] × [%.2f]) = [%d] %s. Currently, node [%s] is assigned [%d] %s of index [%s]. Therefore, - assigning additional %s is not preferred. - """, - index, - totalShards, - nomenclature, - node.nodeId(), - idealAllocation, - nomenclature, - index, + There are [%d] eligible nodes in the [%s] tier for assignment of [%.0f] shards in index [%s]. Ideally no more than [%d] + shard would be assigned per node (the index balance skew setting is [%.2f]). This node is already assigned [%d] shards of + the index. + """, eligibleNodes.size(), - indexBalanceConstraintSettings.getLoadSkewTolerance(), + nomenclature, + totalShards, idealAllocation, indexBalanceConstraintSettings.getLoadSkewTolerance(), - threshold, - nomenclature, - node.nodeId(), - currentAllocation, - nomenclature, - index, - nomenclature + currentAllocation ); logger.trace(explanation); @@ -151,8 +138,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing private void collectEligibleNodes(RoutingAllocation allocation, Set eligibleNodes, DiscoveryNodeRole role) { for (DiscoveryNode discoveryNode : allocation.nodes()) { - if (discoveryNode.canContainData() - && discoveryNode.getRoles().contains(role) + if ( discoveryNode.getRoles().contains(role) && (clusterExcludeFilters == null || clusterExcludeFilters.match(discoveryNode) == false) && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { eligibleNodes.add(discoveryNode); From 67105f256cb0a021b542a953b10f862743511735 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 31 Oct 2025 10:50:52 -0700 Subject: [PATCH 37/67] fix test regressions --- .../decider/WriteLoadConstraintDeciderIT.java | 5 +-- .../cluster/node/DiscoveryNodeFilters.java | 4 +++ .../IndexBalanceConstraintSettings.java | 16 +++++----- .../IndexBalanceAllocationDecider.java | 24 ++++++-------- .../IndexBalanceAllocationDeciderTests.java | 32 ++++++++----------- 5 files changed, 37 insertions(+), 44 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index 1c57d0c481bcd..c0edb7929b3a9 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -166,10 +166,7 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() { "--> Update the filter to exclude " + harness.firstDataNodeName + " so shards will be reassigned away to the other nodes" ); // Updating the cluster settings will trigger a reroute request, no need to explicitly request one in the test. - updateClusterSettings( - Settings.builder() - .put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName) - ); + updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName)); safeAwait(temporaryClusterStateListener); } catch (AssertionError error) { diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java index 4251acfa301f2..439e73243b9c6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java @@ -250,6 +250,10 @@ private boolean isSingleNodeFilterInternal() { || (filters.size() > 1 && opType == OpType.AND && NON_ATTRIBUTE_NAMES.containsAll(filters.keySet())); } + public boolean isEmpty() { + return filters.isEmpty(); + } + /** * Generates a human-readable string for the DiscoverNodeFilters. * Example: {@code _id:"id1 OR blah",name:"blah OR name2"} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java index e34d03cdb5b46..141adde62b389 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java @@ -30,20 +30,20 @@ public class IndexBalanceConstraintSettings { /** * This setting permits nodes to host more than ideally balanced number of index shards. - * Maximum tolerated index shard count = ceil(ideal * skew_tolerance) - * i.e. ideal = 4 shards, skew_tolerance = 1.3 - * maximum tolerated index shards = Math.ceil(4 * 1.3) = 6. + * Maximum tolerated index shard count = ideal + skew_tolerance + * i.e. ideal = 4 shards, skew_tolerance = 1 + * maximum tolerated index shards = 4 + 1 = 5. */ - public static final Setting INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE = Setting.doubleSetting( + public static final Setting INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE = Setting.intSetting( SETTING_PREFIX + "load_skew_tolerance", - 1.5d, - 1.0d, + 0, + 0, Setting.Property.Dynamic, Setting.Property.NodeScope ); private volatile boolean deciderEnabled; - private volatile double loadSkewTolerance; + private volatile int loadSkewTolerance; public IndexBalanceConstraintSettings(ClusterSettings clusterSettings) { clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_ENABLED_SETTING, enabled -> this.deciderEnabled = enabled); @@ -54,7 +54,7 @@ public boolean isDeciderEnabled() { return this.deciderEnabled; } - public double getLoadSkewTolerance() { + public int getLoadSkewTolerance() { return this.loadSkewTolerance; } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index eb2656963232f..302cd6a6e7a30 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -61,11 +61,9 @@ public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterS @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexBalanceConstraintSettings.isDeciderEnabled() == false) { - return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); - } - - if (isStateless == false) { + if (indexBalanceConstraintSettings.isDeciderEnabled() == false + || isStateless == false + || clusterExcludeFilters != null && clusterExcludeFilters.isEmpty() == false) { return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } @@ -87,7 +85,6 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return Decision.single(Decision.Type.YES, NAME, "Decider allows primaries move to search nodes."); } - final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); final Set eligibleNodes = new HashSet<>(); int totalShards = 0; @@ -110,19 +107,20 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - final int threshold = (int) Math.ceil(idealAllocation * indexBalanceConstraintSettings.getLoadSkewTolerance()); + final int threshold = (int) Math.ceil(idealAllocation) + indexBalanceConstraintSettings.getLoadSkewTolerance(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { String explanation = Strings.format( """ - There are [%d] eligible nodes in the [%s] tier for assignment of [%.0f] shards in index [%s]. Ideally no more than [%d] - shard would be assigned per node (the index balance skew setting is [%.2f]). This node is already assigned [%d] shards of - the index. - """, + There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] + shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards of + the index. + """, eligibleNodes.size(), nomenclature, totalShards, + index, idealAllocation, indexBalanceConstraintSettings.getLoadSkewTolerance(), currentAllocation @@ -138,9 +136,7 @@ shard would be assigned per node (the index balance skew setting is [%.2f]). Thi private void collectEligibleNodes(RoutingAllocation allocation, Set eligibleNodes, DiscoveryNodeRole role) { for (DiscoveryNode discoveryNode : allocation.nodes()) { - if ( discoveryNode.getRoles().contains(role) - && (clusterExcludeFilters == null || clusterExcludeFilters.match(discoveryNode) == false) - && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { + if (discoveryNode.getRoles().contains(role) && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { eligibleNodes.add(discoveryNode); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 1877812b0458c..99c70751095d0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -189,13 +189,13 @@ private void setup(boolean allowMaster) { } } - public void testCanAllocate_under_threshold() { + public void testCanAllocateUnderThreshold() { setup(true); Settings settings = Settings.builder() .put("stateless.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1.0d) + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) .build(); IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( @@ -251,7 +251,7 @@ public void testCanAllocate_under_threshold() { "Assigning a shard to a node that is not index or search node should succeed", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMasterNode, routingAllocation), Decision.Type.YES, - "Node has neither index nor search roles, outside purview." + "Node has neither index nor search roles." ); for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { @@ -277,7 +277,7 @@ public void testCanAllocate_under_threshold() { "Assigning a replica shard to a search node should succeed", indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingNode, routingAllocation), Decision.Type.YES, - "Decider allows replicas move to index nodes." + "An index node cannot own search shards. Decider inactive." ); } @@ -289,13 +289,13 @@ public void testCanAllocate_under_threshold() { ); } - public void testCanAllocate_exceed_threshold() { + public void testCanAllocateExceedThreshold() { setup(false); Settings settings = Settings.builder() .put("stateless.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1.0d) + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) .build(); IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( @@ -334,24 +334,20 @@ public void testCanAllocate_exceed_threshold() { "Assigning an additional primary shard to an index node at capacity should fail", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "For index [[IndexBalanceAllocationDeciderIndex]] with [10] primary shards, Node [indexNodeOne] is " - + "expected to hold [5] primary shards for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" - + "nodes available. The configured load skew tolerance is [1.00], which yields an allocation threshold of\n" - + "Math.ceil([5] × [1.00]) = [5] primary shards. Currently, node [indexNodeOne] is assigned [5] primary shards of index " - + "[[IndexBalanceAllocationDeciderIndex]]. Therefore,\n" - + "assigning additional primary shards is not preferred.\n" + "There are [2] eligible nodes in the [primary shards] tier for assignment of [10] shards " + + "in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [5]\n" + + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [5] shards of\n" + + "the index.\n" ); assertDecisionMatches( "Assigning an additional replica shard to an replica node at capacity should fail", indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingSearchNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "For index [[IndexBalanceAllocationDeciderIndex]] with [20] replicas, Node [searchNodeOne] is " - + "expected to hold [10] replicas for index [[IndexBalanceAllocationDeciderIndex]], based on the total of [2]\n" - + "nodes available. The configured load skew tolerance is [1.00], which yields an allocation threshold of\n" - + "Math.ceil([10] × [1.00]) = [10] replicas. Currently, node [searchNodeOne] is assigned [10] replicas of index " - + "[[IndexBalanceAllocationDeciderIndex]]. Therefore,\n" - + "assigning additional replicas is not preferred.\n" + "There are [2] eligible nodes in the [replicas] tier for assignment of [20] " + + "shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [10]\n" + + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [10] shards of\n" + + "the index.\n" ); } From 5f598686927cd4918cbfb800894468ecbe13aeb3 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 31 Oct 2025 12:24:19 -0700 Subject: [PATCH 38/67] fix test regressions --- .../IndexBalanceAllocationDeciderTests.java | 143 ++++++++---------- 1 file changed, 65 insertions(+), 78 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 99c70751095d0..ba33e0f6bd430 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodesHelper; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; @@ -49,48 +48,60 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { - private ProjectId projectId; private DiscoveryNode indexNodeOne; private DiscoveryNode indexNodeTwo; private DiscoveryNode searchNodeOne; private DiscoveryNode searchNodeTwo; private DiscoveryNode masterNode; + private DiscoveryNode machineLearningNode; private RoutingNode routingIndexNodeOne; private RoutingNode routingIndexNodeTwo; private RoutingNode routingSearchNodeOne; private RoutingNode routingSearchNodeTwo; private RoutingNode routingMasterNode; + private RoutingNode routingMachineLearningNode; private List allNodes; - private int numberOfPrimaryShards; private int replicationFactor; - - private ShardId[] shardIds; - private final String indexName = "IndexBalanceAllocationDeciderIndex"; - private final Map> nodeToShardRoutings = new HashMap<>(); private ClusterState clusterState; private IndexMetadata indexMetadata; - private ShardRouting masterPrimaryShardRouting; + private Settings settings; + private RoutingAllocation routingAllocation; + private IndexBalanceAllocationDecider indexBalanceAllocationDecider; + + private void setup(boolean exceedThreshold) { + final String indexName = "IndexBalanceAllocationDeciderIndex"; + final Map> nodeToShardRoutings = new HashMap<>(); + + settings = Settings.builder() + .put("stateless.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) + .build(); - private void setup(boolean allowMaster) { - numberOfPrimaryShards = allowMaster ? 11 : 10; + numberOfPrimaryShards = randomIntBetween(10, 20); replicationFactor = 2; + if (numberOfPrimaryShards % 2 != 0 && exceedThreshold) numberOfPrimaryShards++; + if (numberOfPrimaryShards % 2 == 0 && exceedThreshold == false) numberOfPrimaryShards++; indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); indexNodeTwo = DiscoveryNodeUtils.builder("indexNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); searchNodeOne = DiscoveryNodeUtils.builder("searchNodeOne").roles(Collections.singleton(DiscoveryNodeRole.SEARCH_ROLE)).build(); searchNodeTwo = DiscoveryNodeUtils.builder("searchNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.SEARCH_ROLE)).build(); masterNode = DiscoveryNodeUtils.builder("masterNode").roles(Collections.singleton(DiscoveryNodeRole.MASTER_ROLE)).build(); - allNodes = List.of(indexNodeOne, indexNodeTwo, searchNodeOne, searchNodeTwo, masterNode); + machineLearningNode = DiscoveryNodeUtils.builder("machineLearningNode") + .roles(Collections.singleton(DiscoveryNodeRole.ML_ROLE)) + .build(); + allNodes = List.of(indexNodeOne, indexNodeTwo, searchNodeOne, searchNodeTwo, masterNode, machineLearningNode); DiscoveryNodes.Builder discoveryNodeBuilder = DiscoveryNodes.builder(); for (DiscoveryNode node : allNodes) { discoveryNodeBuilder.add(node); } - projectId = ProjectId.fromId("test-IndexBalanceAllocationDecider"); + ProjectId projectId = ProjectId.fromId("test-IndexBalanceAllocationDecider"); ClusterState.Builder state = ClusterState.builder(new ClusterName("test-IndexBalanceAllocationDecider")); final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); @@ -110,8 +121,8 @@ private void setup(boolean allowMaster) { RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); Metadata.Builder metadataBuilder = Metadata.builder(); - shardIds = new ShardId[numberOfPrimaryShards]; - int shardCount = allowMaster ? numberOfPrimaryShards - 1 : numberOfPrimaryShards; + ShardId[] shardIds = new ShardId[numberOfPrimaryShards]; + int shardCount = exceedThreshold ? numberOfPrimaryShards : numberOfPrimaryShards - 1; for (int i = 0; i < shardCount; i++) { shardIds[i] = new ShardId(indexMetadata.getIndex(), i); IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); @@ -141,19 +152,20 @@ private void setup(boolean allowMaster) { routingTableBuilder.add(indexRoutingTableBuilder.build()); } - if (allowMaster) { + if (exceedThreshold == false) { ShardId lastPrimaryShardId = new ShardId(indexMetadata.getIndex(), numberOfPrimaryShards - 1); - masterPrimaryShardRouting = TestShardRouting.newShardRouting( + ShardRouting lastShardRouting = TestShardRouting.newShardRouting( lastPrimaryShardId, masterNode.getId(), null, true, ShardRoutingState.STARTED ); - IndexShardRoutingTable.Builder indexShardRoutingBuilderMasterNode = IndexShardRoutingTable.builder(lastPrimaryShardId); - indexShardRoutingBuilderMasterNode.addShard(masterPrimaryShardRouting); - indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMasterNode); + IndexShardRoutingTable.Builder indexShardRoutingBuilderMLNode = IndexShardRoutingTable.builder(lastPrimaryShardId); + indexShardRoutingBuilderMLNode.addShard(lastShardRouting); + indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMLNode); routingTableBuilder.add(indexRoutingTableBuilder.build()); + routingMachineLearningNode = RoutingNodesHelper.routingNode(machineLearningNode.getId(), machineLearningNode, lastShardRouting); } metadataBuilder.put(projectBuilder).generateClusterUuidIfNeeded(); @@ -182,38 +194,17 @@ private void setup(boolean allowMaster) { searchNodeTwo, nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) ); - if (allowMaster) { - routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode, masterPrimaryShardRouting); - } else { - routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); - } - } + routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); - public void testCanAllocateUnderThreshold() { - setup(true); - - Settings settings = Settings.builder() - .put("stateless.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) - .build(); - - IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( - settings, - createBuiltInClusterSettings(settings) - ); ClusterInfo clusterInfo = ClusterInfo.builder().build(); - - var routingAllocation = new RoutingAllocation( - null, - RoutingNodes.immutable(clusterState.globalRoutingTable(), clusterState.nodes()), - clusterState, - clusterInfo, - null, - System.nanoTime() - ); + routingAllocation = new RoutingAllocation(null, clusterState.getRoutingNodes(), clusterState, clusterInfo, null, System.nanoTime()); routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); + indexBalanceAllocationDecider = new IndexBalanceAllocationDecider(settings, createBuiltInClusterSettings(settings)); + } + + public void testCanAllocateUnderThreshold() { + setup(false); ShardRouting newIndexShardRouting = TestShardRouting.newShardRouting( new ShardId("newIndex", "uuid", 1), indexNodeTwo.getId(), @@ -249,7 +240,7 @@ public void testCanAllocateUnderThreshold() { assertDecisionMatches( "Assigning a shard to a node that is not index or search node should succeed", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMasterNode, routingAllocation), + indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMachineLearningNode, routingAllocation), Decision.Type.YES, "Node has neither index nor search roles." ); @@ -290,30 +281,7 @@ public void testCanAllocateUnderThreshold() { } public void testCanAllocateExceedThreshold() { - setup(false); - - Settings settings = Settings.builder() - .put("stateless.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) - .build(); - - IndexBalanceAllocationDecider indexBalanceAllocationDecider = new IndexBalanceAllocationDecider( - settings, - createBuiltInClusterSettings(settings) - ); - ClusterInfo clusterInfo = ClusterInfo.builder().build(); - - var routingAllocation = new RoutingAllocation( - null, - RoutingNodes.immutable(clusterState.globalRoutingTable(), clusterState.nodes()), - clusterState, - clusterInfo, - null, - System.nanoTime() - ); - routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); - + setup(true); ShardRouting primaryIndexShardRouting = TestShardRouting.newShardRouting( new ShardId(indexMetadata.getIndex(), 1), indexNodeTwo.getId(), @@ -330,23 +298,42 @@ public void testCanAllocateExceedThreshold() { ShardRoutingState.STARTED ); + int ideal = numberOfPrimaryShards / 2; + int current = numberOfPrimaryShards / 2; + assertDecisionMatches( "Assigning an additional primary shard to an index node at capacity should fail", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "There are [2] eligible nodes in the [primary shards] tier for assignment of [10] shards " - + "in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [5]\n" - + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [5] shards of\n" + "There are [2] eligible nodes in the [primary shards] tier for assignment of [" + + numberOfPrimaryShards + + "] shards " + + "in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + + ideal + + "]\n" + + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + current + + "] shards of\n" + "the index.\n" ); + int total = numberOfPrimaryShards * replicationFactor; + ideal = numberOfPrimaryShards * replicationFactor / 2; + current = numberOfPrimaryShards * replicationFactor / 2; + assertDecisionMatches( "Assigning an additional replica shard to an replica node at capacity should fail", indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingSearchNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "There are [2] eligible nodes in the [replicas] tier for assignment of [20] " - + "shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [10]\n" - + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [10] shards of\n" + "There are [2] eligible nodes in the [replicas] tier for assignment of [" + + total + + "] " + + "shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + + ideal + + "]\n" + + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + current + + "] shards of\n" + "the index.\n" ); } From a21e2d652646cf9576c6fa3c3ef7335fed911420 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 31 Oct 2025 12:27:52 -0700 Subject: [PATCH 39/67] fix test regressions --- .../decider/IndexBalanceAllocationDeciderTests.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index ba33e0f6bd430..d4db6c2e7f186 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -254,15 +254,6 @@ public void testCanAllocateUnderThreshold() { ); } - for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { - assertDecisionMatches( - "Assigning a primary shard to a search node should succeed", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingNode, routingAllocation), - Decision.Type.YES, - "Decider allows primaries move to search nodes." - ); - } - for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { assertDecisionMatches( "Assigning a replica shard to a search node should succeed", From 7057bf04d7ba1ea8c89795b0d20cc367b7088029 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 31 Oct 2025 12:33:05 -0700 Subject: [PATCH 40/67] fix test regressions --- .../allocation/decider/IndexBalanceAllocationDeciderIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java index c2be7355fdb33..4daaeec54b48c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java @@ -92,7 +92,7 @@ private boolean checkShardAssignment( private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { Settings settings = Settings.builder() .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING.getKey(), true) - .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 1.0d) + .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 0) .build(); internalCluster().startMasterOnlyNode(settings); From 292c5502990c0de2880db28c7d83693635f0cd38 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Sun, 2 Nov 2025 22:24:32 -0800 Subject: [PATCH 41/67] fix test regressions --- .../IndexBalanceAllocationDecider.java | 8 +++----- .../IndexBalanceAllocationDeciderTests.java | 20 +++++++------------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 302cd6a6e7a30..98d57c9e91293 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -112,11 +112,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing if (currentAllocation >= threshold) { String explanation = Strings.format( - """ - There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] - shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards of - the index. - """, + "There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] " + + "shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards of " + + "the index.", eligibleNodes.size(), nomenclature, totalShards, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d4db6c2e7f186..a63e48b590a2a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -82,7 +82,7 @@ private void setup(boolean exceedThreshold) { .build(); numberOfPrimaryShards = randomIntBetween(10, 20); - replicationFactor = 2; + replicationFactor = randomIntBetween(1, 2); if (numberOfPrimaryShards % 2 != 0 && exceedThreshold) numberOfPrimaryShards++; if (numberOfPrimaryShards % 2 == 0 && exceedThreshold == false) numberOfPrimaryShards++; @@ -298,14 +298,11 @@ public void testCanAllocateExceedThreshold() { Decision.Type.NOT_PREFERRED, "There are [2] eligible nodes in the [primary shards] tier for assignment of [" + numberOfPrimaryShards - + "] shards " - + "in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal - + "]\n" - + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + "] shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + current - + "] shards of\n" - + "the index.\n" + + "] shards of the index." ); int total = numberOfPrimaryShards * replicationFactor; @@ -318,14 +315,11 @@ public void testCanAllocateExceedThreshold() { Decision.Type.NOT_PREFERRED, "There are [2] eligible nodes in the [replicas] tier for assignment of [" + total - + "] " - + "shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal - + "]\n" - + "shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + "] shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + current - + "] shards of\n" - + "the index.\n" + + "] shards of the index." ); } From 4f83a9e64a8992b005ed3e049365ea34f07cf347 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Mon, 3 Nov 2025 08:42:05 -0800 Subject: [PATCH 42/67] fix test regressions --- .../allocation/decider/IndexBalanceAllocationDecider.java | 4 ++-- .../decider/IndexBalanceAllocationDeciderTests.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 98d57c9e91293..55173836cbb92 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -113,8 +113,8 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing if (currentAllocation >= threshold) { String explanation = Strings.format( "There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] " - + "shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards of " - + "the index.", + + "shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards" + + " of the index.", eligibleNodes.size(), nomenclature, totalShards, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index a63e48b590a2a..5f20ab2b5d4d3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -82,7 +82,7 @@ private void setup(boolean exceedThreshold) { .build(); numberOfPrimaryShards = randomIntBetween(10, 20); - replicationFactor = randomIntBetween(1, 2); + replicationFactor = 2; if (numberOfPrimaryShards % 2 != 0 && exceedThreshold) numberOfPrimaryShards++; if (numberOfPrimaryShards % 2 == 0 && exceedThreshold == false) numberOfPrimaryShards++; From 3314a01c3ba300007e3371b7c89f4cd3aedb6973 Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Fri, 7 Nov 2025 10:43:09 -0800 Subject: [PATCH 43/67] Update server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java Co-authored-by: Henning Andersen <33268011+henningandersen@users.noreply.github.com> --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 55173836cbb92..1e1224fd043c0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -107,7 +107,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - final int threshold = (int) Math.ceil(idealAllocation) + indexBalanceConstraintSettings.getLoadSkewTolerance(); + final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getLoadSkewTolerance()) / eligibleNodes.size(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { From f6a1500c11d7868fde5ddc1674cbcfa89f2c6d99 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 7 Nov 2025 18:50:31 +0000 Subject: [PATCH 44/67] [CI] Auto commit changes from spotless --- .../allocation/decider/IndexBalanceAllocationDecider.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 1e1224fd043c0..9c0ffa15fd6d5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -107,7 +107,8 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getLoadSkewTolerance()) / eligibleNodes.size(); + final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getLoadSkewTolerance()) + / eligibleNodes.size(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { From c2a0d75850dec6c65ebcdcf7bad39a9be249f070 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 12:26:55 -0800 Subject: [PATCH 45/67] Address feedbacks --- .../decider/IndexBalanceAllocationDeciderIT.java | 1 - .../decider/WriteLoadConstraintDeciderIT.java | 2 +- .../cluster/node/DiscoveryNodeFilters.java | 2 +- .../allocation/IndexBalanceConstraintSettings.java | 12 ++++++------ .../decider/IndexBalanceAllocationDecider.java | 12 ++++++------ .../common/settings/ClusterSettings.java | 2 +- .../decider/IndexBalanceAllocationDeciderTests.java | 7 +++---- 7 files changed, 18 insertions(+), 20 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java index 4daaeec54b48c..82d690d2af133 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java @@ -92,7 +92,6 @@ private boolean checkShardAssignment( private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { Settings settings = Settings.builder() .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING.getKey(), true) - .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE.getKey(), 0) .build(); internalCluster().startMasterOnlyNode(settings); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index c0edb7929b3a9..c6f56e6a5ea6c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -604,7 +604,7 @@ private Settings enabledWriteLoadDeciderSettings(int utilizationThresholdPercent WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING.getKey(), TimeValue.timeValueMillis(queueLatencyThresholdMillis) ) - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 1000) + .put("cluster.routing.allocation.index_balance_decider.excess_shards", 1000) // Disable rebalancing so that testing can see Decider change outcomes only. .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") .build(); diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java index 439e73243b9c6..54261ea3852a9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java @@ -250,7 +250,7 @@ private boolean isSingleNodeFilterInternal() { || (filters.size() > 1 && opType == OpType.AND && NON_ATTRIBUTE_NAMES.containsAll(filters.keySet())); } - public boolean isEmpty() { + public boolean hasNoFilters() { return filters.isEmpty(); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java index 141adde62b389..df7df4285a111 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java @@ -34,8 +34,8 @@ public class IndexBalanceConstraintSettings { * i.e. ideal = 4 shards, skew_tolerance = 1 * maximum tolerated index shards = 4 + 1 = 5. */ - public static final Setting INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE = Setting.intSetting( - SETTING_PREFIX + "load_skew_tolerance", + public static final Setting INDEX_BALANCE_DECIDER_EXCESS_SHARDS = Setting.intSetting( + SETTING_PREFIX + "excess_shards", 0, 0, Setting.Property.Dynamic, @@ -43,19 +43,19 @@ public class IndexBalanceConstraintSettings { ); private volatile boolean deciderEnabled; - private volatile int loadSkewTolerance; + private volatile int excessShards; public IndexBalanceConstraintSettings(ClusterSettings clusterSettings) { clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_ENABLED_SETTING, enabled -> this.deciderEnabled = enabled); - clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE, value -> this.loadSkewTolerance = value); + clusterSettings.initializeAndWatch(INDEX_BALANCE_DECIDER_EXCESS_SHARDS, value -> this.excessShards = value); } public boolean isDeciderEnabled() { return this.deciderEnabled; } - public int getLoadSkewTolerance() { - return this.loadSkewTolerance; + public int getExcessShards() { + return this.excessShards; } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 9c0ffa15fd6d5..a2de0f0c39ff4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -63,7 +63,7 @@ public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterS public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false - || clusterExcludeFilters != null && clusterExcludeFilters.isEmpty() == false) { + || clusterExcludeFilters != null && clusterExcludeFilters.hasNoFilters() == false) { return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } @@ -82,7 +82,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } if (node.node().getRoles().contains(SEARCH_ROLE) && shardRouting.primary()) { - return Decision.single(Decision.Type.YES, NAME, "Decider allows primaries move to search nodes."); + return Decision.single(Decision.Type.YES, NAME, "A search node cannot own primary shards. Decider inactive."); } final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); @@ -94,20 +94,20 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing collectEligibleNodes(allocation, eligibleNodes, INDEX_ROLE); // Primary shards only. totalShards = allocation.getClusterState().routingTable(projectId).index(index).size(); - nomenclature = "primary shards"; + nomenclature = "index"; } else if (node.node().getRoles().contains(SEARCH_ROLE)) { collectEligibleNodes(allocation, eligibleNodes, SEARCH_ROLE); // Replicas only. final IndexMetadata indexMetadata = allocation.getClusterState().metadata().getProject(projectId).index(index); totalShards = indexMetadata.getNumberOfShards() * indexMetadata.getNumberOfReplicas(); - nomenclature = "replicas"; + nomenclature = "search"; } assert eligibleNodes.isEmpty() == false; assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getLoadSkewTolerance()) + final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getExcessShards()) / eligibleNodes.size(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); @@ -121,7 +121,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing totalShards, index, idealAllocation, - indexBalanceConstraintSettings.getLoadSkewTolerance(), + indexBalanceConstraintSettings.getExcessShards(), currentAllocation ); diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 5ec30f766c2ef..3c73cd0e7b462 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -661,7 +661,7 @@ public void apply(Settings value, Settings current, Settings previous) { WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING, - IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_LOAD_SKEW_TOLERANCE, + IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_EXCESS_SHARDS, WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_MINIMUM_LOGGING_INTERVAL, SamplingService.TTL_POLL_INTERVAL_SETTING, BlobStoreRepository.MAX_HEAP_SIZE_FOR_SNAPSHOT_DELETION_SETTING, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 5f20ab2b5d4d3..4c1cad9219d3d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -78,7 +78,6 @@ private void setup(boolean exceedThreshold) { settings = Settings.builder() .put("stateless.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.load_skew_tolerance", 0) .build(); numberOfPrimaryShards = randomIntBetween(10, 20); @@ -250,7 +249,7 @@ public void testCanAllocateUnderThreshold() { "Assigning a new primary shard to a search node should succeed", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingNode, routingAllocation), Decision.Type.YES, - "Decider allows primaries move to search nodes." + "A search node cannot own primary shards. Decider inactive." ); } @@ -296,7 +295,7 @@ public void testCanAllocateExceedThreshold() { "Assigning an additional primary shard to an index node at capacity should fail", indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "There are [2] eligible nodes in the [primary shards] tier for assignment of [" + "There are [2] eligible nodes in the [index] tier for assignment of [" + numberOfPrimaryShards + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal @@ -313,7 +312,7 @@ public void testCanAllocateExceedThreshold() { "Assigning an additional replica shard to an replica node at capacity should fail", indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingSearchNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, - "There are [2] eligible nodes in the [replicas] tier for assignment of [" + "There are [2] eligible nodes in the [search] tier for assignment of [" + total + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal From 12b3e270e4fc3c38a2d66fe9511a3f82abf1e1ce Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 7 Nov 2025 20:33:03 +0000 Subject: [PATCH 46/67] [CI] Auto commit changes from spotless --- .../allocation/decider/IndexBalanceAllocationDecider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index a2de0f0c39ff4..ca5d209644b7b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -107,8 +107,8 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getExcessShards()) - / eligibleNodes.size(); + final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getExcessShards()) / eligibleNodes + .size(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { From aba5dcfe55550dfd1dc508bc9c8da44c79d396c3 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 13:19:53 -0800 Subject: [PATCH 47/67] Address feedbacks --- .../IndexBalanceAllocationDeciderIT.java | 188 ------------------ .../IndexBalanceAllocationDecider.java | 4 +- .../IndexBalanceAllocationDeciderTests.java | 6 +- 3 files changed, 4 insertions(+), 194 deletions(-) delete mode 100644 server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java deleted file mode 100644 index 82d690d2af133..0000000000000 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderIT.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.cluster.routing.allocation.decider; - -import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.Index; -import org.elasticsearch.test.ClusterServiceUtils; -import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.transport.TransportService; - -import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; -import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; -import static org.hamcrest.Matchers.equalTo; - -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) -public class IndexBalanceAllocationDeciderIT extends ESIntegTestCase { - - public void testIndexShardCountExceedsAverageAllocation() { - var testHarness = setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed(); - - // Exclude assignment of shards to the first data nodes via the {@link FilterAllocationDecider} settings. - // This triggers the balancer to work out a new routing. - logger.info("---> Remove shard assignments of node " + testHarness.firstDataNodeName + " by excluding first data node."); - updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", testHarness.firstDataNodeName)); - - refreshClusterInfo(); - - int lowerThreshold = testHarness.randomNumberOfShards / 2; - int upperThreshold = (int) Math.ceil((double) testHarness.randomNumberOfShards / 2); - - var verifyShardCountBalanceListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { - var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(testHarness.indexName); - if (indexRoutingTable == null) { - return false; - } - if (indexRoutingTable.numberOfNodesShardsAreAllocatedOn() != 2) { - return false; - } - - Index index = indexRoutingTable.getIndex(); - assertThat(indexRoutingTable.numberOfNodesShardsAreAllocatedOn(), equalTo(2)); - clusterState.getRoutingNodes().stream().forEach(node -> { - if (node.hasIndex(index)) { - assert node.numberOfOwningShardsForIndex(index) >= lowerThreshold; - assert node.numberOfOwningShardsForIndex(index) <= upperThreshold; - } - }); - return true; - }); - - safeAwait(verifyShardCountBalanceListener); - } - - private boolean checkShardAssignment( - RoutingNodes routingNodes, - Index index, - String firstDataNodeId, - String secondDataNodeId, - String thirdDataNodeId, - int upperLimitFirstDataNode, - int lowerLimitFirstDataNode, - int upperLimitSecondDataNode, - int lowerLimitSecondDataNode, - int upperLimitThirdDataNode, - int lowerLimitThirdDataNode - ) { - - int firstDataNodeRealNumberOfShards = routingNodes.node(firstDataNodeId).numberOfOwningShardsForIndex(index); - int secondDataNodeRealNumberOfShards = routingNodes.node(secondDataNodeId).numberOfOwningShardsForIndex(index); - int thirdDataNodeRealNumberOfShards = routingNodes.node(thirdDataNodeId).numberOfOwningShardsForIndex(index); - - return firstDataNodeRealNumberOfShards <= upperLimitFirstDataNode - && firstDataNodeRealNumberOfShards >= lowerLimitFirstDataNode - && secondDataNodeRealNumberOfShards <= upperLimitSecondDataNode - && secondDataNodeRealNumberOfShards >= lowerLimitSecondDataNode - && thirdDataNodeRealNumberOfShards <= upperLimitThirdDataNode - && thirdDataNodeRealNumberOfShards >= lowerLimitThirdDataNode; - } - - private TestHarness setUpThreeHealthyDataNodesAndVerifyIndexShardsBalancedDistributed() { - Settings settings = Settings.builder() - .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING.getKey(), true) - .build(); - internalCluster().startMasterOnlyNode(settings); - - final var dataNodes = internalCluster().startDataOnlyNodes(3, settings); - - final String firstDataNodeName = dataNodes.get(0); - final String secondDataNodeName = dataNodes.get(1); - final String thirdDataNodeName = dataNodes.get(2); - final String firstDataNodeId = getNodeId(firstDataNodeName); - final String secondDataNodeId = getNodeId(secondDataNodeName); - final String thirdDataNodeId = getNodeId(thirdDataNodeName); - ensureStableCluster(4); - - final DiscoveryNode firstDiscoveryNode = internalCluster().getInstance(TransportService.class, firstDataNodeName).getLocalNode(); - final DiscoveryNode secondDiscoveryNode = internalCluster().getInstance(TransportService.class, secondDataNodeName).getLocalNode(); - final DiscoveryNode thirdDiscoveryNode = internalCluster().getInstance(TransportService.class, thirdDataNodeName).getLocalNode(); - - String format = """ - ---> first node NAME %s and ID %s; second node NAME %s and ID %s; third node NAME %s and ID %s; - """; - logger.info( - Strings.format( - format, - firstDataNodeName, - firstDataNodeId, - secondDataNodeName, - secondDataNodeId, - thirdDataNodeName, - thirdDataNodeId - ) - ); - - int randomNumberOfShards = randomIntBetween(15, 20); - String indexName = randomIdentifier(); - int lowerThreshold = randomNumberOfShards / 3; - int upperThreshold = (int) Math.ceil((double) randomNumberOfShards / 3); - - var verifyShardAllocationListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> { - var indexRoutingTable = clusterState.routingTable(ProjectId.DEFAULT).index(indexName); - if (indexRoutingTable == null) { - return false; - } - return checkShardAssignment( - clusterState.getRoutingNodes(), - indexRoutingTable.getIndex(), - firstDataNodeId, - secondDataNodeId, - thirdDataNodeId, - upperThreshold, - lowerThreshold, - upperThreshold, - lowerThreshold, - upperThreshold, - lowerThreshold - ); - }); - - createIndex( - indexName, - Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomNumberOfShards).put(SETTING_NUMBER_OF_REPLICAS, 0).build() - ); - ensureGreen(indexName); - logger.info("---> wait for [" + randomNumberOfShards + "] shards to be assigned to node "); - - safeAwait(verifyShardAllocationListener); - return new TestHarness( - firstDataNodeName, - secondDataNodeName, - thirdDataNodeName, - firstDataNodeId, - secondDataNodeId, - thirdDataNodeName, - firstDiscoveryNode, - secondDiscoveryNode, - thirdDiscoveryNode, - indexName, - randomNumberOfShards - ); - } - - record TestHarness( - String firstDataNodeName, - String secondDataNodeName, - String thirdDataNodeName, - String firstDataNodeId, - String secondDataNodeId, - String thirdDataNodeId, - DiscoveryNode firstDiscoveryNode, - DiscoveryNode secondDiscoveryNode, - DiscoveryNode thirdDiscoveryNode, - String indexName, - int randomNumberOfShards - ) {} -} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index a2de0f0c39ff4..3b2bd8a1d5000 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -114,8 +114,8 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing if (currentAllocation >= threshold) { String explanation = Strings.format( "There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] " - + "shard would be assigned per node (the index balance skew setting is [%d]). This node is already assigned [%d] shards" - + " of the index.", + + "shard would be assigned per node (the index balance excess shards setting is [%d]). This node is already assigned" + + " [%d] shards of the index.", eligibleNodes.size(), nomenclature, totalShards, diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 4c1cad9219d3d..c6dec3d8a859e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -59,7 +59,6 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private RoutingNode routingIndexNodeTwo; private RoutingNode routingSearchNodeOne; private RoutingNode routingSearchNodeTwo; - private RoutingNode routingMasterNode; private RoutingNode routingMachineLearningNode; private List allNodes; @@ -193,7 +192,6 @@ private void setup(boolean exceedThreshold) { searchNodeTwo, nodeToShardRoutings.get(searchNodeTwo).toArray(new ShardRouting[0]) ); - routingMasterNode = RoutingNodesHelper.routingNode(masterNode.getId(), masterNode); ClusterInfo clusterInfo = ClusterInfo.builder().build(); routingAllocation = new RoutingAllocation(null, clusterState.getRoutingNodes(), clusterState, clusterInfo, null, System.nanoTime()); @@ -299,7 +297,7 @@ public void testCanAllocateExceedThreshold() { + numberOfPrimaryShards + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal - + "] shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + "] shard would be assigned per node (the index balance excess shards setting is [0]). This node is already assigned [" + current + "] shards of the index." ); @@ -316,7 +314,7 @@ public void testCanAllocateExceedThreshold() { + total + "] shards in index [[IndexBalanceAllocationDeciderIndex]]. Ideally no more than [" + ideal - + "] shard would be assigned per node (the index balance skew setting is [0]). This node is already assigned [" + + "] shard would be assigned per node (the index balance excess shards setting is [0]). This node is already assigned [" + current + "] shards of the index." ); From 0860ccad1ee1598fd20ce4d45fa6111fe4ff5bd1 Mon Sep 17 00:00:00 2001 From: Zhubo Tang Date: Fri, 7 Nov 2025 13:22:37 -0800 Subject: [PATCH 48/67] Update docs/changelog/135875.yaml --- docs/changelog/135875.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/135875.yaml b/docs/changelog/135875.yaml index ee9fdb9e06214..4f00fb73e9712 100644 --- a/docs/changelog/135875.yaml +++ b/docs/changelog/135875.yaml @@ -2,5 +2,5 @@ pr: 135875 summary: "Allocation: introduce a new decider that balances the index shard count\ \ among nodes" area: Allocation -type: enhancement +type: feature issues: [] From 76688d631a66fa62ab1ad1875efca081efe9b907 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 15:04:39 -0800 Subject: [PATCH 49/67] Address feedbacks --- .../IndexBalanceAllocationDeciderTests.java | 153 ++++++++++++------ 1 file changed, 100 insertions(+), 53 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index c6dec3d8a859e..d2cbba0f1aa67 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -9,6 +9,8 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.elasticsearch.cluster.ClusterInfo; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; @@ -18,6 +20,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.ProjectMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeFilters; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -69,14 +72,19 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private Settings settings; private RoutingAllocation routingAllocation; private IndexBalanceAllocationDecider indexBalanceAllocationDecider; + private int excessShards; + private ShardRouting indexTierShardRouting; + private ShardRouting searchTierShardRouting; - private void setup(boolean exceedThreshold) { + private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { final String indexName = "IndexBalanceAllocationDeciderIndex"; final Map> nodeToShardRoutings = new HashMap<>(); + excessShards = allowExcessShards ? randomIntBetween(1, 5) : 0; settings = Settings.builder() .put("stateless.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.enabled", "true") + .put("cluster.routing.allocation.index_balance_decider.excess_shards", excessShards) .build(); numberOfPrimaryShards = randomIntBetween(10, 20); @@ -103,16 +111,27 @@ private void setup(boolean exceedThreshold) { ClusterState.Builder state = ClusterState.builder(new ClusterName("test-IndexBalanceAllocationDecider")); final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); + + Settings.Builder builder = indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( + SETTING_CREATION_DATE, + System.currentTimeMillis() + ); + + if (hasDiscoveryNodeFilters) { + String setting = randomFrom( + IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING, + IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING, + IndexMetadata.INDEX_ROUTING_INITIAL_RECOVERY_GROUP_SETTING + ).getKey(); + String attribute = randomFrom(DiscoveryNodeFilters.SINGLE_NODE_NAMES); + builder.put(setting + attribute, randomAlphaOfLength(randomIntBetween(2, 3))); + } + indexMetadata = IndexMetadata.builder(indexName) - .settings( - indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( - SETTING_CREATION_DATE, - System.currentTimeMillis() - ) - ) + .settings(builder) .timestampRange(IndexLongFieldRange.UNKNOWN) - .eventIngestedRange(IndexLongFieldRange.UNKNOWN) - .build(); + .eventIngestedRange(IndexLongFieldRange.UNKNOWN).build(); + projectBuilder.put(indexMetadata, false); IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); @@ -198,10 +217,26 @@ private void setup(boolean exceedThreshold) { routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); indexBalanceAllocationDecider = new IndexBalanceAllocationDecider(settings, createBuiltInClusterSettings(settings)); + + indexTierShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + randomFrom(indexNodeOne, indexNodeTwo).getId(), + null, + true, + ShardRoutingState.STARTED + ); + + searchTierShardRouting = TestShardRouting.newShardRouting( + new ShardId(indexMetadata.getIndex(), 1), + randomFrom(searchNodeOne, searchNodeTwo).getId(), + null, + false, + ShardRoutingState.STARTED + ); } public void testCanAllocateUnderThreshold() { - setup(false); + setup(false, false, randomBoolean()); ShardRouting newIndexShardRouting = TestShardRouting.newShardRouting( new ShardId("newIndex", "uuid", 1), indexNodeTwo.getId(), @@ -219,25 +254,9 @@ public void testCanAllocateUnderThreshold() { ); } - ShardRouting primaryIndexShardRouting = TestShardRouting.newShardRouting( - new ShardId(indexMetadata.getIndex(), 1), - indexNodeTwo.getId(), - null, - true, - ShardRoutingState.STARTED - ); - - ShardRouting replicaIndexShardRouting = TestShardRouting.newShardRouting( - new ShardId(indexMetadata.getIndex(), 1), - searchNodeTwo.getId(), - null, - false, - ShardRoutingState.STARTED - ); - assertDecisionMatches( "Assigning a shard to a node that is not index or search node should succeed", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingMachineLearningNode, routingAllocation), + indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingMachineLearningNode, routingAllocation), Decision.Type.YES, "Node has neither index nor search roles." ); @@ -245,7 +264,7 @@ public void testCanAllocateUnderThreshold() { for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { assertDecisionMatches( "Assigning a new primary shard to a search node should succeed", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingNode, routingAllocation), + indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), Decision.Type.YES, "A search node cannot own primary shards. Decider inactive." ); @@ -254,44 +273,45 @@ public void testCanAllocateUnderThreshold() { for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { assertDecisionMatches( "Assigning a replica shard to a search node should succeed", - indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingNode, routingAllocation), + indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), Decision.Type.YES, "An index node cannot own search shards. Decider inactive." ); } - assertDecisionMatches( - "Assigning an additional primary shard to an index node has capacity should succeed", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), - Decision.Type.YES, - "Node index shard allocation is under the threshold." - ); + verifyCanAllocate(); } - public void testCanAllocateExceedThreshold() { - setup(true); - ShardRouting primaryIndexShardRouting = TestShardRouting.newShardRouting( - new ShardId(indexMetadata.getIndex(), 1), - indexNodeTwo.getId(), - null, - true, - ShardRoutingState.STARTED - ); + private void verifyCanAllocate() { + for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + assertDecisionMatches( + "Assigning an additional primary shard to an index node has capacity should succeed", + indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Node index shard allocation is under the threshold." + ); + } + + for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + assertDecisionMatches( + "Assigning an additional primary shard to an index node has capacity should succeed", + indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Node index shard allocation is under the threshold." + ); + } + } - ShardRouting replicaIndexShardRouting = TestShardRouting.newShardRouting( - new ShardId(indexMetadata.getIndex(), 1), - searchNodeTwo.getId(), - null, - false, - ShardRoutingState.STARTED - ); + + public void testCanAllocateExceedThreshold() { + setup(true, false, false); int ideal = numberOfPrimaryShards / 2; int current = numberOfPrimaryShards / 2; assertDecisionMatches( "Assigning an additional primary shard to an index node at capacity should fail", - indexBalanceAllocationDecider.canAllocate(primaryIndexShardRouting, routingIndexNodeOne, routingAllocation), + indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingIndexNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, "There are [2] eligible nodes in the [index] tier for assignment of [" + numberOfPrimaryShards @@ -308,7 +328,7 @@ public void testCanAllocateExceedThreshold() { assertDecisionMatches( "Assigning an additional replica shard to an replica node at capacity should fail", - indexBalanceAllocationDecider.canAllocate(replicaIndexShardRouting, routingSearchNodeOne, routingAllocation), + indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingSearchNodeOne, routingAllocation), Decision.Type.NOT_PREFERRED, "There are [2] eligible nodes in the [search] tier for assignment of [" + total @@ -320,4 +340,31 @@ public void testCanAllocateExceedThreshold() { ); } + public void testCanAllocateHasDiscoveryNodeFilters() { + setup(randomBoolean(), true, randomBoolean()); + + for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + assertDecisionMatches( + "Having DiscoveryNodeFilters disables this decider", + indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Decider is disabled." + ); + } + + for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + assertDecisionMatches( + "Having DiscoveryNodeFilters disables this decider", + indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), + Decision.Type.YES, + "Decider is disabled." + ); + } + } + + public void testCanAllocateWithExcessShards() { + setup(randomBoolean(), false, true); + verifyCanAllocate(); + } + } From 03e5d1bb40735740e5592cc4f4b3927d4840d6fd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 7 Nov 2025 23:12:24 +0000 Subject: [PATCH 50/67] [CI] Auto commit changes from spotless --- .../decider/IndexBalanceAllocationDeciderTests.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d2cbba0f1aa67..496f4de1d729c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -9,8 +9,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; - import org.elasticsearch.cluster.ClusterInfo; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; @@ -130,7 +128,8 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo indexMetadata = IndexMetadata.builder(indexName) .settings(builder) .timestampRange(IndexLongFieldRange.UNKNOWN) - .eventIngestedRange(IndexLongFieldRange.UNKNOWN).build(); + .eventIngestedRange(IndexLongFieldRange.UNKNOWN) + .build(); projectBuilder.put(indexMetadata, false); IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); @@ -302,7 +301,6 @@ private void verifyCanAllocate() { } } - public void testCanAllocateExceedThreshold() { setup(true, false, false); From d75c2cfe28b0186a55e954232e2f29abd42371cd Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 15:19:11 -0800 Subject: [PATCH 51/67] Address feedbacks --- .../IndexBalanceAllocationDeciderTests.java | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index d2cbba0f1aa67..6b9eb371a4879 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -9,8 +9,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; - import org.elasticsearch.cluster.ClusterInfo; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; @@ -75,6 +73,8 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private int excessShards; private ShardRouting indexTierShardRouting; private ShardRouting searchTierShardRouting; + private List indexTier; + private List searchIier; private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { final String indexName = "IndexBalanceAllocationDeciderIndex"; @@ -130,7 +130,8 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo indexMetadata = IndexMetadata.builder(indexName) .settings(builder) .timestampRange(IndexLongFieldRange.UNKNOWN) - .eventIngestedRange(IndexLongFieldRange.UNKNOWN).build(); + .eventIngestedRange(IndexLongFieldRange.UNKNOWN) + .build(); projectBuilder.put(indexMetadata, false); IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); @@ -233,6 +234,9 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo false, ShardRoutingState.STARTED ); + + indexTier = List.of(routingIndexNodeOne, routingIndexNodeTwo); + searchIier = List.of(routingSearchNodeOne, routingSearchNodeTwo); } public void testCanAllocateUnderThreshold() { @@ -261,7 +265,7 @@ public void testCanAllocateUnderThreshold() { "Node has neither index nor search roles." ); - for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + for (RoutingNode routingNode : searchIier) { assertDecisionMatches( "Assigning a new primary shard to a search node should succeed", indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), @@ -270,7 +274,7 @@ public void testCanAllocateUnderThreshold() { ); } - for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + for (RoutingNode routingNode : indexTier) { assertDecisionMatches( "Assigning a replica shard to a search node should succeed", indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), @@ -283,7 +287,7 @@ public void testCanAllocateUnderThreshold() { } private void verifyCanAllocate() { - for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + for (RoutingNode routingNode : indexTier) { assertDecisionMatches( "Assigning an additional primary shard to an index node has capacity should succeed", indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), @@ -292,7 +296,7 @@ private void verifyCanAllocate() { ); } - for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + for (RoutingNode routingNode : searchIier) { assertDecisionMatches( "Assigning an additional primary shard to an index node has capacity should succeed", indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), @@ -302,7 +306,6 @@ private void verifyCanAllocate() { } } - public void testCanAllocateExceedThreshold() { setup(true, false, false); @@ -343,7 +346,7 @@ public void testCanAllocateExceedThreshold() { public void testCanAllocateHasDiscoveryNodeFilters() { setup(randomBoolean(), true, randomBoolean()); - for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo)) { + for (RoutingNode routingNode : indexTier) { assertDecisionMatches( "Having DiscoveryNodeFilters disables this decider", indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), @@ -352,7 +355,7 @@ public void testCanAllocateHasDiscoveryNodeFilters() { ); } - for (RoutingNode routingNode : List.of(routingSearchNodeOne, routingSearchNodeTwo)) { + for (RoutingNode routingNode : searchIier) { assertDecisionMatches( "Having DiscoveryNodeFilters disables this decider", indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), From 3e52b29c5ed215a5a51ed71e688cc87e6b846cf3 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 15:35:37 -0800 Subject: [PATCH 52/67] Address feedbacks --- .../decider/IndexBalanceAllocationDeciderTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 6b9eb371a4879..e71646fa7a9ac 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -267,7 +267,7 @@ public void testCanAllocateUnderThreshold() { for (RoutingNode routingNode : searchIier) { assertDecisionMatches( - "Assigning a new primary shard to a search node should succeed", + "Assigning a new primary shard to a search tier node should succeed", indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingNode, routingAllocation), Decision.Type.YES, "A search node cannot own primary shards. Decider inactive." @@ -276,7 +276,7 @@ public void testCanAllocateUnderThreshold() { for (RoutingNode routingNode : indexTier) { assertDecisionMatches( - "Assigning a replica shard to a search node should succeed", + "Assigning a replica shard to a index tier node should succeed", indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), Decision.Type.YES, "An index node cannot own search shards. Decider inactive." @@ -298,7 +298,7 @@ private void verifyCanAllocate() { for (RoutingNode routingNode : searchIier) { assertDecisionMatches( - "Assigning an additional primary shard to an index node has capacity should succeed", + "Assigning an additional replica shard to an search node has capacity should succeed", indexBalanceAllocationDecider.canAllocate(searchTierShardRouting, routingNode, routingAllocation), Decision.Type.YES, "Node index shard allocation is under the threshold." From d8911aaeb6bdf678f843c6d2e72fee66297ea0ee Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 15:38:30 -0800 Subject: [PATCH 53/67] Address feedbacks --- docs/changelog/135875.yaml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 docs/changelog/135875.yaml diff --git a/docs/changelog/135875.yaml b/docs/changelog/135875.yaml deleted file mode 100644 index 4f00fb73e9712..0000000000000 --- a/docs/changelog/135875.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 135875 -summary: "Allocation: introduce a new decider that balances the index shard count\ - \ among nodes" -area: Allocation -type: feature -issues: [] From 608d5c0c1ef9398013bf366fcb192d4d92bb0a94 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 17:13:55 -0800 Subject: [PATCH 54/67] Address feedbacks --- .../decider/FilterAllocationDecider.java | 6 +-- .../IndexBalanceAllocationDecider.java | 27 +++++++++-- .../IndexBalanceAllocationDeciderTests.java | 46 ++++++++++--------- 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java index a7f0aa3cea89f..91e3b42bbb46d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java @@ -62,9 +62,9 @@ public class FilterAllocationDecider extends AllocationDecider { public static final String NAME = "filter"; - private static final String CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX = "cluster.routing.allocation.require"; - private static final String CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX = "cluster.routing.allocation.include"; - private static final String CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX = "cluster.routing.allocation.exclude"; + public static final String CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX = "cluster.routing.allocation.require"; + public static final String CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX = "cluster.routing.allocation.include"; + public static final String CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX = "cluster.routing.allocation.exclude"; public static final Setting.AffixSetting> CLUSTER_ROUTING_REQUIRE_GROUP_SETTING = Setting.prefixKeySetting( CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX + ".", diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 3c2d210fb9f5c..afae58ddd02f1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -30,10 +30,13 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.AND; import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.OR; import static org.elasticsearch.cluster.node.DiscoveryNodeRole.INDEX_ROLE; import static org.elasticsearch.cluster.node.DiscoveryNodeRole.SEARCH_ROLE; import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_SETTING; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP_SETTING; /** * For an index of n shards hosted by a cluster of m nodes, a node should not host @@ -50,20 +53,25 @@ public class IndexBalanceAllocationDecider extends AllocationDecider { private final IndexBalanceConstraintSettings indexBalanceConstraintSettings; private final boolean isStateless; + + private volatile DiscoveryNodeFilters clusterRequireFilters; + private volatile DiscoveryNodeFilters clusterIncludeFilters; private volatile DiscoveryNodeFilters clusterExcludeFilters; public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterSettings) { this.indexBalanceConstraintSettings = new IndexBalanceConstraintSettings(clusterSettings); + setClusterRequireFilters(CLUSTER_ROUTING_REQUIRE_GROUP_SETTING.getAsMap(settings)); setClusterExcludeFilters(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING.getAsMap(settings)); + setClusterIncludeFilters(CLUSTER_ROUTING_INCLUDE_GROUP_SETTING.getAsMap(settings)); + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_REQUIRE_GROUP_SETTING, this::setClusterRequireFilters, (a, b) -> {}); clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, this::setClusterExcludeFilters, (a, b) -> {}); + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_INCLUDE_GROUP_SETTING, this::setClusterIncludeFilters, (a, b) -> {}); isStateless = DiscoveryNode.isStateless(settings); } @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexBalanceConstraintSettings.isDeciderEnabled() == false - || isStateless == false - || clusterExcludeFilters != null && clusterExcludeFilters.hasNoFilters() == false) { + if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false || hasNoFilters() == false) { return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } @@ -141,8 +149,21 @@ private void collectEligibleNodes(RoutingAllocation allocation, Set> filters) { + clusterRequireFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(AND, filters)); + } + + private void setClusterIncludeFilters(Map> filters) { + clusterIncludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); + } + private void setClusterExcludeFilters(Map> filters) { clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); } + private boolean hasNoFilters() { + return (clusterExcludeFilters == null || clusterExcludeFilters.hasNoFilters()) + && (clusterIncludeFilters == null || clusterIncludeFilters.hasNoFilters()) + && (clusterRequireFilters == null || clusterRequireFilters.hasNoFilters()); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index e71646fa7a9ac..c8155cc1f5c8a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.ProjectMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.node.DiscoveryNodeFilters; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -45,6 +44,9 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX; +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX; import static org.elasticsearch.common.settings.ClusterSettings.createBuiltInClusterSettings; public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { @@ -81,11 +83,11 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo final Map> nodeToShardRoutings = new HashMap<>(); excessShards = allowExcessShards ? randomIntBetween(1, 5) : 0; - settings = Settings.builder() + + Settings.Builder builder = Settings.builder() .put("stateless.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.excess_shards", excessShards) - .build(); + .put("cluster.routing.allocation.index_balance_decider.excess_shards", excessShards); numberOfPrimaryShards = randomIntBetween(10, 20); replicationFactor = 2; @@ -102,6 +104,18 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo .build(); allNodes = List.of(indexNodeOne, indexNodeTwo, searchNodeOne, searchNodeTwo, masterNode, machineLearningNode); + if (hasDiscoveryNodeFilters) { + String setting = randomFrom( + CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX, + CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX, + CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX + ); + String attribute = randomFrom("_value", "name"); + String name = randomFrom(allNodes).getName(); + String ip = randomFrom("192.168.0.1", "192.168.0.1", "192.168.0.1", "10.17.0.1"); + builder.put(setting + "." + attribute, attribute.equals("name") ? name : ip); + } + DiscoveryNodes.Builder discoveryNodeBuilder = DiscoveryNodes.builder(); for (DiscoveryNode node : allNodes) { discoveryNodeBuilder.add(node); @@ -112,23 +126,13 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo final ProjectMetadata.Builder projectBuilder = ProjectMetadata.builder(projectId); - Settings.Builder builder = indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( - SETTING_CREATION_DATE, - System.currentTimeMillis() - ); - - if (hasDiscoveryNodeFilters) { - String setting = randomFrom( - IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING, - IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING, - IndexMetadata.INDEX_ROUTING_INITIAL_RECOVERY_GROUP_SETTING - ).getKey(); - String attribute = randomFrom(DiscoveryNodeFilters.SINGLE_NODE_NAMES); - builder.put(setting + attribute, randomAlphaOfLength(randomIntBetween(2, 3))); - } - indexMetadata = IndexMetadata.builder(indexName) - .settings(builder) + .settings( + indexSettings(IndexVersion.current(), numberOfPrimaryShards, replicationFactor).put( + SETTING_CREATION_DATE, + System.currentTimeMillis() + ).build() + ) .timestampRange(IndexLongFieldRange.UNKNOWN) .eventIngestedRange(IndexLongFieldRange.UNKNOWN) .build(); @@ -217,7 +221,7 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo routingAllocation = new RoutingAllocation(null, clusterState.getRoutingNodes(), clusterState, clusterInfo, null, System.nanoTime()); routingAllocation.setDebugMode(RoutingAllocation.DebugMode.ON); - indexBalanceAllocationDecider = new IndexBalanceAllocationDecider(settings, createBuiltInClusterSettings(settings)); + indexBalanceAllocationDecider = new IndexBalanceAllocationDecider(builder.build(), createBuiltInClusterSettings(builder.build())); indexTierShardRouting = TestShardRouting.newShardRouting( new ShardId(indexMetadata.getIndex(), 1), From 21c403c6895de5bb2b98460c7360cdddba434608 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 7 Nov 2025 17:28:19 -0800 Subject: [PATCH 55/67] Address feedbacks --- .../decider/IndexBalanceAllocationDeciderTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index c8155cc1f5c8a..2091708314404 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -111,8 +111,8 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX ); String attribute = randomFrom("_value", "name"); - String name = randomFrom(allNodes).getName(); - String ip = randomFrom("192.168.0.1", "192.168.0.1", "192.168.0.1", "10.17.0.1"); + String name = randomFrom("indexNodeOne", "indexNodeTwo", "searchNodeOne", "searchNodeTwo"); + String ip = randomFrom("192.168.0.1", "192.168.0.2", "192.168.7.1", "10.17.0.1"); builder.put(setting + "." + attribute, attribute.equals("name") ? name : ip); } From de4cb7eb6a2a604b9df9e285c0729c61f2cf2126 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Mon, 10 Nov 2025 13:58:49 -0800 Subject: [PATCH 56/67] Address feedbacks --- .../IndexBalanceAllocationDecider.java | 4 +- .../IndexBalanceAllocationDeciderTests.java | 50 ++++--------------- 2 files changed, 11 insertions(+), 43 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index afae58ddd02f1..025fad9e8ef0f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -81,9 +81,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } assert node.node() != null; - if (node.node().getRoles().contains(INDEX_ROLE) == false && node.node().getRoles().contains(SEARCH_ROLE) == false) { - return Decision.single(Decision.Type.YES, NAME, "Node has neither index nor search roles."); - } + assert node.node().getRoles().contains(INDEX_ROLE) || node.node().getRoles().contains(SEARCH_ROLE); if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { return Decision.single(Decision.Type.YES, NAME, "An index node cannot own search shards. Decider inactive."); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 2091708314404..6ab3dcd7fe906 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -69,7 +69,6 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private int replicationFactor; private ClusterState clusterState; private IndexMetadata indexMetadata; - private Settings settings; private RoutingAllocation routingAllocation; private IndexBalanceAllocationDecider indexBalanceAllocationDecider; private int excessShards; @@ -78,7 +77,7 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private List indexTier; private List searchIier; - private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { + private void setup(boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { final String indexName = "IndexBalanceAllocationDeciderIndex"; final Map> nodeToShardRoutings = new HashMap<>(); @@ -89,10 +88,8 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo .put("cluster.routing.allocation.index_balance_decider.enabled", "true") .put("cluster.routing.allocation.index_balance_decider.excess_shards", excessShards); - numberOfPrimaryShards = randomIntBetween(10, 20); + numberOfPrimaryShards = randomIntBetween(2, 10) * 2; replicationFactor = 2; - if (numberOfPrimaryShards % 2 != 0 && exceedThreshold) numberOfPrimaryShards++; - if (numberOfPrimaryShards % 2 == 0 && exceedThreshold == false) numberOfPrimaryShards++; indexNodeOne = DiscoveryNodeUtils.builder("indexNodeOne").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); indexNodeTwo = DiscoveryNodeUtils.builder("indexNodeTwo").roles(Collections.singleton(DiscoveryNodeRole.INDEX_ROLE)).build(); @@ -144,8 +141,7 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo Metadata.Builder metadataBuilder = Metadata.builder(); ShardId[] shardIds = new ShardId[numberOfPrimaryShards]; - int shardCount = exceedThreshold ? numberOfPrimaryShards : numberOfPrimaryShards - 1; - for (int i = 0; i < shardCount; i++) { + for (int i = 0; i < numberOfPrimaryShards; i++) { shardIds[i] = new ShardId(indexMetadata.getIndex(), i); IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardIds[i]); @@ -174,22 +170,7 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo routingTableBuilder.add(indexRoutingTableBuilder.build()); } - if (exceedThreshold == false) { - ShardId lastPrimaryShardId = new ShardId(indexMetadata.getIndex(), numberOfPrimaryShards - 1); - ShardRouting lastShardRouting = TestShardRouting.newShardRouting( - lastPrimaryShardId, - masterNode.getId(), - null, - true, - ShardRoutingState.STARTED - ); - IndexShardRoutingTable.Builder indexShardRoutingBuilderMLNode = IndexShardRoutingTable.builder(lastPrimaryShardId); - indexShardRoutingBuilderMLNode.addShard(lastShardRouting); - indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilderMLNode); - routingTableBuilder.add(indexRoutingTableBuilder.build()); - routingMachineLearningNode = RoutingNodesHelper.routingNode(machineLearningNode.getId(), machineLearningNode, lastShardRouting); - } - + routingMachineLearningNode = RoutingNodesHelper.routingNode(machineLearningNode.getId(), machineLearningNode); metadataBuilder.put(projectBuilder).generateClusterUuidIfNeeded(); state.nodes(discoveryNodeBuilder); state.metadata(metadataBuilder); @@ -243,8 +224,8 @@ private void setup(boolean exceedThreshold, boolean hasDiscoveryNodeFilters, boo searchIier = List.of(routingSearchNodeOne, routingSearchNodeTwo); } - public void testCanAllocateUnderThreshold() { - setup(false, false, randomBoolean()); + public void testCanAllocateUnderThresholdWithExcessShards() { + setup(false, true); ShardRouting newIndexShardRouting = TestShardRouting.newShardRouting( new ShardId("newIndex", "uuid", 1), indexNodeTwo.getId(), @@ -253,7 +234,8 @@ public void testCanAllocateUnderThreshold() { ShardRoutingState.STARTED ); - for (RoutingNode routingNode : List.of(routingIndexNodeOne, routingIndexNodeTwo, routingSearchNodeOne, routingSearchNodeTwo)) { + for (RoutingNode routingNode : List.of( + routingIndexNodeOne, routingIndexNodeTwo, routingSearchNodeOne, routingSearchNodeTwo, routingMachineLearningNode)) { assertDecisionMatches( "Assigning a new index to a node should succeed", indexBalanceAllocationDecider.canAllocate(newIndexShardRouting, routingNode, routingAllocation), @@ -262,13 +244,6 @@ public void testCanAllocateUnderThreshold() { ); } - assertDecisionMatches( - "Assigning a shard to a node that is not index or search node should succeed", - indexBalanceAllocationDecider.canAllocate(indexTierShardRouting, routingMachineLearningNode, routingAllocation), - Decision.Type.YES, - "Node has neither index nor search roles." - ); - for (RoutingNode routingNode : searchIier) { assertDecisionMatches( "Assigning a new primary shard to a search tier node should succeed", @@ -311,7 +286,7 @@ private void verifyCanAllocate() { } public void testCanAllocateExceedThreshold() { - setup(true, false, false); + setup( false, false); int ideal = numberOfPrimaryShards / 2; int current = numberOfPrimaryShards / 2; @@ -348,7 +323,7 @@ public void testCanAllocateExceedThreshold() { } public void testCanAllocateHasDiscoveryNodeFilters() { - setup(randomBoolean(), true, randomBoolean()); + setup(true, randomBoolean()); for (RoutingNode routingNode : indexTier) { assertDecisionMatches( @@ -369,9 +344,4 @@ public void testCanAllocateHasDiscoveryNodeFilters() { } } - public void testCanAllocateWithExcessShards() { - setup(randomBoolean(), false, true); - verifyCanAllocate(); - } - } From 902a8d8d96e651cb25127b411d7fc2e042ec09fc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 10 Nov 2025 22:05:43 +0000 Subject: [PATCH 57/67] [CI] Auto commit changes from spotless --- .../decider/IndexBalanceAllocationDeciderTests.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index 6ab3dcd7fe906..e4f70099065fb 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -235,7 +235,12 @@ public void testCanAllocateUnderThresholdWithExcessShards() { ); for (RoutingNode routingNode : List.of( - routingIndexNodeOne, routingIndexNodeTwo, routingSearchNodeOne, routingSearchNodeTwo, routingMachineLearningNode)) { + routingIndexNodeOne, + routingIndexNodeTwo, + routingSearchNodeOne, + routingSearchNodeTwo, + routingMachineLearningNode + )) { assertDecisionMatches( "Assigning a new index to a node should succeed", indexBalanceAllocationDecider.canAllocate(newIndexShardRouting, routingNode, routingAllocation), @@ -286,7 +291,7 @@ private void verifyCanAllocate() { } public void testCanAllocateExceedThreshold() { - setup( false, false); + setup(false, false); int ideal = numberOfPrimaryShards / 2; int current = numberOfPrimaryShards / 2; From dc08221b6aac3c0f3184c9d1d59c4b4299ff650d Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 18 Nov 2025 19:01:11 -0800 Subject: [PATCH 58/67] Changes to support stateless test --- .../allocator/BalancedShardsAllocator.java | 14 +++++++++----- .../allocation/allocator/WeightFunction.java | 3 ++- .../allocator/BalancedShardsAllocatorTests.java | 2 +- .../decider/WriteLoadConstraintDeciderTests.java | 1 + 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index ed64520ce7e12..c551569a2458c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -448,7 +448,7 @@ private boolean balance() { boolean shardBalanced = false; // Balance each partition for (NodeSorter nodeSorter : nodeSorters) { - if (nodeSorter.modelNodes.length < 2) { /* skip if we only have one node */ + if (nodeSorter.getModelNodes().length < 2) { /* skip if we only have one node */ logger.trace("skipping rebalance as the partition has single node only"); continue; } @@ -477,7 +477,7 @@ private MoveDecision explainRebalanceDecision(final ProjectIndex index, final Sh Decision canRebalance = allocation.deciders().canRebalance(shard, allocation); sorter.reset(index); - ModelNode[] modelNodes = sorter.modelNodes; + ModelNode[] modelNodes = sorter.getModelNodes(); final String currentNodeId = shard.currentNodeId(); // find currently assigned node ModelNode currentNode = null; @@ -607,7 +607,7 @@ private MoveDecision explainRebalanceDecision(final ProjectIndex index, final Sh private boolean balanceByWeights(NodeSorter sorter) { boolean shardBalanced = false; final AllocationDeciders deciders = allocation.deciders(); - final ModelNode[] modelNodes = sorter.modelNodes; + final ModelNode[] modelNodes = sorter.getModelNodes(); final float[] weights = sorter.weights; for (var index : buildWeightOrderedIndices(sorter)) { IndexMetadata indexMetadata = indexMetadata(index); @@ -993,7 +993,7 @@ private MoveDecision decideMove( RoutingNode targetNode = null; final List nodeResults = explain ? new ArrayList<>() : null; int weightRanking = 0; - for (ModelNode currentNode : sorter.modelNodes) { + for (ModelNode currentNode : sorter.getModelNodes()) { if (currentNode != sourceNode) { RoutingNode target = currentNode.getRoutingNode(); Decision allocationDecision = decider.apply(shardRouting, target); @@ -1713,7 +1713,7 @@ public boolean containsShard(ShardRouting shard) { * * @see BalancingWeightsFactory */ - public static final class NodeSorter extends IntroSorter { + public static class NodeSorter extends IntroSorter { final ModelNode[] modelNodes; /** The nodes weights with respect to the current weight function / index */ @@ -1789,6 +1789,10 @@ public float delta() { public WeightFunction getWeightFunction() { return function; } + + public ModelNode[] getModelNodes() { + return modelNodes; + } } record ProjectIndex(ProjectId project, String indexName) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java index 5b58b1d022590..74d90ff87f441 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java @@ -60,7 +60,8 @@ public WeightFunction(float shardBalance, float indexBalance, float writeLoadBal theta3 = diskUsageBalance / sum; } - float calculateNodeWeightWithIndex( + // Visible for testing + public float calculateNodeWeightWithIndex( BalancedShardsAllocator.Balancer balancer, BalancedShardsAllocator.ModelNode node, ProjectIndex index diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java index 4afb83255da6f..4c0795b68e566 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java @@ -1240,7 +1240,7 @@ private static class NodeNameDrivenWeightFunction extends WeightFunction { } @Override - float calculateNodeWeightWithIndex( + public float calculateNodeWeightWithIndex( BalancedShardsAllocator.Balancer balancer, BalancedShardsAllocator.ModelNode node, BalancedShardsAllocator.ProjectIndex index diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java index 7fb6468d30f4d..324292a13cf8f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; From 27bcff80163d035e8534e78a7c77ca688fdb6dac Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 19 Nov 2025 15:54:48 -0800 Subject: [PATCH 59/67] Changes to support stateless test --- .../allocation/allocator/BalancedShardsAllocator.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index c551569a2458c..517a246ecd6d9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -1548,6 +1548,11 @@ private boolean tryRelocateShard(ModelNode minNode, ModelNode maxNode, ProjectIn logger.trace("No shards of [{}] can relocate from [{}] to [{}]", idx, maxNode.getNodeId(), minNode.getNodeId()); return false; } + + // Visible for testing. + public RoutingAllocation getAllocation() { + return this.allocation; + } } public static class ModelNode implements Iterable { From 350c2dbd1eb0e07f518e0133d2a4c040bb5d776a Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Wed, 19 Nov 2025 22:38:33 -0800 Subject: [PATCH 60/67] Changes to support stateless test --- .../routing/allocation/allocator/BalancedShardsAllocator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 517a246ecd6d9..17d4dcde53830 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -1800,7 +1800,8 @@ public ModelNode[] getModelNodes() { } } - record ProjectIndex(ProjectId project, String indexName) { + // Visible for testing. + public record ProjectIndex(ProjectId project, String indexName) { ProjectIndex(RoutingAllocation allocation, ShardRouting shard) { this(allocation.metadata().projectFor(shard.index()).id(), shard.getIndexName()); } From 4d5650505fa9d83e2a4a03e36ddeacf45c86e464 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 20 Nov 2025 15:13:51 -0800 Subject: [PATCH 61/67] Changes to support stateless test --- .../routing/allocation/IndexBalanceConstraintSettings.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java index df7df4285a111..775ecce8f4774 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceConstraintSettings.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; -import org.elasticsearch.common.util.FeatureFlag; /** * Settings definitions for the index shard count allocation decider and associated infrastructure @@ -19,11 +18,10 @@ public class IndexBalanceConstraintSettings { private static final String SETTING_PREFIX = "cluster.routing.allocation.index_balance_decider."; - private static final FeatureFlag INDEX_BALANCE_DECIDER_FEATURE_FLAG = new FeatureFlag("index_balance_decider"); public static final Setting INDEX_BALANCE_DECIDER_ENABLED_SETTING = Setting.boolSetting( SETTING_PREFIX + "enabled", - INDEX_BALANCE_DECIDER_FEATURE_FLAG.isEnabled(), + false, Setting.Property.Dynamic, Setting.Property.NodeScope ); From 68fb74bc321bde3cfc365cb084641455d35ba146 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Thu, 20 Nov 2025 15:18:45 -0800 Subject: [PATCH 62/67] Changes to support stateless test --- .../allocator/BalancedShardsAllocator.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index c252b814d0336..dc0990bc599f2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -448,7 +448,7 @@ private boolean balance() { boolean shardBalanced = false; // Balance each partition for (NodeSorter nodeSorter : nodeSorters) { - if (nodeSorter.getModelNodes().length < 2) { /* skip if we only have one node */ + if (nodeSorter.modelNodes.length < 2) { /* skip if we only have one node */ logger.trace("skipping rebalance as the partition has single node only"); continue; } @@ -477,7 +477,7 @@ private MoveDecision explainRebalanceDecision(final ProjectIndex index, final Sh Decision canRebalance = allocation.deciders().canRebalance(shard, allocation); sorter.reset(index); - ModelNode[] modelNodes = sorter.getModelNodes(); + ModelNode[] modelNodes = sorter.modelNodes; final String currentNodeId = shard.currentNodeId(); // find currently assigned node ModelNode currentNode = null; @@ -607,7 +607,7 @@ private MoveDecision explainRebalanceDecision(final ProjectIndex index, final Sh private boolean balanceByWeights(NodeSorter sorter) { boolean shardBalanced = false; final AllocationDeciders deciders = allocation.deciders(); - final ModelNode[] modelNodes = sorter.getModelNodes(); + final ModelNode[] modelNodes = sorter.modelNodes; final float[] weights = sorter.weights; for (var index : buildWeightOrderedIndices(sorter)) { IndexMetadata indexMetadata = indexMetadata(index); @@ -1030,7 +1030,7 @@ private MoveDecision decideMove( RoutingNode targetNode = null; final List nodeResults = explain ? new ArrayList<>() : null; int weightRanking = 0; - for (ModelNode currentNode : sorter.getModelNodes()) { + for (ModelNode currentNode : sorter.modelNodes) { if (currentNode != sourceNode) { RoutingNode target = currentNode.getRoutingNode(); Decision allocationDecision = decider.apply(shardRouting, target); @@ -1755,7 +1755,7 @@ public boolean containsShard(ShardRouting shard) { * * @see BalancingWeightsFactory */ - public static class NodeSorter extends IntroSorter { + public static final class NodeSorter extends IntroSorter { final ModelNode[] modelNodes; /** The nodes weights with respect to the current weight function / index */ @@ -1831,10 +1831,6 @@ public float delta() { public WeightFunction getWeightFunction() { return function; } - - public ModelNode[] getModelNodes() { - return modelNodes; - } } // Visible for testing. From cdd2eccc8922b4ae1d1d84ea46e1777ee84d79bd Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Fri, 21 Nov 2025 11:25:49 -0800 Subject: [PATCH 63/67] address feedbacks. --- .../decider/WriteLoadConstraintDeciderIT.java | 1 - .../cluster/node/DiscoveryNodeFilters.java | 4 +-- .../IndexBalanceAllocationDecider.java | 10 ++++---- .../IndexBalanceAllocationDeciderTests.java | 25 ++++++++++++++----- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java index c6f56e6a5ea6c..5d5f317348001 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java @@ -604,7 +604,6 @@ private Settings enabledWriteLoadDeciderSettings(int utilizationThresholdPercent WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING.getKey(), TimeValue.timeValueMillis(queueLatencyThresholdMillis) ) - .put("cluster.routing.allocation.index_balance_decider.excess_shards", 1000) // Disable rebalancing so that testing can see Decider change outcomes only. .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") .build(); diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java index 54261ea3852a9..c1bbc345633d9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeFilters.java @@ -250,8 +250,8 @@ private boolean isSingleNodeFilterInternal() { || (filters.size() > 1 && opType == OpType.AND && NON_ATTRIBUTE_NAMES.containsAll(filters.keySet())); } - public boolean hasNoFilters() { - return filters.isEmpty(); + public boolean hasFilters() { + return filters.isEmpty() == false; } /** diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 025fad9e8ef0f..3a2d12a229b05 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -71,7 +71,7 @@ public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterS @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { - if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false || hasNoFilters() == false) { + if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false || hasFilters()) { return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); } @@ -159,9 +159,9 @@ private void setClusterExcludeFilters(Map> filters) { clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); } - private boolean hasNoFilters() { - return (clusterExcludeFilters == null || clusterExcludeFilters.hasNoFilters()) - && (clusterIncludeFilters == null || clusterIncludeFilters.hasNoFilters()) - && (clusterRequireFilters == null || clusterRequireFilters.hasNoFilters()); + private boolean hasFilters() { + return (clusterExcludeFilters != null && clusterExcludeFilters.hasFilters()) + || (clusterIncludeFilters != null && clusterIncludeFilters.hasFilters()) + || (clusterRequireFilters != null && clusterRequireFilters.hasFilters()); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java index e4f70099065fb..aa9324bfaf1e2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDeciderTests.java @@ -30,6 +30,7 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; @@ -51,6 +52,9 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { + public static final String INCLUDE_DISCOVERY_NODE_FILTERS = "include.discovery.node.filters"; + public static final String ALLOW_EXCESS_SHARDS = "allow.excess.shards"; + private DiscoveryNode indexNodeOne; private DiscoveryNode indexNodeTwo; private DiscoveryNode searchNodeOne; @@ -77,7 +81,10 @@ public class IndexBalanceAllocationDeciderTests extends ESAllocationTestCase { private List indexTier; private List searchIier; - private void setup(boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { + private void setup(Settings settings) { + boolean hasDiscoveryNodeFilters = settings.getAsBoolean(INCLUDE_DISCOVERY_NODE_FILTERS, true); + boolean allowExcessShards = settings.getAsBoolean(ALLOW_EXCESS_SHARDS, true); + final String indexName = "IndexBalanceAllocationDeciderIndex"; final Map> nodeToShardRoutings = new HashMap<>(); @@ -85,8 +92,8 @@ private void setup(boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { Settings.Builder builder = Settings.builder() .put("stateless.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.enabled", "true") - .put("cluster.routing.allocation.index_balance_decider.excess_shards", excessShards); + .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_ENABLED_SETTING.getKey(), "true") + .put(IndexBalanceConstraintSettings.INDEX_BALANCE_DECIDER_EXCESS_SHARDS.getKey(), excessShards); numberOfPrimaryShards = randomIntBetween(2, 10) * 2; replicationFactor = 2; @@ -225,7 +232,8 @@ private void setup(boolean hasDiscoveryNodeFilters, boolean allowExcessShards) { } public void testCanAllocateUnderThresholdWithExcessShards() { - setup(false, true); + Settings testSettings = Settings.builder().put(INCLUDE_DISCOVERY_NODE_FILTERS, false).put(ALLOW_EXCESS_SHARDS, true).build(); + setup(testSettings); ShardRouting newIndexShardRouting = TestShardRouting.newShardRouting( new ShardId("newIndex", "uuid", 1), indexNodeTwo.getId(), @@ -291,7 +299,8 @@ private void verifyCanAllocate() { } public void testCanAllocateExceedThreshold() { - setup(false, false); + Settings testSettings = Settings.builder().put(INCLUDE_DISCOVERY_NODE_FILTERS, false).put(ALLOW_EXCESS_SHARDS, false).build(); + setup(testSettings); int ideal = numberOfPrimaryShards / 2; int current = numberOfPrimaryShards / 2; @@ -328,7 +337,11 @@ public void testCanAllocateExceedThreshold() { } public void testCanAllocateHasDiscoveryNodeFilters() { - setup(true, randomBoolean()); + Settings testSettings = Settings.builder() + .put(INCLUDE_DISCOVERY_NODE_FILTERS, true) + .put(ALLOW_EXCESS_SHARDS, randomBoolean()) + .build(); + setup(testSettings); for (RoutingNode routingNode : indexTier) { assertDecisionMatches( From 09d8714da0438188cc47f7c8d3474c2d6dba1896 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Mon, 24 Nov 2025 16:12:19 -0800 Subject: [PATCH 64/67] address feedbacks. --- .../decider/IndexBalanceAllocationDecider.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 3a2d12a229b05..0d12dbf6abf0e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -72,23 +72,23 @@ public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterS @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false || hasFilters()) { - return Decision.single(Decision.Type.YES, NAME, "Decider is disabled."); + return allocation.decision(Decision.YES, NAME, "Decider is disabled."); } Index index = shardRouting.index(); if (node.hasIndex(index) == false) { - return Decision.single(Decision.Type.YES, NAME, "Node does not currently host this index."); + return allocation.decision(Decision.YES, NAME, "Node does not currently host this index."); } assert node.node() != null; assert node.node().getRoles().contains(INDEX_ROLE) || node.node().getRoles().contains(SEARCH_ROLE); if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { - return Decision.single(Decision.Type.YES, NAME, "An index node cannot own search shards. Decider inactive."); + return allocation.decision(Decision.YES, NAME, "An index node cannot own search shards. Decider inactive."); } if (node.node().getRoles().contains(SEARCH_ROLE) && shardRouting.primary()) { - return Decision.single(Decision.Type.YES, NAME, "A search node cannot own primary shards. Decider inactive."); + return allocation.decision(Decision.YES, NAME, "A search node cannot own primary shards. Decider inactive."); } final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); @@ -110,9 +110,14 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing } assert eligibleNodes.isEmpty() == false; + if (eligibleNodes.isEmpty()) { + return allocation.decision(Decision.YES, NAME, "There are no eligible nodes available."); + } assert totalShards > 0; - final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); + + // The built-in "eligibleNodes.size() - 1" offers just enough buffer so threshold is not rounded down by integer division. + // But not too much so that threshold does not get an automatic 1 shard extra allowance. final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getExcessShards()) / eligibleNodes .size(); final int currentAllocation = node.numberOfOwningShardsForIndex(index); From da55db3f7574f4b45c9a540801629c43efd41593 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 25 Nov 2025 14:26:01 -0800 Subject: [PATCH 65/67] address feedbacks. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 0d12dbf6abf0e..2aafdd994bd3d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -118,8 +118,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing // The built-in "eligibleNodes.size() - 1" offers just enough buffer so threshold is not rounded down by integer division. // But not too much so that threshold does not get an automatic 1 shard extra allowance. - final int threshold = (totalShards + eligibleNodes.size() - 1 + indexBalanceConstraintSettings.getExcessShards()) / eligibleNodes - .size(); + final int threshold = Math.ceilDiv(totalShards + indexBalanceConstraintSettings.getExcessShards(), eligibleNodes.size()); final int currentAllocation = node.numberOfOwningShardsForIndex(index); if (currentAllocation >= threshold) { From e2f576d3bae1a447058abd522483a42fc64a0721 Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 25 Nov 2025 14:29:44 -0800 Subject: [PATCH 66/67] address feedbacks. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 2aafdd994bd3d..2fed115c8f70e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -116,8 +116,9 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing assert totalShards > 0; final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); - // The built-in "eligibleNodes.size() - 1" offers just enough buffer so threshold is not rounded down by integer division. - // But not too much so that threshold does not get an automatic 1 shard extra allowance. + // Adding the excess shards before division ensures that with tolerance 1 we get: + // 2 shards, 2 nodes, allow 2 on each + // 3 shards, 2 nodes, allow 2 on each (prior version 3, but there is already 1 shard wiggle room). etc. final int threshold = Math.ceilDiv(totalShards + indexBalanceConstraintSettings.getExcessShards(), eligibleNodes.size()); final int currentAllocation = node.numberOfOwningShardsForIndex(index); From 45267009344fe6c08a010050853569964a29288f Mon Sep 17 00:00:00 2001 From: zhubotang-wq Date: Tue, 25 Nov 2025 14:30:53 -0800 Subject: [PATCH 67/67] address feedbacks. --- .../allocation/decider/IndexBalanceAllocationDecider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java index 2fed115c8f70e..289b003c1d08e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/IndexBalanceAllocationDecider.java @@ -118,7 +118,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing // Adding the excess shards before division ensures that with tolerance 1 we get: // 2 shards, 2 nodes, allow 2 on each - // 3 shards, 2 nodes, allow 2 on each (prior version 3, but there is already 1 shard wiggle room). etc. + // 3 shards, 2 nodes, allow 2 on each etc. final int threshold = Math.ceilDiv(totalShards + indexBalanceConstraintSettings.getExcessShards(), eligibleNodes.size()); final int currentAllocation = node.numberOfOwningShardsForIndex(index);