From 2372936175166d4ebf64d9b958dfb44546be1084 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 09:20:42 +1100 Subject: [PATCH 1/3] Re-run decideMove if debug logging is turned on to populate explanation --- .../allocator/BalancedShardsAllocator.java | 39 ++++++++++++++++--- .../BalancedShardsAllocatorTests.java | 4 +- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 325c89400010a..75414fe27ecdb 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -862,17 +862,14 @@ public boolean moveShards() { for (var storedShardMovement : bestNonPreferredShardMovementsTracker.getBestShardMovements()) { final var shardRouting = storedShardMovement.shardRouting(); final var index = projectIndex(shardRouting); - // If `shardMoved` is true, there may have been moves that have made our previous move decision - // invalid, so we must call `decideMove` again. If not, we know we haven't made any moves, and we - // can use the cached decision. - final var moveDecision = shardMoved ? decideMove(index, shardRouting) : storedShardMovement.moveDecision(); + final var moveDecision = reassessDecisionIfRequired(index, storedShardMovement, shardMoved); if (moveDecision.isDecisionTaken() && moveDecision.cannotRemainAndCanMove()) { if (notPreferredLogger.isDebugEnabled()) { notPreferredLogger.debug( - "Moving shard [{}] to [{}] from a NOT_PREFERRED allocation, explanation is [{}]", + "Moving shard [{}] to [{}] from a NOT_PREFERRED allocation: {}", shardRouting, moveDecision.getTargetNode().getName(), - moveDecision.getCanRemainDecision().getExplanation() + moveDecision.getCanRemainDecision() ); } executeMove(shardRouting, index, moveDecision, "move-non-preferred"); @@ -886,6 +883,36 @@ public boolean moveShards() { return shardMoved; } + /** + * If a shard has moved we need to re-run the decision to check that it's still valid. If {@link #notPreferredLogger} is + * set to debug, we re-run the decision with explain turned on so we can populate the explanation in our log message + * + * @param index The index of the shard + * @param storedShardMovement The existing shard movement decision + * @param shardMoved True if a shard moved in this balancing round, false otherwise + * @return The move decision to act on + */ + private MoveDecision reassessDecisionIfRequired( + ProjectIndex index, + BestShardMovementsTracker.StoredShardMovement storedShardMovement, + boolean shardMoved + ) { + final boolean needToReassess = notPreferredLogger.isDebugEnabled() || shardMoved; + if (needToReassess == false) { + return storedShardMovement.moveDecision(); + } + + final var oldDebugMode = allocation.getDebugMode(); + if (notPreferredLogger.isDebugEnabled()) { + allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS); + } + try { + return decideMove(index, storedShardMovement.shardRouting()); + } finally { + allocation.setDebugMode(oldDebugMode); + } + } + private void executeMove(ShardRouting shardRouting, ProjectIndex index, MoveDecision moveDecision, String reason) { final ModelNode sourceNode = nodes.get(shardRouting.currentNodeId()); final ModelNode targetNode = nodes.get(moveDecision.getTargetNode().getId()); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java index 246e6631a01c9..f968033aaff88 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java @@ -1027,7 +1027,7 @@ public Decision canRemain( RoutingNode node, RoutingAllocation allocation ) { - return new Decision.Single(Decision.Type.NOT_PREFERRED, "test_decider", "Always NOT_PREFERRED"); + return allocation.decision(Decision.NOT_PREFERRED, "test_decider", "Always NOT_PREFERRED"); } })), clusterState.getRoutingNodes().mutableCopy(), clusterState, ClusterInfo.EMPTY, SnapshotShardSizeInfo.EMPTY, 0L); @@ -1039,7 +1039,7 @@ public Decision canRemain( "moved a NOT_PREFERRED allocation", notPreferredLoggerName, Level.DEBUG, - "Moving shard [*] to [*] from a NOT_PREFERRED allocation, explanation is [Always NOT_PREFERRED]" + "Moving shard [*] to [*] from a NOT_PREFERRED allocation: [NOT_PREFERRED(Always NOT_PREFERRED)]" ) ); } From 45702c41cf61b261994e453ec4000336a8ae3228 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 14:57:49 +1100 Subject: [PATCH 2/3] Update server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java Co-authored-by: Dianna Hohensee --- .../routing/allocation/allocator/BalancedShardsAllocator.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index a6752eb61f909..f4e224d33a5fb 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -897,8 +897,7 @@ private MoveDecision reassessDecisionIfRequired( BestShardMovementsTracker.StoredShardMovement storedShardMovement, boolean shardMoved ) { - final boolean needToReassess = notPreferredLogger.isDebugEnabled() || shardMoved; - if (needToReassess == false) { + if (notPreferredLogger.isDebugEnabled() == false && shardMoved == false) { return storedShardMovement.moveDecision(); } From c9a8751fe21b9ff60e6ce0a868746dde2d093496 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 20 Nov 2025 15:11:41 +1100 Subject: [PATCH 3/3] Improve naming/javadoc --- .../allocator/BalancedShardsAllocator.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index f4e224d33a5fb..9b9af8143561c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -862,7 +862,7 @@ public boolean moveShards() { for (var storedShardMovement : bestNonPreferredShardMovementsTracker.getBestShardMovements()) { final var shardRouting = storedShardMovement.shardRouting(); final var index = projectIndex(shardRouting); - final var moveDecision = reassessDecisionIfRequired(index, storedShardMovement, shardMoved); + final var moveDecision = refreshDecisionIfRequired(index, storedShardMovement, shardMoved); if (moveDecision.isDecisionTaken() && moveDecision.cannotRemainAndCanMove()) { if (notPreferredLogger.isDebugEnabled()) { notPreferredLogger.debug( @@ -884,15 +884,26 @@ public boolean moveShards() { } /** - * If a shard has moved we need to re-run the decision to check that it's still valid. If {@link #notPreferredLogger} is - * set to debug, we re-run the decision with explain turned on so we can populate the explanation in our log message + * Re-run the allocation deciders if we need to + *

+ * Reasons to re-run the deciders include: + *

    + *
  • A shard has been moved since the decision was made, we need to + * re-run the deciders to ensure the decision is still valid + *
  • + *
  • The {@link #notPreferredLogger} is set to DEBUG, + * we need to re-run the deciders with + * {@link org.elasticsearch.cluster.routing.allocation.RoutingAllocation.DebugMode#EXCLUDE_YES_DECISIONS}, + * so the explanation(s) are populated for the log message + *
  • + *
* * @param index The index of the shard * @param storedShardMovement The existing shard movement decision * @param shardMoved True if a shard moved in this balancing round, false otherwise - * @return The move decision to act on + * @return The move decision to act on, recalculated if necessary */ - private MoveDecision reassessDecisionIfRequired( + private MoveDecision refreshDecisionIfRequired( ProjectIndex index, BestShardMovementsTracker.StoredShardMovement storedShardMovement, boolean shardMoved