From 6704f4fa16b3f82baa47637acd356a7631d245e6 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 6 Oct 2025 10:23:36 -0700 Subject: [PATCH 01/22] allocation: add balancer round summary as metrics This commit adds the BalancerRoundSummary as a collection of APM/open telemetry metrics. These are already logged. The summary collected every ten seconds or so is set as the current state into the telemetry metrics class (AllocationBalancingRoundMetrics). Whenever the telemetry runs, each metric picks up its current view. --- .../elasticsearch/cluster/ClusterModule.java | 13 +- .../AllocationBalancingRoundMetrics.java | 267 ++++++++++++++++++ ...llocationBalancingRoundSummaryService.java | 10 + .../DesiredBalanceShardsAllocator.java | 14 +- ...nsportDeleteDesiredBalanceActionTests.java | 4 +- .../AllocationStatsServiceTests.java | 4 +- .../ClusterAllocationSimulationTests.java | 3 +- .../DesiredBalanceShardsAllocatorTests.java | 30 +- .../cluster/ESAllocationTestCase.java | 4 +- 9 files changed, 328 insertions(+), 21 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index c3f4055c8d061..13c313285d51f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -40,6 +40,7 @@ import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision; import org.elasticsearch.cluster.routing.allocation.WriteLoadForecaster; +import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.allocator.BalancerSettings; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingWeightsFactory; @@ -140,6 +141,7 @@ public class ClusterModule extends AbstractModule { private final AllocationStatsService allocationStatsService; private final TelemetryProvider telemetryProvider; private final DesiredBalanceMetrics desiredBalanceMetrics; + private final AllocationBalancingRoundMetrics balancingRoundMetrics; public ClusterModule( Settings settings, @@ -167,6 +169,7 @@ public ClusterModule( balancingWeightsFactory ); this.desiredBalanceMetrics = new DesiredBalanceMetrics(telemetryProvider.getMeterRegistry()); + this.balancingRoundMetrics = new AllocationBalancingRoundMetrics(telemetryProvider.getMeterRegistry()); this.shardsAllocator = createShardsAllocator( settings, clusterService.getClusterSettings(), @@ -179,7 +182,8 @@ public ClusterModule( writeLoadForecaster, nodeAllocationStatsAndWeightsCalculator, this::explainShardAllocation, - desiredBalanceMetrics + desiredBalanceMetrics, + balancingRoundMetrics ); this.clusterService = clusterService; this.indexNameExpressionResolver = new IndexNameExpressionResolver(threadPool.getThreadContext(), systemIndices, projectResolver); @@ -510,7 +514,8 @@ private static ShardsAllocator createShardsAllocator( WriteLoadForecaster writeLoadForecaster, NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator, ShardAllocationExplainer shardAllocationExplainer, - DesiredBalanceMetrics desiredBalanceMetrics + DesiredBalanceMetrics desiredBalanceMetrics, + AllocationBalancingRoundMetrics balancingRoundMetrics ) { Map> allocators = new HashMap<>(); allocators.put( @@ -527,7 +532,8 @@ private static ShardsAllocator createShardsAllocator( reconciler, nodeAllocationStatsAndWeightsCalculator, shardAllocationExplainer, - desiredBalanceMetrics + desiredBalanceMetrics, + balancingRoundMetrics ) ); @@ -572,6 +578,7 @@ protected void configure() { bind(AllocationStatsService.class).toInstance(allocationStatsService); bind(TelemetryProvider.class).toInstance(telemetryProvider); bind(DesiredBalanceMetrics.class).toInstance(desiredBalanceMetrics); + bind(AllocationBalancingRoundMetrics.class).toInstance(balancingRoundMetrics); bind(MetadataRolloverService.class).asEagerSingleton(); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java new file mode 100644 index 0000000000000..7f4224a2abeee --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -0,0 +1,267 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.allocator; + +import org.elasticsearch.telemetry.metric.DoubleWithAttributes; +import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import java.util.function.ToLongFunction; + +/** + * A telemetry metrics sender for {@link BalancingRoundSummary.CombinedBalancingRoundSummary} + */ +public class AllocationBalancingRoundMetrics { + + public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; + public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; + + public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; + public static final String NUMBER_OF_SHARDS_DELTA_METRIC_NAME = "es.allocator.balancing_round.shard_count_delta"; + + public static final String DISK_USAGE_BYTES_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes"; + public static final String DISK_USAGE_BYTES_DELTA_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes_delta"; + + public static final String WRITE_LOAD_METRIC_NAME = "es.allocator.balancing_round.write_load"; + public static final String WRITE_LOAD_DELTA_METRIC_NAME = "es.allocator.balancing_round.write_load_delta"; + + public static final String TOTAL_WEIGHT_METRIC_NAME = "es.allocator.balancing_round.total_weight"; + public static final String TOTAL_WEIGHT_DELTA_METRIC_NAME = "es.allocator.balancing_round.total_weight_delta"; + + /** + * The current view of the last period's summary + */ + private final AtomicReference combinedSummariesRef = new AtomicReference<>(); + + public static final AllocationBalancingRoundMetrics NOOP = new AllocationBalancingRoundMetrics(MeterRegistry.NOOP); + + private final MeterRegistry meterRegistry; + + public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { + this.meterRegistry = meterRegistry; + + meterRegistry.registerLongsGauge( + NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, + "Current number of balancing rounds", + "unit", + this::getBalancingRounds + ); + + meterRegistry.registerLongsGauge( + NUMBER_OF_SHARD_MOVES_METRIC_NAME, + "Current number of shard moves", + "{shard}", + this::getShardMoves + ); + + meterRegistry.registerLongsGauge( + NUMBER_OF_SHARDS_METRIC_NAME, + "Current number of shards", + "unit", + this::getShardCount + ); + meterRegistry.registerLongsGauge( + NUMBER_OF_SHARDS_DELTA_METRIC_NAME, + "Current number of shard moves", + "{shard}", + this::getShardCountDelta + ); + + meterRegistry.registerDoublesGauge( + DISK_USAGE_BYTES_METRIC_NAME, + "Disk usage in bytes", + "unit", + this::getDiskUsage + ); + meterRegistry.registerDoublesGauge( + DISK_USAGE_BYTES_DELTA_METRIC_NAME, + "Disk usage delta in bytes", + "{shard}", + this::getDiskUsageDelta + ); + + meterRegistry.registerDoublesGauge( + WRITE_LOAD_METRIC_NAME, + "Write load", + "1.0", + this::getWriteLoad + ); + meterRegistry.registerDoublesGauge( + WRITE_LOAD_DELTA_METRIC_NAME, + "Write load", + "1.0", + this::getWriteLoadDelta + ); + + meterRegistry.registerDoublesGauge( + TOTAL_WEIGHT_METRIC_NAME, + "Total weight", + "1.0", + this::getTotalWeight + ); + meterRegistry.registerDoublesGauge( + TOTAL_WEIGHT_DELTA_METRIC_NAME, + "Total weight delta", + "1.0", + this::getTotalWeightDelta + ); + } + + public void updateRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { + combinedSummariesRef.set(summary); + } + + public void clearRoundMetrics() { + combinedSummariesRef.set(null); + } + + private Map getNodeAttributes(String nodeId) { + return Map.of("node_id", nodeId); + } + + private List getBalancingRounds() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); + return List.of(result); + } + + private List getShardMoves() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); + return List.of(result); + } + + private List getShardCount() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new LongWithAttributes(nodeWeights.getValue().baseWeights().shardCount(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getShardCountDelta() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new LongWithAttributes(nodeWeights.getValue().weightsDiff().shardCountDiff(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getDiskUsage() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().diskUsageInBytes(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getDiskUsageDelta() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().diskUsageInBytesDiff(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getWriteLoad() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().writeLoad(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getWriteLoadDelta() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().writeLoadDiff(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getTotalWeight() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().nodeWeight(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } + + private List getTotalWeightDelta() { + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); + if (combinedSummary == null) { + return Collections.emptyList(); + } + + Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); + for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { + metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().totalWeightDiff(), getNodeAttributes(nodeWeights.getKey()))); + } + return metrics; + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java index 554dd4ce178e5..6878b3de450b4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java @@ -58,6 +58,7 @@ public class AllocationBalancingRoundSummaryService { private final ThreadPool threadPool; private volatile boolean enableBalancerRoundSummaries; private volatile TimeValue summaryReportInterval; + private final AllocationBalancingRoundMetrics balancingRoundMetrics; /** * A concurrency-safe list of balancing round summaries. Balancer rounds are run and added here serially, so the queue will naturally @@ -69,11 +70,17 @@ public class AllocationBalancingRoundSummaryService { private final AtomicReference scheduledReportFuture = new AtomicReference<>(); public AllocationBalancingRoundSummaryService(ThreadPool threadPool, ClusterSettings clusterSettings) { + this(threadPool, clusterSettings, AllocationBalancingRoundMetrics.NOOP); + } + + public AllocationBalancingRoundSummaryService(ThreadPool threadPool, ClusterSettings clusterSettings, + AllocationBalancingRoundMetrics balancingRoundMetrics) { this.threadPool = threadPool; // Initialize the local setting values to avoid a null access when ClusterSettings#initializeAndWatch is called on each setting: // updating enableBalancerRoundSummaries accesses summaryReportInterval. this.enableBalancerRoundSummaries = clusterSettings.get(ENABLE_BALANCER_ROUND_SUMMARIES_SETTING); this.summaryReportInterval = clusterSettings.get(BALANCER_ROUND_SUMMARIES_LOG_INTERVAL_SETTING); + this.balancingRoundMetrics = balancingRoundMetrics; clusterSettings.initializeAndWatch(ENABLE_BALANCER_ROUND_SUMMARIES_SETTING, value -> { this.enableBalancerRoundSummaries = value; @@ -185,6 +192,8 @@ private void drainAndReportSummaries() { } logger.info("Balancing round summaries: " + combinedSummaries); + + balancingRoundMetrics.updateRoundMetrics(combinedSummaries); } /** @@ -213,6 +222,7 @@ private void updateBalancingRoundSummaryReporting() { cancelReporting(); // Clear the data structure so that we don't retain unnecessary memory. drainSummaries(); + balancingRoundMetrics.clearRoundMetrics(); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index e56a8a9ed052e..ff55eb22bb80e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -90,6 +90,7 @@ public class DesiredBalanceShardsAllocator implements ShardsAllocator { private final Set processedNodeShutdowns = new HashSet<>(); private final NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator; private final DesiredBalanceMetrics desiredBalanceMetrics; + private final AllocationBalancingRoundMetrics balancingRoundMetrics; /** * Manages balancer round results in order to report on the balancer activity in a configurable manner. */ @@ -121,7 +122,8 @@ public DesiredBalanceShardsAllocator( DesiredBalanceReconcilerAction reconciler, NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator, ShardAllocationExplainer shardAllocationExplainer, - DesiredBalanceMetrics desiredBalanceMetrics + DesiredBalanceMetrics desiredBalanceMetrics, + AllocationBalancingRoundMetrics balancingRoundMetrics ) { this( delegateAllocator, @@ -130,7 +132,8 @@ public DesiredBalanceShardsAllocator( new DesiredBalanceComputer(clusterSettings, threadPool, delegateAllocator, shardAllocationExplainer), reconciler, nodeAllocationStatsAndWeightsCalculator, - desiredBalanceMetrics + desiredBalanceMetrics, + balancingRoundMetrics ); } @@ -141,11 +144,14 @@ public DesiredBalanceShardsAllocator( DesiredBalanceComputer desiredBalanceComputer, DesiredBalanceReconcilerAction reconciler, NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator, - DesiredBalanceMetrics desiredBalanceMetrics + DesiredBalanceMetrics desiredBalanceMetrics, + AllocationBalancingRoundMetrics balancingRoundMetrics ) { this.desiredBalanceMetrics = desiredBalanceMetrics; + this.balancingRoundMetrics = balancingRoundMetrics; this.nodeAllocationStatsAndWeightsCalculator = nodeAllocationStatsAndWeightsCalculator; - this.balancerRoundSummaryService = new AllocationBalancingRoundSummaryService(threadPool, clusterService.getClusterSettings()); + this.balancerRoundSummaryService = new AllocationBalancingRoundSummaryService(threadPool, clusterService.getClusterSettings(), + balancingRoundMetrics); this.delegateAllocator = delegateAllocator; this.threadPool = threadPool; this.reconciler = reconciler; diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java index f9dacdb1b1e1f..19f2f4df6974b 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalance; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceComputer; @@ -121,7 +122,8 @@ public DesiredBalance compute( computer, (state, action) -> state, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var allocationService = new MockAllocationService( randomAllocationDeciders(settings, clusterSettings), diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java index 07c3d737c1878..3f67addab7fb1 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics; import org.elasticsearch.cluster.routing.allocation.allocator.BalancerSettings; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalance; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics; @@ -177,7 +178,8 @@ public void testUndesiredShardCount() { (innerState, strategy) -> innerState, EMPTY_NODE_ALLOCATION_STATS, TEST_ONLY_EXPLAINER, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { @Override public DesiredBalance getDesiredBalance() { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterAllocationSimulationTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterAllocationSimulationTests.java index 3dcf6b15ae1e1..fa0ff495c5538 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterAllocationSimulationTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterAllocationSimulationTests.java @@ -490,7 +490,8 @@ private Map.Entry createNewAllocationSer .executeWithRoutingAllocation(clusterState, "reconcile-desired-balance", routingAllocationAction), EMPTY_NODE_ALLOCATION_STATS, TEST_ONLY_EXPLAINER, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { @Override public void allocate(RoutingAllocation allocation, ActionListener listener) { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java index 1f067fcdba898..a22b134a4102d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java @@ -181,7 +181,8 @@ public ClusterState apply(ClusterState clusterState, RerouteStrategy routingAllo reconcileAction, EMPTY_NODE_ALLOCATION_STATS, TEST_ONLY_EXPLAINER, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); assertValidStats(desiredBalanceShardsAllocator.getStats()); var allocationService = createAllocationService(desiredBalanceShardsAllocator, createGatewayAllocator(allocateUnassigned)); @@ -310,7 +311,8 @@ public ClusterState apply(ClusterState clusterState, RerouteStrategy routingAllo reconcileAction, EMPTY_NODE_ALLOCATION_STATS, TEST_ONLY_EXPLAINER, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var allocationService = new AllocationService( new AllocationDeciders(List.of()), @@ -429,7 +431,8 @@ boolean hasEnoughIterations(int currentIteration) { }, reconcileAction, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var allocationService = createAllocationService(desiredBalanceShardsAllocator, gatewayAllocator); allocationServiceRef.set(allocationService); @@ -557,7 +560,8 @@ public DesiredBalance compute( }, reconcileAction, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var allocationService = createAllocationService(desiredBalanceShardsAllocator, gatewayAllocator); allocationServiceRef.set(allocationService); @@ -661,7 +665,8 @@ public DesiredBalance compute( }, reconcileAction, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var allocationService = createAllocationService(desiredBalanceShardsAllocator, gatewayAllocator); @@ -754,7 +759,8 @@ public DesiredBalance compute( desiredBalanceComputer, (reconcilerClusterState, rerouteStrategy) -> reconcilerClusterState, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var service = createAllocationService(desiredBalanceShardsAllocator, createGatewayAllocator()); @@ -864,7 +870,8 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing (reconcilerClusterState, rerouteStrategy) -> allocationServiceRef.get() .executeWithRoutingAllocation(reconcilerClusterState, "reconcile-desired-balance", rerouteStrategy), EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { @Override protected void reconcile(DesiredBalance desiredBalance, RoutingAllocation allocation) { @@ -1069,7 +1076,8 @@ public void testResetDesiredBalanceOnNoLongerMaster() { desiredBalanceComputer, (reconcilerClusterState, rerouteStrategy) -> reconcilerClusterState, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ); var service = createAllocationService(desiredBalanceShardsAllocator, createGatewayAllocator()); @@ -1124,7 +1132,8 @@ public void testResetDesiredBalanceOnNodeShutdown() { desiredBalanceComputer, (reconcilerClusterState, rerouteStrategy) -> reconcilerClusterState, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { @Override public void resetDesiredBalance() { @@ -1220,7 +1229,8 @@ public DesiredBalance compute( }, (clusterState, rerouteStrategy) -> null, EMPTY_NODE_ALLOCATION_STATS, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { private ActionListener lastListener; diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java index 7d12d0e77cb99..4ddd50e739378 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java @@ -31,6 +31,7 @@ import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision; import org.elasticsearch.cluster.routing.allocation.WriteLoadForecaster; +import org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.allocator.BalancerSettings; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalance; @@ -184,7 +185,8 @@ private static DesiredBalanceShardsAllocator createDesiredBalanceShardsAllocator null, EMPTY_NODE_ALLOCATION_STATS, TEST_ONLY_EXPLAINER, - DesiredBalanceMetrics.NOOP + DesiredBalanceMetrics.NOOP, + AllocationBalancingRoundMetrics.NOOP ) { private RoutingAllocation lastAllocation; From 99844adaa753678f4aaae001bd7e3453bce6b29c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 6 Oct 2025 17:36:58 +0000 Subject: [PATCH 02/22] [CI] Auto commit changes from spotless --- .../AllocationBalancingRoundMetrics.java | 79 ++++++++----------- ...llocationBalancingRoundSummaryService.java | 7 +- .../DesiredBalanceShardsAllocator.java | 7 +- 3 files changed, 41 insertions(+), 52 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 7f4224a2abeee..36caba06e5871 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -9,20 +9,16 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; import org.elasticsearch.telemetry.metric.DoubleWithAttributes; import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.telemetry.metric.MeterRegistry; -import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; - import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Supplier; -import java.util.function.ToLongFunction; /** * A telemetry metrics sender for {@link BalancingRoundSummary.CombinedBalancingRoundSummary} @@ -70,12 +66,7 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this::getShardMoves ); - meterRegistry.registerLongsGauge( - NUMBER_OF_SHARDS_METRIC_NAME, - "Current number of shards", - "unit", - this::getShardCount - ); + meterRegistry.registerLongsGauge(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit", this::getShardCount); meterRegistry.registerLongsGauge( NUMBER_OF_SHARDS_DELTA_METRIC_NAME, "Current number of shard moves", @@ -83,12 +74,7 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this::getShardCountDelta ); - meterRegistry.registerDoublesGauge( - DISK_USAGE_BYTES_METRIC_NAME, - "Disk usage in bytes", - "unit", - this::getDiskUsage - ); + meterRegistry.registerDoublesGauge(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit", this::getDiskUsage); meterRegistry.registerDoublesGauge( DISK_USAGE_BYTES_DELTA_METRIC_NAME, "Disk usage delta in bytes", @@ -96,31 +82,11 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this::getDiskUsageDelta ); - meterRegistry.registerDoublesGauge( - WRITE_LOAD_METRIC_NAME, - "Write load", - "1.0", - this::getWriteLoad - ); - meterRegistry.registerDoublesGauge( - WRITE_LOAD_DELTA_METRIC_NAME, - "Write load", - "1.0", - this::getWriteLoadDelta - ); + meterRegistry.registerDoublesGauge(WRITE_LOAD_METRIC_NAME, "Write load", "1.0", this::getWriteLoad); + meterRegistry.registerDoublesGauge(WRITE_LOAD_DELTA_METRIC_NAME, "Write load", "1.0", this::getWriteLoadDelta); - meterRegistry.registerDoublesGauge( - TOTAL_WEIGHT_METRIC_NAME, - "Total weight", - "1.0", - this::getTotalWeight - ); - meterRegistry.registerDoublesGauge( - TOTAL_WEIGHT_DELTA_METRIC_NAME, - "Total weight delta", - "1.0", - this::getTotalWeightDelta - ); + meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0", this::getTotalWeight); + meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_DELTA_METRIC_NAME, "Total weight delta", "1.0", this::getTotalWeightDelta); } public void updateRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { @@ -176,7 +142,9 @@ private List getShardCountDelta() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new LongWithAttributes(nodeWeights.getValue().weightsDiff().shardCountDiff(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new LongWithAttributes(nodeWeights.getValue().weightsDiff().shardCountDiff(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } @@ -190,7 +158,9 @@ private List getDiskUsage() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().diskUsageInBytes(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes(nodeWeights.getValue().baseWeights().diskUsageInBytes(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } @@ -204,7 +174,12 @@ private List getDiskUsageDelta() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().diskUsageInBytesDiff(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes( + nodeWeights.getValue().weightsDiff().diskUsageInBytesDiff(), + getNodeAttributes(nodeWeights.getKey()) + ) + ); } return metrics; } @@ -218,7 +193,9 @@ private List getWriteLoad() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().writeLoad(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes(nodeWeights.getValue().baseWeights().writeLoad(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } @@ -232,7 +209,9 @@ private List getWriteLoadDelta() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().writeLoadDiff(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().writeLoadDiff(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } @@ -246,7 +225,9 @@ private List getTotalWeight() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().baseWeights().nodeWeight(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes(nodeWeights.getValue().baseWeights().nodeWeight(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } @@ -260,7 +241,9 @@ private List getTotalWeightDelta() { Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().totalWeightDiff(), getNodeAttributes(nodeWeights.getKey()))); + metrics.add( + new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().totalWeightDiff(), getNodeAttributes(nodeWeights.getKey())) + ); } return metrics; } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java index 6878b3de450b4..51868ed85480b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java @@ -73,8 +73,11 @@ public AllocationBalancingRoundSummaryService(ThreadPool threadPool, ClusterSett this(threadPool, clusterSettings, AllocationBalancingRoundMetrics.NOOP); } - public AllocationBalancingRoundSummaryService(ThreadPool threadPool, ClusterSettings clusterSettings, - AllocationBalancingRoundMetrics balancingRoundMetrics) { + public AllocationBalancingRoundSummaryService( + ThreadPool threadPool, + ClusterSettings clusterSettings, + AllocationBalancingRoundMetrics balancingRoundMetrics + ) { this.threadPool = threadPool; // Initialize the local setting values to avoid a null access when ClusterSettings#initializeAndWatch is called on each setting: // updating enableBalancerRoundSummaries accesses summaryReportInterval. diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index ff55eb22bb80e..1374efe2b4aa0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -150,8 +150,11 @@ public DesiredBalanceShardsAllocator( this.desiredBalanceMetrics = desiredBalanceMetrics; this.balancingRoundMetrics = balancingRoundMetrics; this.nodeAllocationStatsAndWeightsCalculator = nodeAllocationStatsAndWeightsCalculator; - this.balancerRoundSummaryService = new AllocationBalancingRoundSummaryService(threadPool, clusterService.getClusterSettings(), - balancingRoundMetrics); + this.balancerRoundSummaryService = new AllocationBalancingRoundSummaryService( + threadPool, + clusterService.getClusterSettings(), + balancingRoundMetrics + ); this.delegateAllocator = delegateAllocator; this.threadPool = threadPool; this.reconciler = reconciler; From dbce2bd60162077ffac767d32d7eb85e8859a3c4 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Wed, 8 Oct 2025 13:15:52 -0700 Subject: [PATCH 03/22] Update docs/changelog/136043.yaml --- docs/changelog/136043.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/136043.yaml diff --git a/docs/changelog/136043.yaml b/docs/changelog/136043.yaml new file mode 100644 index 0000000000000..baee8094be783 --- /dev/null +++ b/docs/changelog/136043.yaml @@ -0,0 +1,5 @@ +pr: 136043 +summary: "Allocation: add balancer round summary as metrics" +area: Allocation +type: enhancement +issues: [] From 840b003e5d37072b3fef89924097b9f3608818ce Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Wed, 8 Oct 2025 17:53:36 -0700 Subject: [PATCH 04/22] Added enableSending flag in, and some renames --- .../AllocationBalancingRoundMetrics.java | 64 +++++++++++-------- ...llocationBalancingRoundSummaryService.java | 4 +- .../DesiredBalanceShardsAllocator.java | 1 + 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 36caba06e5871..d3ba3fac1d377 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -26,6 +26,7 @@ public class AllocationBalancingRoundMetrics { public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; + public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; @@ -45,11 +46,17 @@ public class AllocationBalancingRoundMetrics { */ private final AtomicReference combinedSummariesRef = new AtomicReference<>(); + /** + * Whether metrics sending is enabled + */ + private volatile boolean enableSending = false; + public static final AllocationBalancingRoundMetrics NOOP = new AllocationBalancingRoundMetrics(MeterRegistry.NOOP); private final MeterRegistry meterRegistry; public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { + this.combinedSummariesRef.set(BalancingRoundSummary.CombinedBalancingRoundSummary.EMPTY_RESULTS); this.meterRegistry = meterRegistry; meterRegistry.registerLongsGauge( @@ -69,7 +76,7 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { meterRegistry.registerLongsGauge(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit", this::getShardCount); meterRegistry.registerLongsGauge( NUMBER_OF_SHARDS_DELTA_METRIC_NAME, - "Current number of shard moves", + "Current number of shards delta", "{shard}", this::getShardCountDelta ); @@ -83,18 +90,23 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { ); meterRegistry.registerDoublesGauge(WRITE_LOAD_METRIC_NAME, "Write load", "1.0", this::getWriteLoad); - meterRegistry.registerDoublesGauge(WRITE_LOAD_DELTA_METRIC_NAME, "Write load", "1.0", this::getWriteLoadDelta); + meterRegistry.registerDoublesGauge(WRITE_LOAD_DELTA_METRIC_NAME, "Write load delta", "1.0", this::getWriteLoadDelta); meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0", this::getTotalWeight); meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_DELTA_METRIC_NAME, "Total weight delta", "1.0", this::getTotalWeightDelta); } - public void updateRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { + public void setEnableSending(boolean enableSending) { + this.enableSending = enableSending; + } + + public void updateBalancingRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { + assert summary != null : "balancing round metrics cannot be null"; combinedSummariesRef.set(summary); } - public void clearRoundMetrics() { - combinedSummariesRef.set(null); + public void clearBalancingRoundMetrics() { + combinedSummariesRef.set(BalancingRoundSummary.CombinedBalancingRoundSummary.EMPTY_RESULTS); } private Map getNodeAttributes(String nodeId) { @@ -102,29 +114,31 @@ private Map getNodeAttributes(String nodeId) { } private List getBalancingRounds() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); return List.of(result); } private List getShardMoves() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); return List.of(result); } private List getShardCount() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -134,11 +148,11 @@ private List getShardCount() { } private List getShardCountDelta() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -150,11 +164,11 @@ private List getShardCountDelta() { } private List getDiskUsage() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -166,11 +180,11 @@ private List getDiskUsage() { } private List getDiskUsageDelta() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -185,11 +199,11 @@ private List getDiskUsageDelta() { } private List getWriteLoad() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -201,11 +215,11 @@ private List getWriteLoad() { } private List getWriteLoadDelta() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -217,11 +231,11 @@ private List getWriteLoadDelta() { } private List getTotalWeight() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { @@ -233,11 +247,11 @@ private List getTotalWeight() { } private List getTotalWeightDelta() { - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - if (combinedSummary == null) { + if (enableSending == false) { return Collections.emptyList(); } + final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java index 51868ed85480b..e8de9067f37af 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java @@ -196,7 +196,7 @@ private void drainAndReportSummaries() { logger.info("Balancing round summaries: " + combinedSummaries); - balancingRoundMetrics.updateRoundMetrics(combinedSummaries); + balancingRoundMetrics.updateBalancingRoundMetrics(combinedSummaries); } /** @@ -225,7 +225,7 @@ private void updateBalancingRoundSummaryReporting() { cancelReporting(); // Clear the data structure so that we don't retain unnecessary memory. drainSummaries(); - balancingRoundMetrics.clearRoundMetrics(); + balancingRoundMetrics.clearBalancingRoundMetrics(); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 1374efe2b4aa0..0656364ee9e0c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -250,6 +250,7 @@ public String toString() { // Only update on change, to minimise volatile writes if (event.localNodeMaster() != event.previousState().nodes().isLocalNodeElectedMaster()) { desiredBalanceMetrics.setNodeIsMaster(event.localNodeMaster()); + balancingRoundMetrics.setEnableSending(event.localNodeMaster()); } }); } From 5a946570beae10c6b50910c52a6ad46385f08273 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 13 Oct 2025 10:20:53 -0700 Subject: [PATCH 05/22] Added metrics consolidation, correct diff calculation, and some tests. --- .../AllocationBalancingRoundMetrics.java | 180 ++++-------------- ...tionBalancingRoundSummaryServiceTests.java | 40 +++- 2 files changed, 69 insertions(+), 151 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index d3ba3fac1d377..e8732d992f967 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -11,6 +11,7 @@ import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; import org.elasticsearch.telemetry.metric.DoubleWithAttributes; +import org.elasticsearch.telemetry.metric.LongCounter; import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.telemetry.metric.MeterRegistry; @@ -25,26 +26,23 @@ */ public class AllocationBalancingRoundMetrics { + // counters that measure rounds and moves from the last balancing round public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; - public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; + // gauges that measure current utilization public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; - public static final String NUMBER_OF_SHARDS_DELTA_METRIC_NAME = "es.allocator.balancing_round.shard_count_delta"; - public static final String DISK_USAGE_BYTES_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes"; - public static final String DISK_USAGE_BYTES_DELTA_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes_delta"; - public static final String WRITE_LOAD_METRIC_NAME = "es.allocator.balancing_round.write_load"; - public static final String WRITE_LOAD_DELTA_METRIC_NAME = "es.allocator.balancing_round.write_load_delta"; - public static final String TOTAL_WEIGHT_METRIC_NAME = "es.allocator.balancing_round.total_weight"; - public static final String TOTAL_WEIGHT_DELTA_METRIC_NAME = "es.allocator.balancing_round.total_weight_delta"; + + private final LongCounter balancingRoundCounter; + private final LongCounter shardMovesCounter; /** * The current view of the last period's summary */ - private final AtomicReference combinedSummariesRef = new AtomicReference<>(); + private final AtomicReference> nodeNameToWeightChangesRef = new AtomicReference<>(); /** * Whether metrics sending is enabled @@ -56,44 +54,16 @@ public class AllocationBalancingRoundMetrics { private final MeterRegistry meterRegistry; public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { - this.combinedSummariesRef.set(BalancingRoundSummary.CombinedBalancingRoundSummary.EMPTY_RESULTS); this.meterRegistry = meterRegistry; - meterRegistry.registerLongsGauge( - NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, - "Current number of balancing rounds", - "unit", - this::getBalancingRounds - ); - - meterRegistry.registerLongsGauge( - NUMBER_OF_SHARD_MOVES_METRIC_NAME, - "Current number of shard moves", - "{shard}", - this::getShardMoves - ); + this.balancingRoundCounter = meterRegistry.registerLongCounter(NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, "Current number of balancing rounds", "unit"); + this.shardMovesCounter = meterRegistry.registerLongCounter(NUMBER_OF_SHARD_MOVES_METRIC_NAME, "Current number of shard moves", "{shard}"); + this.nodeNameToWeightChangesRef.set(Map.of()); meterRegistry.registerLongsGauge(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit", this::getShardCount); - meterRegistry.registerLongsGauge( - NUMBER_OF_SHARDS_DELTA_METRIC_NAME, - "Current number of shards delta", - "{shard}", - this::getShardCountDelta - ); - meterRegistry.registerDoublesGauge(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit", this::getDiskUsage); - meterRegistry.registerDoublesGauge( - DISK_USAGE_BYTES_DELTA_METRIC_NAME, - "Disk usage delta in bytes", - "{shard}", - this::getDiskUsageDelta - ); - meterRegistry.registerDoublesGauge(WRITE_LOAD_METRIC_NAME, "Write load", "1.0", this::getWriteLoad); - meterRegistry.registerDoublesGauge(WRITE_LOAD_DELTA_METRIC_NAME, "Write load delta", "1.0", this::getWriteLoadDelta); - meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0", this::getTotalWeight); - meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_DELTA_METRIC_NAME, "Total weight delta", "1.0", this::getTotalWeightDelta); } public void setEnableSending(boolean enableSending) { @@ -102,63 +72,33 @@ public void setEnableSending(boolean enableSending) { public void updateBalancingRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { assert summary != null : "balancing round metrics cannot be null"; - combinedSummariesRef.set(summary); + + nodeNameToWeightChangesRef.set(summary.nodeNameToWeightChanges()); + if (enableSending) { + balancingRoundCounter.incrementBy(summary.numberOfBalancingRounds()); + shardMovesCounter.incrementBy(summary.numberOfShardMoves()); + } } public void clearBalancingRoundMetrics() { - combinedSummariesRef.set(BalancingRoundSummary.CombinedBalancingRoundSummary.EMPTY_RESULTS); + nodeNameToWeightChangesRef.set(Map.of()); } private Map getNodeAttributes(String nodeId) { return Map.of("node_id", nodeId); } - private List getBalancingRounds() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); - return List.of(result); - } - - private List getShardMoves() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - LongWithAttributes result = new LongWithAttributes(combinedSummary.numberOfShardMoves()); - return List.of(result); - } - private List getShardCount() { if (enableSending == false) { return Collections.emptyList(); } - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add(new LongWithAttributes(nodeWeights.getValue().baseWeights().shardCount(), getNodeAttributes(nodeWeights.getKey()))); - } - return metrics; - } - - private List getShardCountDelta() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new LongWithAttributes(nodeWeights.getValue().weightsDiff().shardCountDiff(), getNodeAttributes(nodeWeights.getKey())) - ); + NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); + long shardCount = nodeWeightChanges.baseWeights().shardCount() + nodeWeightChanges.weightsDiff().shardCountDiff(); + metrics.add(new LongWithAttributes(shardCount, getNodeAttributes(nodeWeights.getKey()))); } return metrics; } @@ -168,32 +108,12 @@ private List getDiskUsage() { return Collections.emptyList(); } - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes(nodeWeights.getValue().baseWeights().diskUsageInBytes(), getNodeAttributes(nodeWeights.getKey())) - ); - } - return metrics; - } - - private List getDiskUsageDelta() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes( - nodeWeights.getValue().weightsDiff().diskUsageInBytesDiff(), - getNodeAttributes(nodeWeights.getKey()) - ) - ); + NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); + double diskUsage = nodeWeightChanges.baseWeights().diskUsageInBytes() + nodeWeightChanges.weightsDiff().diskUsageInBytesDiff(); + metrics.add(new DoubleWithAttributes(diskUsage, getNodeAttributes(nodeWeights.getKey()))); } return metrics; } @@ -203,29 +123,12 @@ private List getWriteLoad() { return Collections.emptyList(); } - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes(nodeWeights.getValue().baseWeights().writeLoad(), getNodeAttributes(nodeWeights.getKey())) - ); - } - return metrics; - } - - private List getWriteLoadDelta() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().writeLoadDiff(), getNodeAttributes(nodeWeights.getKey())) - ); + NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); + double writeLoad = nodeWeightChanges.baseWeights().writeLoad() + nodeWeightChanges.weightsDiff().writeLoadDiff(); + metrics.add(new DoubleWithAttributes(writeLoad, getNodeAttributes(nodeWeights.getKey()))); } return metrics; } @@ -235,29 +138,12 @@ private List getTotalWeight() { return Collections.emptyList(); } - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes(nodeWeights.getValue().baseWeights().nodeWeight(), getNodeAttributes(nodeWeights.getKey())) - ); - } - return metrics; - } - - private List getTotalWeightDelta() { - if (enableSending == false) { - return Collections.emptyList(); - } - - final BalancingRoundSummary.CombinedBalancingRoundSummary combinedSummary = combinedSummariesRef.get(); - Map nodeNameToWeightChanges = combinedSummary.nodeNameToWeightChanges(); + Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - metrics.add( - new DoubleWithAttributes(nodeWeights.getValue().weightsDiff().totalWeightDiff(), getNodeAttributes(nodeWeights.getKey())) - ); + NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); + double totalWeight = nodeWeightChanges.baseWeights().nodeWeight() + nodeWeightChanges.weightsDiff().totalWeightDiff(); + metrics.add(new DoubleWithAttributes(totalWeight, getNodeAttributes(nodeWeights.getKey()))); } return metrics; } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 8345d3261139d..61a62a91c293b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -17,11 +17,22 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLog; import org.elasticsearch.threadpool.ThreadPool; import org.junit.Before; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARD_MOVES_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARDS_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.DISK_USAGE_BYTES_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.WRITE_LOAD_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.TOTAL_WEIGHT_METRIC_NAME; + +import java.util.List; import java.util.Map; import java.util.Set; @@ -86,7 +97,9 @@ public void setUpThreadPool() { * {@link AllocationBalancingRoundSummaryService#ENABLE_BALANCER_ROUND_SUMMARIES_SETTING} defaults to false. */ public void testServiceDisabledByDefault() { - var service = new AllocationBalancingRoundSummaryService(testThreadPool, disabledDefaultEmptyClusterSettings); + var recordingMeterRegistry = new RecordingMeterRegistry(); + var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); + var service = new AllocationBalancingRoundSummaryService(testThreadPool, disabledDefaultEmptyClusterSettings, balancingRoundMetrics); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -110,11 +123,16 @@ public void testServiceDisabledByDefault() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); + List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + assertEquals(measurements.size(), 0); } } public void testEnabledService() { - var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings); + var recordingMeterRegistry = new RecordingMeterRegistry(); + var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); + var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings, balancingRoundMetrics); + balancingRoundMetrics.setEnableSending(true); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -156,6 +174,9 @@ public void testEnabledService() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); + + List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + assertEquals(measurements.size(), 2); } } @@ -163,7 +184,10 @@ public void testEnabledService() { * The service should combine multiple summaries together into a single report when multiple summaries were added since the last report. */ public void testCombinedSummary() { - var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings); + var recordingMeterRegistry = new RecordingMeterRegistry(); + var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); + var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings, balancingRoundMetrics); + balancingRoundMetrics.setEnableSending(true); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { service.addBalancerRoundSummary(new BalancingRoundSummary(NODE_NAME_TO_WEIGHT_CHANGES, 50)); @@ -182,6 +206,9 @@ public void testCombinedSummary() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); + + List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + assertEquals(measurements.size(), 2); } } @@ -189,7 +216,9 @@ public void testCombinedSummary() { * The service shouldn't log anything when there haven't been any summaries added since the last report. */ public void testNoSummariesToReport() { - var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings); + var recordingMeterRegistry = new RecordingMeterRegistry(); + var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); + var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings, balancingRoundMetrics); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -229,6 +258,9 @@ public void testNoSummariesToReport() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); + + List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + assertEquals(measurements.size(), 0); } } From c7195fc729d97351e2efe4e1b862ae50fcdb27a0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 13 Oct 2025 17:29:43 +0000 Subject: [PATCH 06/22] [CI] Auto commit changes from spotless --- .../AllocationBalancingRoundMetrics.java | 12 +++++++-- ...tionBalancingRoundSummaryServiceTests.java | 27 ++++++++++--------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index e8732d992f967..82be1e17f650d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -56,8 +56,16 @@ public class AllocationBalancingRoundMetrics { public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; - this.balancingRoundCounter = meterRegistry.registerLongCounter(NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, "Current number of balancing rounds", "unit"); - this.shardMovesCounter = meterRegistry.registerLongCounter(NUMBER_OF_SHARD_MOVES_METRIC_NAME, "Current number of shard moves", "{shard}"); + this.balancingRoundCounter = meterRegistry.registerLongCounter( + NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, + "Current number of balancing rounds", + "unit" + ); + this.shardMovesCounter = meterRegistry.registerLongCounter( + NUMBER_OF_SHARD_MOVES_METRIC_NAME, + "Current number of shard moves", + "{shard}" + ); this.nodeNameToWeightChangesRef.set(Map.of()); meterRegistry.registerLongsGauge(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit", this::getShardCount); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 61a62a91c293b..224ce8d018198 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -25,17 +25,12 @@ import org.elasticsearch.threadpool.ThreadPool; import org.junit.Before; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARD_MOVES_METRIC_NAME; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARDS_METRIC_NAME; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.DISK_USAGE_BYTES_METRIC_NAME; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.WRITE_LOAD_METRIC_NAME; -import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.TOTAL_WEIGHT_METRIC_NAME; - import java.util.List; import java.util.Map; import java.util.Set; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME; + public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { private static final Logger logger = LogManager.getLogger(AllocationBalancingRoundSummaryServiceTests.class); @@ -99,7 +94,11 @@ public void setUpThreadPool() { public void testServiceDisabledByDefault() { var recordingMeterRegistry = new RecordingMeterRegistry(); var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); - var service = new AllocationBalancingRoundSummaryService(testThreadPool, disabledDefaultEmptyClusterSettings, balancingRoundMetrics); + var service = new AllocationBalancingRoundSummaryService( + testThreadPool, + disabledDefaultEmptyClusterSettings, + balancingRoundMetrics + ); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -123,7 +122,8 @@ public void testServiceDisabledByDefault() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); assertEquals(measurements.size(), 0); } } @@ -175,7 +175,8 @@ public void testEnabledService() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); assertEquals(measurements.size(), 2); } } @@ -207,7 +208,8 @@ public void testCombinedSummary() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); assertEquals(measurements.size(), 2); } } @@ -259,7 +261,8 @@ public void testNoSummariesToReport() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder().getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); assertEquals(measurements.size(), 0); } } From 4d1b9db55f83c57256319cfde2f5c8cd42ba6357 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 27 Oct 2025 16:35:46 -0700 Subject: [PATCH 07/22] Changing metrics for NodeWeightsChanges and NodeWeightsDiff to use a histogram --- .../elasticsearch/cluster/ClusterModule.java | 1 - .../AllocationBalancingRoundMetrics.java | 126 ++++------------ ...llocationBalancingRoundSummaryService.java | 8 +- .../DesiredBalanceShardsAllocator.java | 1 - ...tionBalancingRoundSummaryServiceTests.java | 134 ++++++++++++++++-- .../elasticsearch/telemetry/Measurement.java | 38 +++++ 6 files changed, 186 insertions(+), 122 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index f7b5840097beb..c3d256c37c77c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -582,7 +582,6 @@ protected void configure() { bind(AllocationStatsService.class).toInstance(allocationStatsService); bind(TelemetryProvider.class).toInstance(telemetryProvider); bind(DesiredBalanceMetrics.class).toInstance(desiredBalanceMetrics); - bind(AllocationBalancingRoundMetrics.class).toInstance(balancingRoundMetrics); bind(MetadataRolloverService.class).asEagerSingleton(); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 82be1e17f650d..8d8b239d294c6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -10,19 +10,15 @@ package org.elasticsearch.cluster.routing.allocation.allocator; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; -import org.elasticsearch.telemetry.metric.DoubleWithAttributes; +import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; -import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.LongHistogram; import org.elasticsearch.telemetry.metric.MeterRegistry; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicReference; /** - * A telemetry metrics sender for {@link BalancingRoundSummary.CombinedBalancingRoundSummary} + * A telemetry metrics sender for {@link BalancingRoundSummary} */ public class AllocationBalancingRoundMetrics { @@ -30,7 +26,7 @@ public class AllocationBalancingRoundMetrics { public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; - // gauges that measure current utilization + // histograms that measure current utilization public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; public static final String DISK_USAGE_BYTES_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes"; public static final String WRITE_LOAD_METRIC_NAME = "es.allocator.balancing_round.write_load"; @@ -39,20 +35,15 @@ public class AllocationBalancingRoundMetrics { private final LongCounter balancingRoundCounter; private final LongCounter shardMovesCounter; - /** - * The current view of the last period's summary - */ - private final AtomicReference> nodeNameToWeightChangesRef = new AtomicReference<>(); - - /** - * Whether metrics sending is enabled - */ - private volatile boolean enableSending = false; - - public static final AllocationBalancingRoundMetrics NOOP = new AllocationBalancingRoundMetrics(MeterRegistry.NOOP); + private final LongHistogram shardCountHistogram; + private final DoubleHistogram diskUsageHistogram; + private final DoubleHistogram writeLoadHistogram; + private final DoubleHistogram totalWeightHistogram; private final MeterRegistry meterRegistry; + public static AllocationBalancingRoundMetrics NOOP = new AllocationBalancingRoundMetrics(MeterRegistry.NOOP); + public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; @@ -66,93 +57,32 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { "Current number of shard moves", "{shard}" ); - this.nodeNameToWeightChangesRef.set(Map.of()); - - meterRegistry.registerLongsGauge(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit", this::getShardCount); - meterRegistry.registerDoublesGauge(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit", this::getDiskUsage); - meterRegistry.registerDoublesGauge(WRITE_LOAD_METRIC_NAME, "Write load", "1.0", this::getWriteLoad); - meterRegistry.registerDoublesGauge(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0", this::getTotalWeight); - } - - public void setEnableSending(boolean enableSending) { - this.enableSending = enableSending; - } - public void updateBalancingRoundMetrics(BalancingRoundSummary.CombinedBalancingRoundSummary summary) { - assert summary != null : "balancing round metrics cannot be null"; - - nodeNameToWeightChangesRef.set(summary.nodeNameToWeightChanges()); - if (enableSending) { - balancingRoundCounter.incrementBy(summary.numberOfBalancingRounds()); - shardMovesCounter.incrementBy(summary.numberOfShardMoves()); - } + this.shardCountHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit"); + this.diskUsageHistogram = meterRegistry.registerDoubleHistogram(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit"); + this.writeLoadHistogram = meterRegistry.registerDoubleHistogram(WRITE_LOAD_METRIC_NAME, "Write load", "1.0"); + this.totalWeightHistogram = meterRegistry.registerDoubleHistogram(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0"); } - public void clearBalancingRoundMetrics() { - nodeNameToWeightChangesRef.set(Map.of()); - } + public void addBalancingRoundSummary(BalancingRoundSummary summary) { + balancingRoundCounter.increment(); + shardMovesCounter.incrementBy(summary.numberOfShardsToMove()); - private Map getNodeAttributes(String nodeId) { - return Map.of("node_id", nodeId); - } + for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { + String nodeName = changesEntry.getKey(); + NodesWeightsChanges weightChanges = changesEntry.getValue(); - private List getShardCount() { - if (enableSending == false) { - return Collections.emptyList(); - } + DesiredBalanceMetrics.NodeWeightStats baseWeights = weightChanges.baseWeights(); + BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); - Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); - long shardCount = nodeWeightChanges.baseWeights().shardCount() + nodeWeightChanges.weightsDiff().shardCountDiff(); - metrics.add(new LongWithAttributes(shardCount, getNodeAttributes(nodeWeights.getKey()))); + shardCountHistogram.record(baseWeights.shardCount() + weightsDiff.shardCountDiff(), getNodeAttributes(nodeName)); + diskUsageHistogram.record(baseWeights.diskUsageInBytes() + weightsDiff.diskUsageInBytesDiff(), getNodeAttributes(nodeName)); + writeLoadHistogram.record(baseWeights.writeLoad() + weightsDiff.writeLoadDiff(), getNodeAttributes(nodeName)); + totalWeightHistogram.record(baseWeights.nodeWeight() + weightsDiff.totalWeightDiff(), getNodeAttributes(nodeName)); } - return metrics; } - private List getDiskUsage() { - if (enableSending == false) { - return Collections.emptyList(); - } - - Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); - double diskUsage = nodeWeightChanges.baseWeights().diskUsageInBytes() + nodeWeightChanges.weightsDiff().diskUsageInBytesDiff(); - metrics.add(new DoubleWithAttributes(diskUsage, getNodeAttributes(nodeWeights.getKey()))); - } - return metrics; - } - - private List getWriteLoad() { - if (enableSending == false) { - return Collections.emptyList(); - } - - Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); - double writeLoad = nodeWeightChanges.baseWeights().writeLoad() + nodeWeightChanges.weightsDiff().writeLoadDiff(); - metrics.add(new DoubleWithAttributes(writeLoad, getNodeAttributes(nodeWeights.getKey()))); - } - return metrics; - } - - private List getTotalWeight() { - if (enableSending == false) { - return Collections.emptyList(); - } - - Map nodeNameToWeightChanges = nodeNameToWeightChangesRef.get(); - List metrics = new ArrayList<>(nodeNameToWeightChanges.size()); - for (var nodeWeights : nodeNameToWeightChanges.entrySet()) { - NodesWeightsChanges nodeWeightChanges = nodeWeights.getValue(); - double totalWeight = nodeWeightChanges.baseWeights().nodeWeight() + nodeWeightChanges.weightsDiff().totalWeightDiff(); - metrics.add(new DoubleWithAttributes(totalWeight, getNodeAttributes(nodeWeights.getKey()))); - } - return metrics; + private Map getNodeAttributes(String nodeId) { + return Map.of("node_name", nodeId); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java index 8ff6372211c7b..f14baa0ff3ac6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java @@ -69,10 +69,6 @@ public class AllocationBalancingRoundSummaryService { /** This reference is set when reporting is scheduled. If it is null, then reporting is inactive. */ private final AtomicReference scheduledReportFuture = new AtomicReference<>(); - public AllocationBalancingRoundSummaryService(ThreadPool threadPool, ClusterSettings clusterSettings) { - this(threadPool, clusterSettings, AllocationBalancingRoundMetrics.NOOP); - } - public AllocationBalancingRoundSummaryService( ThreadPool threadPool, ClusterSettings clusterSettings, @@ -174,6 +170,7 @@ public void addBalancerRoundSummary(BalancingRoundSummary summary) { } summaries.add(summary); + balancingRoundMetrics.addBalancingRoundSummary(summary); } /** @@ -195,8 +192,6 @@ private void drainAndReportSummaries() { } logger.info("Balancing round summaries: " + combinedSummaries); - - balancingRoundMetrics.updateBalancingRoundMetrics(combinedSummaries); } /** @@ -225,7 +220,6 @@ private void updateBalancingRoundSummaryReporting() { cancelReporting(); // Clear the data structure so that we don't retain unnecessary memory. drainSummaries(); - balancingRoundMetrics.clearBalancingRoundMetrics(); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 0656364ee9e0c..1374efe2b4aa0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -250,7 +250,6 @@ public String toString() { // Only update on change, to minimise volatile writes if (event.localNodeMaster() != event.previousState().nodes().isLocalNodeElectedMaster()) { desiredBalanceMetrics.setNodeIsMaster(event.localNodeMaster()); - balancingRoundMetrics.setEnableSending(event.localNodeMaster()); } }); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 224ce8d018198..c014be0cbdcb0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -19,7 +19,9 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.telemetry.InstrumentType; import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.MetricRecorder; import org.elasticsearch.telemetry.RecordingMeterRegistry; +import org.elasticsearch.telemetry.metric.Instrument; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLog; import org.elasticsearch.threadpool.ThreadPool; @@ -28,8 +30,14 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Function; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.DISK_USAGE_BYTES_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARDS_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARD_MOVES_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.TOTAL_WEIGHT_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.WRITE_LOAD_METRIC_NAME; public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { private static final Logger logger = LogManager.getLogger(AllocationBalancingRoundSummaryServiceTests.class); @@ -122,9 +130,8 @@ public void testServiceDisabledByDefault() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder() - .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); - assertEquals(measurements.size(), 0); + + assertMetricsCollected(recordingMeterRegistry, List.of(), List.of(), Map.of(), Map.of(), Map.of(), Map.of()); } } @@ -132,7 +139,6 @@ public void testEnabledService() { var recordingMeterRegistry = new RecordingMeterRegistry(); var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings, balancingRoundMetrics); - balancingRoundMetrics.setEnableSending(true); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -175,9 +181,15 @@ public void testEnabledService() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder() - .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); - assertEquals(measurements.size(), 2); + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L, 1L), + List.of(50L, 200L), + Map.of("node1", List.of(2L, 2L), "node2", List.of(2L, 2L)), + Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)), + Map.of("node1", List.of(6.0, 6.0), "node2", List.of(6.0, 6.0)), + Map.of("node1", List.of(8.0, 8.0), "node2", List.of(8.0, 8.0)) + ); } } @@ -188,7 +200,6 @@ public void testCombinedSummary() { var recordingMeterRegistry = new RecordingMeterRegistry(); var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); var service = new AllocationBalancingRoundSummaryService(testThreadPool, enabledClusterSettings, balancingRoundMetrics); - balancingRoundMetrics.setEnableSending(true); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { service.addBalancerRoundSummary(new BalancingRoundSummary(NODE_NAME_TO_WEIGHT_CHANGES, 50)); @@ -208,9 +219,15 @@ public void testCombinedSummary() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder() - .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); - assertEquals(measurements.size(), 2); + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L, 1L), + List.of(50L, 100L), + Map.of("node1", List.of(2L, 2L), "node2", List.of(2L, 2L)), + Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)), + Map.of("node1", List.of(6.0, 6.0), "node2", List.of(6.0, 6.0)), + Map.of("node1", List.of(8.0, 8.0), "node2", List.of(8.0, 8.0)) + ); } } @@ -238,6 +255,16 @@ public void testNoSummariesToReport() { ) ); + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L), + List.of(50L), + Map.of("node1", List.of(2L), "node2", List.of(2L)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)), + Map.of("node1", List.of(6.0), "node2", List.of(6.0)), + Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + ); + deterministicTaskQueue.advanceTime(); deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); @@ -261,9 +288,15 @@ public void testNoSummariesToReport() { mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); - List measurements = recordingMeterRegistry.getRecorder() - .getMeasurements(InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME); - assertEquals(measurements.size(), 0); + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L), + List.of(50L), + Map.of("node1", List.of(2L), "node2", List.of(2L)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)), + Map.of("node1", List.of(6.0), "node2", List.of(6.0)), + Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + ); } } @@ -272,11 +305,13 @@ public void testNoSummariesToReport() { * to false. */ public void testEnableAndThenDisableService() { + var recordingMeterRegistry = new RecordingMeterRegistry(); + var balancingRoundMetrics = new AllocationBalancingRoundMetrics(recordingMeterRegistry); var disabledSettingsUpdate = Settings.builder() .put(AllocationBalancingRoundSummaryService.ENABLE_BALANCER_ROUND_SUMMARIES_SETTING.getKey(), false) .build(); ClusterSettings clusterSettings = new ClusterSettings(enabledSummariesSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - var service = new AllocationBalancingRoundSummaryService(testThreadPool, clusterSettings); + var service = new AllocationBalancingRoundSummaryService(testThreadPool, clusterSettings, balancingRoundMetrics); try (var mockLog = MockLog.capture(AllocationBalancingRoundSummaryService.class)) { /** @@ -290,6 +325,16 @@ public void testEnableAndThenDisableService() { clusterSettings.applySettings(disabledSettingsUpdate); service.verifyNumberOfSummaries(0); + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L), + List.of(50L), + Map.of("node1", List.of(2L), "node2", List.of(2L)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)), + Map.of("node1", List.of(6.0), "node2", List.of(6.0)), + Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + ); + /** * Verify that any additional summaries are not retained, since the service is disabled. */ @@ -310,6 +355,16 @@ public void testEnableAndThenDisableService() { deterministicTaskQueue.runAllRunnableTasks(); mockLog.awaitAllExpectationsMatched(); service.verifyNumberOfSummaries(0); + + assertMetricsCollected( + recordingMeterRegistry, + List.of(1L), + List.of(50L), + Map.of("node1", List.of(2L), "node2", List.of(2L)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)), + Map.of("node1", List.of(6.0), "node2", List.of(6.0)), + Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + ); } } @@ -482,4 +537,53 @@ public void testCreateBalancerRoundSummaryWithAddedNode() { private void assertDoublesEqual(double expected, double actual) { assertEquals(expected, actual, 0.00001); } + + private void assertMetricsCollected( + RecordingMeterRegistry recordingMeterRegistry, + List roundCounts, + List shardMoves, + Map> shardCountTelemetry, + Map> diskUsageTelemetry, + Map> writeLoadTelemetry, + Map> totalWeightTelemetry + ) { + MetricRecorder metricRecorder = recordingMeterRegistry.getRecorder(); + + List measuredRoundCounts = metricRecorder.getMeasurements( + InstrumentType.LONG_COUNTER, + NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME + ); + List measuredRoundCountValues = Measurement.getMeasurementValues(measuredRoundCounts, (measurement -> measurement.getLong())); + assertEquals(measuredRoundCountValues, roundCounts); + + List measuredShardMoves = metricRecorder.getMeasurements( + InstrumentType.LONG_COUNTER, + NUMBER_OF_SHARD_MOVES_METRIC_NAME + ); + List measuredShardMoveValues = Measurement.getMeasurementValues(measuredShardMoves, (measurement -> measurement.getLong())); + assertEquals(measuredShardMoveValues, shardMoves); + + List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARDS_METRIC_NAME); + var shardCountsByNode = groupMeasurementsByAttribute(measuredShardCounts, (measurement -> measurement.getLong())); + assertEquals(shardCountTelemetry, shardCountsByNode); + + List measuredDiskUsage = metricRecorder.getMeasurements(InstrumentType.DOUBLE_HISTOGRAM, DISK_USAGE_BYTES_METRIC_NAME); + var diskUsageByNode = groupMeasurementsByAttribute(measuredDiskUsage, (measurement -> measurement.getDouble())); + assertEquals(diskUsageTelemetry, diskUsageByNode); + + List measuredWriteLoad = metricRecorder.getMeasurements(InstrumentType.DOUBLE_HISTOGRAM, WRITE_LOAD_METRIC_NAME); + var writeLoadByNode = groupMeasurementsByAttribute(measuredWriteLoad, (measurement -> measurement.getDouble())); + assertEquals(writeLoadTelemetry, writeLoadByNode); + + List measuredTotalWeight = metricRecorder.getMeasurements(InstrumentType.DOUBLE_HISTOGRAM, TOTAL_WEIGHT_METRIC_NAME); + var totalWeightByNode = groupMeasurementsByAttribute(measuredTotalWeight, (measurement -> measurement.getDouble())); + assertEquals(totalWeightTelemetry, totalWeightByNode); + } + + private Map> groupMeasurementsByAttribute( + List measurements, + Function getMeasurementValue + ) { + return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("node_name")), getMeasurementValue); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/telemetry/Measurement.java b/test/framework/src/main/java/org/elasticsearch/telemetry/Measurement.java index 4b769a25b295f..d45ff79f05ab1 100644 --- a/test/framework/src/main/java/org/elasticsearch/telemetry/Measurement.java +++ b/test/framework/src/main/java/org/elasticsearch/telemetry/Measurement.java @@ -9,11 +9,13 @@ package org.elasticsearch.telemetry; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; /** @@ -66,4 +68,40 @@ public static List combine(List measurements) { .map(entry -> new Measurement(entry.getValue(), entry.getKey(), isDouble)) .collect(Collectors.toList()); } + + /** + * Turn a list of {@link org.elasticsearch.telemetry.Measurement} into a list of its Long or Double + * + * @param measurements The measurements in question + * @param getMeasurementValue The measurement -> value (Long or Double) retrieval function + */ + public static List getMeasurementValues(List measurements, Function getMeasurementValue) { + List measurementValues = new ArrayList(measurements.size()); + for (Measurement measurement : measurements) { + T measurementValue = getMeasurementValue.apply(measurement); + measurementValues.add(measurementValue); + } + return measurementValues; + } + + /** + * Groups a list of {@link org.elasticsearch.telemetry.Measurement} by their attribute values + * + * @param measurements The measurements + * @param getAttribute The attribute retrieval function. This must cast from Object to its return type + * @param getMeasurementValue The measurement -> value (Long or Double) retrieval function + */ + public static Map> groupMeasurementsByAttribute( + List measurements, + Function, Attr> getAttribute, + Function getMeasurementValue + ) { + Map> measurementsByNode = new HashMap<>(); + for (Measurement measurement : measurements) { + Attr attr = getAttribute.apply(measurement.attributes()); + List nodeMeasurements = measurementsByNode.computeIfAbsent(attr, (k -> new ArrayList<>())); + nodeMeasurements.add(getMeasurementValue.apply(measurement)); + } + return measurementsByNode; + } } From 1b149ef6d936255177f28b635c4143482b8d4951 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Sun, 2 Nov 2025 20:51:41 -0800 Subject: [PATCH 08/22] Remove extra meter registry --- .../allocation/allocator/AllocationBalancingRoundMetrics.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 8d8b239d294c6..62a7539177b68 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -40,13 +40,9 @@ public class AllocationBalancingRoundMetrics { private final DoubleHistogram writeLoadHistogram; private final DoubleHistogram totalWeightHistogram; - private final MeterRegistry meterRegistry; - public static AllocationBalancingRoundMetrics NOOP = new AllocationBalancingRoundMetrics(MeterRegistry.NOOP); public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { - this.meterRegistry = meterRegistry; - this.balancingRoundCounter = meterRegistry.registerLongCounter( NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, "Current number of balancing rounds", From 41a2c72769d9a569238cf03d0ac4b095b3d04c47 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Sun, 2 Nov 2025 20:52:07 -0800 Subject: [PATCH 09/22] Send absolute value of diff instead of last + diff --- .../AllocationBalancingRoundMetrics.java | 10 ++-- ...tionBalancingRoundSummaryServiceTests.java | 48 +++++++++---------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 62a7539177b68..e5e5caab87178 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -67,14 +67,12 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { String nodeName = changesEntry.getKey(); NodesWeightsChanges weightChanges = changesEntry.getValue(); - - DesiredBalanceMetrics.NodeWeightStats baseWeights = weightChanges.baseWeights(); BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); - shardCountHistogram.record(baseWeights.shardCount() + weightsDiff.shardCountDiff(), getNodeAttributes(nodeName)); - diskUsageHistogram.record(baseWeights.diskUsageInBytes() + weightsDiff.diskUsageInBytesDiff(), getNodeAttributes(nodeName)); - writeLoadHistogram.record(baseWeights.writeLoad() + weightsDiff.writeLoadDiff(), getNodeAttributes(nodeName)); - totalWeightHistogram.record(baseWeights.nodeWeight() + weightsDiff.totalWeightDiff(), getNodeAttributes(nodeName)); + shardCountHistogram.record(Math.abs(weightsDiff.shardCountDiff()), getNodeAttributes(nodeName)); + diskUsageHistogram.record(Math.abs(weightsDiff.diskUsageInBytesDiff()), getNodeAttributes(nodeName)); + writeLoadHistogram.record(Math.abs(weightsDiff.writeLoadDiff()), getNodeAttributes(nodeName)); + totalWeightHistogram.record(Math.abs(weightsDiff.totalWeightDiff()), getNodeAttributes(nodeName)); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index c014be0cbdcb0..6bdb901c4861e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -185,10 +185,10 @@ public void testEnabledService() { recordingMeterRegistry, List.of(1L, 1L), List.of(50L, 200L), - Map.of("node1", List.of(2L, 2L), "node2", List.of(2L, 2L)), - Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)), - Map.of("node1", List.of(6.0, 6.0), "node2", List.of(6.0, 6.0)), - Map.of("node1", List.of(8.0, 8.0), "node2", List.of(8.0, 8.0)) + Map.of("node1", List.of(1L, 1L), "node2", List.of(1L, 1L)), + Map.of("node1", List.of(2.0, 2.0), "node2", List.of(2.0, 2.0)), + Map.of("node1", List.of(3.0, 3.0), "node2", List.of(3.0, 3.0)), + Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)) ); } } @@ -223,10 +223,10 @@ public void testCombinedSummary() { recordingMeterRegistry, List.of(1L, 1L), List.of(50L, 100L), - Map.of("node1", List.of(2L, 2L), "node2", List.of(2L, 2L)), - Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)), - Map.of("node1", List.of(6.0, 6.0), "node2", List.of(6.0, 6.0)), - Map.of("node1", List.of(8.0, 8.0), "node2", List.of(8.0, 8.0)) + Map.of("node1", List.of(1L, 1L), "node2", List.of(1L, 1L)), + Map.of("node1", List.of(2.0, 2.0), "node2", List.of(2.0, 2.0)), + Map.of("node1", List.of(3.0, 3.0), "node2", List.of(3.0, 3.0)), + Map.of("node1", List.of(4.0, 4.0), "node2", List.of(4.0, 4.0)) ); } } @@ -259,10 +259,10 @@ public void testNoSummariesToReport() { recordingMeterRegistry, List.of(1L), List.of(50L), - Map.of("node1", List.of(2L), "node2", List.of(2L)), - Map.of("node1", List.of(4.0), "node2", List.of(4.0)), - Map.of("node1", List.of(6.0), "node2", List.of(6.0)), - Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + Map.of("node1", List.of(1L), "node2", List.of(1L)), + Map.of("node1", List.of(2.0), "node2", List.of(2.0)), + Map.of("node1", List.of(3.0), "node2", List.of(3.0)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)) ); deterministicTaskQueue.advanceTime(); @@ -292,10 +292,10 @@ public void testNoSummariesToReport() { recordingMeterRegistry, List.of(1L), List.of(50L), - Map.of("node1", List.of(2L), "node2", List.of(2L)), - Map.of("node1", List.of(4.0), "node2", List.of(4.0)), - Map.of("node1", List.of(6.0), "node2", List.of(6.0)), - Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + Map.of("node1", List.of(1L), "node2", List.of(1L)), + Map.of("node1", List.of(2.0), "node2", List.of(2.0)), + Map.of("node1", List.of(3.0), "node2", List.of(3.0)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)) ); } } @@ -329,10 +329,10 @@ public void testEnableAndThenDisableService() { recordingMeterRegistry, List.of(1L), List.of(50L), - Map.of("node1", List.of(2L), "node2", List.of(2L)), - Map.of("node1", List.of(4.0), "node2", List.of(4.0)), - Map.of("node1", List.of(6.0), "node2", List.of(6.0)), - Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + Map.of("node1", List.of(1L), "node2", List.of(1L)), + Map.of("node1", List.of(2.0), "node2", List.of(2.0)), + Map.of("node1", List.of(3.0), "node2", List.of(3.0)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)) ); /** @@ -360,10 +360,10 @@ public void testEnableAndThenDisableService() { recordingMeterRegistry, List.of(1L), List.of(50L), - Map.of("node1", List.of(2L), "node2", List.of(2L)), - Map.of("node1", List.of(4.0), "node2", List.of(4.0)), - Map.of("node1", List.of(6.0), "node2", List.of(6.0)), - Map.of("node1", List.of(8.0), "node2", List.of(8.0)) + Map.of("node1", List.of(1L), "node2", List.of(1L)), + Map.of("node1", List.of(2.0), "node2", List.of(2.0)), + Map.of("node1", List.of(3.0), "node2", List.of(3.0)), + Map.of("node1", List.of(4.0), "node2", List.of(4.0)) ); } } From 26f9c14e05278e58450a3c2461096e55247cf6a5 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Thu, 6 Nov 2025 20:23:21 -0800 Subject: [PATCH 10/22] Adding shard moves histogram --- .../allocator/AllocationBalancingRoundMetrics.java | 5 +++++ .../AllocationBalancingRoundSummaryServiceTests.java | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index e5e5caab87178..22375ed850782 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -25,6 +25,7 @@ public class AllocationBalancingRoundMetrics { // counters that measure rounds and moves from the last balancing round public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; + public static final String NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME = "es.allocator.balancing_round.shard_moves_histogram"; // histograms that measure current utilization public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; @@ -34,6 +35,7 @@ public class AllocationBalancingRoundMetrics { private final LongCounter balancingRoundCounter; private final LongCounter shardMovesCounter; + private final LongHistogram shardMovesHistogram; private final LongHistogram shardCountHistogram; private final DoubleHistogram diskUsageHistogram; @@ -54,6 +56,8 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { "{shard}" ); + this.shardMovesHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME, + "Histogram of shard moves", "unit"); this.shardCountHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit"); this.diskUsageHistogram = meterRegistry.registerDoubleHistogram(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit"); this.writeLoadHistogram = meterRegistry.registerDoubleHistogram(WRITE_LOAD_METRIC_NAME, "Write load", "1.0"); @@ -63,6 +67,7 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { public void addBalancingRoundSummary(BalancingRoundSummary summary) { balancingRoundCounter.increment(); shardMovesCounter.incrementBy(summary.numberOfShardsToMove()); + shardMovesHistogram.record(summary.numberOfShardsToMove()); for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { String nodeName = changesEntry.getKey(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 6bdb901c4861e..6988c6e910737 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -35,6 +35,7 @@ import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.DISK_USAGE_BYTES_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARDS_METRIC_NAME; +import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.NUMBER_OF_SHARD_MOVES_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.TOTAL_WEIGHT_METRIC_NAME; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationBalancingRoundMetrics.WRITE_LOAD_METRIC_NAME; @@ -563,6 +564,10 @@ private void assertMetricsCollected( List measuredShardMoveValues = Measurement.getMeasurementValues(measuredShardMoves, (measurement -> measurement.getLong())); assertEquals(measuredShardMoveValues, shardMoves); + List measuredShardMoveHistogram = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME); + List measuredShardMoveHistogramValues = Measurement.getMeasurementValues(measuredShardMoveHistogram, (measurement -> measurement.getLong())); + assertEquals(measuredShardMoveHistogramValues, shardMoves); + List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARDS_METRIC_NAME); var shardCountsByNode = groupMeasurementsByAttribute(measuredShardCounts, (measurement -> measurement.getLong())); assertEquals(shardCountTelemetry, shardCountsByNode); From 40bf700f982af81f2f5dcf2d5f833ebfcafc4873 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 7 Nov 2025 04:42:09 +0000 Subject: [PATCH 11/22] [CI] Auto commit changes from spotless --- .../allocator/AllocationBalancingRoundMetrics.java | 7 +++++-- .../AllocationBalancingRoundSummaryServiceTests.java | 10 ++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 22375ed850782..185951311e60c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -56,8 +56,11 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { "{shard}" ); - this.shardMovesHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME, - "Histogram of shard moves", "unit"); + this.shardMovesHistogram = meterRegistry.registerLongHistogram( + NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME, + "Histogram of shard moves", + "unit" + ); this.shardCountHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit"); this.diskUsageHistogram = meterRegistry.registerDoubleHistogram(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit"); this.writeLoadHistogram = meterRegistry.registerDoubleHistogram(WRITE_LOAD_METRIC_NAME, "Write load", "1.0"); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 6988c6e910737..f639363463b62 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -564,8 +564,14 @@ private void assertMetricsCollected( List measuredShardMoveValues = Measurement.getMeasurementValues(measuredShardMoves, (measurement -> measurement.getLong())); assertEquals(measuredShardMoveValues, shardMoves); - List measuredShardMoveHistogram = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME); - List measuredShardMoveHistogramValues = Measurement.getMeasurementValues(measuredShardMoveHistogram, (measurement -> measurement.getLong())); + List measuredShardMoveHistogram = metricRecorder.getMeasurements( + InstrumentType.LONG_HISTOGRAM, + NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME + ); + List measuredShardMoveHistogramValues = Measurement.getMeasurementValues( + measuredShardMoveHistogram, + (measurement -> measurement.getLong()) + ); assertEquals(measuredShardMoveHistogramValues, shardMoves); List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARDS_METRIC_NAME); From dad8264bc5a3385403c3362df9cb0286b360bc95 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 10:24:21 -0800 Subject: [PATCH 12/22] Fixes to metrics names and summaries --- .../AllocationBalancingRoundMetrics.java | 48 ++++++++++++------- ...tionBalancingRoundSummaryServiceTests.java | 9 ++-- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 185951311e60c..a52e8418106fe 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -23,15 +23,15 @@ public class AllocationBalancingRoundMetrics { // counters that measure rounds and moves from the last balancing round - public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds"; - public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves"; - public static final String NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME = "es.allocator.balancing_round.shard_moves_histogram"; + public static final String NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME = "es.allocator.balancing_round.balancing_rounds.total"; + public static final String NUMBER_OF_SHARD_MOVES_METRIC_NAME = "es.allocator.balancing_round.shard_moves.total"; + public static final String NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME = "es.allocator.balancing_round.shard_moves.histogram"; // histograms that measure current utilization - public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count"; - public static final String DISK_USAGE_BYTES_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes"; - public static final String WRITE_LOAD_METRIC_NAME = "es.allocator.balancing_round.write_load"; - public static final String TOTAL_WEIGHT_METRIC_NAME = "es.allocator.balancing_round.total_weight"; + public static final String NUMBER_OF_SHARDS_METRIC_NAME = "es.allocator.balancing_round.shard_count.histogram"; + public static final String DISK_USAGE_BYTES_METRIC_NAME = "es.allocator.balancing_round.disk_usage_bytes.histogram"; + public static final String WRITE_LOAD_METRIC_NAME = "es.allocator.balancing_round.write_load.histogram"; + public static final String TOTAL_WEIGHT_METRIC_NAME = "es.allocator.balancing_round.total_weight.histogram"; private final LongCounter balancingRoundCounter; private final LongCounter shardMovesCounter; @@ -47,24 +47,40 @@ public class AllocationBalancingRoundMetrics { public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { this.balancingRoundCounter = meterRegistry.registerLongCounter( NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME, - "Current number of balancing rounds", + "Total number of balancing rounds", "unit" ); this.shardMovesCounter = meterRegistry.registerLongCounter( NUMBER_OF_SHARD_MOVES_METRIC_NAME, - "Current number of shard moves", - "{shard}" + "Total number of shard moves", + "unit" ); this.shardMovesHistogram = meterRegistry.registerLongHistogram( NUMBER_OF_SHARD_MOVES_HISTOGRAM_METRIC_NAME, - "Histogram of shard moves", + "Number of shard movements executed in a balancing round", + "unit" + ); + this.shardCountHistogram = meterRegistry.registerLongHistogram( + NUMBER_OF_SHARDS_METRIC_NAME, + "change in node shard count per balancing round", "unit" ); - this.shardCountHistogram = meterRegistry.registerLongHistogram(NUMBER_OF_SHARDS_METRIC_NAME, "Current number of shards", "unit"); - this.diskUsageHistogram = meterRegistry.registerDoubleHistogram(DISK_USAGE_BYTES_METRIC_NAME, "Disk usage in bytes", "unit"); - this.writeLoadHistogram = meterRegistry.registerDoubleHistogram(WRITE_LOAD_METRIC_NAME, "Write load", "1.0"); - this.totalWeightHistogram = meterRegistry.registerDoubleHistogram(TOTAL_WEIGHT_METRIC_NAME, "Total weight", "1.0"); + this.diskUsageHistogram = meterRegistry.registerDoubleHistogram( + DISK_USAGE_BYTES_METRIC_NAME, + "change in disk usage in bytes per balancing round", + "unit" + ); + this.writeLoadHistogram = meterRegistry.registerDoubleHistogram( + WRITE_LOAD_METRIC_NAME, + "change in write load per balancing round", + "1.0" + ); + this.totalWeightHistogram = meterRegistry.registerDoubleHistogram( + TOTAL_WEIGHT_METRIC_NAME, + "change in total weight per balancing round", + "1.0" + ); } public void addBalancingRoundSummary(BalancingRoundSummary summary) { @@ -85,6 +101,6 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { } private Map getNodeAttributes(String nodeId) { - return Map.of("node_name", nodeId); + return Map.of("balancing_node_name", nodeId); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index f639363463b62..22bed40079bbf 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -554,7 +554,8 @@ private void assertMetricsCollected( InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME ); - List measuredRoundCountValues = Measurement.getMeasurementValues(measuredRoundCounts, (measurement -> measurement.getLong())); + List measuredRoundCountValues = Measurement.getMeasurementValues(measuredRoundCounts, + (measurement -> measurement.getLong())); assertEquals(measuredRoundCountValues, roundCounts); List measuredShardMoves = metricRecorder.getMeasurements( @@ -574,7 +575,8 @@ private void assertMetricsCollected( ); assertEquals(measuredShardMoveHistogramValues, shardMoves); - List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARDS_METRIC_NAME); + List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, + NUMBER_OF_SHARDS_METRIC_NAME); var shardCountsByNode = groupMeasurementsByAttribute(measuredShardCounts, (measurement -> measurement.getLong())); assertEquals(shardCountTelemetry, shardCountsByNode); @@ -595,6 +597,7 @@ private Map> groupMeasurementsByAttribute( List measurements, Function getMeasurementValue ) { - return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("node_name")), getMeasurementValue); + return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("balancing_node_name")), + getMeasurementValue); } } From 28dbf909aaeccb744e6172697d5472e6ae60dedd Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 10:24:28 -0800 Subject: [PATCH 13/22] Move of DiscoveryNode as key in BalancingRoundSummary.nodeToWeightChanges --- .../AllocationBalancingRoundMetrics.java | 18 ++++++----- ...llocationBalancingRoundSummaryService.java | 11 ++++--- .../allocator/BalancingRoundSummary.java | 10 +++--- ...tionBalancingRoundSummaryServiceTests.java | 31 ++++++++++--------- .../allocator/BalancingRoundSummaryTests.java | 6 ++-- 5 files changed, 43 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index a52e8418106fe..f98692d173fb1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -9,6 +9,7 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; @@ -88,19 +89,20 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { shardMovesCounter.incrementBy(summary.numberOfShardsToMove()); shardMovesHistogram.record(summary.numberOfShardsToMove()); - for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { - String nodeName = changesEntry.getKey(); + for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { + DiscoveryNode node = changesEntry.getKey(); NodesWeightsChanges weightChanges = changesEntry.getValue(); BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); - shardCountHistogram.record(Math.abs(weightsDiff.shardCountDiff()), getNodeAttributes(nodeName)); - diskUsageHistogram.record(Math.abs(weightsDiff.diskUsageInBytesDiff()), getNodeAttributes(nodeName)); - writeLoadHistogram.record(Math.abs(weightsDiff.writeLoadDiff()), getNodeAttributes(nodeName)); - totalWeightHistogram.record(Math.abs(weightsDiff.totalWeightDiff()), getNodeAttributes(nodeName)); + shardCountHistogram.record(Math.abs(weightsDiff.shardCountDiff()), getNodeAttributes(node)); + diskUsageHistogram.record(Math.abs(weightsDiff.diskUsageInBytesDiff()), getNodeAttributes(node)); + writeLoadHistogram.record(Math.abs(weightsDiff.writeLoadDiff()), getNodeAttributes(node)); + totalWeightHistogram.record(Math.abs(weightsDiff.totalWeightDiff()), getNodeAttributes(node)); } } - private Map getNodeAttributes(String nodeId) { - return Map.of("balancing_node_name", nodeId); + private Map getNodeAttributes(DiscoveryNode node) { + return Map.of("node_name", node.getName(), + "node_id", node.getId()); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java index f14baa0ff3ac6..c3a2853daa1dc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.core.TimeValue; @@ -105,14 +106,14 @@ public static BalancingRoundSummary createBalancerRoundSummary(DesiredBalance ol * Creates a summary of the node weight changes from {@code oldDesiredBalance} to {@code newDesiredBalance}. * See {@link BalancingRoundSummary.NodesWeightsChanges} for content details. */ - private static Map createWeightsSummary( + private static Map createWeightsSummary( DesiredBalance oldDesiredBalance, DesiredBalance newDesiredBalance ) { var oldWeightsPerNode = oldDesiredBalance.weightsPerNode(); var newWeightsPerNode = newDesiredBalance.weightsPerNode(); - Map nodeNameToWeightInfo = new HashMap<>(oldWeightsPerNode.size()); + Map nodeNameToWeightInfo = new HashMap<>(oldWeightsPerNode.size()); for (var nodeAndWeights : oldWeightsPerNode.entrySet()) { var discoveryNode = nodeAndWeights.getKey(); var oldNodeWeightStats = nodeAndWeights.getValue(); @@ -122,7 +123,7 @@ private static Map createWeig var newNodeWeightStats = newWeightsPerNode.getOrDefault(discoveryNode, DesiredBalanceMetrics.NodeWeightStats.ZERO); nodeNameToWeightInfo.put( - discoveryNode.getName(), + discoveryNode, new BalancingRoundSummary.NodesWeightsChanges( oldNodeWeightStats, BalancingRoundSummary.NodeWeightsDiff.create(oldNodeWeightStats, newNodeWeightStats) @@ -134,11 +135,11 @@ private static Map createWeig // the new DesiredBalance to check. for (var nodeAndWeights : newWeightsPerNode.entrySet()) { var discoveryNode = nodeAndWeights.getKey(); - if (nodeNameToWeightInfo.containsKey(discoveryNode.getName()) == false) { + if (nodeNameToWeightInfo.containsKey(discoveryNode) == false) { // This node is new in the new DesiredBalance, there was no entry added during iteration of the nodes in the old // DesiredBalance. So we'll make a new entry with a base of zero value weights and a weights diff of the new node's weights. nodeNameToWeightInfo.put( - discoveryNode.getName(), + discoveryNode, new BalancingRoundSummary.NodesWeightsChanges( DesiredBalanceMetrics.NodeWeightStats.ZERO, BalancingRoundSummary.NodeWeightsDiff.create(DesiredBalanceMetrics.NodeWeightStats.ZERO, nodeAndWeights.getValue()) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java index 62331019937bb..73f44bd2a966a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java @@ -9,6 +9,8 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import org.elasticsearch.cluster.node.DiscoveryNode; + import java.util.HashMap; import java.util.List; import java.util.Map; @@ -16,12 +18,12 @@ /** * Summarizes the impact to the cluster as a result of a rebalancing round. * - * @param nodeNameToWeightChanges The shard balance weight changes for each node (by name), comparing a previous DesiredBalance shard + * @param nodeNameToWeightChanges The shard balance weight changes for each DiscoveryNode, comparing a previous DesiredBalance shard * allocation to a new DesiredBalance allocation. * @param numberOfShardsToMove The number of shard moves required to move from the previous desired balance to the new one. Does not include * new (index creation) or removed (index deletion) shard assignements. */ -public record BalancingRoundSummary(Map nodeNameToWeightChanges, long numberOfShardsToMove) { +public record BalancingRoundSummary(Map nodeNameToWeightChanges, long numberOfShardsToMove) { /** * Represents the change in weights for a node going from an old DesiredBalance to a new DesiredBalance @@ -98,7 +100,7 @@ public String toString() { */ public record CombinedBalancingRoundSummary( int numberOfBalancingRounds, - Map nodeNameToWeightChanges, + Map nodeNameToWeightChanges, long numberOfShardMoves ) { @@ -113,7 +115,7 @@ public static CombinedBalancingRoundSummary combine(List } // We will loop through the summaries and sum the weight diffs for each node entry. - Map combinedNodeNameToWeightChanges = new HashMap<>(); + Map combinedNodeNameToWeightChanges = new HashMap<>(); // Number of shards moves are simply summed across summaries. Each new balancing round is built upon the last one, so it is // possible that a shard is reassigned back to a node before it even moves away, and that will still be counted as 2 moves here. diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 22bed40079bbf..746f678be4365 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -45,24 +45,27 @@ public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { private static final String BALANCING_SUMMARY_MSG_PREFIX = "Balancing round summaries:*"; - private static final Map NODE_NAME_TO_WEIGHT_CHANGES = Map.of( - "node1", + final static DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1_id", "eph-NODE_1", "abc", "abc", null, Map.of(), Set.of(), null); + final static DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2_id", "eph-NODE_2", "abc", "abc", null, Map.of(), Set.of(), null); + + private static final Map NODE_NAME_TO_WEIGHT_CHANGES = Map.of( + NODE_1, new BalancingRoundSummary.NodesWeightsChanges( new DesiredBalanceMetrics.NodeWeightStats(1L, 2, 3, 4), new BalancingRoundSummary.NodeWeightsDiff(1, 2, 3, 4) ), - "node2", + NODE_2, new BalancingRoundSummary.NodesWeightsChanges( new DesiredBalanceMetrics.NodeWeightStats(1L, 2, 3, 4), new BalancingRoundSummary.NodeWeightsDiff(1, 2, 3, 4) ) ); - final DiscoveryNode DUMMY_NODE = new DiscoveryNode("node1Name", "node1Id", "eph-node1", "abc", "abc", null, Map.of(), Set.of(), null); + final DiscoveryNode DUMMY_NODE = new DiscoveryNode("dummy1Name", "dummy1Id", "eph-dummy1", "abc", "abc", null, Map.of(), Set.of(), null); final DiscoveryNode SECOND_DUMMY_NODE = new DiscoveryNode( - "node2Name", - "node2Id", - "eph-node2", + "dummy2Name", + "dummy2Id", + "eph-dummy2", "def", "def", null, @@ -403,7 +406,7 @@ public void testCreateBalancerRoundSummary() { assertEquals(2, firstSummary.numberOfShardsToMove()); assertEquals(1, firstSummary.nodeNameToWeightChanges().size()); - var firstSummaryWeights = firstSummary.nodeNameToWeightChanges().get(DUMMY_NODE.getName()); + var firstSummaryWeights = firstSummary.nodeNameToWeightChanges().get(DUMMY_NODE); assertEquals(10, firstSummaryWeights.baseWeights().shardCount()); assertDoublesEqual(20, firstSummaryWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, firstSummaryWeights.baseWeights().writeLoad()); @@ -415,7 +418,7 @@ public void testCreateBalancerRoundSummary() { assertEquals(1, secondSummary.numberOfShardsToMove()); assertEquals(1, secondSummary.nodeNameToWeightChanges().size()); - var secondSummaryWeights = secondSummary.nodeNameToWeightChanges().get(DUMMY_NODE.getName()); + var secondSummaryWeights = secondSummary.nodeNameToWeightChanges().get(DUMMY_NODE); assertEquals(20, secondSummaryWeights.baseWeights().shardCount()); assertDoublesEqual(40, secondSummaryWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(60, secondSummaryWeights.baseWeights().writeLoad()); @@ -458,7 +461,7 @@ public void testCreateBalancerRoundSummaryWithRemovedNode() { assertEquals(0, summary.numberOfShardsToMove()); assertEquals(2, summary.nodeNameToWeightChanges().size()); - var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE.getName()); + var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE); assertEquals(10, summaryDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(20, summaryDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, summaryDummyNodeWeights.baseWeights().writeLoad()); @@ -468,7 +471,7 @@ public void testCreateBalancerRoundSummaryWithRemovedNode() { assertDoublesEqual(30, summaryDummyNodeWeights.weightsDiff().writeLoadDiff()); assertDoublesEqual(40, summaryDummyNodeWeights.weightsDiff().totalWeightDiff()); - var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE.getName()); + var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE); assertEquals(5, summarySecondDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(15, summarySecondDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(25, summarySecondDummyNodeWeights.baseWeights().writeLoad()); @@ -511,7 +514,7 @@ public void testCreateBalancerRoundSummaryWithAddedNode() { assertEquals(1, summary.numberOfShardsToMove()); assertEquals(2, summary.nodeNameToWeightChanges().size()); - var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE.getName()); + var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE); assertEquals(10, summaryDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(20, summaryDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, summaryDummyNodeWeights.baseWeights().writeLoad()); @@ -521,7 +524,7 @@ public void testCreateBalancerRoundSummaryWithAddedNode() { assertDoublesEqual(30, summaryDummyNodeWeights.weightsDiff().writeLoadDiff()); assertDoublesEqual(40, summaryDummyNodeWeights.weightsDiff().totalWeightDiff()); - var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE.getName()); + var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE); assertEquals(0, summarySecondDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(0, summarySecondDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(0, summarySecondDummyNodeWeights.baseWeights().writeLoad()); @@ -597,7 +600,7 @@ private Map> groupMeasurementsByAttribute( List measurements, Function getMeasurementValue ) { - return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("balancing_node_name")), + return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("node_name")), getMeasurementValue); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java index 6291c629281dc..ab79dbf9ebbd1 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java @@ -9,12 +9,14 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.CombinedBalancingRoundSummary; import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; public class BalancingRoundSummaryTests extends ESTestCase { @@ -22,8 +24,8 @@ public class BalancingRoundSummaryTests extends ESTestCase { * Tests the {@link BalancingRoundSummary.CombinedBalancingRoundSummary#combine(List)} method. */ public void testCombine() { - final String NODE_1 = "node1"; - final String NODE_2 = "node2"; + final DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1Id", "eph-node1", "abc", "abc", null, Map.of(), Set.of(), null); + final DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2Id", "eph-node2", "abc", "abc", null, Map.of(), Set.of(), null); final var node1BaseWeights = new DesiredBalanceMetrics.NodeWeightStats(10, 20, 30, 40); final var node2BaseWeights = new DesiredBalanceMetrics.NodeWeightStats(100, 200, 300, 400); final var commonDiff = new BalancingRoundSummary.NodeWeightsDiff(1, 2, 3, 4); From d89a03ce098cb3ab910e76230bc194102f9a25bf Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 10:24:31 -0800 Subject: [PATCH 14/22] Renaming nodeNameToWeightChanges to nodeToWeightChanges --- .../AllocationBalancingRoundMetrics.java | 2 +- .../allocator/BalancingRoundSummary.java | 14 ++++++------- ...tionBalancingRoundSummaryServiceTests.java | 20 +++++++++---------- .../allocator/BalancingRoundSummaryTests.java | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index f98692d173fb1..fc7a8d20f380f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -89,7 +89,7 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { shardMovesCounter.incrementBy(summary.numberOfShardsToMove()); shardMovesHistogram.record(summary.numberOfShardsToMove()); - for (Map.Entry changesEntry : summary.nodeNameToWeightChanges().entrySet()) { + for (Map.Entry changesEntry : summary.nodeToWeightChanges().entrySet()) { DiscoveryNode node = changesEntry.getKey(); NodesWeightsChanges weightChanges = changesEntry.getValue(); BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java index 73f44bd2a966a..45844dbed016e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java @@ -18,12 +18,12 @@ /** * Summarizes the impact to the cluster as a result of a rebalancing round. * - * @param nodeNameToWeightChanges The shard balance weight changes for each DiscoveryNode, comparing a previous DesiredBalance shard + * @param nodeToWeightChanges The shard balance weight changes for each DiscoveryNode, comparing a previous DesiredBalance shard * allocation to a new DesiredBalance allocation. * @param numberOfShardsToMove The number of shard moves required to move from the previous desired balance to the new one. Does not include * new (index creation) or removed (index deletion) shard assignements. */ -public record BalancingRoundSummary(Map nodeNameToWeightChanges, long numberOfShardsToMove) { +public record BalancingRoundSummary(Map nodeToWeightChanges, long numberOfShardsToMove) { /** * Represents the change in weights for a node going from an old DesiredBalance to a new DesiredBalance @@ -77,8 +77,8 @@ public NodeWeightsDiff combine(NodeWeightsDiff otherDiff) { @Override public String toString() { return "BalancingRoundSummary{" - + "nodeNameToWeightChanges" - + nodeNameToWeightChanges + + "nodeToWeightChanges" + + nodeToWeightChanges + ", numberOfShardsToMove=" + numberOfShardsToMove + '}'; @@ -95,12 +95,12 @@ public String toString() { * latest desired balance. * * @param numberOfBalancingRounds How many balancing round summaries are combined in this report. - * @param nodeNameToWeightChanges + * @param nodeToWeightChanges * @param numberOfShardMoves The sum of shard moves for each balancing round being combined into a single summary. */ public record CombinedBalancingRoundSummary( int numberOfBalancingRounds, - Map nodeNameToWeightChanges, + Map nodeToWeightChanges, long numberOfShardMoves ) { @@ -130,7 +130,7 @@ public static CombinedBalancingRoundSummary combine(List // We'll build the weight changes by keeping the node weight base from the first summary in which a node appears and then // summing the weight diffs in each summary to get total weight diffs across summaries. - for (var nodeNameAndWeights : summary.nodeNameToWeightChanges.entrySet()) { + for (var nodeNameAndWeights : summary.nodeToWeightChanges.entrySet()) { var combined = combinedNodeNameToWeightChanges.get(nodeNameAndWeights.getKey()); if (combined == null) { // Either this is the first summary, and combinedNodeNameToWeightChanges hasn't been initialized yet for this node; diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 746f678be4365..a8be870940cd7 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -405,8 +405,8 @@ public void testCreateBalancerRoundSummary() { var secondSummary = AllocationBalancingRoundSummaryService.createBalancerRoundSummary(secondDesiredBalance, thirdDesiredBalance); assertEquals(2, firstSummary.numberOfShardsToMove()); - assertEquals(1, firstSummary.nodeNameToWeightChanges().size()); - var firstSummaryWeights = firstSummary.nodeNameToWeightChanges().get(DUMMY_NODE); + assertEquals(1, firstSummary.nodeToWeightChanges().size()); + var firstSummaryWeights = firstSummary.nodeToWeightChanges().get(DUMMY_NODE); assertEquals(10, firstSummaryWeights.baseWeights().shardCount()); assertDoublesEqual(20, firstSummaryWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, firstSummaryWeights.baseWeights().writeLoad()); @@ -417,8 +417,8 @@ public void testCreateBalancerRoundSummary() { assertDoublesEqual(40, firstSummaryWeights.weightsDiff().totalWeightDiff()); assertEquals(1, secondSummary.numberOfShardsToMove()); - assertEquals(1, secondSummary.nodeNameToWeightChanges().size()); - var secondSummaryWeights = secondSummary.nodeNameToWeightChanges().get(DUMMY_NODE); + assertEquals(1, secondSummary.nodeToWeightChanges().size()); + var secondSummaryWeights = secondSummary.nodeToWeightChanges().get(DUMMY_NODE); assertEquals(20, secondSummaryWeights.baseWeights().shardCount()); assertDoublesEqual(40, secondSummaryWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(60, secondSummaryWeights.baseWeights().writeLoad()); @@ -459,9 +459,9 @@ public void testCreateBalancerRoundSummaryWithRemovedNode() { var summary = AllocationBalancingRoundSummaryService.createBalancerRoundSummary(firstDesiredBalance, secondDesiredBalance); assertEquals(0, summary.numberOfShardsToMove()); - assertEquals(2, summary.nodeNameToWeightChanges().size()); + assertEquals(2, summary.nodeToWeightChanges().size()); - var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE); + var summaryDummyNodeWeights = summary.nodeToWeightChanges().get(DUMMY_NODE); assertEquals(10, summaryDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(20, summaryDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, summaryDummyNodeWeights.baseWeights().writeLoad()); @@ -471,7 +471,7 @@ public void testCreateBalancerRoundSummaryWithRemovedNode() { assertDoublesEqual(30, summaryDummyNodeWeights.weightsDiff().writeLoadDiff()); assertDoublesEqual(40, summaryDummyNodeWeights.weightsDiff().totalWeightDiff()); - var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE); + var summarySecondDummyNodeWeights = summary.nodeToWeightChanges().get(SECOND_DUMMY_NODE); assertEquals(5, summarySecondDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(15, summarySecondDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(25, summarySecondDummyNodeWeights.baseWeights().writeLoad()); @@ -512,9 +512,9 @@ public void testCreateBalancerRoundSummaryWithAddedNode() { var summary = AllocationBalancingRoundSummaryService.createBalancerRoundSummary(firstDesiredBalance, secondDesiredBalance); assertEquals(1, summary.numberOfShardsToMove()); - assertEquals(2, summary.nodeNameToWeightChanges().size()); + assertEquals(2, summary.nodeToWeightChanges().size()); - var summaryDummyNodeWeights = summary.nodeNameToWeightChanges().get(DUMMY_NODE); + var summaryDummyNodeWeights = summary.nodeToWeightChanges().get(DUMMY_NODE); assertEquals(10, summaryDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(20, summaryDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(30, summaryDummyNodeWeights.baseWeights().writeLoad()); @@ -524,7 +524,7 @@ public void testCreateBalancerRoundSummaryWithAddedNode() { assertDoublesEqual(30, summaryDummyNodeWeights.weightsDiff().writeLoadDiff()); assertDoublesEqual(40, summaryDummyNodeWeights.weightsDiff().totalWeightDiff()); - var summarySecondDummyNodeWeights = summary.nodeNameToWeightChanges().get(SECOND_DUMMY_NODE); + var summarySecondDummyNodeWeights = summary.nodeToWeightChanges().get(SECOND_DUMMY_NODE); assertEquals(0, summarySecondDummyNodeWeights.baseWeights().shardCount()); assertDoublesEqual(0, summarySecondDummyNodeWeights.baseWeights().diskUsageInBytes()); assertDoublesEqual(0, summarySecondDummyNodeWeights.baseWeights().writeLoad()); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java index ab79dbf9ebbd1..fd2e8b41aac79 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java @@ -80,10 +80,10 @@ public void testCombine() { assertEquals(2, combined.numberOfBalancingRounds()); assertEquals(shardMovesSummary1 + shardMovesSummary2, combined.numberOfShardMoves()); - assertEquals(2, combined.nodeNameToWeightChanges().size()); + assertEquals(2, combined.nodeToWeightChanges().size()); - var combinedNode1WeightsChanges = combined.nodeNameToWeightChanges().get(NODE_1); - var combinedNode2WeightsChanges = combined.nodeNameToWeightChanges().get(NODE_2); + var combinedNode1WeightsChanges = combined.nodeToWeightChanges().get(NODE_1); + var combinedNode2WeightsChanges = combined.nodeToWeightChanges().get(NODE_2); // The base weights for each node should match the first BalancingRoundSummary's base weight values. The diff weights will be summed // across all BalancingRoundSummary entries (in this case, there are two BalancingRoundSummary entries). From ba786de89dfc35b8b41d82bef933af981a0b9a16 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 10 Nov 2025 18:35:28 +0000 Subject: [PATCH 15/22] [CI] Auto commit changes from spotless --- .../AllocationBalancingRoundMetrics.java | 3 +-- ...tionBalancingRoundSummaryServiceTests.java | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index fc7a8d20f380f..4e93486d35605 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -102,7 +102,6 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { } private Map getNodeAttributes(DiscoveryNode node) { - return Map.of("node_name", node.getName(), - "node_id", node.getId()); + return Map.of("node_name", node.getName(), "node_id", node.getId()); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index a8be870940cd7..6c6e81a646ce0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -61,7 +61,17 @@ public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { ) ); - final DiscoveryNode DUMMY_NODE = new DiscoveryNode("dummy1Name", "dummy1Id", "eph-dummy1", "abc", "abc", null, Map.of(), Set.of(), null); + final DiscoveryNode DUMMY_NODE = new DiscoveryNode( + "dummy1Name", + "dummy1Id", + "eph-dummy1", + "abc", + "abc", + null, + Map.of(), + Set.of(), + null + ); final DiscoveryNode SECOND_DUMMY_NODE = new DiscoveryNode( "dummy2Name", "dummy2Id", @@ -557,8 +567,7 @@ private void assertMetricsCollected( InstrumentType.LONG_COUNTER, NUMBER_OF_BALANCING_ROUNDS_METRIC_NAME ); - List measuredRoundCountValues = Measurement.getMeasurementValues(measuredRoundCounts, - (measurement -> measurement.getLong())); + List measuredRoundCountValues = Measurement.getMeasurementValues(measuredRoundCounts, (measurement -> measurement.getLong())); assertEquals(measuredRoundCountValues, roundCounts); List measuredShardMoves = metricRecorder.getMeasurements( @@ -578,8 +587,7 @@ private void assertMetricsCollected( ); assertEquals(measuredShardMoveHistogramValues, shardMoves); - List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, - NUMBER_OF_SHARDS_METRIC_NAME); + List measuredShardCounts = metricRecorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, NUMBER_OF_SHARDS_METRIC_NAME); var shardCountsByNode = groupMeasurementsByAttribute(measuredShardCounts, (measurement -> measurement.getLong())); assertEquals(shardCountTelemetry, shardCountsByNode); @@ -600,7 +608,6 @@ private Map> groupMeasurementsByAttribute( List measurements, Function getMeasurementValue ) { - return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("node_name")), - getMeasurementValue); + return Measurement.groupMeasurementsByAttribute(measurements, (attrs -> (String) attrs.get("node_name")), getMeasurementValue); } } From 85e4919ae99659d6a34691a56bfc5bac3e1f3dfb Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 16:35:03 -0800 Subject: [PATCH 16/22] CombinedBalancingRoundSummary prints DiscoveryNode name instead of entire object --- .../allocator/BalancingRoundSummary.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java index 45844dbed016e..93e7930eb00ed 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java @@ -10,6 +10,7 @@ package org.elasticsearch.cluster.routing.allocation.allocator; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.core.Strings; import java.util.HashMap; import java.util.List; @@ -106,6 +107,23 @@ public record CombinedBalancingRoundSummary( public static final CombinedBalancingRoundSummary EMPTY_RESULTS = new CombinedBalancingRoundSummary(0, new HashMap<>(), 0); + /** + * Serialize the CombinedBalancingRoundSummary to a compact log representation, where {@link DiscoveryNode#getName()} is used instead + * of the entire {@link DiscoveryNode#toString()} method. + */ + @Override + public String toString() { + Map nodeNameToWeightChanges = new HashMap<>(nodeToWeightChanges.size()); + nodeToWeightChanges.forEach((node, nodesWeightChanges) -> nodeNameToWeightChanges.put(node.getName(), nodesWeightChanges)); + + return Strings.format("CombinedBalancingRoundSummary[numberOfBalancingRounds=%d, nodeToWeightChange=%s, " + + "numberOfShardMoves=%d]", + numberOfBalancingRounds, + nodeNameToWeightChanges, + numberOfShardMoves + ); + } + /** * Merges multiple {@link BalancingRoundSummary} summaries into a single {@link CombinedBalancingRoundSummary}. */ From 52c1ce736c62bd2dbd697ef16a212cc93be5819d Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 16:58:24 -0800 Subject: [PATCH 17/22] Math.abs on a long is forbidden. Use *= -1 instead --- .../allocator/AllocationBalancingRoundMetrics.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 4e93486d35605..44e3f32eaa674 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -94,7 +94,13 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { NodesWeightsChanges weightChanges = changesEntry.getValue(); BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); - shardCountHistogram.record(Math.abs(weightsDiff.shardCountDiff()), getNodeAttributes(node)); + // Math.abs on a long value does not have a corresponding positive value for long.MIN_VALUE. + // This is impossible here, so multiply by negative one if negative + long shardCountDiff = weightsDiff.shardCountDiff(); + if (shardCountDiff < 0) { + shardCountDiff *= -1; + } + shardCountHistogram.record(shardCountDiff, getNodeAttributes(node)); diskUsageHistogram.record(Math.abs(weightsDiff.diskUsageInBytesDiff()), getNodeAttributes(node)); writeLoadHistogram.record(Math.abs(weightsDiff.writeLoadDiff()), getNodeAttributes(node)); totalWeightHistogram.record(Math.abs(weightsDiff.totalWeightDiff()), getNodeAttributes(node)); From cb6a19611fdf468a9c1e90c39db3ec514c279119 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Mon, 10 Nov 2025 16:58:41 -0800 Subject: [PATCH 18/22] Style fixes --- .../allocation/allocator/BalancingRoundSummary.java | 8 ++++---- .../AllocationBalancingRoundSummaryServiceTests.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java index 93e7930eb00ed..1ba7359e6fe4d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java @@ -108,16 +108,16 @@ public record CombinedBalancingRoundSummary( public static final CombinedBalancingRoundSummary EMPTY_RESULTS = new CombinedBalancingRoundSummary(0, new HashMap<>(), 0); /** - * Serialize the CombinedBalancingRoundSummary to a compact log representation, where {@link DiscoveryNode#getName()} is used instead - * of the entire {@link DiscoveryNode#toString()} method. + * Serialize the CombinedBalancingRoundSummary to a compact log representation, where {@link DiscoveryNode#getName()} is used + * instead of the entire {@link DiscoveryNode#toString()} method. */ @Override public String toString() { Map nodeNameToWeightChanges = new HashMap<>(nodeToWeightChanges.size()); nodeToWeightChanges.forEach((node, nodesWeightChanges) -> nodeNameToWeightChanges.put(node.getName(), nodesWeightChanges)); - return Strings.format("CombinedBalancingRoundSummary[numberOfBalancingRounds=%d, nodeToWeightChange=%s, " - + "numberOfShardMoves=%d]", + return Strings.format( + "CombinedBalancingRoundSummary[numberOfBalancingRounds=%d, nodeToWeightChange=%s, " + "numberOfShardMoves=%d]", numberOfBalancingRounds, nodeNameToWeightChanges, numberOfShardMoves diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 6c6e81a646ce0..8539a77d38599 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -45,8 +45,8 @@ public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { private static final String BALANCING_SUMMARY_MSG_PREFIX = "Balancing round summaries:*"; - final static DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1_id", "eph-NODE_1", "abc", "abc", null, Map.of(), Set.of(), null); - final static DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2_id", "eph-NODE_2", "abc", "abc", null, Map.of(), Set.of(), null); + static final DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1_id", "eph-NODE_1", "abc", "abc", null, Map.of(), Set.of(), null); + static final DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2_id", "eph-NODE_2", "abc", "abc", null, Map.of(), Set.of(), null); private static final Map NODE_NAME_TO_WEIGHT_CHANGES = Map.of( NODE_1, From d0bb2d3d133ea710f4a147a557f37caa721a784f Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Tue, 11 Nov 2025 15:25:29 -0800 Subject: [PATCH 19/22] Move of Math.abs on long into suppressed method --- .../allocator/AllocationBalancingRoundMetrics.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index 44e3f32eaa674..e550de45762c6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -11,6 +11,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.NodesWeightsChanges; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; import org.elasticsearch.telemetry.metric.LongHistogram; @@ -84,6 +85,11 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { ); } + @SuppressForbidden(reason = "ForbiddenAPIs bans Math.abs on longs, but long.MIN_VALUE is impossible here") + private long longAbsNegativeSafe(long input) { + return Math.abs(input); + } + public void addBalancingRoundSummary(BalancingRoundSummary summary) { balancingRoundCounter.increment(); shardMovesCounter.incrementBy(summary.numberOfShardsToMove()); @@ -94,13 +100,7 @@ public void addBalancingRoundSummary(BalancingRoundSummary summary) { NodesWeightsChanges weightChanges = changesEntry.getValue(); BalancingRoundSummary.NodeWeightsDiff weightsDiff = weightChanges.weightsDiff(); - // Math.abs on a long value does not have a corresponding positive value for long.MIN_VALUE. - // This is impossible here, so multiply by negative one if negative - long shardCountDiff = weightsDiff.shardCountDiff(); - if (shardCountDiff < 0) { - shardCountDiff *= -1; - } - shardCountHistogram.record(shardCountDiff, getNodeAttributes(node)); + shardCountHistogram.record(longAbsNegativeSafe(weightsDiff.shardCountDiff()), getNodeAttributes(node)); diskUsageHistogram.record(Math.abs(weightsDiff.diskUsageInBytesDiff()), getNodeAttributes(node)); writeLoadHistogram.record(Math.abs(weightsDiff.writeLoadDiff()), getNodeAttributes(node)); totalWeightHistogram.record(Math.abs(weightsDiff.totalWeightDiff()), getNodeAttributes(node)); From a650365c61e721c40972c0bf9799f01b7962aa5c Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Tue, 11 Nov 2025 15:25:54 -0800 Subject: [PATCH 20/22] Use DiscoveryNodeUtils to make a node --- ...tionBalancingRoundSummaryServiceTests.java | 29 ++++--------------- .../allocator/BalancingRoundSummaryTests.java | 6 ++-- 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java index 8539a77d38599..af5956414e508 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundSummaryServiceTests.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; @@ -45,8 +46,8 @@ public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { private static final String BALANCING_SUMMARY_MSG_PREFIX = "Balancing round summaries:*"; - static final DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1_id", "eph-NODE_1", "abc", "abc", null, Map.of(), Set.of(), null); - static final DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2_id", "eph-NODE_2", "abc", "abc", null, Map.of(), Set.of(), null); + static final DiscoveryNode NODE_1 = DiscoveryNodeUtils.create("node1", "node1_id"); + static final DiscoveryNode NODE_2 = DiscoveryNodeUtils.create("node2", "node2_id"); private static final Map NODE_NAME_TO_WEIGHT_CHANGES = Map.of( NODE_1, @@ -61,28 +62,8 @@ public class AllocationBalancingRoundSummaryServiceTests extends ESTestCase { ) ); - final DiscoveryNode DUMMY_NODE = new DiscoveryNode( - "dummy1Name", - "dummy1Id", - "eph-dummy1", - "abc", - "abc", - null, - Map.of(), - Set.of(), - null - ); - final DiscoveryNode SECOND_DUMMY_NODE = new DiscoveryNode( - "dummy2Name", - "dummy2Id", - "eph-dummy2", - "def", - "def", - null, - Map.of(), - Set.of(), - null - ); + final DiscoveryNode DUMMY_NODE = DiscoveryNodeUtils.create("dummy1Name", "dummy1Id"); + final DiscoveryNode SECOND_DUMMY_NODE = DiscoveryNodeUtils.create("dummy2Name", "dummy2Id"); final String INDEX_NAME = "index"; final String INDEX_UUID = "_indexUUID_"; diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java index fd2e8b41aac79..56764ac31afe4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummaryTests.java @@ -10,13 +10,13 @@ package org.elasticsearch.cluster.routing.allocation.allocator; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.routing.allocation.allocator.BalancingRoundSummary.CombinedBalancingRoundSummary; import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Set; public class BalancingRoundSummaryTests extends ESTestCase { @@ -24,8 +24,8 @@ public class BalancingRoundSummaryTests extends ESTestCase { * Tests the {@link BalancingRoundSummary.CombinedBalancingRoundSummary#combine(List)} method. */ public void testCombine() { - final DiscoveryNode NODE_1 = new DiscoveryNode("node1", "node1Id", "eph-node1", "abc", "abc", null, Map.of(), Set.of(), null); - final DiscoveryNode NODE_2 = new DiscoveryNode("node2", "node2Id", "eph-node2", "abc", "abc", null, Map.of(), Set.of(), null); + final DiscoveryNode NODE_1 = DiscoveryNodeUtils.create("node1", "node1Id"); + final DiscoveryNode NODE_2 = DiscoveryNodeUtils.create("node2", "node2Id"); final var node1BaseWeights = new DesiredBalanceMetrics.NodeWeightStats(10, 20, 30, 40); final var node2BaseWeights = new DesiredBalanceMetrics.NodeWeightStats(100, 200, 300, 400); final var commonDiff = new BalancingRoundSummary.NodeWeightsDiff(1, 2, 3, 4); From ac9ee89d01202cb0e5a0427f89202fb3daa07857 Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Tue, 11 Nov 2025 15:26:01 -0800 Subject: [PATCH 21/22] Formatting fixes --- .../routing/allocation/allocator/BalancingRoundSummary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java index 1ba7359e6fe4d..22ad18a330f59 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancingRoundSummary.java @@ -117,7 +117,7 @@ public String toString() { nodeToWeightChanges.forEach((node, nodesWeightChanges) -> nodeNameToWeightChanges.put(node.getName(), nodesWeightChanges)); return Strings.format( - "CombinedBalancingRoundSummary[numberOfBalancingRounds=%d, nodeToWeightChange=%s, " + "numberOfShardMoves=%d]", + "CombinedBalancingRoundSummary[numberOfBalancingRounds=%d, nodeToWeightChange=%s, numberOfShardMoves=%d]", numberOfBalancingRounds, nodeNameToWeightChanges, numberOfShardMoves From adb174152e407703951838032c14995107490e6b Mon Sep 17 00:00:00 2001 From: Simon Chase Date: Tue, 11 Nov 2025 17:06:15 -0800 Subject: [PATCH 22/22] Adding assertion for Long.MIN_VALUE, better comments and names --- .../allocator/AllocationBalancingRoundMetrics.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java index e550de45762c6..3e1bd01048874 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/AllocationBalancingRoundMetrics.java @@ -85,9 +85,10 @@ public AllocationBalancingRoundMetrics(MeterRegistry meterRegistry) { ); } - @SuppressForbidden(reason = "ForbiddenAPIs bans Math.abs on longs, but long.MIN_VALUE is impossible here") - private long longAbsNegativeSafe(long input) { - return Math.abs(input); + @SuppressForbidden(reason = "ForbiddenAPIs bans Math.abs(long) because of overflow on Long.MIN_VALUE, but this is impossible here") + private long longAbsNegativeSafe(long value) { + assert value != Long.MIN_VALUE : "value must not be Long.MIN_VALUE"; + return Math.abs(value); } public void addBalancingRoundSummary(BalancingRoundSummary summary) {