elastic · elasticsearchmachine · Aug 1, 2025 · Jul 11, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java
@@ -13,6 +13,7 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.ActionRequest;
 import org.elasticsearch.action.admin.cluster.node.stats.TransportNodesStatsAction;
+import org.elasticsearch.action.admin.cluster.node.usage.TransportNodeUsageStatsForThreadPoolsAction;
 import org.elasticsearch.action.admin.indices.stats.IndicesStatsAction;
 import org.elasticsearch.action.support.ActionFilter;
 import org.elasticsearch.action.support.ActionFilters;
@@ -21,6 +22,7 @@
 import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
 import org.elasticsearch.cluster.routing.RoutingTable;
 import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
 import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
@@ -39,16 +41,19 @@
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.InternalTestCluster;
 import org.elasticsearch.test.transport.MockTransportService;
+import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 import org.hamcrest.Matchers;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import static java.util.Collections.emptySet;
@@ -334,4 +339,65 @@ public void testClusterInfoServiceInformationClearOnError() {
             );
         }
     }
+
+    public void testClusterInfoIncludesNodeUsageStatsForThreadPools() {
+        var settings = Settings.builder()
+            .put(
+                WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
+                WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
+            )
+            .build();
+        var masterName = internalCluster().startMasterOnlyNode(settings);
+        var dataNodeName = internalCluster().startDataOnlyNode(settings);
+        ensureStableCluster(2);
+        assertEquals(internalCluster().getMasterName(), masterName);
+        assertNotEquals(internalCluster().getMasterName(), dataNodeName);
+        logger.info("---> master node: " + masterName + ", data node: " + dataNodeName);
+
+        // Track when the data node receives a poll from the master for the write thread pool's stats.
+        final MockTransportService dataNodeMockTransportService = MockTransportService.getInstance(dataNodeName);
+        final CountDownLatch nodeThreadPoolStatsPolledByMaster = new CountDownLatch(1);
+        dataNodeMockTransportService.addRequestHandlingBehavior(
+            TransportNodeUsageStatsForThreadPoolsAction.NAME + "[n]",
+            (handler, request, channel, task) -> {
+                handler.messageReceived(request, channel, task);
+
+                if (nodeThreadPoolStatsPolledByMaster.getCount() > 0) {
+                    logger.info("---> Data node received a request for thread pool stats");
+                }
+                nodeThreadPoolStatsPolledByMaster.countDown();
+            }
+        );
+
+        // Do some writes to create some write thread pool activity.
+        final String indexName = randomIdentifier();
+        for (int i = 0; i < randomIntBetween(1, 1000); i++) {
+            index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar"));
+        }
+
+        // Force a refresh of the ClusterInfo state to collect fresh info from the data nodes.
+        final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
+            InternalClusterInfoService.class,
+            internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class)
+        );
+        final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
+
+        // Verify that the data node received a request for thread pool stats.
+        safeAwait(nodeThreadPoolStatsPolledByMaster);
+
+        final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
+        logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
+        assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collectedg
+        var dataNodeId = getNodeId(dataNodeName);
+        var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
+        assertNotNull(nodeUsageStatsForThreadPool);
+        logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
+
+        assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
+        var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
+        assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
+        assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
+        assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f));
+        assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
+    }
 }
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java
@@ -25,7 +25,6 @@
 import org.elasticsearch.cluster.EstimatedHeapUsageCollector;
 import org.elasticsearch.cluster.InternalClusterInfoService;
 import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
-import org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.metadata.ProjectId;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -92,7 +91,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -135,11 +133,7 @@ public class IndexShardIT extends ESSingleNodeTestCase {
 
     @Override
     protected Collection<Class<? extends Plugin>> getPlugins() {
-        return pluginList(
-            InternalSettingsPlugin.class,
-            BogusEstimatedHeapUsagePlugin.class,
-            BogusNodeUsageStatsForThreadPoolsCollectorPlugin.class
-        );
+        return pluginList(InternalSettingsPlugin.class, BogusEstimatedHeapUsagePlugin.class);
     }
 
     public void testLockTryingToDelete() throws Exception {
@@ -334,8 +328,7 @@ public void testNodeWriteLoadsArePresent() {
             ClusterInfoServiceUtils.refresh(clusterInfoService);
             nodeThreadPoolStats = clusterInfoService.getClusterInfo().getNodeUsageStatsForThreadPools();
 
-            /** Verify that each node has usage stats reported. The test {@link BogusNodeUsageStatsForThreadPoolsCollector} implementation
-             * generates random usage values */
+            /** Verify that each node has usage stats reported. */
             ClusterState state = getInstanceFromNode(ClusterService.class).state();
             assertEquals(state.nodes().size(), nodeThreadPoolStats.size());
             for (DiscoveryNode node : state.nodes()) {
@@ -348,7 +341,7 @@ public void testNodeWriteLoadsArePresent() {
                 assertNotNull(writeThreadPoolStats);
                 assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThanOrEqualTo(0));
                 assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0.0f));
-                assertThat(writeThreadPoolStats.averageThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
+                assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
             }
         } finally {
             updateClusterSettings(
@@ -993,61 +986,4 @@ public ClusterService getClusterService() {
             return clusterService.get();
         }
     }
-
-    /**
-     * A simple {@link NodeUsageStatsForThreadPoolsCollector} implementation that creates and returns random
-     * {@link NodeUsageStatsForThreadPools} for each node in the cluster.
-     * <p>
-     * Note: there's an 'org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector' file that declares this implementation so that the
-     * plugin system can pick it up and use it for the test set-up.
-     */
-    public static class BogusNodeUsageStatsForThreadPoolsCollector implements NodeUsageStatsForThreadPoolsCollector {
-
-        private final BogusNodeUsageStatsForThreadPoolsCollectorPlugin plugin;
-
-        public BogusNodeUsageStatsForThreadPoolsCollector(BogusNodeUsageStatsForThreadPoolsCollectorPlugin plugin) {
-            this.plugin = plugin;
-        }
-
-        @Override
-        public void collectUsageStats(ActionListener<Map<String, NodeUsageStatsForThreadPools>> listener) {
-            ActionListener.completeWith(
-                listener,
-                () -> plugin.getClusterService()
-                    .state()
-                    .nodes()
-                    .stream()
-                    .collect(Collectors.toUnmodifiableMap(DiscoveryNode::getId, node -> makeRandomNodeUsageStats(node.getId())))
-            );
-        }
-
-        private NodeUsageStatsForThreadPools makeRandomNodeUsageStats(String nodeId) {
-            NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = new NodeUsageStatsForThreadPools.ThreadPoolUsageStats(
-                randomNonNegativeInt(),
-                randomFloat(),
-                randomNonNegativeLong()
-            );
-            Map<String, NodeUsageStatsForThreadPools.ThreadPoolUsageStats> statsForThreadPools = new HashMap<>();
-            statsForThreadPools.put(ThreadPool.Names.WRITE, writeThreadPoolStats);
-            return new NodeUsageStatsForThreadPools(nodeId, statsForThreadPools);
-        }
-    }
-
-    /**
-     * Make a plugin to gain access to the {@link ClusterService} instance.
-     */
-    public static class BogusNodeUsageStatsForThreadPoolsCollectorPlugin extends Plugin implements ClusterPlugin {
-
-        private final SetOnce<ClusterService> clusterService = new SetOnce<>();
-
-        @Override
-        public Collection<?> createComponents(PluginServices services) {
-            clusterService.set(services.clusterService());
-            return List.of();
-        }
-
-        public ClusterService getClusterService() {
-            return clusterService.get();
-        }
-    }
 }
diff --git a/...sources/META-INF/services/org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector b/...sources/META-INF/services/org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -357,6 +357,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion COMPONENT_TEMPLATE_TRACKING_INFO = def(9_132_0_00);
     public static final TransportVersion TO_CHILD_BLOCK_JOIN_QUERY = def(9_133_0_00);
     public static final TransportVersion ML_INFERENCE_AI21_COMPLETION_ADDED = def(9_134_0_00);
+    public static final TransportVersion TRANSPORT_NODE_USAGE_STATS_FOR_THREAD_POOLS_ACTION = def(9_135_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java
@@ -38,6 +38,7 @@
 import org.elasticsearch.action.admin.cluster.node.tasks.cancel.TransportCancelTasksAction;
 import org.elasticsearch.action.admin.cluster.node.tasks.get.TransportGetTaskAction;
 import org.elasticsearch.action.admin.cluster.node.tasks.list.TransportListTasksAction;
+import org.elasticsearch.action.admin.cluster.node.usage.TransportNodeUsageStatsForThreadPoolsAction;
 import org.elasticsearch.action.admin.cluster.node.usage.TransportNodesUsageAction;
 import org.elasticsearch.action.admin.cluster.remote.RemoteClusterNodesAction;
 import org.elasticsearch.action.admin.cluster.remote.TransportRemoteInfoAction;
@@ -629,6 +630,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
         ActionRegistry actions = new ActionRegistry();
 
         actions.register(TransportNodesInfoAction.TYPE, TransportNodesInfoAction.class);
+        actions.register(TransportNodeUsageStatsForThreadPoolsAction.TYPE, TransportNodeUsageStatsForThreadPoolsAction.class);
         actions.register(TransportRemoteInfoAction.TYPE, TransportRemoteInfoAction.class);
         actions.register(TransportNodesCapabilitiesAction.TYPE, TransportNodesCapabilitiesAction.class);
         actions.register(TransportNodesFeaturesAction.TYPE, TransportNodesFeaturesAction.class);

diff --git a/...org/elasticsearch/action/admin/cluster/node/usage/NodeUsageStatsForThreadPoolsAction.java b/...org/elasticsearch/action/admin/cluster/node/usage/NodeUsageStatsForThreadPoolsAction.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.action.admin.cluster.node.usage;
+
+import org.elasticsearch.action.FailedNodeException;
+import org.elasticsearch.action.support.nodes.BaseNodeResponse;
+import org.elasticsearch.action.support.nodes.BaseNodesRequest;
+import org.elasticsearch.action.support.nodes.BaseNodesResponse;
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.transport.AbstractTransportRequest;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Defines the request/response types for {@link TransportNodeUsageStatsForThreadPoolsAction}.
+ */
+public class NodeUsageStatsForThreadPoolsAction {
+    /**
+     * The sender request type that will be resolved to send individual {@link NodeRequest} requests to every node in the cluster.
+     */
+    public static class Request extends BaseNodesRequest {
+        /**
+         * @param nodeIds The list of nodes to which to send individual requests and collect responses from. If the list is null, all nodes
+         *                in the cluster will be sent a request.
+         */
+        public Request(String[] nodeIds) {
+            super(nodeIds);
+        }
+    }
+
+    /**
+     * Request sent to and received by a cluster node. There are no parameters needed in the node-specific request.
+     */
+    public static class NodeRequest extends AbstractTransportRequest {
+        public NodeRequest(StreamInput in) throws IOException {
+            super(in);
+        }
+
+        public NodeRequest() {}
+    }
+
+    /**
+     * A collection of {@link NodeUsageStatsForThreadPools} responses from all the cluster nodes.
+     */
+    public static class Response extends BaseNodesResponse<NodeUsageStatsForThreadPoolsAction.NodeResponse> {
+
+        protected Response(StreamInput in) throws IOException {
+            super(in);
+        }
+
+        public Response(
+            ClusterName clusterName,
+            List<NodeUsageStatsForThreadPoolsAction.NodeResponse> nodeResponses,
+            List<FailedNodeException> nodeFailures
+        ) {
+            super(clusterName, nodeResponses, nodeFailures);
+        }
+
+        /**
+         * Combines the responses from each node that was called into a single map (by node ID) for the final {@link Response}.
+         */
+        public Map<String, NodeUsageStatsForThreadPools> getAllNodeUsageStatsForThreadPools() {
+            Map<String, NodeUsageStatsForThreadPools> allNodeUsageStatsForThreadPools = new HashMap<>();
+            for (NodeUsageStatsForThreadPoolsAction.NodeResponse nodeResponse : getNodes()) {
+                allNodeUsageStatsForThreadPools.put(
+                    nodeResponse.getNodeUsageStatsForThreadPools().nodeId(),
+                    nodeResponse.getNodeUsageStatsForThreadPools()
+                );
+            }
+            return allNodeUsageStatsForThreadPools;
+        }
+
+        @Override
+        protected void writeNodesTo(StreamOutput out, List<NodeResponse> nodeResponses) throws IOException {
+            out.writeCollection(nodeResponses);
+        }
+
+        @Override
+        protected List<NodeResponse> readNodesFrom(StreamInput in) throws IOException {
+            return in.readCollectionAsList(NodeUsageStatsForThreadPoolsAction.NodeResponse::new);
+        }
+
+        @Override
+        public String toString() {
+            return "NodeUsageStatsForThreadPoolsAction.Response{" + getNodes() + "}";
+        }
+    }
+
+    /**
+     * A {@link NodeUsageStatsForThreadPools} response from a single cluster node.
+     */
+    public static class NodeResponse extends BaseNodeResponse {
+        private final NodeUsageStatsForThreadPools nodeUsageStatsForThreadPools;
+
+        protected NodeResponse(StreamInput in, DiscoveryNode node) throws IOException {
+            super(in, node);
+            this.nodeUsageStatsForThreadPools = new NodeUsageStatsForThreadPools(in);
+        }
+
+        public NodeResponse(DiscoveryNode node, NodeUsageStatsForThreadPools nodeUsageStatsForThreadPools) {
+            super(node);
+            this.nodeUsageStatsForThreadPools = nodeUsageStatsForThreadPools;
+        }
+
+        public NodeResponse(StreamInput in) throws IOException {
+            super(in);
+            this.nodeUsageStatsForThreadPools = new NodeUsageStatsForThreadPools(in);
+        }
+
+        public NodeUsageStatsForThreadPools getNodeUsageStatsForThreadPools() {
+            return nodeUsageStatsForThreadPools;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            super.writeTo(out);
+            nodeUsageStatsForThreadPools.writeTo(out);
+        }
+
+        @Override
+        public String toString() {
+            return "NodeUsageStatsForThreadPoolsAction.NodeResponse{"
+                + "nodeId="
+                + getNode().getId()
+                + ", nodeUsageStatsForThreadPools="
+                + nodeUsageStatsForThreadPools
+                + "}";
+        }
+    }
+
+}