From 3a1650a1d5b22d50a45b125d6fcd41965b3b98a2 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 3 Sep 2025 14:45:05 +0200 Subject: [PATCH] [ML] Fix double-counting of inference memory in the assignment rebalancer (#133919) The static method TrainedModelAssignmentRebalancer.getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference was used to subtract load.getAssignedNativeInferenceMemory() from load.getFreeMemoryExcludingPerNodeOverhead(). However, in NodeLoad.getFreeMemoryExcludingPerNodeOverhead(), native inference memory was already subtracted as part of the getAssignedJobMemoryExcludingPerNodeOverhead() calculation. This led to double-counting of the native inference memory. Avoiding this double-counting allows us to remove the private method getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference() entirely. --- docs/changelog/133919.yaml | 5 +++++ .../assignment/TrainedModelAssignmentRebalancer.java | 8 +------- 2 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/133919.yaml diff --git a/docs/changelog/133919.yaml b/docs/changelog/133919.yaml new file mode 100644 index 0000000000000..34c3ecd3ebe57 --- /dev/null +++ b/docs/changelog/133919.yaml @@ -0,0 +1,5 @@ +pr: 133919 +summary: Fix double-counting of inference memory in the assignment rebalancer +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index f523b4b086f35..90f86dbc243f0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -298,9 +298,7 @@ private Map, List> createNodesByZoneMap() { nodes.add( new AssignmentPlan.Node( discoveryNode.getId(), - // We subtract native inference memory as the planner expects available memory for - // native inference including current assignments. - getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load), + load.getFreeMemoryExcludingPerNodeOverhead(), MlProcessors.get(discoveryNode, allocatedProcessorsScale).roundUp() ) ); @@ -317,10 +315,6 @@ private Map, List> createNodesByZoneMap() { })); } - private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory(); - } - private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { TrainedModelAssignmentMetadata.Builder builder = TrainedModelAssignmentMetadata.Builder.empty(); for (AssignmentPlan.Deployment deployment : assignmentPlan.deployments()) {