Skip to content

Commit

Permalink
[ML] Fix incorrect assumption about minimum ML node size (#91694) (#9…
Browse files Browse the repository at this point in the history
…1697)

The ML autoscaling code was making an assumption that all ML
nodes in Cloud will be at least 1GB. This is not correct. After
allowing for logging and metrics collection it is possible for
ML nodes to be smaller.

This PR updates the assumption to 0.5GB.
  • Loading branch information
droberts195 committed Nov 18, 2022
1 parent b06be47 commit 5c387f8
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public final class NativeMemoryCalculator {
// Maximum permitted JVM heap size when auto-configured.
// Must match the value used in MachineDependentHeap.MachineNodeRole.ML_ONLY.
public static final long STATIC_JVM_UPPER_THRESHOLD = ByteSizeValue.ofGb(31).getBytes();
public static final long MINIMUM_AUTOMATIC_NODE_SIZE = ByteSizeValue.ofGb(1).getBytes();
public static final long MINIMUM_AUTOMATIC_NODE_SIZE = ByteSizeValue.ofMb(512).getBytes();
private static final long OS_OVERHEAD = ByteSizeValue.ofMb(200).getBytes();
// Memory size beyond which the JVM is given 10% of memory instead of 40%.
// Must match the value used in MachineDependentHeap.MachineNodeRole.ML_ONLY.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1217,8 +1217,8 @@ public void testScale_WithNoMlNodesButWaitingAnalytics() {
"requesting scale up as number of jobs in queues exceeded configured limit and there are no machine learning nodes"
)
);
assertThat(result.nodeSize(), equalTo(ByteSizeValue.ofGb(1)));
assertThat(result.tierSize(), equalTo(ByteSizeValue.ofGb(1)));
assertThat(result.nodeSize(), equalTo(ByteSizeValue.ofMb(714)));
assertThat(result.tierSize(), equalTo(ByteSizeValue.ofMb(714)));
}

private MlMemoryAutoscalingDecider buildDecider() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ public void testConsistencyInAutoCalculation() {
NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(bytesForML, bytesForML, jvmSize);

MlMemoryAutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true, Long.MAX_VALUE, 1).build();
// We don't allow node sizes below 1GB, so we will always be at least that large
// We don't allow node sizes below 0.5GB, so we will always be at least that large
// Also, allow 1 byte off for weird rounding issues
assertThat(
capacity.nodeSize().getBytes(),
greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)
greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofMb(512).getBytes()) - 1L)
);
assertThat(
capacity.tierSize().getBytes(),
greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)
greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofMb(512).getBytes()) - 1L)
);
}
}
Expand Down

0 comments on commit 5c387f8

Please sign in to comment.