diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index d2e412a..637185d 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -237,6 +237,13 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) // Filter into untainted and tainted nodes untaintedNodes, taintedNodes, forceTaintedNodes, cordonedNodes := c.filterNodes(nodeGroup, allNodes) + // Determine which nodes count toward capacity for utilisation calculation + capacityNodes := untaintedNodes + if nodeGroup.Opts.IncludeTaintedInCapacity { + capacityNodes = append(append([]*v1.Node{}, untaintedNodes...), taintedNodes...) + log.WithField("nodegroup", nodegroup).Infof("Including %v tainted nodes in capacity calculation (total capacity nodes: %v)", len(taintedNodes), len(capacityNodes)) + } + // Metrics and Logs log.WithField("nodegroup", nodegroup).Infof("pods total: %v", len(pods)) log.WithField("nodegroup", nodegroup).Infof("nodes remaining total: %v", len(allNodes)) @@ -274,14 +281,14 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) // for working out which pods are on which nodes nodeGroup.NodeInfoMap = k8s.CreateNodeNameToInfoMap(pods, allNodes) - // Calc capacity for untainted nodes + // Calc capacity for nodes included in utilisation calculation podRequests, err := k8s.CalculatePodsRequestedUsage(pods) if err != nil { log.Errorf("Failed to calculate requests: %v", err) return 0, err } - nodeCapacity, err := k8s.CalculateNodesCapacity(untaintedNodes, pods) + nodeCapacity, err := k8s.CalculateNodesCapacity(capacityNodes, pods) if err != nil { log.Errorf("Failed to calculate capacity: %v", err) return 0, err @@ -319,14 +326,14 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) } // Calc % - // both cpu and memory capacity are based on number of untainted nodes - // pass number of untainted nodes in to help make decision if it's a scaling-up-from-0 + // both cpu and memory capacity are based on number of capacity nodes + // pass number of capacity nodes in to help make decision if it's a scaling-up-from-0 cpuPercent, memPercent, err := calcPercentUsage( *podRequests.Total.GetCPUQuantity(), *podRequests.Total.GetMemoryQuantity(), *nodeCapacity.Total.GetCPUQuantity(), *nodeCapacity.Total.GetMemoryQuantity(), - int64(len(untaintedNodes))) + int64(len(capacityNodes))) if err != nil { log.Errorf("Failed to calculate percentages: %v", err) return 0, err @@ -374,7 +381,7 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) // we want to add enough nodes such that the maxPercentage cluster util // drops back below ScaleUpThresholdPercent nodesDelta, err = calcScaleUpDelta( - untaintedNodes, + capacityNodes, cpuPercent, memPercent, *podRequests.Total.GetCPUQuantity(), diff --git a/pkg/controller/controller_scale_node_group_test.go b/pkg/controller/controller_scale_node_group_test.go index 5280311..5dc1ec2 100644 --- a/pkg/controller/controller_scale_node_group_test.go +++ b/pkg/controller/controller_scale_node_group_test.go @@ -1497,3 +1497,221 @@ func TestScaleNodeGroupNodeMaxAge(t *testing.T) { }) } } + +func TestIncludeTaintedInCapacity(t *testing.T) { + t.Run("tainted nodes included in denominator prevents artificial utilisation spike", func(t *testing.T) { + nodeGroup := NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 2, + MaxNodes: 20, + TaintLowerCapacityThresholdPercent: 40, + TaintUpperCapacityThresholdPercent: 60, + ScaleUpThresholdPercent: 80, + FastNodeRemovalRate: 3, + SlowNodeRemovalRate: 1, + SoftDeleteGracePeriod: "5m", + HardDeleteGracePeriod: "10m", + ScaleUpCoolDownPeriod: "1m", + TaintEffect: "NoSchedule", + IncludeTaintedInCapacity: true, + } + nodeGroups := []NodeGroupOptions{nodeGroup} + + // 5 untainted nodes + 5 tainted nodes = 10 total + untaintedNodes := test.BuildTestNodes(5, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + }) + taintedNodes := test.BuildTestNodes(5, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + Tainted: true, + }) + allNodes := append(untaintedNodes, taintedNodes...) + + pods := test.BuildTestPods(9, test.PodOpts{ + CPU: []int64{500}, + Mem: []int64{500}, + }) + + client, opts, err := buildTestClient(allNodes, pods, nodeGroups, ListerOptions{}) + require.NoError(t, err) + + testCloudProvider := test.NewCloudProvider(1) + testNodeGroup := test.NewNodeGroup( + nodeGroup.CloudProviderGroupName, + nodeGroup.Name, + int64(nodeGroup.MinNodes), + int64(nodeGroup.MaxNodes), + int64(len(allNodes)), + ) + testCloudProvider.RegisterNodeGroup(testNodeGroup) + + nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{ + nodeGroups: nodeGroups, + client: *client, + }) + + controller := &Controller{ + Client: client, + Opts: opts, + stopChan: nil, + nodeGroups: nodeGroupsState, + cloudProvider: testCloudProvider, + } + + nodesDelta, err := controller.scaleNodeGroup(nodeGroup.Name, nodeGroupsState[nodeGroup.Name]) + require.NoError(t, err) + + // With flag enabled, utilisation = 45% (not 90%), so no scale up + assert.LessOrEqual(t, nodesDelta, 0) + }) + + t.Run("flag disabled preserves existing behavior", func(t *testing.T) { + nodeGroup := NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 2, + MaxNodes: 20, + TaintLowerCapacityThresholdPercent: 40, + TaintUpperCapacityThresholdPercent: 60, + ScaleUpThresholdPercent: 80, + FastNodeRemovalRate: 3, + SlowNodeRemovalRate: 1, + SoftDeleteGracePeriod: "5m", + HardDeleteGracePeriod: "10m", + ScaleUpCoolDownPeriod: "1m", + TaintEffect: "NoSchedule", + IncludeTaintedInCapacity: false, // Default behavior + } + nodeGroups := []NodeGroupOptions{nodeGroup} + + // 5 untainted nodes + 5 tainted nodes = 10 total + untaintedNodes := test.BuildTestNodes(5, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + }) + taintedNodes := test.BuildTestNodes(5, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + Tainted: true, + }) + allNodes := append(untaintedNodes, taintedNodes...) + + pods := test.BuildTestPods(9, test.PodOpts{ + CPU: []int64{500}, + Mem: []int64{500}, + }) + + client, opts, err := buildTestClient(allNodes, pods, nodeGroups, ListerOptions{}) + require.NoError(t, err) + + testCloudProvider := test.NewCloudProvider(1) + testNodeGroup := test.NewNodeGroup( + nodeGroup.CloudProviderGroupName, + nodeGroup.Name, + int64(nodeGroup.MinNodes), + int64(nodeGroup.MaxNodes), + int64(len(allNodes)), + ) + testCloudProvider.RegisterNodeGroup(testNodeGroup) + + nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{ + nodeGroups: nodeGroups, + client: *client, + }) + + controller := &Controller{ + Client: client, + Opts: opts, + stopChan: nil, + nodeGroups: nodeGroupsState, + cloudProvider: testCloudProvider, + } + + nodesDelta, err := controller.scaleNodeGroup(nodeGroup.Name, nodeGroupsState[nodeGroup.Name]) + require.NoError(t, err) + + // With flag disabled, utilisation = 90% > 80%, triggers scale up + assert.Greater(t, nodesDelta, 0) + }) + + t.Run("force-tainted and cordoned nodes are never included", func(t *testing.T) { + nodeGroup := NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 2, + MaxNodes: 20, + TaintLowerCapacityThresholdPercent: 40, + TaintUpperCapacityThresholdPercent: 60, + ScaleUpThresholdPercent: 80, + FastNodeRemovalRate: 3, + SlowNodeRemovalRate: 1, + SoftDeleteGracePeriod: "5m", + HardDeleteGracePeriod: "10m", + ScaleUpCoolDownPeriod: "1m", + TaintEffect: "NoSchedule", + IncludeTaintedInCapacity: true, + } + nodeGroups := []NodeGroupOptions{nodeGroup} + + untaintedNodes := test.BuildTestNodes(3, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + }) + taintedNodes := test.BuildTestNodes(2, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + Tainted: true, + }) + forceTaintedNodes := test.BuildTestNodes(2, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + ForceTainted: true, + }) + cordonedNodes := test.BuildTestNodes(2, test.NodeOpts{ + CPU: 1000, + Mem: 1000, + Unschedulable: true, + }) + allNodes := append(append(append(untaintedNodes, taintedNodes...), forceTaintedNodes...), cordonedNodes...) + + pods := test.BuildTestPods(5, test.PodOpts{ + CPU: []int64{500}, + Mem: []int64{500}, + }) + + client, opts, err := buildTestClient(allNodes, pods, nodeGroups, ListerOptions{}) + require.NoError(t, err) + + testCloudProvider := test.NewCloudProvider(1) + testNodeGroup := test.NewNodeGroup( + nodeGroup.CloudProviderGroupName, + nodeGroup.Name, + int64(nodeGroup.MinNodes), + int64(nodeGroup.MaxNodes), + int64(len(allNodes)), + ) + testCloudProvider.RegisterNodeGroup(testNodeGroup) + + nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{ + nodeGroups: nodeGroups, + client: *client, + }) + + controller := &Controller{ + Client: client, + Opts: opts, + stopChan: nil, + nodeGroups: nodeGroupsState, + cloudProvider: testCloudProvider, + } + + nodesDelta, err := controller.scaleNodeGroup(nodeGroup.Name, nodeGroupsState[nodeGroup.Name]) + require.NoError(t, err) + + // Capacity = 5 nodes (3 untainted + 2 tainted), utilisation = 50% + assert.Equal(t, -nodeGroup.SlowNodeRemovalRate, nodesDelta) + }) +} diff --git a/pkg/controller/node_group.go b/pkg/controller/node_group.go index 97b9f06..ab11d3b 100644 --- a/pkg/controller/node_group.go +++ b/pkg/controller/node_group.go @@ -63,6 +63,10 @@ type NodeGroupOptions struct { // allowed in the nodegroup at any given time. MaxUnhealthyNodesPercent int `json:"max_unhealthy_nodes_percent,omitempty" yaml:"max_unhealthy_nodes_percent,omitempty"` + // IncludeTaintedInCapacity includes tainted nodes in the capacity denominator + // for utilisation calculations, preventing artificial spikes when nodes are tainted. + IncludeTaintedInCapacity bool `json:"include_tainted_in_capacity,omitempty" yaml:"include_tainted_in_capacity,omitempty"` + // Private variables for storing the parsed duration from the string softDeleteGracePeriodDuration time.Duration hardDeleteGracePeriodDuration time.Duration