Skip to content

Commit

Permalink
eni: Fix Cilium overallocating network interfaces
Browse files Browse the repository at this point in the history
This fixes a bug where Cilium wrongly overestimated the amount of available
ENI IP addresses. This bug was introduced when we removed the primary
ENI IP address from the IPAM pool, but forgot to adjust the number of
addresses used to compare with the AWS instance limits.

This led to the operator overestimating the number of available IP
addresses by one. This in turn could lead to the operator first failing
to allocate more IPs (because it exceeded the limit) and then
unnecessarily creating a new ENI to fulfill the allocation request.

Fixes: 7c1bb35 ("aws/ec2: Exclude primary ENI IP from IPAM pool")
Fixes: #15877

Signed-off-by: Sebastian Wicki <sebastian@isovalent.com>
  • Loading branch information
gandro authored and ti-mo committed May 4, 2021
1 parent 9cf13c0 commit 119cd19
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 42 deletions.
14 changes: 11 additions & 3 deletions pkg/aws/eni/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ func (n *Node) PrepareIPAllocation(scopedLog *logrus.Entry) (a *ipam.AllocationA
continue
}

availableOnENI := math.IntMax(limits.IPv4-len(e.Addresses), 0)
// The limits include the primary IP, so we need to take it into account
// when computing the amount of available addresses on the ENI.
availableOnENI := math.IntMax(limits.IPv4-len(e.Addresses)-1, 0)
if availableOnENI <= 0 {
continue
} else {
Expand Down Expand Up @@ -551,8 +553,11 @@ func (n *Node) GetMaximumAllocatableIPv4() int {
return 0
}

// limits.IPv4 contains the primary IP which is not available for allocation
maxPerInterface := math.IntMax(limits.IPv4-1, 0)

// Return the maximum amount of IP addresses allocatable on the instance
return (limits.Adapters - firstInterfaceIndex) * limits.IPv4
return (limits.Adapters - firstInterfaceIndex) * maxPerInterface
}

var adviseOperatorFlagOnce sync.Once
Expand Down Expand Up @@ -609,5 +614,8 @@ func (n *Node) GetMinimumAllocatableIPv4() int {
return 0
}

return math.IntMin(minimum, (limits.Adapters-index)*limits.IPv4)
// limits.IPv4 contains the primary IP which is not available for allocation
maxPerInterface := math.IntMax(limits.IPv4-1, 0)

return math.IntMin(minimum, (limits.Adapters-index)*maxPerInterface)
}
63 changes: 32 additions & 31 deletions pkg/aws/eni/node_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func reachedAddressesNeeded(mngr *ipam.NodeManager, nodeName string, needed int)

// TestNodeManagerDefaultAllocation tests allocation with default parameters
//
// - m5.large (3x ENIs, 2x10 IPs)
// - m5.large (3x ENIs, 2x10-2 IPs)
// - MinAllocate 0
// - MaxAllocate 0
// - PreAllocate 8
Expand Down Expand Up @@ -278,7 +278,7 @@ func (e *ENISuite) TestNodeManagerDefaultAllocation(c *check.C) {

// TestNodeManagerENIWithSGTags tests ENI allocation + association with a SG based on tags
//
// - m5.large (3x ENIs, 2x10 IPs)
// - m5.large (3x ENIs, 2x10-2 IPs)
// - MinAllocate 0
// - MaxAllocate 0
// - PreAllocate 8
Expand Down Expand Up @@ -333,7 +333,7 @@ func (e *ENISuite) TestNodeManagerENIWithSGTags(c *check.C) {

// TestNodeManagerMinAllocate20 tests MinAllocate without PreAllocate
//
// - m5.4xlarge (8x ENIs, 7x30 IPs)
// - m5.4xlarge (8x ENIs, 7x30-7 IPs)
// - MinAllocate 10
// - MaxAllocate 0
// - PreAllocate -1
Expand Down Expand Up @@ -383,7 +383,7 @@ func (e *ENISuite) TestNodeManagerMinAllocate20(c *check.C) {

// TestNodeManagerMinAllocateAndPreallocate tests MinAllocate in combination with PreAllocate
//
// - m3.large (3x ENIs, 2x10 IPs)
// - m3.large (3x ENIs, 2x10-2 IPs)
// - MinAllocate 10
// - MaxAllocate 0
// - PreAllocate 1
Expand Down Expand Up @@ -439,17 +439,17 @@ func (e *ENISuite) TestNodeManagerMinAllocateAndPreallocate(c *check.C) {
// TestNodeManagerReleaseAddress tests PreAllocate, MinAllocate and MaxAboveWatermark
// when release excess IP is enabled
//
// - m4.large (4x ENIs, 3x15 IPs)
// - m4.large (4x ENIs, 3x15-3 IPs)
// - MinAllocate 10
// - MaxAllocate 0
// - PreAllocate 4
// - MaxAboveWatermark 4
// - PreAllocate 2
// - MaxAboveWatermark 3
// - FirstInterfaceIndex 0
func (e *ENISuite) TestNodeManagerReleaseAddress(c *check.C) {
ec2api := ec2mock.NewAPI([]*ipamTypes.Subnet{testSubnet}, []*ipamTypes.VirtualNetwork{testVpc}, testSecurityGroups)
instances := NewInstancesManager(ec2api)
c.Assert(instances, check.Not(check.IsNil))
eniID1, _, err := ec2api.CreateNetworkInterface(context.TODO(), 1, "s-1", "desc", []string{"sg1", "sg2"})
eniID1, _, err := ec2api.CreateNetworkInterface(context.TODO(), 0, "s-1", "desc", []string{"sg1", "sg2"})
c.Assert(err, check.IsNil)
_, err = ec2api.AttachNetworkInterface(context.TODO(), 0, "i-testNodeManagerReleaseAddress-1", eniID1)
c.Assert(err, check.IsNil)
Expand All @@ -459,46 +459,46 @@ func (e *ENISuite) TestNodeManagerReleaseAddress(c *check.C) {
c.Assert(mngr, check.Not(check.IsNil))

// Announce node, wait for IPs to become available
cn := newCiliumNode("node3", "i-testNodeManagerReleaseAddress-1", "m4.xlarge", "us-west-1", "vpc-1", 0, 4, 10, 0)
cn.Spec.IPAM.MaxAboveWatermark = 4
cn := newCiliumNode("node3", "i-testNodeManagerReleaseAddress-1", "m4.xlarge", "us-west-1", "vpc-1", 0, 2, 10, 0)
cn.Spec.IPAM.MaxAboveWatermark = 3
mngr.Update(cn)
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second), check.IsNil)

// 10 min-allocate + 4 max-above-watermark => 14 IPs must become
// available as 14 < 15 (interface limit)
// 10 min-allocate + 3 max-above-watermark => 13 IPs must become
// available as 13 < 14 (interface limit)
node := mngr.Get("node3")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 14)
c.Assert(node.Stats().AvailableIPs, check.Equals, 13)
c.Assert(node.Stats().UsedIPs, check.Equals, 0)

// Use 11 out of 14 IPs, no additional IPs should be allocated
mngr.Update(updateCiliumNode(cn, 14, 11))
// Use 11 out of 13 IPs, no additional IPs should be allocated
mngr.Update(updateCiliumNode(cn, 13, 11))
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second), check.IsNil)
node = mngr.Get("node3")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 15)
c.Assert(node.Stats().AvailableIPs, check.Equals, 13)
c.Assert(node.Stats().UsedIPs, check.Equals, 11)

// Use 14 out of 15 IPs, PreAllocate 4 + MaxAboveWatermark must kick in
// and allocate 8 additional IPs
mngr.Update(updateCiliumNode(cn, 15, 14))
// Use 13 out of 13 IPs, PreAllocate 2 + MaxAboveWatermark 3 must kick in
// and allocate 5 additional IPs
mngr.Update(updateCiliumNode(cn, 13, 13))
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second), check.IsNil)
node = mngr.Get("node3")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 22)
c.Assert(node.Stats().UsedIPs, check.Equals, 14)
c.Assert(node.Stats().AvailableIPs, check.Equals, 18)
c.Assert(node.Stats().UsedIPs, check.Equals, 13)

// Reduce used IPs to 10, this leads to 15 excess IPs but release
// Reduce used IPs to 10, this leads to 8 excess IPs but release
// occurs at interval based resync, so expect timeout at first
mngr.Update(updateCiliumNode(cn, 22, 10))
mngr.Update(updateCiliumNode(cn, 18, 10))
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 2*time.Second), check.Not(check.IsNil))
node = mngr.Get("node3")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 22)
c.Assert(node.Stats().AvailableIPs, check.Equals, 18)
c.Assert(node.Stats().UsedIPs, check.Equals, 10)

// Trigger resync manually, excess IPs should be released
// 10 used + 4 pre-allocate + 4 max-above-watermark => 18
// 10 used + 3 pre-allocate + 2 max-above-watermark => 15
node = mngr.Get("node3")
eniNode, castOK := node.Ops().(*Node)
c.Assert(castOK, check.Equals, true)
Expand All @@ -512,13 +512,13 @@ func (e *ENISuite) TestNodeManagerReleaseAddress(c *check.C) {
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second), check.IsNil)
node = mngr.Get("node3")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 18)
c.Assert(node.Stats().AvailableIPs, check.Equals, 15)
c.Assert(node.Stats().UsedIPs, check.Equals, 10)
}

// TestNodeManagerExceedENICapacity tests exceeding ENI capacity
//
// - t2.xlarge (3x ENIs, 3x15 IPs)
// - t2.xlarge (3x ENIs, 3x15-3 IPs)
// - MinAllocate 20
// - MaxAllocate 0
// - PreAllocate 8
Expand Down Expand Up @@ -547,13 +547,14 @@ func (e *ENISuite) TestNodeManagerExceedENICapacity(c *check.C) {
c.Assert(node.Stats().UsedIPs, check.Equals, 0)

// Use 40 out of 42 available IPs, we should reach 0 address needed once we
// assigned the remaining 3 that the t2.xlarge instance type supports (45 max)
// assigned the remaining 3 that the t2.xlarge instance type supports
// (3x15 - 3 = 42 max)
mngr.Update(updateCiliumNode(cn, 42, 40))
c.Assert(testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second), check.IsNil)

node = mngr.Get("node2")
c.Assert(node, check.Not(check.IsNil))
c.Assert(node.Stats().AvailableIPs, check.Equals, 45)
c.Assert(node.Stats().AvailableIPs, check.Equals, 42)
c.Assert(node.Stats().UsedIPs, check.Equals, 40)
}

Expand All @@ -565,7 +566,7 @@ type nodeState struct {

// TestNodeManagerManyNodes tests IP allocation of 100 nodes across 3 subnets
//
// - c3.xlarge (4x ENIs, 4x15 IPs)
// - c3.xlarge (4x ENIs, 4x15-4 IPs)
// - MinAllocate 10
// - MaxAllocate 0
// - PreAllocate 1
Expand Down Expand Up @@ -684,7 +685,7 @@ func (e *ENISuite) TestNodeManagerInstanceNotRunning(c *check.C) {
// TestInstanceBeenDeleted verifies that instance deletion is correctly detected
// and no further action is taken
//
// - m4.large (2x ENIs, 2x10 IPs)
// - m4.large (2x ENIs, 2x10-2 IPs)
// - FirstInterfaceIndex 0
func (e *ENISuite) TestInstanceBeenDeleted(c *check.C) {
ec2api := ec2mock.NewAPI([]*ipamTypes.Subnet{testSubnet}, []*ipamTypes.VirtualNetwork{testVpc}, testSecurityGroups)
Expand Down
8 changes: 4 additions & 4 deletions pkg/aws/eni/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ func (e *ENISuite) TestGetMaximumAllocatableIPv4(c *check.C) {
// With no k8sObj defined, it should return 0
c.Assert(n.GetMaximumAllocatableIPv4(), check.Equals, 0)

// With instance-type = m5.large and first-interface-index = 0, we should be able to allocate up to 30 addresses
// With instance-type = m5.large and first-interface-index = 0, we should be able to allocate up to 3x10-3 addresses
n.k8sObj = newCiliumNode("node", "i-testnode", "m5.large", "eu-west-1", "test-vpc", 0, 0, 0, 0)
c.Assert(n.GetMaximumAllocatableIPv4(), check.Equals, 30)
c.Assert(n.GetMaximumAllocatableIPv4(), check.Equals, 27)

// With instance-type = m5.large and first-interface-index = 1, we should be able to allocate up to 20 addresses
// With instance-type = m5.large and first-interface-index = 1, we should be able to allocate up to 2x10-2 addresses
n.k8sObj = newCiliumNode("node", "i-testnode", "m5.large", "eu-west-1", "test-vpc", 1, 0, 0, 0)
c.Assert(n.GetMaximumAllocatableIPv4(), check.Equals, 20)
c.Assert(n.GetMaximumAllocatableIPv4(), check.Equals, 18)

// With instance-type = m5.large and first-interface-index = 4, we should return 0 as there is only 3 interfaces
n.k8sObj = newCiliumNode("node", "i-testnode", "m5.large", "eu-west-1", "test-vpc", 4, 0, 0, 0)
Expand Down
3 changes: 1 addition & 2 deletions pkg/aws/eni/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ type ENI struct {
// +optional
VPC AwsVPC `json:"vpc,omitempty"`

// Addresses is the list of all IPs associated with the ENI, including
// all secondary addresses
// Addresses is the list of all secondary IPs associated with the ENI
//
// +optional
Addresses []string `json:"addresses,omitempty"`
Expand Down
4 changes: 2 additions & 2 deletions pkg/k8s/apis/cilium.io/client/crds/v2/ciliumnodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,8 @@ spec:
\n More details: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html"
properties:
addresses:
description: Addresses is the list of all IPs associated
with the ENI, including all secondary addresses
description: Addresses is the list of all secondary IPs
associated with the ENI
items:
type: string
type: array
Expand Down

0 comments on commit 119cd19

Please sign in to comment.