Skip to content

Commit

Permalink
ipam: move podCIDRPool to separate file
Browse files Browse the repository at this point in the history
It will be used by the IPAM pools allocator in a subsequent
commit. This commit contains no functional changes.

Signed-off-by: Sebastian Wicki <sebastian@isovalent.com>
Signed-off-by: Tobias Klauser <tobias@cilium.io>
  • Loading branch information
gandro committed Mar 8, 2023
1 parent a8ea043 commit f4bae37
Show file tree
Hide file tree
Showing 2 changed files with 352 additions and 336 deletions.
336 changes: 0 additions & 336 deletions pkg/ipam/clusterpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"sync"
"time"

"github.com/cilium/ipam/service/ipallocator"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"go.uber.org/multierr"
Expand All @@ -21,7 +20,6 @@ import (

"github.com/cilium/cilium/pkg/cidr"
"github.com/cilium/cilium/pkg/controller"
"github.com/cilium/cilium/pkg/defaults"
"github.com/cilium/cilium/pkg/inctimer"
"github.com/cilium/cilium/pkg/ipam/types"
"github.com/cilium/cilium/pkg/k8s"
Expand All @@ -30,7 +28,6 @@ import (
"github.com/cilium/cilium/pkg/k8s/watchers/subscriber"
"github.com/cilium/cilium/pkg/lock"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/pkg/trigger"
)

Expand All @@ -39,339 +36,6 @@ const (
clusterPoolStatusTriggerName = "sync-clusterpool-status-trigger"
)

// A podCIDRPool manages the allocation of IPs in multiple pod CIDRs.
// It maintains one IP allocator for each pod CIDR in the pool.
// Unused pod CIDRs which have been marked as released, but not yet deleted
// from the local CiliumNode CRD by the operator are put into the released set.
// Once the operator removes a released pod CIDR from the CiliumNode CRD spec,
// it is also deleted from the release set.
// Pod CIDRs which have been erroneously deleted from the CiliumNode CRD spec
// (either by a buggy operator or by manual/human changes CRD) are marked in
// the removed map. If IP addresses have been allocated from such a pod CIDR,
// its allocator is kept around. But no new IPs will be allocated from this
// pod CIDR. By keeping removed CIDRs in the CiliumNode CRD status, we indicate
// to the operator that we would like to re-gain ownership over that pod CIDR.
type podCIDRPool struct {
mutex lock.Mutex
ipAllocators []*ipallocator.Range
released map[string]struct{}
removed map[string]struct{}
allocationThreshold int
releaseThreshold int
}

// newPodCIDRPool creates a new pod CIDR pool with the parameters used
// to manage the pod CIDR status:
// - allocationThreshold defines the minimum number of free IPs in this pool
// before all used CIDRs are marked as depleted (causing the operator to
// allocate a new one)
// - releaseThreshold defines the maximum number of free IPs in this pool
// before unused CIDRs are marked for release.
// - previouslyReleasedCIDRs contains a list of pod CIDRs which were allocated
// to this node, but have been released before the agent was restarted. We
// keep track of them to avoid accidental use-after-free after an agent restart.
func newPodCIDRPool(allocationThreshold, releaseThreshold int, previouslyReleasedCIDRs []string) *podCIDRPool {
if allocationThreshold <= 0 {
allocationThreshold = defaults.IPAMPodCIDRAllocationThreshold
}

if releaseThreshold <= 0 {
releaseThreshold = defaults.IPAMPodCIDRReleaseThreshold
}

released := make(map[string]struct{}, len(previouslyReleasedCIDRs))
for _, releasedCIDR := range previouslyReleasedCIDRs {
released[releasedCIDR] = struct{}{}
}

return &podCIDRPool{
released: released,
removed: map[string]struct{}{},
allocationThreshold: allocationThreshold,
releaseThreshold: releaseThreshold,
}
}

func (p *podCIDRPool) allocate(ip net.IP) error {
p.mutex.Lock()
defer p.mutex.Unlock()

for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
if cidrNet.Contains(ip) {
return ipAllocator.Allocate(ip)
}
}

return fmt.Errorf("IP %s not in range of any pod CIDR", ip)
}

func (p *podCIDRPool) allocateNext() (net.IP, error) {
p.mutex.Lock()
defer p.mutex.Unlock()

// When allocating a random IP, we try the pod CIDRs in the order they are
// listed in the CRD. This avoids internal fragmentation.
for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
if _, removed := p.removed[cidrStr]; removed {
continue
}
if ipAllocator.Free() == 0 {
continue
}
return ipAllocator.AllocateNext()
}

return nil, errors.New("all pod CIDR ranges are exhausted")
}

func (p *podCIDRPool) release(ip net.IP) error {
p.mutex.Lock()
defer p.mutex.Unlock()

for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
if cidrNet.Contains(ip) {
return ipAllocator.Release(ip)
}
}

return nil
}

func (p *podCIDRPool) hasAvailableIPs() bool {
p.mutex.Lock()
defer p.mutex.Unlock()

for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
if _, removed := p.removed[cidrStr]; removed {
continue
}
if ipAllocator.Free() > 0 {
return true
}
}

return false
}

func (p *podCIDRPool) inUsePodCIDRsLocked() []string {
podCIDRs := make([]string, 0, len(p.ipAllocators))
for _, ipAllocator := range p.ipAllocators {
ipnet := ipAllocator.CIDR()
podCIDRs = append(podCIDRs, ipnet.String())
}
return podCIDRs
}

func (p *podCIDRPool) dump() (ipToOwner map[string]string, usedIPs, freeIPs, numPodCIDRs int, err error) {
// TODO(gandro): Use the Snapshot interface to avoid locking during dump
p.mutex.Lock()
defer p.mutex.Unlock()

ipToOwner = map[string]string{}
for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
usedIPs += ipAllocator.Used()
if _, removed := p.removed[cidrStr]; !removed {
freeIPs += ipAllocator.Free()
}
ipAllocator.ForEach(func(ip net.IP) {
ipToOwner[ip.String()] = ""
})
}
numPodCIDRs = len(p.ipAllocators)

return
}

func (p *podCIDRPool) status() types.PodCIDRMap {
p.mutex.Lock()
defer p.mutex.Unlock()

result := types.PodCIDRMap{}

// Mark all released pod CIDRs as released.
for cidrStr := range p.released {
result[cidrStr] = types.PodCIDRMapEntry{
Status: types.PodCIDRStatusReleased,
}
}

// Compute the total number of free and used IPs for all non-released pod
// CIDRs.
totalUsed := 0
totalFree := 0
for _, r := range p.ipAllocators {
cidrNet := r.CIDR()
cidrStr := cidrNet.String()
if _, released := p.released[cidrStr]; released {
continue
}
totalUsed += r.Used()
if _, removed := p.removed[cidrStr]; !removed {
totalFree += r.Free()
}
}

if totalFree < p.allocationThreshold {
// If the total number of free IPs is below the allocation threshold,
// then mark all pod CIDRs as depleted, unless they have already been
// released.
for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
if _, released := p.released[cidrStr]; released {
continue
}
result[cidrStr] = types.PodCIDRMapEntry{
Status: types.PodCIDRStatusDepleted,
}
}
} else {
// Iterate over pod CIDRs in reverse order so we prioritize releasing
// later pod CIDRs.
for i := len(p.ipAllocators) - 1; i >= 0; i-- {
ipAllocator := p.ipAllocators[i]
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
if _, released := p.released[cidrStr]; released {
continue
}
var status types.PodCIDRStatus
if ipAllocator.Used() > 0 {
// If a pod CIDR is used, then mark it as in-use or depleted.
if ipAllocator.Free() == 0 {
status = types.PodCIDRStatusDepleted
} else {
status = types.PodCIDRStatusInUse
}
} else if _, removed := p.removed[cidrStr]; removed {
// Otherwise, if the pod CIDR has been removed, then mark it as released.
p.released[cidrStr] = struct{}{}
delete(p.removed, cidrStr)
status = types.PodCIDRStatusReleased
log.WithField(logfields.CIDR, cidrStr).Debug("releasing removed pod CIDR")
} else if free := ipAllocator.Free(); totalFree-free >= p.releaseThreshold {
// Otherwise, if the pod CIDR is not used and releasing it would
// not take us below the release threshold, then release it and
// mark it as released.
p.released[cidrStr] = struct{}{}
totalFree -= free
status = types.PodCIDRStatusReleased
log.WithField(logfields.CIDR, cidrStr).Debug("releasing pod CIDR")
} else {
// Otherwise, mark the pod CIDR as in-use.
status = types.PodCIDRStatusInUse
}
result[cidrStr] = types.PodCIDRMapEntry{
Status: status,
}
}
}

return result
}

func (p *podCIDRPool) updatePool(podCIDRs []string) {
p.mutex.Lock()
defer p.mutex.Unlock()

if option.Config.Debug {
log.WithFields(logrus.Fields{
logfields.NewCIDR: podCIDRs,
logfields.OldCIDR: p.inUsePodCIDRsLocked(),
}).Debug("Updating IPAM pool")
}

// Parse the pod CIDRs, ignoring invalid CIDRs, and de-duplicating them.
cidrNets := make([]*net.IPNet, 0, len(podCIDRs))
cidrStrSet := make(map[string]struct{}, len(podCIDRs))
for _, podCIDR := range podCIDRs {
_, cidr, err := net.ParseCIDR(podCIDR)
if err != nil {
log.WithError(err).WithField(logfields.CIDR, podCIDR).Error("ignoring invalid pod CIDR")
continue
}
if _, ok := cidrStrSet[cidr.String()]; ok {
log.WithField(logfields.CIDR, podCIDR).Error("ignoring duplicate pod CIDR")
continue
}
cidrNets = append(cidrNets, cidr)
cidrStrSet[cidr.String()] = struct{}{}
}

// Forget any released pod CIDRs no longer present in the CRD.
for cidrStr := range p.released {
if _, ok := cidrStrSet[cidrStr]; !ok {
log.WithField(logfields.CIDR, cidrStr).Debug("removing released pod CIDR")
delete(p.released, cidrStr)
}

if option.Config.EnableUnreachableRoutes {
if err := cleanupUnreachableRoutes(cidrStr); err != nil {
log.WithFields(logrus.Fields{
logfields.CIDR: cidrStr,
logrus.ErrorKey: err,
}).Warning("failed to remove unreachable routes for pod cidr")
}
}
}

// newIPAllocators is the new slice of IP allocators.
newIPAllocators := make([]*ipallocator.Range, 0, len(podCIDRs))

// addedCIDRs is the set of pod CIDRs that have been added to newIPAllocators.
addedCIDRs := make(map[string]struct{}, len(p.ipAllocators))

// Add existing IP allocators to newIPAllocators in order.
for _, ipAllocator := range p.ipAllocators {
cidrNet := ipAllocator.CIDR()
cidrStr := cidrNet.String()
if _, ok := cidrStrSet[cidrStr]; !ok {
if ipAllocator.Used() == 0 {
continue
}
log.WithField(logfields.CIDR, cidrStr).Error("in-use pod CIDR was removed from spec")
p.removed[cidrStr] = struct{}{}
}
newIPAllocators = append(newIPAllocators, ipAllocator)
addedCIDRs[cidrStr] = struct{}{}
}

// Create and add new IP allocators to newIPAllocators.
for _, cidrNet := range cidrNets {
cidrStr := cidrNet.String()
if _, ok := addedCIDRs[cidrStr]; ok {
continue
}
ipAllocator, err := ipallocator.NewCIDRRange(cidrNet)
if err != nil {
log.WithError(err).WithField(logfields.CIDR, cidrStr).Error("cannot create *ipallocator.Range")
continue
}
if ipAllocator.Free() == 0 {
log.WithField(logfields.CIDR, cidrNet.String()).Error("skipping too-small pod CIDR")
p.released[cidrNet.String()] = struct{}{}
continue
}
log.WithField(logfields.CIDR, cidrStr).Debug("created new pod CIDR allocator")
newIPAllocators = append(newIPAllocators, ipAllocator)
addedCIDRs[cidrStr] = struct{}{} // Protect against duplicate CIDRs.
}

if len(p.ipAllocators) > 0 && len(newIPAllocators) == 0 {
log.Warning("Removed last pod CIDR allocator")
}

p.ipAllocators = newIPAllocators
}

// containsCIDR checks if the outer IPNet contains the inner IPNet
func containsCIDR(outer, inner *net.IPNet) bool {
outerMask, _ := outer.Mask.Size()
Expand Down

0 comments on commit f4bae37

Please sign in to comment.