Skip to content

Commit

Permalink
fqdn: Limit the number of zombies per host
Browse files Browse the repository at this point in the history
Commit f6ce522 ("FQDN: Added garbage collector functions.")
introduced a per-host limit on the number of IPs to be associated in the
DNS cache, but at the time we did not support keeping FQDN entries alive
beyond DNS TTL ("zombie entries"). These were later added in commit
f629372 ("fqdn: Add and use DNSZombieMappings in Endpoint"), but at
that time no such per-host limit was imposed on these zombie entries.

Commit 5923daf ("fqdn: keep IPs alive if their name is alive")
later adjusted the zombie garbage collection to allow zombies to stay
alive as long as any IP that shares the same FQDN is marked as alive.
Unfortunately, this lead to situations where a very high number of DNS
cache entries remain in the cache beyond the DNS TTL, simply because one
IP for the given name continues to be used.

In the case of something like Amazon S3, where DNS TTLs are known to be
low, and IP recycling high, if an app constantly made requests via
ToFQDNs policy towards names hosted by this service, this could lead to
thousands of stale FQDN mappings accumulating in the cache. For each of
these mappings, Cilium would allocate corresponding identities, and when
this is combined with a permissive pod policy, this could lead to
policymaps becoming full, and error messages in the logs like:

    msg="Failed to add PolicyMap key" ...
    error="Unable to update element for map with file descriptor 67: argument list too long"

This could also prevent new pods from being scheduled on nodes, as
Cilium would be unable to implement the full requested policy for the
new endpoints.

In order to mitigate this situation, extend the per-host limit
configuration to apply separately also to zombie entries. This allows up
to 'ToFQDNsMaxIPsPerHost' FQDN entries that are alive (ie below DNS TTL)
in addition to a further 'ToFQDNsMaxIPsPerHost' zombie entries
corresponding to connections which remain alive beyond the DNS TTL.

Signed-off-by: Joe Stringer <joe@cilium.io>
  • Loading branch information
joestringer committed Apr 22, 2022
1 parent aafc70b commit ac93cb4
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 30 deletions.
2 changes: 1 addition & 1 deletion pkg/endpoint/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ func createEndpoint(owner regeneration.Owner, policyGetter policyRepoGetter, nam
OpLabels: labels.NewOpLabels(),
DNSRules: nil,
DNSHistory: fqdn.NewDNSCacheWithLimit(option.Config.ToFQDNsMinTTL, option.Config.ToFQDNsMaxIPsPerHost),
DNSZombies: fqdn.NewDNSZombieMappings(option.Config.ToFQDNsMaxDeferredConnectionDeletes),
DNSZombies: fqdn.NewDNSZombieMappings(option.Config.ToFQDNsMaxDeferredConnectionDeletes, option.Config.ToFQDNsMaxIPsPerHost),
state: "",
status: NewEndpointStatus(),
hasBPFProgram: make(chan struct{}, 0),
Expand Down
2 changes: 1 addition & 1 deletion pkg/endpoint/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ func (ep *Endpoint) UnmarshalJSON(raw []byte) error {
restoredEp := &serializableEndpoint{
OpLabels: labels.NewOpLabels(),
DNSHistory: fqdn.NewDNSCacheWithLimit(option.Config.ToFQDNsMinTTL, option.Config.ToFQDNsMaxIPsPerHost),
DNSZombies: fqdn.NewDNSZombieMappings(option.Config.ToFQDNsMaxDeferredConnectionDeletes),
DNSZombies: fqdn.NewDNSZombieMappings(option.Config.ToFQDNsMaxDeferredConnectionDeletes, option.Config.ToFQDNsMaxIPsPerHost),
}
if err := json.Unmarshal(raw, restoredEp); err != nil {
return fmt.Errorf("error unmarshaling serializableEndpoint from base64 representation: %s", err)
Expand Down
104 changes: 83 additions & 21 deletions pkg/fqdn/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@ import (
"time"
"unsafe"

"github.com/sirupsen/logrus"

"github.com/cilium/cilium/pkg/fqdn/matchpattern"
"github.com/cilium/cilium/pkg/ip"
"github.com/cilium/cilium/pkg/lock"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/option"
)

// cacheEntry objects hold data passed in via DNSCache.Update, nominally
Expand Down Expand Up @@ -716,13 +714,17 @@ type DNSZombieMappings struct {
deletes map[string]*DNSZombieMapping // map[ip]toDelete
lastCTGCUpdate time.Time
max int // max allowed zombies

// perHostLimit is the number of maximum number of IP per host.
perHostLimit int
}

// NewDNSZombieMappings constructs a DNSZombieMappings that is read to use
func NewDNSZombieMappings(max int) *DNSZombieMappings {
func NewDNSZombieMappings(max, perHostLimit int) *DNSZombieMappings {
return &DNSZombieMappings{
deletes: make(map[string]*DNSZombieMapping),
max: max,
deletes: make(map[string]*DNSZombieMapping),
max: max,
perHostLimit: perHostLimit,
}
}

Expand Down Expand Up @@ -762,12 +764,16 @@ func (zombies *DNSZombieMappings) isConnectionAlive(zombie *DNSZombieMapping) bo

// getAliveNames returns all the names that are alive
// a name is alive if at least one of the IPs that resolve to it is alive
func (zombies *DNSZombieMappings) getAliveNames() map[string]struct{} {
var aliveNames map[string]struct{} = map[string]struct{}{}
func (zombies *DNSZombieMappings) getAliveNames() map[string][]*DNSZombieMapping {
aliveNames := make(map[string][]*DNSZombieMapping)

for _, z := range zombies.deletes {
if zombies.isConnectionAlive(z) {
for _, name := range z.Names {
aliveNames[name] = struct{}{}
if _, ok := aliveNames[name]; !ok {
aliveNames[name] = make([]*DNSZombieMapping, 0, 5)
}
aliveNames[name] = append(aliveNames[name], z)
}
}
}
Expand All @@ -780,22 +786,27 @@ func (zombies *DNSZombieMappings) getAliveNames() map[string]struct{} {
// A zombie is alive if its connection is alive or if one of its names is
// alive. The function takes an argument that contains the aliveNames (can be
// obtained via getAliveNames())
func (zombies *DNSZombieMappings) isZombieAlive(zombie *DNSZombieMapping, aliveNames map[string]struct{}) bool {
func (zombies *DNSZombieMappings) isZombieAlive(zombie *DNSZombieMapping, aliveNames map[string][]*DNSZombieMapping) (alive, overLimit bool) {
if zombies.isConnectionAlive(zombie) {
return true
alive = true
if zombies.perHostLimit == 0 {
return alive, overLimit
}
}

for _, name := range zombie.Names {
if _, ok := aliveNames[name]; ok {
log.WithFields(logrus.Fields{
logfields.DNSName: name,
logfields.IPAddr: zombie.IP,
}).Debug("FQDN has multiple IPs. One IP has an expired TTL.")
return true
if z, ok := aliveNames[name]; ok {
alive = true
if zombies.perHostLimit == 0 {
return alive, overLimit
} else if len(z) > zombies.perHostLimit {
overLimit = true
return alive, overLimit
}
}
}

return false
return alive, overLimit
}

// sortZombieMappingSlice sorts the provided slice by whether the connection is
Expand Down Expand Up @@ -823,18 +834,69 @@ func (zombies *DNSZombieMappings) GC() (alive, dead []*DNSZombieMapping) {
zombies.Lock()
defer zombies.Unlock()

var aliveNames map[string]struct{} = zombies.getAliveNames()
aliveNames := zombies.getAliveNames()

// Collect zombies we can delete
for _, zombie := range zombies.deletes {
if zombies.isZombieAlive(zombie, aliveNames) {
zombieAlive, overLimit := zombies.isZombieAlive(zombie, aliveNames)
if overLimit {
// No-op: This zombie is part of a name in 'aliveNames'
// that needs to impose a per-host IP limit. Decide
// whether to add to alive or dead in the next loop.
} else if zombieAlive {
alive = append(alive, zombie.DeepCopy())
} else {
// Emit the actual object here since we will no longer update it
dead = append(dead, zombie)
}
}

if zombies.perHostLimit > 0 {
warnActiveDNSEntries := false
deadIdx := len(dead)

// Find names which have too many IPs associated mark them dead.
//
// Multiple names can refer to the same IP, so if we expire the
// zombie by IP then we need to ensure that it doesn't get
// added to both 'alive' and 'dead'.
//
// 1) Assemble all of the 'dead', starting from 'deadIdx'.
// Assemble alive candidates in 'possibleAlive'.
// 2) Ensure that 'possibleAlive' doesn't contain any of the
// entries in 'dead[deadIdx:]'.
// 3) Add the remaining 'possibleAlive' to 'alive'.
possibleAlive := make(map[*DNSZombieMapping]struct{})
for _, aliveIPsForName := range aliveNames {
if len(aliveIPsForName) <= zombies.perHostLimit {
// Already handled in the loop above.
continue
}
overLimit := len(aliveIPsForName) - zombies.perHostLimit
sortZombieMappingSlice(aliveIPsForName)
dead = append(dead, aliveIPsForName[:overLimit]...)
for _, z := range aliveIPsForName[overLimit:] {
possibleAlive[z] = struct{}{}
}
if dead[len(dead)-1].AliveAt.IsZero() {
warnActiveDNSEntries = true
}
}
if warnActiveDNSEntries {
log.Warningf("Evicting expired DNS cache entries that may be in-use due to per-host limits. This may cause recently created connections to be disconnected. Raise %s to mitigate this.", option.ToFQDNsMaxIPsPerHost)
}

for _, dead := range dead[deadIdx:] {
if _, ok := possibleAlive[dead]; ok {
delete(possibleAlive, dead)
}
}

for zombie := range possibleAlive {
alive = append(alive, zombie.DeepCopy())
}
}

// Limit alive zombies to max. This is messy, and will break some existing
// connections. We sort by whether the connection is marked alive or not, the
// oldest created connections, and tie-break by the number of DNS names for
Expand Down Expand Up @@ -977,7 +1039,7 @@ func (zombies *DNSZombieMappings) DumpAlive(cidrMatcher CIDRMatcherFunc) (alive

aliveNames := zombies.getAliveNames()
for _, zombie := range zombies.deletes {
if !zombies.isZombieAlive(zombie, aliveNames) {
if alive, _ := zombies.isZombieAlive(zombie, aliveNames); !alive {
continue
}
// only proceed if zombie is alive and the IP matches the CIDR selector
Expand Down
38 changes: 31 additions & 7 deletions pkg/fqdn/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ func assertZombiesContain(c *C, zombies []*DNSZombieMapping, mappings map[string

func (ds *DNSCacheTestSuite) TestZombiesSiblingsGC(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

// Siblings are IPs that resolve to the same name.
zombies.Upsert(now, "1.1.1.1", "test.com")
Expand All @@ -640,7 +640,7 @@ func (ds *DNSCacheTestSuite) TestZombiesSiblingsGC(c *C) {

func (ds *DNSCacheTestSuite) TestZombiesGC(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

zombies.Upsert(now, "1.1.1.1", "test.com")
zombies.Upsert(now, "2.2.2.2", "somethingelse.com")
Expand Down Expand Up @@ -719,9 +719,33 @@ func (ds *DNSCacheTestSuite) TestZombiesGC(c *C) {
})
}

func (ds *DNSCacheTestSuite) TestZombiesGCOverLimit(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, 1)

// Limit the total number of IPs to be associated with a specific host
// to 1, but associate 'test.com' with multiple IPs.
zombies.Upsert(now, "1.1.1.1", "test.com")
zombies.Upsert(now, "2.2.2.2", "somethingelse.com", "test.com")
zombies.Upsert(now, "3.3.3.3", "anothertest.com")

// Based on the zombie liveness sorting, the '2.2.2.2' entry is more
// important (as it could potentially impact multiple apps connecting
// to different domains), so it should be kept alive when sweeping to
// enforce the max per-host IP limit for names.
alive, dead := zombies.GC()
assertZombiesContain(c, dead, map[string][]string{
"1.1.1.1": {"test.com"},
})
assertZombiesContain(c, alive, map[string][]string{
"2.2.2.2": {"somethingelse.com", "test.com"},
"3.3.3.3": {"anothertest.com"},
})
}

func (ds *DNSCacheTestSuite) TestZombiesGCDeferredDeletes(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

zombies.Upsert(now.Add(0*time.Second), "1.1.1.1", "test.com")
zombies.Upsert(now.Add(1*time.Second), "2.2.2.2", "somethingelse.com")
Expand All @@ -736,7 +760,7 @@ func (ds *DNSCacheTestSuite) TestZombiesGCDeferredDeletes(c *C) {
"3.3.3.3": {"onemorething.com"},
})

zombies = NewDNSZombieMappings(2)
zombies = NewDNSZombieMappings(2, defaults.ToFQDNsMaxIPsPerHost)
zombies.Upsert(now.Add(0*time.Second), "1.1.1.1", "test.com")

// No zombies should be evicted because we are below the limit
Expand Down Expand Up @@ -779,7 +803,7 @@ func (ds *DNSCacheTestSuite) TestZombiesGCDeferredDeletes(c *C) {

func (ds *DNSCacheTestSuite) TestZombiesForceExpire(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

zombies.Upsert(now, "1.1.1.1", "test.com", "anothertest.com")
zombies.Upsert(now, "2.2.2.2", "somethingelse.com")
Expand Down Expand Up @@ -854,7 +878,7 @@ func (ds *DNSCacheTestSuite) TestZombiesForceExpire(c *C) {
func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {
now := time.Now()
cache := NewDNSCache(0)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

// Add entries that should expire at different times
cache.Update(now, "test.com", []net.IP{net.ParseIP("1.1.1.1"), net.ParseIP("2.2.2.2")}, 3)
Expand Down Expand Up @@ -885,7 +909,7 @@ func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {

func (ds *DNSCacheTestSuite) TestZombiesDumpAlive(c *C) {
now := time.Now()
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes)
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, defaults.ToFQDNsMaxIPsPerHost)

alive := zombies.DumpAlive(nil)
c.Assert(alive, HasLen, 0)
Expand Down

0 comments on commit ac93cb4

Please sign in to comment.