Skip to content

Commit

Permalink
fqdn: avoid expensive sort/unique of names during GC
Browse files Browse the repository at this point in the history
Return the names affected by GC in a set instead of a slice. This avoids
having to repeatedly sort and unique the slice which is a potentially
expensive operation if there are many FQDN names to garbage collect. The
consumer in `(*NameManager).GC` will expect a `sets.Set` already anyway.

Signed-off-by: Tobias Klauser <tobias@cilium.io>
  • Loading branch information
tklauser committed Mar 13, 2024
1 parent 9fbe73c commit 16caf0e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 44 deletions.
41 changes: 21 additions & 20 deletions pkg/fqdn/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,25 +240,26 @@ func (c *DNSCache) addNameToCleanup(entry *cacheEntry) {
// cleanups begin from that time.
// It returns the list of names that have expired data and a map of removed DNS
// cache entries, keyed by IP.
func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames []string, removed map[netip.Addr][]*cacheEntry) {
func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) {
if c.lastCleanup.IsZero() {
return nil, nil
}

var toCleanNames []string
toCleanNames := sets.New[string]()
for c.lastCleanup.Before(expires) {
key := c.lastCleanup.Unix()
if entries, exists := c.cleanup[key]; exists {
toCleanNames = append(toCleanNames, entries...)
toCleanNames.Insert(entries...)
delete(c.cleanup, key)
}
c.lastCleanup = c.lastCleanup.Add(time.Second).Truncate(time.Second)
}

affectedNames = sets.New[string]()
removed = make(map[netip.Addr][]*cacheEntry)
for _, name := range slices.Unique(toCleanNames) {
for name := range toCleanNames {
if entries, exists := c.forward[name]; exists {
affectedNames = append(affectedNames, name)
affectedNames.Insert(name)
for ip, entry := range c.removeExpired(entries, c.lastCleanup, time.Time{}) {
removed[ip] = append(removed[ip], entry)
}
Expand All @@ -271,18 +272,20 @@ func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames []str
// cleanupOverLimitEntries returns the names that has reached the max number of
// IP per host. Internally the function sort the entries by the expiration
// time.
func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed map[netip.Addr][]*cacheEntry) {
func (c *DNSCache) cleanupOverLimitEntries() (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) {
type IPEntry struct {
ip netip.Addr
entry *cacheEntry
}
removed = make(map[netip.Addr][]*cacheEntry)

// For global cache the limit maybe is not used at all.
if c.perHostLimit == 0 {
return affectedNames, nil
return nil, nil
}

affectedNames = sets.New[string]()
removed = make(map[netip.Addr][]*cacheEntry)

for dnsName := range c.overLimit {
entries, ok := c.forward[dnsName]
if !ok {
Expand All @@ -307,7 +310,7 @@ func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed ma
c.remove(key.ip, key.entry)
removed[key.ip] = append(removed[key.ip], key.entry)
}
affectedNames = append(affectedNames, dnsName)
affectedNames.Insert(dnsName)
}
c.overLimit = map[string]bool{}
return affectedNames, removed
Expand All @@ -319,7 +322,7 @@ func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed ma
// other management of zombies is left to the caller.
// Note: zombies use the original lookup's ExpirationTime for DeletePendingAt,
// not the now parameter. This allows better ordering in zombie GC.
func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames []string) {
func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames sets.Set[string]) {
c.Lock()
expiredNames, expiredEntries := c.cleanupExpiredEntries(now)
overLimitNames, overLimitEntries := c.cleanupOverLimitEntries()
Expand Down Expand Up @@ -349,7 +352,7 @@ func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames
}
}

return slices.Unique(append(expiredNames, overLimitNames...))
return expiredNames.Union(overLimitNames)
}

// UpdateFromCache is a utility function that allows updating a DNSCache
Expand Down Expand Up @@ -605,10 +608,12 @@ func (c *DNSCache) GetIPs() sets.Set[netip.Addr] {
// expireLookupsBefore requires a lookup to have a LookupTime before it in
// order to remove it.
// nameMatch will remove any DNS names that match.
func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected []string) {
func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected sets.Set[string]) {
c.Lock()
defer c.Unlock()

namesAffected = sets.New[string]()

for name, entries := range c.forward {
// If nameMatch was passed in, we must match it. Otherwise, "match all".
if nameMatch != nil && !nameMatch.MatchString(name) {
Expand All @@ -619,14 +624,14 @@ func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.
// The second expireLookupsBefore actually matches lookup times, and will
// delete the entries completely.
for _, entry := range c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) {
namesAffected = append(namesAffected, entry.Name)
namesAffected.Insert(entry.Name)
}
}

return slices.Unique(namesAffected)
return namesAffected
}

func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []string) (namesAffected []string) {
func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []string) {
for _, name := range names {
entries, exists := c.forward[name]
if !exists {
Expand All @@ -637,12 +642,8 @@ func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []str
// because LookupTime must be before ExpirationTime.
// The second expireLookupsBefore actually matches lookup times, and will
// delete the entries completely.
for _, entry := range c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) {
namesAffected = append(namesAffected, entry.Name)
}
c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore)
}

return namesAffected
}

// Dump returns unexpired cache entries in the cache. They are deduplicated,
Expand Down
33 changes: 14 additions & 19 deletions pkg/fqdn/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

. "github.com/cilium/checkmate"
"k8s.io/apimachinery/pkg/util/sets"

"github.com/cilium/cilium/pkg/checker"
"github.com/cilium/cilium/pkg/defaults"
Expand Down Expand Up @@ -104,7 +105,7 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {
nameMatch, err := regexp.Compile("^notatest.com$")
c.Assert(err, IsNil)
namesAffected := cache.ForceExpire(now, nameMatch)
c.Assert(len(namesAffected), Equals, 0, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected.Len(), Equals, 0, Commentf("Incorrect count of names removed %v", namesAffected))
for _, name := range []string{"test1.com", "test2.com", "test3.com"} {
ips := cache.lookupByTime(now, name)
c.Assert(len(ips), Equals, 2, Commentf("Wrong count of IPs returned (%v) for non-deleted name '%s'", ips, name))
Expand All @@ -120,8 +121,8 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {
nameMatch, err = regexp.Compile("^test1.com$")
c.Assert(err, IsNil)
namesAffected = cache.ForceExpire(now, nameMatch)
c.Assert(len(namesAffected), Equals, 1, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected[0], Equals, "test1.com", Commentf("Incorrect affected name returned on forced expire: %s", namesAffected))
c.Assert(namesAffected.Len(), Equals, 1, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected.Has("test1.com"), Equals, true, Commentf("Incorrect affected name returned on forced expire: %s", namesAffected))
ips := cache.lookupByTime(now, "test1.com")
c.Assert(len(ips), Equals, 0, Commentf("IPs returned (%v) for deleted name 'test1.com'", ips))
c.Assert(cache.forward, Not(checker.HasKey), "test1.com", Commentf("Expired name 'test1.com' not deleted from forward"))
Expand All @@ -139,10 +140,9 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {

// Delete the whole cache. This should leave no data.
namesAffected = cache.ForceExpire(now, nil)
sort.Strings(namesAffected) // simplify the checks below
c.Assert(len(namesAffected), Equals, 2, Commentf("Incorrect count of names removed %v", namesAffected))
for i, name := range []string{"test2.com", "test3.com"} {
c.Assert(namesAffected[i], Equals, name, Commentf("Incorrect affected name returned on forced expire"))
c.Assert(namesAffected.Len(), Equals, 2, Commentf("Incorrect count of names removed %v", namesAffected))
for _, name := range []string{"test2.com", "test3.com"} {
c.Assert(namesAffected.Has(name), Equals, true, Commentf("Incorrect affected name returned on forced expire"))
}
for name := range names {
ips = cache.lookupByTime(now, name)
Expand All @@ -166,13 +166,8 @@ func (ds *DNSCacheTestSuite) Test_forceExpiredByNames(c *C) {
}

c.Assert(cache.forward, HasLen, 3)
result := cache.forceExpireByNames(time.Now(), names)
c.Assert(result, checker.DeepEquals, names)
c.Assert(result, HasLen, 2)
cache.forceExpireByNames(time.Now(), names)
c.Assert(cache.forward["test3.com"], Not(IsNil))

invalidName := cache.forceExpireByNames(now, []string{"invalid.name"})
c.Assert(invalidName, HasLen, 0)
}

func (ds *DNSCacheTestSuite) TestReverseUpdateLookup(c *C) {
Expand Down Expand Up @@ -562,7 +557,7 @@ func (ds *DNSCacheTestSuite) TestOverlimitEntriesWithValidLimit(c *C) {
cache.Update(now, "test.com", []netip.Addr{netip.MustParseAddr(fmt.Sprintf("1.1.1.%d", i))}, i)
}
affectedNames, _ := cache.cleanupOverLimitEntries()
c.Assert(affectedNames, checker.DeepEquals, []string{"test.com"})
c.Assert(affectedNames, checker.DeepEquals, sets.New[string]("test.com"))

c.Assert(cache.Lookup("test.com"), HasLen, limit)
c.Assert(cache.LookupIP(netip.MustParseAddr("1.1.1.1")), checker.DeepEquals, []string{"foo.bar"})
Expand Down Expand Up @@ -597,7 +592,7 @@ func (ds *DNSCacheTestSuite) TestGCOverlimitAfterTTLCleanup(c *C) {
c.Assert(cache.overLimit, HasLen, 1)

result, _ := cache.cleanupExpiredEntries(time.Now().Add(5 * time.Second))
c.Assert(result, checker.DeepEquals, []string{"test.com"})
c.Assert(result, checker.DeepEquals, sets.New[string]("test.com"))

// Due all entries are deleted on TTL, the overlimit should return 0 entries.
affectedNames, _ := cache.cleanupOverLimitEntries()
Expand Down Expand Up @@ -956,7 +951,7 @@ func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {
// Cascade expirations from cache to zombies. The 3.3.3.3 lookup has not expired
now = now.Add(4 * time.Second)
expired := cache.GC(now, zombies)
c.Assert(expired, HasLen, 1) // test.com
c.Assert(expired.Len(), Equals, 1) // test.com
// Not all IPs expired (3.3.3.3 still alive) so we expect test.com to be
// present in the cache.
c.Assert(cache.forward, checker.HasKey, "test.com")
Expand All @@ -974,7 +969,7 @@ func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {
// but the older zombies are still alive.
now = now.Add(4 * time.Second)
expired = cache.GC(now, zombies)
c.Assert(expired, HasLen, 1) // test.com
c.Assert(expired.Len(), Equals, 1) // test.com
// Now all IPs expired so we expect test.com to be removed from the cache.
c.Assert(cache.forward, Not(checker.HasKey), "test.com")
c.Assert(cache.forward, HasLen, 0)
Expand Down Expand Up @@ -1106,8 +1101,8 @@ func (ds *DNSCacheTestSuite) TestOverlimitPreferNewerEntries(c *C) {

affected := cache.GC(time.Now(), zombies)

c.Assert(affected, HasLen, 1)
c.Assert(affected[0], Equals, name)
c.Assert(affected.Len(), Equals, 1)
c.Assert(affected.Has(name), Equals, true)

// No entries have expired, but no more than toFQDNsMaxIPsPerHost can be
// kept in the cache.
Expand Down
9 changes: 4 additions & 5 deletions pkg/fqdn/name_manager_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (n *NameManager) GC(ctx context.Context) error {
}
}
affectedNames := ep.DNSHistory.GC(GCStart, ep.DNSZombies)
namesToClean.Insert(affectedNames...)
namesToClean = namesToClean.Union(affectedNames)

alive, dead := ep.DNSZombies.GC()
if metrics.FQDNAliveZombieConnections.IsEnabled() {
Expand Down Expand Up @@ -108,7 +108,7 @@ func (n *NameManager) GC(ctx context.Context) error {
}
}

if len(namesToClean) == 0 {
if namesToClean.Len() == 0 {
return nil
}

Expand Down Expand Up @@ -169,15 +169,14 @@ func (n *NameManager) DeleteDNSLookups(expireLookupsBefore time.Time, matchPatte
}

maybeStaleIPs := n.cache.GetIPs()
namesToRegen := sets.Set[string]{}

// Clear any to-delete entries globally
// Clear any to-delete entries in each endpoint, then update globally to
// insert any entries that now should be in the global cache (because they
// provide an IP at the latest expiration time).
namesToRegen.Insert(n.cache.ForceExpire(expireLookupsBefore, nameMatcher)...)
namesToRegen := n.cache.ForceExpire(expireLookupsBefore, nameMatcher)
for _, ep := range n.config.GetEndpointsDNSInfo() {
namesToRegen.Insert(ep.DNSHistory.ForceExpire(expireLookupsBefore, nameMatcher)...)
namesToRegen = namesToRegen.Union(ep.DNSHistory.ForceExpire(expireLookupsBefore, nameMatcher))
n.cache.UpdateFromCache(ep.DNSHistory, nil)

namesToRegen.Insert(ep.DNSZombies.ForceExpire(expireLookupsBefore, nameMatcher)...)
Expand Down

0 comments on commit 16caf0e

Please sign in to comment.