Skip to content

Commit

Permalink
fqdn: return names affected by GC in set instead of slice to avoid ex…
Browse files Browse the repository at this point in the history
…pensive sort

This avoids having to repeatedly sort and unique the slice which is a
potentially expensive operation if there are many FQDN names to garbage
collect. The consumer in `(*NameManager).GC` will expect a `sets.Set`
already anyway.

TODO: benchmarks

Signed-off-by: Tobias Klauser <tobias@cilium.io>
  • Loading branch information
tklauser committed Feb 22, 2024
1 parent 00e45d7 commit 71c48e8
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 44 deletions.
41 changes: 21 additions & 20 deletions pkg/fqdn/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,25 +240,26 @@ func (c *DNSCache) addNameToCleanup(entry *cacheEntry) {
// cleanups begin from that time.
// It returns the list of names that have expired data and a map of removed DNS
// cache entries, keyed by IP.
func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames []string, removed map[netip.Addr][]*cacheEntry) {
func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) {
if c.lastCleanup.IsZero() {
return nil, nil
}

var toCleanNames []string
toCleanNames := sets.New[string]()
for c.lastCleanup.Before(expires) {
key := c.lastCleanup.Unix()
if entries, exists := c.cleanup[key]; exists {
toCleanNames = append(toCleanNames, entries...)
toCleanNames.Insert(entries...)
delete(c.cleanup, key)
}
c.lastCleanup = c.lastCleanup.Add(time.Second).Truncate(time.Second)
}

affectedNames = sets.New[string]()
removed = make(map[netip.Addr][]*cacheEntry)
for _, name := range slices.Unique(toCleanNames) {
for name := range toCleanNames {
if entries, exists := c.forward[name]; exists {
affectedNames = append(affectedNames, name)
affectedNames.Insert(name)
for ip, entry := range c.removeExpired(entries, c.lastCleanup, time.Time{}) {
removed[ip] = append(removed[ip], entry)
}
Expand All @@ -271,18 +272,20 @@ func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames []str
// cleanupOverLimitEntries returns the names that has reached the max number of
// IP per host. Internally the function sort the entries by the expiration
// time.
func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed map[netip.Addr][]*cacheEntry) {
func (c *DNSCache) cleanupOverLimitEntries() (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) {
type IPEntry struct {
ip netip.Addr
entry *cacheEntry
}
removed = make(map[netip.Addr][]*cacheEntry)

// For global cache the limit maybe is not used at all.
if c.perHostLimit == 0 {
return affectedNames, nil
return nil, nil
}

affectedNames = sets.New[string]()
removed = make(map[netip.Addr][]*cacheEntry)

for dnsName := range c.overLimit {
entries, ok := c.forward[dnsName]
if !ok {
Expand All @@ -307,7 +310,7 @@ func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed ma
c.remove(key.ip, key.entry)
removed[key.ip] = append(removed[key.ip], key.entry)
}
affectedNames = append(affectedNames, dnsName)
affectedNames.Insert(dnsName)
}
c.overLimit = map[string]bool{}
return affectedNames, removed
Expand All @@ -319,7 +322,7 @@ func (c *DNSCache) cleanupOverLimitEntries() (affectedNames []string, removed ma
// other management of zombies is left to the caller.
// Note: zombies use the original lookup's ExpirationTime for DeletePendingAt,
// not the now parameter. This allows better ordering in zombie GC.
func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames []string) {
func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames sets.Set[string]) {
c.Lock()
expiredNames, expiredEntries := c.cleanupExpiredEntries(now)
overLimitNames, overLimitEntries := c.cleanupOverLimitEntries()
Expand All @@ -339,7 +342,7 @@ func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames
}
}

return slices.Unique(append(expiredNames, overLimitNames...))
return expiredNames.Union(overLimitNames)
}

// UpdateFromCache is a utility function that allows updating a DNSCache
Expand Down Expand Up @@ -595,10 +598,12 @@ func (c *DNSCache) GetIPs() sets.Set[netip.Addr] {
// expireLookupsBefore requires a lookup to have a LookupTime before it in
// order to remove it.
// nameMatch will remove any DNS names that match.
func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected []string) {
func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected sets.Set[string]) {
c.Lock()
defer c.Unlock()

namesAffected = sets.New[string]()

for name, entries := range c.forward {
// If nameMatch was passed in, we must match it. Otherwise, "match all".
if nameMatch != nil && !nameMatch.MatchString(name) {
Expand All @@ -609,14 +614,14 @@ func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.
// The second expireLookupsBefore actually matches lookup times, and will
// delete the entries completely.
for _, entry := range c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) {
namesAffected = append(namesAffected, entry.Name)
namesAffected.Insert(entry.Name)
}
}

return slices.Unique(namesAffected)
return namesAffected
}

func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []string) (namesAffected []string) {
func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []string) {
for _, name := range names {
entries, exists := c.forward[name]
if !exists {
Expand All @@ -627,12 +632,8 @@ func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []str
// because LookupTime must be before ExpirationTime.
// The second expireLookupsBefore actually matches lookup times, and will
// delete the entries completely.
for _, entry := range c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) {
namesAffected = append(namesAffected, entry.Name)
}
c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore)
}

return namesAffected
}

// Dump returns unexpired cache entries in the cache. They are deduplicated,
Expand Down
33 changes: 14 additions & 19 deletions pkg/fqdn/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

. "github.com/cilium/checkmate"
"k8s.io/apimachinery/pkg/util/sets"

"github.com/cilium/cilium/pkg/checker"
"github.com/cilium/cilium/pkg/defaults"
Expand Down Expand Up @@ -104,7 +105,7 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {
nameMatch, err := regexp.Compile("^notatest.com$")
c.Assert(err, IsNil)
namesAffected := cache.ForceExpire(now, nameMatch)
c.Assert(len(namesAffected), Equals, 0, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected.Len(), Equals, 0, Commentf("Incorrect count of names removed %v", namesAffected))
for _, name := range []string{"test1.com", "test2.com", "test3.com"} {
ips := cache.lookupByTime(now, name)
c.Assert(len(ips), Equals, 2, Commentf("Wrong count of IPs returned (%v) for non-deleted name '%s'", ips, name))
Expand All @@ -120,8 +121,8 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {
nameMatch, err = regexp.Compile("^test1.com$")
c.Assert(err, IsNil)
namesAffected = cache.ForceExpire(now, nameMatch)
c.Assert(len(namesAffected), Equals, 1, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected[0], Equals, "test1.com", Commentf("Incorrect affected name returned on forced expire: %s", namesAffected))
c.Assert(namesAffected.Len(), Equals, 1, Commentf("Incorrect count of names removed %v", namesAffected))
c.Assert(namesAffected.Has("test1.com"), Equals, true, Commentf("Incorrect affected name returned on forced expire: %s", namesAffected))
ips := cache.lookupByTime(now, "test1.com")
c.Assert(len(ips), Equals, 0, Commentf("IPs returned (%v) for deleted name 'test1.com'", ips))
c.Assert(cache.forward, Not(checker.HasKey), "test1.com", Commentf("Expired name 'test1.com' not deleted from forward"))
Expand All @@ -139,10 +140,9 @@ func (ds *DNSCacheTestSuite) TestDelete(c *C) {

// Delete the whole cache. This should leave no data.
namesAffected = cache.ForceExpire(now, nil)
sort.Strings(namesAffected) // simplify the checks below
c.Assert(len(namesAffected), Equals, 2, Commentf("Incorrect count of names removed %v", namesAffected))
for i, name := range []string{"test2.com", "test3.com"} {
c.Assert(namesAffected[i], Equals, name, Commentf("Incorrect affected name returned on forced expire"))
c.Assert(namesAffected.Len(), Equals, 2, Commentf("Incorrect count of names removed %v", namesAffected))
for _, name := range []string{"test2.com", "test3.com"} {
c.Assert(namesAffected.Has(name), Equals, true, Commentf("Incorrect affected name returned on forced expire"))
}
for name := range names {
ips = cache.lookupByTime(now, name)
Expand All @@ -166,13 +166,8 @@ func (ds *DNSCacheTestSuite) Test_forceExpiredByNames(c *C) {
}

c.Assert(cache.forward, HasLen, 3)
result := cache.forceExpireByNames(time.Now(), names)
c.Assert(result, checker.DeepEquals, names)
c.Assert(result, HasLen, 2)
cache.forceExpireByNames(time.Now(), names)
c.Assert(cache.forward["test3.com"], Not(IsNil))

invalidName := cache.forceExpireByNames(now, []string{"invalid.name"})
c.Assert(invalidName, HasLen, 0)
}

func (ds *DNSCacheTestSuite) TestReverseUpdateLookup(c *C) {
Expand Down Expand Up @@ -562,7 +557,7 @@ func (ds *DNSCacheTestSuite) TestOverlimitEntriesWithValidLimit(c *C) {
cache.Update(now, "test.com", []netip.Addr{netip.MustParseAddr(fmt.Sprintf("1.1.1.%d", i))}, i)
}
affectedNames, _ := cache.cleanupOverLimitEntries()
c.Assert(affectedNames, checker.DeepEquals, []string{"test.com"})
c.Assert(affectedNames, checker.DeepEquals, sets.New[string]("test.com"))

c.Assert(cache.Lookup("test.com"), HasLen, limit)
c.Assert(cache.LookupIP(netip.MustParseAddr("1.1.1.1")), checker.DeepEquals, []string{"foo.bar"})
Expand Down Expand Up @@ -597,7 +592,7 @@ func (ds *DNSCacheTestSuite) TestGCOverlimitAfterTTLCleanup(c *C) {
c.Assert(cache.overLimit, HasLen, 1)

result, _ := cache.cleanupExpiredEntries(time.Now().Add(5 * time.Second))
c.Assert(result, checker.DeepEquals, []string{"test.com"})
c.Assert(result, checker.DeepEquals, sets.New[string]("test.com"))

// Due all entries are deleted on TTL, the overlimit should return 0 entries.
affectedNames, _ := cache.cleanupOverLimitEntries()
Expand Down Expand Up @@ -939,7 +934,7 @@ func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {
// Cascade expirations from cache to zombies. The 3.3.3.3 lookup has not expired
now = now.Add(4 * time.Second)
expired := cache.GC(now, zombies)
c.Assert(expired, HasLen, 1) // test.com
c.Assert(expired.Len(), Equals, 1) // test.com
// Not all IPs expired (3.3.3.3 still alive) so we expect test.com to be
// present in the cache.
c.Assert(cache.forward, checker.HasKey, "test.com")
Expand All @@ -957,7 +952,7 @@ func (ds *DNSCacheTestSuite) TestCacheToZombiesGCCascade(c *C) {
// but the older zombies are still alive.
now = now.Add(4 * time.Second)
expired = cache.GC(now, zombies)
c.Assert(expired, HasLen, 1) // test.com
c.Assert(expired.Len(), Equals, 1) // test.com
// Now all IPs expired so we expect test.com to be removed from the cache.
c.Assert(cache.forward, Not(checker.HasKey), "test.com")
c.Assert(cache.forward, HasLen, 0)
Expand Down Expand Up @@ -1077,8 +1072,8 @@ func (ds *DNSCacheTestSuite) TestOverlimitPreferNewerEntries(c *C) {

affected := cache.GC(time.Now(), zombies)

c.Assert(affected, HasLen, 1)
c.Assert(affected[0], Equals, name)
c.Assert(affected.Len(), Equals, 1)
c.Assert(affected.Has(name), Equals, true)

// No entries have expired, but no more than toFQDNsMaxIPsPerHost can be
// kept in the cache.
Expand Down
9 changes: 4 additions & 5 deletions pkg/fqdn/name_manager_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (n *NameManager) GC(ctx context.Context) error {
}
}
affectedNames := ep.DNSHistory.GC(GCStart, ep.DNSZombies)
namesToClean.Insert(affectedNames...)
namesToClean = namesToClean.Union(affectedNames)

alive, dead := ep.DNSZombies.GC()
if metrics.FQDNAliveZombieConnections.IsEnabled() {
Expand Down Expand Up @@ -108,7 +108,7 @@ func (n *NameManager) GC(ctx context.Context) error {
}
}

if len(namesToClean) == 0 {
if namesToClean.Len() == 0 {
return nil
}

Expand Down Expand Up @@ -169,15 +169,14 @@ func (n *NameManager) DeleteDNSLookups(expireLookupsBefore time.Time, matchPatte
}

maybeStaleIPs := n.cache.GetIPs()
namesToRegen := sets.Set[string]{}

// Clear any to-delete entries globally
// Clear any to-delete entries in each endpoint, then update globally to
// insert any entries that now should be in the global cache (because they
// provide an IP at the latest expiration time).
namesToRegen.Insert(n.cache.ForceExpire(expireLookupsBefore, nameMatcher)...)
namesToRegen := n.cache.ForceExpire(expireLookupsBefore, nameMatcher)
for _, ep := range n.config.GetEndpointsDNSInfo() {
namesToRegen.Insert(ep.DNSHistory.ForceExpire(expireLookupsBefore, nameMatcher)...)
namesToRegen = namesToRegen.Union(ep.DNSHistory.ForceExpire(expireLookupsBefore, nameMatcher))
n.cache.UpdateFromCache(ep.DNSHistory, nil)

namesToRegen.Insert(ep.DNSZombies.ForceExpire(expireLookupsBefore, nameMatcher)...)
Expand Down

0 comments on commit 71c48e8

Please sign in to comment.