Skip to content

Commit

Permalink
fqdn: Report zombie expiry time by CT GC
Browse files Browse the repository at this point in the history
Report the expiry time for entries kept alive based on connection state
as the next time that connection tracking garbage collection runs. This
way, the expiration time should be closer to the actual expiry time
rather than simply reporting the zero timestamp.

Signed-off-by: Joe Stringer <joe@cilium.io>
  • Loading branch information
joestringer committed May 6, 2024
1 parent 29f47d4 commit 4c98d60
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 18 deletions.
4 changes: 2 additions & 2 deletions pkg/endpoint/fqdn.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ func (e *Endpoint) MarkDNSCTEntry(dstIP netip.Addr, now time.Time) {
// deleted.
// The DNS garbage collector is in daemon/fqdn.go and the CT GC is in
// pkg/maps/ctmap/gc/gc.go
func (e *Endpoint) MarkCTGCTime(now time.Time) {
e.DNSZombies.SetCTGCTime(now)
func (e *Endpoint) MarkCTGCTime(prev, next time.Time) {
e.DNSZombies.SetCTGCTime(prev, next)
}
4 changes: 3 additions & 1 deletion pkg/fqdn/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ type DNSZombieMappings struct {
lock.Mutex
deletes map[netip.Addr]*DNSZombieMapping
lastCTGCUpdate time.Time
nextCTGCUpdate time.Time // estimated
// ctGCRevision is a serial number tracking the number of conntrack
// garbage collection runs. It is used to ensure that entries
// are not reaped until CT GC has run at least twice.
Expand Down Expand Up @@ -1059,9 +1060,10 @@ func (zombies *DNSZombieMappings) MarkAlive(now time.Time, ip netip.Addr) {
// When 'ctGCStart' is later than an alive timestamp, set with MarkAlive, the zombie is
// no longer alive. Thus, this call acts as a gating function for what data is
// returned by GC.
func (zombies *DNSZombieMappings) SetCTGCTime(ctGCStart time.Time) {
func (zombies *DNSZombieMappings) SetCTGCTime(ctGCStart, estNext time.Time) {
zombies.Lock()
zombies.lastCTGCUpdate = ctGCStart
zombies.nextCTGCUpdate = estNext
zombies.ctGCRevision++
zombies.Unlock()
}
Expand Down
32 changes: 20 additions & 12 deletions pkg/fqdn/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,10 +637,11 @@ func TestZombiesSiblingsGC(t *testing.T) {
// Mark 1.1.1.2 alive which should also keep 1.1.1.1 alive since they
// have the same name
now = now.Add(5 * time.Minute)
zombies.SetCTGCTime(now)
next := now.Add(5 * time.Minute)
zombies.SetCTGCTime(now, next)
now = now.Add(time.Second)
zombies.MarkAlive(now.Add(time.Second), netip.MustParseAddr("1.1.1.2"))
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, next)

alive, dead := zombies.GC()
assertZombiesContain(t, alive, map[string][]string{
Expand Down Expand Up @@ -680,7 +681,8 @@ func TestZombiesGC(t *testing.T) {
// Even when not marking alive, running CT GC the first time is ignored;
// we must always complete 2 GC cycles before allowing a name to be dead
now = now.Add(5 * time.Minute)
zombies.SetCTGCTime(now)
next := now.Add(5 * time.Minute)
zombies.SetCTGCTime(now, next)
alive, dead = zombies.GC()
require.Len(t, dead, 0)
assertZombiesContain(t, alive, map[string][]string{
Expand All @@ -691,9 +693,10 @@ func TestZombiesGC(t *testing.T) {
// Cause 1.1.1.1 to die by not marking it alive before the second GC
//zombies.MarkAlive(now, netip.MustParseAddr("1.1.1.1"))
now = now.Add(5 * time.Minute)
next = now.Add(5 * time.Minute)
// Mark 2.2.2.2 alive with 1 second grace period
zombies.MarkAlive(now.Add(time.Second), netip.MustParseAddr("2.2.2.2"))
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, next)

// alive should contain 2.2.2.2 -> somethingelse.com
// dead should contain 1.1.1.1 -> anotherthing.com, test.com
Expand Down Expand Up @@ -724,10 +727,12 @@ func TestZombiesGC(t *testing.T) {

// Cause all zombies but 2.2.2.2 to die
now = now.Add(5 * time.Minute)
zombies.SetCTGCTime(now)
next = now.Add(5 * time.Minute)
zombies.SetCTGCTime(now, next)
now = now.Add(5 * time.Minute)
next = now.Add(5 * time.Minute)
zombies.MarkAlive(now.Add(time.Second), netip.MustParseAddr("2.2.2.2"))
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, next)
alive, dead = zombies.GC()
require.Len(t, alive, 1)
assertZombiesContain(t, alive, map[string][]string{
Expand All @@ -739,7 +744,7 @@ func TestZombiesGC(t *testing.T) {

// Cause all zombies to die
now = now.Add(2 * time.Second)
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, next)
alive, dead = zombies.GC()
require.Len(t, alive, 0)
assertZombiesContain(t, dead, map[string][]string{
Expand Down Expand Up @@ -776,7 +781,7 @@ func TestZombiesGCOverLimitWithCTGC(t *testing.T) {
afterNow := now.Add(1 * time.Nanosecond)
maxConnections := 3
zombies := NewDNSZombieMappings(defaults.ToFQDNsMaxDeferredConnectionDeletes, maxConnections)
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, afterNow)

// Limit the number of IPs per hostname, but associate 'test.com' with
// more IPs.
Expand All @@ -790,7 +795,7 @@ func TestZombiesGCOverLimitWithCTGC(t *testing.T) {
for i := 0; i < maxConnections; i++ {
zombies.MarkAlive(afterNow, netip.MustParseAddr(fmt.Sprintf("1.1.1.%d", i+1)))
}
zombies.SetCTGCTime(afterNow)
zombies.SetCTGCTime(afterNow, afterNow.Add(5*time.Minute))

// Garbage collection should now impose the maxConnections limit on
// the name, prioritizing to keep the active IPs live and then marking
Expand Down Expand Up @@ -850,9 +855,10 @@ func TestZombiesGCDeferredDeletes(t *testing.T) {
// latest insert time.
zombies.Upsert(now.Add(0*time.Second), netip.MustParseAddr("1.1.1.1"), "test.com")
gcTime := now.Add(4 * time.Second)
next := now.Add(4 * time.Second)
zombies.MarkAlive(gcTime, netip.MustParseAddr("1.1.1.1"))
zombies.MarkAlive(gcTime, netip.MustParseAddr("2.2.2.2"))
zombies.SetCTGCTime(gcTime)
zombies.SetCTGCTime(gcTime, next)

alive, dead = zombies.GC()
assertZombiesContain(t, dead, map[string][]string{
Expand Down Expand Up @@ -1005,7 +1011,8 @@ func TestZombiesDumpAlive(t *testing.T) {
// Ensure that two GC runs must progress before
// marking zombies dead.
now = now.Add(time.Second)
zombies.SetCTGCTime(now)
next := now.Add(5 * time.Minute)
zombies.SetCTGCTime(now, next)
alive = zombies.DumpAlive(nil)
assertZombiesContain(t, alive, map[string][]string{
"1.1.1.1": {"test.com"},
Expand All @@ -1014,9 +1021,10 @@ func TestZombiesDumpAlive(t *testing.T) {
})

now = now.Add(5 * time.Minute) // Need to step the clock 5 minutes ahead here, to account for the grace period
next = now.Add(5 * time.Minute)
zombies.MarkAlive(now, netip.MustParseAddr("1.1.1.1"))
zombies.MarkAlive(now, netip.MustParseAddr("2.2.2.2"))
zombies.SetCTGCTime(now)
zombies.SetCTGCTime(now, next)

alive = zombies.DumpAlive(nil)
assertZombiesContain(t, alive, map[string][]string{
Expand Down
2 changes: 1 addition & 1 deletion pkg/fqdn/name_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (n *NameManager) GetDNSHistoryModel(endpointID string, prefixMatcher Prefix
Ips: []string{delete.IP.String()},
LookupTime: strfmt.DateTime(delete.AliveAt),
TTL: 0,
ExpirationTime: strfmt.DateTime(delete.AliveAt),
ExpirationTime: strfmt.DateTime(ep.DNSZombies.nextCTGCUpdate),
EndpointID: int64(ep.ID64),
Source: DNSSourceConnection,
})
Expand Down
5 changes: 3 additions & 2 deletions pkg/maps/ctmap/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,10 @@ func (gc *GC) Enable() {
}

// Mark the CT GC as over in each EP DNSZombies instance, if we did a *full* GC run
interval := ctmap.GetInterval(gcInterval, maxDeleteRatio)
if success && ipv4 == gc.ipv4 && ipv6 == gc.ipv6 {
for _, e := range eps {
e.MarkCTGCTime(gcStart)
e.MarkCTGCTime(gcStart, time.Now().Add(interval))
}
}

Expand Down Expand Up @@ -198,7 +199,7 @@ func (gc *GC) Enable() {
ipv6 = true
}
}
case <-ctTimer.After(ctmap.GetInterval(gcInterval, maxDeleteRatio)):
case <-ctTimer.After(interval):
gc.signalHandler.MuteSignals()
ipv4 = gc.ipv4
ipv6 = gc.ipv6
Expand Down

0 comments on commit 4c98d60

Please sign in to comment.