Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auth Map: Initial Garbage Collection #25754

Merged
merged 3 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-agent.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-agent_hive.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-agent_hive_dot-graph.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/auth/authmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
type authMap interface {
Update(key authKey, info authInfo) error
Delete(key authKey) error
DeleteIf(predicate func(key authKey, info authInfo) bool) error
Get(key authKey) (authInfo, error)
All() (map[authKey]authInfo, error)
}
Expand Down
31 changes: 30 additions & 1 deletion pkg/auth/authmap_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
package auth

import (
"errors"
"fmt"

"github.com/cilium/ebpf"
"golang.org/x/exp/maps"

"github.com/cilium/cilium/pkg/lock"
)

Expand All @@ -26,7 +30,11 @@ func (r *authMapCache) All() (map[authKey]authInfo, error) {
r.cacheEntriesMutex.RLock()
defer r.cacheEntriesMutex.RUnlock()

return r.cacheEntries, nil
result := make(map[authKey]authInfo)
mhofstetter marked this conversation as resolved.
Show resolved Hide resolved
for k, v := range r.cacheEntries {
result[k] = v
}
return maps.Clone(result), nil
}

func (r *authMapCache) Get(key authKey) (authInfo, error) {
Expand Down Expand Up @@ -66,6 +74,27 @@ func (r *authMapCache) Delete(key authKey) error {
return nil
}

func (r *authMapCache) DeleteIf(predicate func(key authKey, info authInfo) bool) error {
r.cacheEntriesMutex.Lock()
defer r.cacheEntriesMutex.Unlock()

for k, v := range r.cacheEntries {
if predicate(k, v) {
// delete every entry individually to keep the cache in sync in case of an error
if err := r.authmap.Delete(k); err != nil {
if errors.Is(err, ebpf.ErrKeyNotExist) {
log.Debugf("auth: failed to delete auth entry with key %s: entry already deleted", k)
continue
}
return fmt.Errorf("failed to delete auth entry from map: %w", err)
}
delete(r.cacheEntries, k)
}
}

return nil
}

func (r *authMapCache) restoreCache() error {
log.Debug("auth: starting cache restore")

Expand Down
42 changes: 42 additions & 0 deletions pkg/auth/authmap_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,46 @@ func Test_authMapCache_restoreCache(t *testing.T) {
assert.NoError(t, err)

assert.Len(t, am.cacheEntries, 1)

val, err := am.Get(authKey{
localIdentity: 1,
remoteIdentity: 2,
remoteNodeID: 10,
authType: policy.AuthTypeDisabled,
})
assert.NoError(t, err)
assert.NotNil(t, val)
}

func Test_authMapCache_allReturnsCopy(t *testing.T) {
am := authMapCache{
authmap: &fakeAuthMap{
entries: map[authKey]authInfo{},
},
cacheEntries: map[authKey]authInfo{
{
localIdentity: 1,
remoteIdentity: 2,
remoteNodeID: 10,
authType: policy.AuthTypeDisabled,
}: {
expiration: time.Now().Add(10 * time.Minute),
},
},
}

all, err := am.All()
assert.NoError(t, err)
assert.Len(t, all, 1)

all[authKey{
localIdentity: 10,
remoteIdentity: 20,
remoteNodeID: 100,
authType: policy.AuthTypeDisabled,
}] = authInfo{
expiration: time.Now().Add(10 * time.Minute),
}
assert.Len(t, all, 2)
assert.Len(t, am.cacheEntries, 1)
}
173 changes: 173 additions & 0 deletions pkg/auth/authmap_gc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package auth

import (
"context"
"fmt"
"net"
"time"

"github.com/cilium/cilium/pkg/identity"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
"github.com/cilium/cilium/pkg/k8s/resource"
"github.com/cilium/cilium/pkg/node/addressing"
)

type authMapGarbageCollector struct {
authmap authMap
ipCache ipCache

discoveredCiliumNodeIDs map[uint16]struct{}
discoveredCiliumIdentities map[identity.NumericIdentity]struct{}
}

func newAuthMapGC(authmap authMap, ipCache ipCache) *authMapGarbageCollector {
return &authMapGarbageCollector{
authmap: authmap,
ipCache: ipCache,
discoveredCiliumNodeIDs: map[uint16]struct{}{
0: {}, // Local node 0 is always available
},
discoveredCiliumIdentities: map[identity.NumericIdentity]struct{}{},
}
}

func (r *authMapGarbageCollector) handleCiliumNodeEvent(_ context.Context, e resource.Event[*ciliumv2.CiliumNode]) (err error) {
defer func() { e.Done(err) }()

switch e.Kind {
case resource.Upsert:
if r.discoveredCiliumNodeIDs != nil {
log.Debug("auth: nodes discovered - getting node id")
remoteNodeIDs := r.remoteNodeIDs(e.Object)
for _, rID := range remoteNodeIDs {
r.discoveredCiliumNodeIDs[rID] = struct{}{}
}
}
case resource.Sync:
log.Debug("auth: nodes synced - cleaning up missing nodes")
if err = r.cleanupMissingNodes(); err != nil {
return fmt.Errorf("failed to cleanup missing nodes: %w", err)
}
r.discoveredCiliumNodeIDs = nil
case resource.Delete:
log.Debugf("auth: node deleted - cleaning up: %s", e.Key.Name)
if err = r.cleanupDeletedNode(e.Object); err != nil {
return fmt.Errorf("failed to cleanup deleted node: %w", err)
}
}
return nil
}

func (r *authMapGarbageCollector) handleCiliumIdentityEvent(_ context.Context, e resource.Event[*ciliumv2.CiliumIdentity]) (err error) {
defer func() { e.Done(err) }()

switch e.Kind {
case resource.Upsert:
if r.discoveredCiliumIdentities != nil {
log.Debug("auth: identities discovered")
var id identity.NumericIdentity
id, err = identity.ParseNumericIdentity(e.Object.Name)
if err != nil {
return fmt.Errorf("failed to parse identity: %w", err)
}
joamaki marked this conversation as resolved.
Show resolved Hide resolved
r.discoveredCiliumIdentities[id] = struct{}{}
}
case resource.Sync:
log.Debug("auth: identities synced - cleaning up missing identities")
if err = r.cleanupMissingIdentities(); err != nil {
return fmt.Errorf("failed to cleanup missing identities: %w", err)
}
case resource.Delete:
log.Debugf("auth: identity deleted - cleaning up: %s", e.Key.Name)
if err = r.cleanupDeletedIdentity(e.Object); err != nil {
return fmt.Errorf("failed to cleanup deleted identity: %w", err)
}
r.discoveredCiliumIdentities = nil
}
return nil
}

func (r *authMapGarbageCollector) cleanupMissingNodes() error {
return r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
if _, ok := r.discoveredCiliumNodeIDs[key.remoteNodeID]; !ok {
log.Debugf("auth: deleting entry due to removed remote node: %d", key.remoteNodeID)
return true
}
return false
})
}

func (r *authMapGarbageCollector) cleanupMissingIdentities() error {
return r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
if _, ok := r.discoveredCiliumIdentities[key.localIdentity]; !ok {
log.Debugf("auth: deleting entry due to removed local identity: %d", key.localIdentity)
return true
}
if _, ok := r.discoveredCiliumIdentities[key.remoteIdentity]; !ok {
log.Debugf("auth: deleting entry due to removed remote identity: %d", key.remoteIdentity)
return true
}
return false
})
}

func (r *authMapGarbageCollector) cleanupDeletedNode(node *ciliumv2.CiliumNode) error {
remoteNodeIDs := r.remoteNodeIDs(node)

return r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
for _, id := range remoteNodeIDs {
if key.remoteNodeID == id {
log.Debugf("auth: deleting entry due to removed node: %d", id)
return true
}
}
return false
})
}

func (r *authMapGarbageCollector) cleanupDeletedIdentity(id *ciliumv2.CiliumIdentity) error {
idNumeric, err := identity.ParseNumericIdentity(id.Name)
if err != nil {
return fmt.Errorf("failed to parse deleted identity: %w", err)
}

return r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
if key.localIdentity == idNumeric || key.remoteIdentity == idNumeric {
log.Debugf("auth: deleting entry due to removed identity: %d", idNumeric)
return true
}
return false
})
}

func (r *authMapGarbageCollector) CleanupExpiredEntries(_ context.Context) error {
log.Debug("auth: cleaning up expired entries")
now := time.Now()
err := r.authmap.DeleteIf(func(key authKey, info authInfo) bool {
if info.expiration.Before(now) {
log.Debugf("auth: deleting entry due to expiration: %s", info.expiration)
return true
}
return false
})

if err != nil {
return fmt.Errorf("failed to cleanup expired entries: %w", err)
}
return nil
}

func (r *authMapGarbageCollector) remoteNodeIDs(node *ciliumv2.CiliumNode) []uint16 {
var remoteNodeIDs []uint16

for _, addr := range node.Spec.Addresses {
if addr.Type == addressing.NodeInternalIP {
remoteNodeIDs = append(remoteNodeIDs, r.ipCache.AllocateNodeID(net.ParseIP(addr.IP)))
}
}

return remoteNodeIDs
}