From b3d043c558b50526fef1c7e65c8d89db8f984d28 Mon Sep 17 00:00:00 2001 From: Nadav Strahilevitz Date: Tue, 18 Jul 2023 14:59:12 +0000 Subject: [PATCH] fix (cgroups): already dead edge case Certain kubernetes version make use of short lived cgroups for various tasks (for example log rotation). These cgroups will generate events and a very quick cgroup_rmdir event. As such, many related events to the cgroup will attempt to query its directory through the recursive path and not find it. Commit adds a "Dead" field to the cgroup info to indicate a cgroup which has already been removed. Various logical sections can refer to it if its relevant to them, and more importantly, additional queries will not be attempted. Bonus: optimize away additional Stat syscall in containers by returning the directory ctime in cgroup.GetCgroupPath. --- pkg/cgroup/cgroup.go | 16 ++++++---- pkg/containers/containers.go | 49 ++++++++++++++++++----------- pkg/ebpf/controlplane/controller.go | 2 +- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/pkg/cgroup/cgroup.go b/pkg/cgroup/cgroup.go index 76389f11213..a8264bac3c4 100644 --- a/pkg/cgroup/cgroup.go +++ b/pkg/cgroup/cgroup.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "syscall" + "time" "github.com/aquasecurity/tracee/pkg/errfmt" "github.com/aquasecurity/tracee/pkg/logger" @@ -389,10 +390,12 @@ func GetCgroupControllerHierarchy(subsys string) (int, error) { // given cgroupId and subPath (related to cgroup fs root dir). If subPath is // empty, then all directories from cgroup fs will be searched for the given // cgroupId. -func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, error) { +// +// Returns found cgroup path, its ctime, and an error if relevant +func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, time.Time, error) { entries, err := os.ReadDir(rootDir) if err != nil { - return "", errfmt.WrapError(err) + return "", time.Time{}, errfmt.WrapError(err) } for _, entry := range entries { @@ -407,17 +410,18 @@ func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, err if err := syscall.Stat(entryPath, &stat); err == nil { // Check if this cgroup path belongs to cgroupId if (stat.Ino & 0xFFFFFFFF) == (cgroupId & 0xFFFFFFFF) { - return entryPath, nil + ctime := time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec) + return entryPath, ctime, nil } } } // No match at this dir level: continue recursively - path, err := GetCgroupPath(entryPath, cgroupId, subPath) + path, ctime, err := GetCgroupPath(entryPath, cgroupId, subPath) if err == nil { - return path, nil + return path, ctime, nil } } - return "", fs.ErrNotExist + return "", time.Time{}, fs.ErrNotExist } diff --git a/pkg/containers/containers.go b/pkg/containers/containers.go index 97009adde64..72f5e36cce8 100644 --- a/pkg/containers/containers.go +++ b/pkg/containers/containers.go @@ -3,6 +3,7 @@ package containers import ( "bufio" "context" + "errors" "fmt" "io/fs" "os" @@ -39,6 +40,7 @@ type CgroupInfo struct { Runtime cruntime.RuntimeId ContainerRoot bool // is the cgroup directory the root of its container Ctime time.Time + Dead bool // is the cgroup deleted expiresAt time.Time } @@ -149,7 +151,7 @@ func (c *Containers) populate() error { inodeNumber := stat.Ino statusChange := time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec) - _, err = c.cgroupUpdate(inodeNumber, path, statusChange) + _, err = c.cgroupUpdate(inodeNumber, path, statusChange, false) return errfmt.WrapError(err) } @@ -162,7 +164,7 @@ func (c *Containers) populate() error { // NOTE: ALL given cgroup dir paths are stored in CgroupInfo map. // NOTE: not thread-safe, lock should be placed in the external calling function, depending // on the transaction length. -func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time) (CgroupInfo, error) { +func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time, dead bool) (CgroupInfo, error) { // Cgroup paths should be stored and evaluated relative to the mountpoint, // trim it from the path. path = strings.TrimPrefix(path, c.cgroups.GetDefaultCgroup().GetMountPoint()) @@ -177,6 +179,7 @@ func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time) Runtime: containerRuntime, ContainerRoot: isRoot, Ctime: ctime, + Dead: dead, } c.cgroupsMap[uint32(cgroupId)] = info @@ -207,6 +210,10 @@ func (c *Containers) EnrichCgroupInfo(cgroupId uint64) (cruntime.ContainerMetada return metadata, errfmt.Errorf("no containerId") } + if info.Dead { + return metadata, errfmt.Errorf("container already deleted") + } + if info.Container.Image != "" { // If already enriched (from control plane) - short circuit and return return info.Container, nil @@ -293,9 +300,10 @@ func getContainerIdFromCgroup(cgroupPath string) (string, cruntime.RuntimeId, bo } // CgroupRemove removes cgroupInfo of deleted cgroup dir from Containers struct. -// NOTE: Expiration logic of 5 seconds to avoid race conditions (if cgroup dir +// NOTE: Expiration logic of 30 seconds to avoid race conditions (if cgroup dir // event arrives too fast and its cgroupInfo data is still needed). func (c *Containers) CgroupRemove(cgroupId uint64, hierarchyID uint32) { + const expiryTime = 30 * time.Second // cgroupv1: no need to check other controllers than the default switch c.cgroups.GetDefaultCgroup().(type) { case *cgroup.CgroupV1: @@ -321,7 +329,8 @@ func (c *Containers) CgroupRemove(cgroupId uint64, hierarchyID uint32) { c.deleted = deleted if info, ok := c.cgroupsMap[uint32(cgroupId)]; ok { - info.expiresAt = now.Add(5 * time.Second) + info.expiresAt = now.Add(expiryTime) + info.Dead = true c.cgroupsMap[uint32(cgroupId)] = info c.deleted = append(c.deleted, cgroupId) } @@ -341,19 +350,16 @@ func (c *Containers) CgroupMkdir(cgroupId uint64, subPath string, hierarchyID ui c.cgroupsMutex.Lock() defer c.cgroupsMutex.Unlock() curTime := time.Now() - path, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, subPath) + path, ctime, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, subPath) if err == nil { - var stat syscall.Stat_t - if err := syscall.Stat(path, &stat); err == nil { - // Add cgroupInfo to Containers struct w/ found path (and its last modification time) - return c.cgroupUpdate(cgroupId, path, time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec)) - } + // Add cgroupInfo to Containers struct w/ found path (and its last modification time) + return c.cgroupUpdate(cgroupId, path, ctime, false) } // No entry found: container may have already exited. // Add cgroupInfo to Containers struct with existing data. // In this case, ctime is just an estimation (current time). - return c.cgroupUpdate(cgroupId, subPath, curTime) + return c.cgroupUpdate(cgroupId, subPath, curTime, true) } // FindContainerCgroupID32LSB returns the 32 LSB of the Cgroup ID for a given container ID @@ -385,14 +391,21 @@ func (c *Containers) GetCgroupInfo(cgroupId uint64) CgroupInfo { c.cgroupsMutex.Lock() defer c.cgroupsMutex.Unlock() - path, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, "") + path, ctime, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, "") if err == nil { - var stat syscall.Stat_t - if err = syscall.Stat(path, &stat); err == nil { - cgroupInfo, err = c.cgroupUpdate(cgroupId, path, time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec)) - if err != nil { - logger.Errorw("cgroupUpdate", "error", err) - } + cgroupInfo, err = c.cgroupUpdate(cgroupId, path, ctime, false) + if err != nil { + logger.Errorw("cgroupUpdate", "error", err) + } + } + + // No entry found: cgroup may have already been deleted. + // Add cgroupInfo to Containers struct with existing data. + // In this case, ctime is just an estimation (current time). + if errors.Is(err, fs.ErrNotExist) { + cgroupInfo, err = c.cgroupUpdate(cgroupId, path, time.Now(), true) + if err != nil { + logger.Errorw("cgroupUpdate", "error", err) } } diff --git a/pkg/ebpf/controlplane/controller.go b/pkg/ebpf/controlplane/controller.go index a141ff77767..0d6800fdcab 100644 --- a/pkg/ebpf/controlplane/controller.go +++ b/pkg/ebpf/controlplane/controller.go @@ -137,7 +137,7 @@ func (p *Controller) processCgroupMkdir(args []trace.Argument) error { if err != nil { return errfmt.WrapError(err) } - if info.Container.ContainerId == "" { + if info.Container.ContainerId == "" && !info.Dead { // If cgroupId is from a regular cgroup directory, and not the // container base directory (from known runtimes), it should be // removed from the containers bpf map.