Skip to content

Commit

Permalink
fix (cgroups): already dead edge case
Browse files Browse the repository at this point in the history
Certain kubernetes version make use of short lived cgroups for various
tasks (for example log rotation). These cgroups will generate events and
a very quick cgroup_rmdir event.
As such, many related events to the cgroup will attempt to query its
directory through the recursive path and not find it.

Commit adds a "Dead" field to the cgroup info to indicate a cgroup which
has already been removed. Various logical sections can refer to it if
its relevant to them, and more importantly, additional queries will not
be attempted.

Bonus: optimize away additional Stat syscall in containers by returning
the directory ctime in cgroup.GetCgroupPath.
  • Loading branch information
NDStrahilevitz committed Jul 18, 2023
1 parent b0b9029 commit b3d043c
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 25 deletions.
16 changes: 10 additions & 6 deletions pkg/cgroup/cgroup.go
Expand Up @@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"syscall"
"time"

"github.com/aquasecurity/tracee/pkg/errfmt"
"github.com/aquasecurity/tracee/pkg/logger"
Expand Down Expand Up @@ -389,10 +390,12 @@ func GetCgroupControllerHierarchy(subsys string) (int, error) {
// given cgroupId and subPath (related to cgroup fs root dir). If subPath is
// empty, then all directories from cgroup fs will be searched for the given
// cgroupId.
func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, error) {
//
// Returns found cgroup path, its ctime, and an error if relevant
func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, time.Time, error) {
entries, err := os.ReadDir(rootDir)
if err != nil {
return "", errfmt.WrapError(err)
return "", time.Time{}, errfmt.WrapError(err)
}

for _, entry := range entries {
Expand All @@ -407,17 +410,18 @@ func GetCgroupPath(rootDir string, cgroupId uint64, subPath string) (string, err
if err := syscall.Stat(entryPath, &stat); err == nil {
// Check if this cgroup path belongs to cgroupId
if (stat.Ino & 0xFFFFFFFF) == (cgroupId & 0xFFFFFFFF) {
return entryPath, nil
ctime := time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec)
return entryPath, ctime, nil
}
}
}

// No match at this dir level: continue recursively
path, err := GetCgroupPath(entryPath, cgroupId, subPath)
path, ctime, err := GetCgroupPath(entryPath, cgroupId, subPath)
if err == nil {
return path, nil
return path, ctime, nil
}
}

return "", fs.ErrNotExist
return "", time.Time{}, fs.ErrNotExist
}
49 changes: 31 additions & 18 deletions pkg/containers/containers.go
Expand Up @@ -3,6 +3,7 @@ package containers
import (
"bufio"
"context"
"errors"
"fmt"
"io/fs"
"os"
Expand Down Expand Up @@ -39,6 +40,7 @@ type CgroupInfo struct {
Runtime cruntime.RuntimeId
ContainerRoot bool // is the cgroup directory the root of its container
Ctime time.Time
Dead bool // is the cgroup deleted
expiresAt time.Time
}

Expand Down Expand Up @@ -149,7 +151,7 @@ func (c *Containers) populate() error {

inodeNumber := stat.Ino
statusChange := time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec)
_, err = c.cgroupUpdate(inodeNumber, path, statusChange)
_, err = c.cgroupUpdate(inodeNumber, path, statusChange, false)

return errfmt.WrapError(err)
}
Expand All @@ -162,7 +164,7 @@ func (c *Containers) populate() error {
// NOTE: ALL given cgroup dir paths are stored in CgroupInfo map.
// NOTE: not thread-safe, lock should be placed in the external calling function, depending
// on the transaction length.
func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time) (CgroupInfo, error) {
func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time, dead bool) (CgroupInfo, error) {
// Cgroup paths should be stored and evaluated relative to the mountpoint,
// trim it from the path.
path = strings.TrimPrefix(path, c.cgroups.GetDefaultCgroup().GetMountPoint())
Expand All @@ -177,6 +179,7 @@ func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time)
Runtime: containerRuntime,
ContainerRoot: isRoot,
Ctime: ctime,
Dead: dead,
}

c.cgroupsMap[uint32(cgroupId)] = info
Expand Down Expand Up @@ -207,6 +210,10 @@ func (c *Containers) EnrichCgroupInfo(cgroupId uint64) (cruntime.ContainerMetada
return metadata, errfmt.Errorf("no containerId")
}

if info.Dead {
return metadata, errfmt.Errorf("container already deleted")
}

if info.Container.Image != "" {
// If already enriched (from control plane) - short circuit and return
return info.Container, nil
Expand Down Expand Up @@ -293,9 +300,10 @@ func getContainerIdFromCgroup(cgroupPath string) (string, cruntime.RuntimeId, bo
}

// CgroupRemove removes cgroupInfo of deleted cgroup dir from Containers struct.
// NOTE: Expiration logic of 5 seconds to avoid race conditions (if cgroup dir
// NOTE: Expiration logic of 30 seconds to avoid race conditions (if cgroup dir
// event arrives too fast and its cgroupInfo data is still needed).
func (c *Containers) CgroupRemove(cgroupId uint64, hierarchyID uint32) {
const expiryTime = 30 * time.Second
// cgroupv1: no need to check other controllers than the default
switch c.cgroups.GetDefaultCgroup().(type) {
case *cgroup.CgroupV1:
Expand All @@ -321,7 +329,8 @@ func (c *Containers) CgroupRemove(cgroupId uint64, hierarchyID uint32) {
c.deleted = deleted

if info, ok := c.cgroupsMap[uint32(cgroupId)]; ok {
info.expiresAt = now.Add(5 * time.Second)
info.expiresAt = now.Add(expiryTime)
info.Dead = true
c.cgroupsMap[uint32(cgroupId)] = info
c.deleted = append(c.deleted, cgroupId)
}
Expand All @@ -341,19 +350,16 @@ func (c *Containers) CgroupMkdir(cgroupId uint64, subPath string, hierarchyID ui
c.cgroupsMutex.Lock()
defer c.cgroupsMutex.Unlock()
curTime := time.Now()
path, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, subPath)
path, ctime, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, subPath)
if err == nil {
var stat syscall.Stat_t
if err := syscall.Stat(path, &stat); err == nil {
// Add cgroupInfo to Containers struct w/ found path (and its last modification time)
return c.cgroupUpdate(cgroupId, path, time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec))
}
// Add cgroupInfo to Containers struct w/ found path (and its last modification time)
return c.cgroupUpdate(cgroupId, path, ctime, false)
}

// No entry found: container may have already exited.
// Add cgroupInfo to Containers struct with existing data.
// In this case, ctime is just an estimation (current time).
return c.cgroupUpdate(cgroupId, subPath, curTime)
return c.cgroupUpdate(cgroupId, subPath, curTime, true)
}

// FindContainerCgroupID32LSB returns the 32 LSB of the Cgroup ID for a given container ID
Expand Down Expand Up @@ -385,14 +391,21 @@ func (c *Containers) GetCgroupInfo(cgroupId uint64) CgroupInfo {
c.cgroupsMutex.Lock()
defer c.cgroupsMutex.Unlock()

path, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, "")
path, ctime, err := cgroup.GetCgroupPath(c.cgroups.GetDefaultCgroup().GetMountPoint(), cgroupId, "")
if err == nil {
var stat syscall.Stat_t
if err = syscall.Stat(path, &stat); err == nil {
cgroupInfo, err = c.cgroupUpdate(cgroupId, path, time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec))
if err != nil {
logger.Errorw("cgroupUpdate", "error", err)
}
cgroupInfo, err = c.cgroupUpdate(cgroupId, path, ctime, false)
if err != nil {
logger.Errorw("cgroupUpdate", "error", err)
}
}

// No entry found: cgroup may have already been deleted.
// Add cgroupInfo to Containers struct with existing data.
// In this case, ctime is just an estimation (current time).
if errors.Is(err, fs.ErrNotExist) {
cgroupInfo, err = c.cgroupUpdate(cgroupId, path, time.Now(), true)
if err != nil {
logger.Errorw("cgroupUpdate", "error", err)
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/ebpf/controlplane/controller.go
Expand Up @@ -137,7 +137,7 @@ func (p *Controller) processCgroupMkdir(args []trace.Argument) error {
if err != nil {
return errfmt.WrapError(err)
}
if info.Container.ContainerId == "" {
if info.Container.ContainerId == "" && !info.Dead {
// If cgroupId is from a regular cgroup directory, and not the
// container base directory (from known runtimes), it should be
// removed from the containers bpf map.
Expand Down

0 comments on commit b3d043c

Please sign in to comment.