Skip to content

Commit

Permalink
Use persist dir for exit and oom file
Browse files Browse the repository at this point in the history
Conmon writes the exit file and oom file (if container
was oom killed) to the persist directory. This directory
is retained across reboots as well.
Update podman to create a persist-dir/ctr-id for the exit
and oom files for each container to be written to. The exit
and oom state of container is set after reading the files
from the persist-dir/ctr-id directory.

Signed-off-by: Urvashi Mohnani <umohnani@redhat.com>
  • Loading branch information
umohnani8 committed Feb 7, 2024
1 parent f4f96a2 commit 0d1383f
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 26 deletions.
14 changes: 8 additions & 6 deletions libpod/container_exec.go
Expand Up @@ -885,9 +885,11 @@ func (c *Container) execAttachSocketPath(sessionID string) (string, error) {
return c.ociRuntime.ExecAttachSocketPath(c, sessionID)
}

// execExitFileDir gets the path to the container's exit file
func (c *Container) execExitFileDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "exit")
// execPersistDir gets the path to the container's persist directory
// The persist directory container the exit file and oom file (if oomkilled)
// of a container
func (c *Container) execPersistDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
}

// execOCILog returns the file path for the exec sessions oci log
Expand All @@ -911,10 +913,10 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
}
}
}()
if err := os.MkdirAll(c.execExitFileDir(sessionID), execDirPermission); err != nil {
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
}
}
return nil
Expand All @@ -923,7 +925,7 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
// readExecExitCode reads the exit file for an exec session and returns
// the exit code
func (c *Container) readExecExitCode(sessionID string) (int, error) {
exitFile := filepath.Join(c.execExitFileDir(sessionID), c.ID())
exitFile := filepath.Join(c.execPersistDir(sessionID), "exit")
chWait := make(chan error)
defer close(chWait)

Expand Down
24 changes: 18 additions & 6 deletions libpod/container_internal.go
Expand Up @@ -145,6 +145,10 @@ func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c)
}

func (c *Container) oomFilePath() (string, error) {
return c.ociRuntime.OOMFilePath(c)
}

// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
Expand Down Expand Up @@ -181,6 +185,7 @@ func (c *Container) waitForExitFileAndSync() error {
// Handle the container exit file.
// The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists.
// Also check for an oom file to determine if the container was oom killed or not.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := os.ReadFile(exitFile)
Expand All @@ -194,7 +199,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
}
c.state.ExitCode = int32(statusCode)

oomFilePath := filepath.Join(c.bundlePath(), "oom")
oomFilePath, err := c.oomFilePath()
if err != nil {
return err
}
if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true
}
Expand Down Expand Up @@ -742,11 +750,6 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
}

oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}

// Remove the exit file so we don't leak memory in tmpfs
exitFile, err := c.exitFilePath()
if err != nil {
Expand All @@ -756,6 +759,15 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
}

// Remove the oom file
oomFile, err := c.oomFilePath()
if err != nil {
return err
}
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
}

return nil
}

Expand Down
6 changes: 6 additions & 0 deletions libpod/oci.go
Expand Up @@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
// This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error)

// OOMFilePath is the path to a container's oom file if it was oom killed.
// An oom file is only created when the container is oom killed. The existence
// of this file means that the container was oom killed.
// This is the path to that file for a given container.
OOMFilePath(ctr *Container) (string, error)

// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)

Expand Down
42 changes: 33 additions & 9 deletions libpod/oci_conmon_common.go
Expand Up @@ -55,7 +55,6 @@ type ConmonOCIRuntime struct {
conmonPath string
conmonEnv []string
tmpDir string
exitsDir string
logSizeMax int64
noPivot bool
reservePorts bool
Expand All @@ -64,6 +63,7 @@ type ConmonOCIRuntime struct {
supportsKVM bool
supportsNoCgroups bool
enableKeyring bool
persistDir string
}

// Make a new Conmon-based OCI runtime with the given options.
Expand Down Expand Up @@ -142,13 +142,14 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
}

runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")

// Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
}
}
return runtime, nil
Expand Down Expand Up @@ -937,7 +938,14 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
if ctr == nil {
return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
}
return filepath.Join(r.exitsDir, ctr.ID()), nil
return filepath.Join(r.persistDir, ctr.ID(), "exit"), nil
}

func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
if ctr == nil {
return "", fmt.Errorf("must provide a valid container to get oom file path: %w", define.ErrInvalidArg)
}
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo provides information on the runtime.
Expand Down Expand Up @@ -1107,7 +1115,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
persistDir := filepath.Join(r.persistDir, ctr.ID())
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), persistDir, ociLog, ctr.LogDriver(), logTag)
if err != nil {
return 0, err
}

if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
Expand Down Expand Up @@ -1363,7 +1375,19 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
}

// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
// Make the persists directory for the container after the ctr ID is appended to it in the caller
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
// So creating a directory with the container ID under the persist dir will help keep track of which container the
// exit and oom files belong to.
if err := os.MkdirAll(persistDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
}
}

// set the conmon API version to be able to use the correct sync struct keys
args := []string{
"--api-version", "1",
Expand All @@ -1373,7 +1397,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"-b", bundlePath,
"-p", pidPath,
"-n", ctr.Name(),
"--exit-dir", exitDir,
"--persist-dir", persistDir,
"--full-attach",
}
if len(r.runtimeFlags) > 0 {
Expand Down Expand Up @@ -1436,7 +1460,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logrus.Debugf("Running with no Cgroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
}
return args
return args, nil
}

// newPipe creates a unix socket pair for communication.
Expand Down
5 changes: 4 additions & 1 deletion libpod/oci_conmon_exec_common.go
Expand Up @@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
}
defer processFile.Close()

args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
if err != nil {
return nil, nil, err
}

preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
if err != nil {
Expand Down
17 changes: 13 additions & 4 deletions libpod/oci_missing.go
Expand Up @@ -27,8 +27,8 @@ var (
type MissingRuntime struct {
// Name is the name of the missing runtime. Will be used in errors.
name string
// exitsDir is the directory for exit files.
exitsDir string
// persistDir is the directory for exit and oom files.
persistDir string
}

// Get a new MissingRuntime for the given name.
Expand All @@ -51,7 +51,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {

newRuntime := new(MissingRuntime)
newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")

missingRuntimes[name] = newRuntime

Expand Down Expand Up @@ -219,7 +219,16 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
if ctr == nil {
return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
}
return filepath.Join(r.exitsDir, ctr.ID()), nil
return filepath.Join(r.persistDir, ctr.ID(), "exit"), nil
}

// OOMFilePath returns the oom file path for a container.
// The oom file will only exist if the container was oom killed.
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
if ctr == nil {
return "", fmt.Errorf("must provide a valid container to get oom file path: %w", define.ErrInvalidArg)
}
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo returns information on the missing runtime
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/run_memory_test.go
Expand Up @@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
Expect(session.OutputToString()).To(Equal(limit))
})
}

It("podman run memory test on oomkilled container", func() {
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
mem.WaitWithDefaultTimeout()
if mem.OutputToString() != "0" {
Skip("overcommit memory is not set to 0")
}

ctrName := "oomkilled-ctr"
// create a container that gets oomkilled
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitWithError())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
})

It("podman run memory test on successfully exited container", func() {
ctrName := "success-ctr"
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
})
})

0 comments on commit 0d1383f

Please sign in to comment.