Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use persist dir for oom file #21523

Merged
merged 1 commit into from Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions libpod/container_exec.go
Expand Up @@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "exit")
}

// execPersistDir gets the path to the container's persist directory
// The persist directory container the exit file and oom file (if oomkilled)
// of a container
func (c *Container) execPersistDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
}

// execOCILog returns the file path for the exec sessions oci log
func (c *Container) execOCILog(sessionID string) string {
if !c.ociRuntime.SupportsJSONErrors() {
Expand Down Expand Up @@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
}
}
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
}
return nil
}

Expand Down
25 changes: 19 additions & 6 deletions libpod/container_internal.go
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c)
}

func (c *Container) oomFilePath() (string, error) {
return c.ociRuntime.OOMFilePath(c)
}

// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
Expand Down Expand Up @@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error {
// Handle the container exit file.
// The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists.
// Also check for an oom file to determine if the container was oom killed or not.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := os.ReadFile(exitFile)
Expand All @@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
}
c.state.ExitCode = int32(statusCode)

oomFilePath := filepath.Join(c.bundlePath(), "oom")
oomFilePath, err := c.oomFilePath()
if err != nil {
return err
}
if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true
}
Expand Down Expand Up @@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
}

oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}

// Remove the exit file so we don't leak memory in tmpfs
exitFile, err := c.exitFilePath()
if err != nil {
Expand All @@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
}

// Remove the oom file
oomFile, err := c.oomFilePath()
if err != nil {
return err
}
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
}

return nil
}

Expand Down
6 changes: 6 additions & 0 deletions libpod/oci.go
Expand Up @@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
// This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error)

// OOMFilePath is the path to a container's oom file if it was oom killed.
// An oom file is only created when the container is oom killed. The existence
// of this file means that the container was oom killed.
// This is the path to that file for a given container.
OOMFilePath(ctr *Container) (string, error)

// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)

Expand Down
37 changes: 30 additions & 7 deletions libpod/oci_conmon_common.go
Expand Up @@ -64,6 +64,7 @@ type ConmonOCIRuntime struct {
supportsKVM bool
supportsNoCgroups bool
enableKeyring bool
persistDir string
}

// Make a new Conmon-based OCI runtime with the given options.
Expand Down Expand Up @@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
}

runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")

// Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
}
return runtime, nil
}
Expand Down Expand Up @@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}

// OOMFilePath is the path to a container's oom file.
// The oom file will only exist if the container was oom killed.
func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo provides information on the runtime.
func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
runtimePackage := version.Package(r.path)
Expand Down Expand Up @@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
persistDir := filepath.Join(r.persistDir, ctr.ID())
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag)
if err != nil {
return 0, err
}

if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
Expand Down Expand Up @@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
}

// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
// Make the persists directory for the container after the ctr ID is appended to it in the caller
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
// So creating a directory with the container ID under the persist dir will help keep track of which container the
// exit and oom files belong to.
if err := os.MkdirAll(persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
}

// set the conmon API version to be able to use the correct sync struct keys
args := []string{
"--api-version", "1",
Expand All @@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"-p", pidPath,
"-n", ctr.Name(),
"--exit-dir", exitDir,
"--persist-dir", persistDir,
"--full-attach",
}
if len(r.runtimeFlags) > 0 {
Expand Down Expand Up @@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logrus.Debugf("Running with no Cgroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
}
return args
return args, nil
}

// newPipe creates a unix socket pair for communication.
Expand Down
5 changes: 4 additions & 1 deletion libpod/oci_conmon_exec_common.go
Expand Up @@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
}
defer processFile.Close()

args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
if err != nil {
return nil, nil, err
}

preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
if err != nil {
Expand Down
9 changes: 9 additions & 0 deletions libpod/oci_missing.go
Expand Up @@ -29,6 +29,8 @@ type MissingRuntime struct {
name string
// exitsDir is the directory for exit files.
exitsDir string
// persistDir is the directory for exit and oom files.
persistDir string
}

// Get a new MissingRuntime for the given name.
Expand All @@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {
newRuntime := new(MissingRuntime)
newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")

missingRuntimes[name] = newRuntime

Expand Down Expand Up @@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}

// OOMFilePath returns the oom file path for a container.
// The oom file will only exist if the container was oom killed.
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo returns information on the missing runtime
func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
ocirt := define.OCIRuntimeInfo{
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/run_memory_test.go
Expand Up @@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
Expect(session.OutputToString()).To(Equal(limit))
})
}

It("podman run memory test on oomkilled container", func() {
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
mem.WaitWithDefaultTimeout()
if mem.OutputToString() != "0" {
Skip("overcommit memory is not set to 0")
}

ctrName := "oomkilled-ctr"
// create a container that gets oomkilled
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitWithError())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
})

It("podman run memory test on successfully exited container", func() {
ctrName := "success-ctr"
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
})
})