Skip to content

Commit

Permalink
Merge pull request #5837 from donpenney/exec-cgroup
Browse files Browse the repository at this point in the history
Exec cgroup
  • Loading branch information
openshift-merge-robot committed May 10, 2022
2 parents 895030c + 3d516c5 commit 0ba47c9
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 5 deletions.
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies:
match: version

- name: conmon
version: v2.0.26
version: v2.0.27
refPaths:
- path: scripts/versions
match: conmon
Expand Down
23 changes: 23 additions & 0 deletions internal/config/cgmgr/cgmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/containers/podman/v3/pkg/cgroups"
"github.com/cri-o/cri-o/internal/config/node"
libctr "github.com/opencontainers/runc/libcontainer/cgroups"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -150,3 +151,25 @@ func createSandboxCgroup(sbParent, containerID string, mgr CgroupManager) error
_, err = cgroups.New(path, &rspec.LinuxResources{})
return err
}

// MoveProcessToContainerCgroup moves process to the container cgroup
func MoveProcessToContainerCgroup(containerPid, commandPid int) error {
parentCgroupFile := fmt.Sprintf("/proc/%d/cgroup", containerPid)
cgmap, err := libctr.ParseCgroupFile(parentCgroupFile)
if err != nil {
return err
}

var dir string
for controller, path := range cgmap {
// For cgroups V2, controller will be an empty string
dir = filepath.Join("/sys/fs/cgroup", controller, path)

if libctr.PathExists(dir) {
if err := libctr.WriteCgroupProc(dir, commandPid); err != nil {
return err
}
}
}
return nil
}
86 changes: 83 additions & 3 deletions internal/oci/runtime_oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/containernetworking/plugins/pkg/ns"
conmonconfig "github.com/containers/conmon/runner/config"
"github.com/containers/storage/pkg/pools"
"github.com/cri-o/cri-o/internal/config/cgmgr"
"github.com/cri-o/cri-o/internal/log"
"github.com/cri-o/cri-o/pkg/config"
"github.com/cri-o/cri-o/server/metrics"
Expand Down Expand Up @@ -440,7 +441,17 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
Err: err,
}
}

childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return nil, &ExecSyncError{
ExitCode: -1,
Err: err,
}
}

defer parentPipe.Close()
defer parentStartPipe.Close()
defer func() {
if e := os.Remove(pidFile); e != nil {
log.Warnf(ctx, "Could not remove temporary PID file %s", pidFile)
Expand Down Expand Up @@ -495,22 +506,40 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
"--exec-process-spec", processFile,
"--runtime-arg", fmt.Sprintf("%s=%s", rootFlag, r.root))

cmd := cmdrunner.Command(r.handler.MonitorPath, args...) // nolint: gosec
var cmd *exec.Cmd

if r.handler.MonitorExecCgroup == config.MonitorExecCgroupDefault || r.config.InfraCtrCPUSet == "" { // nolint: gocritic
cmd = cmdrunner.Command(r.handler.MonitorPath, args...) // nolint: gosec
} else if r.handler.MonitorExecCgroup == config.MonitorExecCgroupContainer {
cmd = exec.Command(r.handler.MonitorPath, args...) // nolint: gosec
} else {
msg := fmt.Sprintf("Unsupported monitor_exec_cgroup value: %s", r.handler.MonitorExecCgroup)
return &types.ExecSyncResponse{
Stderr: []byte(msg),
ExitCode: -1,
}, nil
}

var stdoutBuf, stderrBuf bytes.Buffer
cmd.Stdout = &stdoutBuf
cmd.Stderr = &stderrBuf
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)

cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
// 0, 1 and 2 are stdin, stdout and stderr
cmd.Env = r.handler.MonitorEnv
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
cmd.Env = append(cmd.Env,
fmt.Sprintf("_OCI_SYNCPIPE=%d", 3),
fmt.Sprintf("_OCI_STARTPIPE=%d", 4))

if v, found := os.LookupEnv("XDG_RUNTIME_DIR"); found {
cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", v))
}

err = cmd.Start()
if err != nil {
childPipe.Close()
childStartPipe.Close()

return nil, &ExecSyncError{
Stdout: stdoutBuf,
Stderr: stderrBuf,
Expand All @@ -521,6 +550,57 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman

// We don't need childPipe on the parent side
childPipe.Close()
childStartPipe.Close()

// Create new scope to reduce cleanup code.
if err := func() (retErr error) {
defer func() {
if retErr != nil {
// We need to always kill and wait on this process.
// Failing to do so will cause us to leak a zombie.
killErr := cmd.Process.Kill()
waitErr := cmd.Wait()
if killErr != nil {
retErr = errors.Wrapf(retErr, "failed to kill %+v after failing with", killErr)
}
// Per https://pkg.go.dev/os#ProcessState.ExitCode, the exit code is -1 when the process died because
// of a signal. We expect this in this case, as we've just killed it with a signal. Don't append the
// error in this case to reduce noise.
if exitErr, ok := waitErr.(*exec.ExitError); !ok || exitErr.ExitCode() != -1 {
retErr = errors.Wrapf(retErr, "failed to wait %+v after failing with", waitErr)
}
}
}()

if r.handler.MonitorExecCgroup == config.MonitorExecCgroupContainer && r.config.InfraCtrCPUSet != "" {
// Update the exec's cgroup
containerPid, err := c.pid()
if err != nil {
return err
}

err = cgmgr.MoveProcessToContainerCgroup(containerPid, cmd.Process.Pid)
if err != nil {
return err
}
}

// Unblock children
someData := []byte{0}
_, err = parentStartPipe.Write(someData)
if err != nil {
return err
}

return nil
}(); err != nil {
return nil, &ExecSyncError{
Stdout: stdoutBuf,
Stderr: stderrBuf,
ExitCode: -1,
Err: err,
}
}

// first, wait till the command is done
waitErr := cmd.Wait()
Expand Down
5 changes: 5 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ const (
RuntimeTypeVMBinaryPattern = "containerd-shim-([a-zA-Z0-9\\-\\+])+-v2"
tasksetBinary = "taskset"
defaultMonitorCgroup = "system.slice"
MonitorExecCgroupDefault = ""
MonitorExecCgroupContainer = "container"
)

// Config represents the entire set of configuration values that can be set for
Expand Down Expand Up @@ -205,6 +207,9 @@ type RuntimeHandler struct {
MonitorPath string `toml:"monitor_path,omitempty"`
MonitorCgroup string `toml:"monitor_cgroup,omitempty"`
MonitorEnv []string `toml:"monitor_env,omitempty"`

// MonitorExecCgroup indicates whether to move exec probes to the container's cgroup.
MonitorExecCgroup string `toml:"monitor_exec_cgroup,omitempty"`
}

// Multiple runtime Handlers in a map
Expand Down
3 changes: 3 additions & 0 deletions pkg/config/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,8 @@ const templateStringCrioRuntimeRuntimesRuntimeHandler = `# The "crio.runtime.run
# "io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container.
# "io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook.
# "io.kubernetes.cri.rdt-class" for setting the RDT class of a container
# - monitor_exec_cgroup (optional, string): if set to "container", indicates exec probes
# should be moved to the container's cgroup
{{ range $runtime_name, $runtime_handler := .Runtimes }}
{{ $.Comment }}[crio.runtime.runtimes.{{ $runtime_name }}]
Expand All @@ -1113,6 +1115,7 @@ const templateStringCrioRuntimeRuntimesRuntimeHandler = `# The "crio.runtime.run
{{ range $opt := $runtime_handler.MonitorEnv }}{{ $.Comment }}{{ printf "\t%q,\n" $opt }}{{ end }}{{ $.Comment }}]
{{ $.Comment }}{{ end }}
{{ $.Comment }}monitor_cgroup = "{{ $runtime_handler.MonitorCgroup }}"
{{ $.Comment }}monitor_exec_cgroup = "{{ $runtime_handler.MonitorExecCgroup }}"
{{ $.Comment }}{{ end }}
# crun is a fast and lightweight fully featured OCI runtime and C library for
Expand Down
2 changes: 1 addition & 1 deletion scripts/versions
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -euo pipefail
# Versions to be used
declare -A VERSIONS=(
["cni-plugins"]=v1.1.1
["conmon"]=v2.0.26
["conmon"]=v2.0.27
["cri-tools"]=v1.23.0
["runc"]=v1.0.2
["crun"]=0.20.1
Expand Down

0 comments on commit 0ba47c9

Please sign in to comment.