Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-1.26] OCPBUGS-33174: oci: keep track of exec PIDs and stop them on container stop #8105

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions internal/oci/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ type Container struct {
restore bool
restoreArchive string
restoreIsOCIImage bool
execPIDs map[int]bool
}

func (c *Container) CRIAttributes() *types.ContainerAttributes {
Expand Down Expand Up @@ -155,6 +156,7 @@ func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations,
stopSignal: stopSignal,
stopTimeoutChan: make(chan int64, 10),
stopWatchers: []chan struct{}{},
execPIDs: map[int]bool{},
}
return c, nil
}
Expand Down Expand Up @@ -753,3 +755,58 @@ func (c *Container) RestoreIsOCIImage() bool {
func (c *Container) SetRestoreIsOCIImage(restoreIsOCIImage bool) {
c.restoreIsOCIImage = restoreIsOCIImage
}

// AddExecPID registers a PID associated with an exec session.
// It is tracked so exec sessions can be cancelled when the container is being stopped.
// If the PID is conmon, shouldKill should be false, as we should not call SIGKILL on conmon.
// If it is an exec session, shouldKill should be true, as we can't guarantee the exec process
// will have a SIGINT handler.
func (c *Container) AddExecPID(pid int, shouldKill bool) error {
c.stopLock.Lock()
defer c.stopLock.Unlock()

logrus.Debugf("Starting to track exec PID %d for container %s (should kill = %t) ...", pid, c.ID(), shouldKill)
if c.stopping {
return errors.New("cannot register an exec PID: container is stopping")
}
c.execPIDs[pid] = shouldKill
return nil
}

// DeleteExecPID is for deregistering a pid after it has exited.
func (c *Container) DeleteExecPID(pid int) {
c.stopLock.Lock()
defer c.stopLock.Unlock()
delete(c.execPIDs, pid)
}

// KillExecPIDs loops through the saved execPIDs and sends a signal to them.
// If shouldKill is true, the signal is SIGKILL. Otherwise, SIGINT.
func (c *Container) KillExecPIDs() {
c.stopLock.Lock()
toKill := c.execPIDs
c.stopLock.Unlock()

for len(toKill) != 0 {
unkilled := map[int]bool{}
for pid, shouldKill := range toKill {
if pid == 0 {
// The caller may accidentally register `0` (for instance if the PID of the cmd has already exited)
// and killing 0 is the way to ask the kernel to kill the whole process group of the calling process.
// We definitely don't want to kill the CRI-O process group, so add this check just in case.
continue
}
sig := syscall.SIGINT
if shouldKill {
sig = syscall.SIGKILL
}

logrus.Debugf("Stopping exec PID %d for container %s with signal %s ...", pid, c.ID(), unix.SignalName(sig))
if err := syscall.Kill(pid, sig); err != nil && !errors.Is(err, syscall.ESRCH) {
unkilled[pid] = shouldKill
}
}
toKill = unkilled
time.Sleep(stopProcessWatchSleep)
}
}
10 changes: 8 additions & 2 deletions internal/oci/oci_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,22 @@ func setSize(fd uintptr, size remotecommand.TerminalSize) error {
return unix.IoctlSetWinsize(int(fd), unix.TIOCSWINSZ, winsize)
}

func ttyCmd(execCmd *exec.Cmd, stdin io.Reader, stdout io.WriteCloser, resize <-chan remotecommand.TerminalSize) error {
func ttyCmd(execCmd *exec.Cmd, stdin io.Reader, stdout io.WriteCloser, resize <-chan remotecommand.TerminalSize, c *Container) error {
p, err := pty.Start(execCmd)
if err != nil {
return err
}
defer p.Close()

// make sure to close the stdout stream
defer stdout.Close()

pid := execCmd.Process.Pid
if err := c.AddExecPID(pid, true); err != nil {
return err
}

defer c.DeleteExecPID(pid)

kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) {
if err := setSize(p.Fd(), size); err != nil {
logrus.Warnf("Unable to set terminal size: %v", err)
Expand Down
21 changes: 20 additions & 1 deletion internal/oci/runtime_oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ func (r *runtimeOCI) ExecContainer(ctx context.Context, c *Container, cmd []stri
}
var cmdErr, copyError error
if tty {
cmdErr = ttyCmd(execCmd, stdin, stdout, resize)
cmdErr = ttyCmd(execCmd, stdin, stdout, resize, c)
} else {
var r, w *os.File
if stdin != nil {
Expand Down Expand Up @@ -474,6 +474,12 @@ func (r *runtimeOCI) ExecContainer(ctx context.Context, c *Container, cmd []stri
return err
}

pid := execCmd.Process.Pid
if err := c.AddExecPID(pid, true); err != nil {
return err
}
defer c.DeleteExecPID(pid)

// The read side of the pipe should be closed after the container process has been started.
if r != nil {
if err := r.Close(); err != nil {
Expand Down Expand Up @@ -649,6 +655,12 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
}
}()

// A neat trick we can do is register the exec PID before we send info down the start pipe.
// Doing so guarantees we can short circuit the exec process if the container is stopping already.
if err := c.AddExecPID(cmd.Process.Pid, false); err != nil {
return err
}

if r.handler.MonitorExecCgroup == config.MonitorExecCgroupContainer && r.config.InfraCtrCPUSet != "" {
// Update the exec's cgroup
containerPid, _, err := c.pid()
Expand Down Expand Up @@ -679,9 +691,14 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
}
}

// defer in case the Pid is changed after Wait()
pid := cmd.Process.Pid

// first, wait till the command is done
waitErr := cmd.Wait()

c.DeleteExecPID(pid)

// regardless of what is in waitErr
// we should attempt to decode the output of the parent pipe
// this allows us to catch TimedOutMessage, which will cause waitErr to not be nil
Expand Down Expand Up @@ -855,6 +872,8 @@ func (r *runtimeOCI) StopLoopForContainer(c *Container, bm kwait.BackoffManager)

startTime := time.Now()

go c.KillExecPIDs()

// Allow for SIGINT to correctly interrupt the stop loop, especially
// when CRI-O is run directly in the foreground in the terminal.
ctx, stop := signal.NotifyContext(ctx, os.Interrupt)
Expand Down
15 changes: 15 additions & 0 deletions test/ctr.bats
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,21 @@ function check_oci_annotation() {
[[ $(crictl exec --sync "$ctr_id" /bin/sh -c "for i in $(seq 1 50000000); do echo -n 'a'; done" | wc -c) -le 16777216 ]]
}

@test "ctr exec{,sync} should be cancelled when container is stopped" {
start_crio
ctr_id=$(crictl run "$TESTDATA"/container_sleep.json "$TESTDATA"/sandbox_config.json)

crictl exec --sync "$ctr_id" /bin/bash -c 'while true; do echo XXXXXXXXXXXXXXXXXXXXXXXX; done' &
pid1=$!
crictl exec "$ctr_id" /bin/bash -c 'while true; do echo XXXXXXXXXXXXXXXXXXXXXXXX; done' || true &
pid2=$!

sleep 1s

crictl stop "$ctr_id"
wait "$pid1" "$pid2"
}

@test "ctr device add" {
# In an user namespace we can only bind mount devices from the host, not mknod
# https://github.com/opencontainers/runc/blob/master/libcontainer/rootfs_linux.go#L480-L481
Expand Down
Loading