Skip to content

Commit

Permalink
Merge pull request #18507 from mheon/fix_rm_depends
Browse files Browse the repository at this point in the history
Fix `podman rm -fa` with dependencies
  • Loading branch information
openshift-merge-robot committed Jun 12, 2023
2 parents 1e1efd8 + 2ebc900 commit 3cae574
Show file tree
Hide file tree
Showing 23 changed files with 511 additions and 250 deletions.
10 changes: 9 additions & 1 deletion cmd/podman/pods/rm.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,10 @@ func removePods(namesOrIDs []string, rmOptions entities.PodRmOptions, printIDs b
return nil
}
setExitCode(err)
return append(errs, err)
errs = append(errs, err)
if !strings.Contains(err.Error(), define.ErrRemovingCtrs.Error()) {
return errs
}
}

// in the cli, first we print out all the successful attempts
Expand All @@ -128,6 +131,11 @@ func removePods(namesOrIDs []string, rmOptions entities.PodRmOptions, printIDs b
}
setExitCode(r.Err)
errs = append(errs, r.Err)
for ctr, err := range r.RemovedCtrs {
if err != nil {
errs = append(errs, fmt.Errorf("error removing container %s from pod %s: %w", ctr, r.Id, err))
}
}
}
}
return errs
Expand Down
8 changes: 7 additions & 1 deletion libpod/container_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,13 @@ func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool,
}

if !ctrErrored {
if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil {
opts := ctrRmOpts{
Force: force,
RemovePod: true,
Timeout: timeout,
}

if _, _, err := node.container.runtime.removeContainer(ctx, node.container, opts); err != nil {
ctrErrored = true
ctrErrors[node.id] = err
}
Expand Down
4 changes: 4 additions & 0 deletions libpod/define/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,4 +211,8 @@ var (
// ErrConmonVersionFormat is used when the expected version format of conmon
// has changed.
ErrConmonVersionFormat = "conmon version changed format"

// ErrRemovingCtrs indicates that there was an error removing all
// containers from a pod.
ErrRemovingCtrs = errors.New("removing pod containers")
)
8 changes: 8 additions & 0 deletions libpod/lock/shm/shm_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,14 @@ shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
goto CLEANUP_UNMAP;
}

// Ensure that recursive locking of a mutex by the same OS thread (which may
// refer to numerous goroutines) blocks.
ret_code = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
if (ret_code != 0) {
*error_code = -1 * ret_code;
goto CLEANUP_FREEATTR;
}

// Set mutexes to pshared - multiprocess-safe
ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
if (ret_code != 0) {
Expand Down
106 changes: 76 additions & 30 deletions libpod/oci_conmon_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,10 +356,20 @@ func generateResourceFile(res *spec.LinuxResources) (string, []string, error) {
// If all is set, send to all PIDs in the container.
// All is only supported if the container created cgroups.
func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
if _, err := r.killContainer(ctr, signal, all, false); err != nil {
return err
}

return nil
}

// If captureStderr is requested, OCI runtime STDERR will be captured as a
// *bytes.buffer and returned; otherwise, it is set to os.Stderr.
func (r *ConmonOCIRuntime) killContainer(ctr *Container, signal uint, all, captureStderr bool) (*bytes.Buffer, error) {
logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
return nil, err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
var args []string
Expand All @@ -369,19 +379,27 @@ func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool)
} else {
args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
var (
stderr io.Writer = os.Stderr
stderrBuffer *bytes.Buffer
)
if captureStderr {
stderrBuffer = new(bytes.Buffer)
stderr = stderrBuffer
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, stderr, env, r.path, args...); err != nil {
// Update container state - there's a chance we failed because
// the container exited in the meantime.
if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
}
if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return fmt.Errorf("%w: %s", define.ErrCtrStateInvalid, ctr.state.State)
return stderrBuffer, fmt.Errorf("%w: %s", define.ErrCtrStateInvalid, ctr.state.State)
}
return fmt.Errorf("sending signal to container %s: %w", ctr.ID(), err)
return stderrBuffer, fmt.Errorf("sending signal to container %s: %w", ctr.ID(), err)
}

return nil
return stderrBuffer, nil
}

// StopContainer stops a container, first using its given stop signal (or
Expand All @@ -400,23 +418,65 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
return nil
}

if timeout > 0 {
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
killCtr := func(signal uint) (bool, error) {
stderr, err := r.killContainer(ctr, signal, all, true)

// Before handling error from KillContainer, convert STDERR to a []string
// (one string per line of output) and print it, ignoring known OCI runtime
// errors that we don't care about
stderrLines := strings.Split(stderr.String(), "\n")
for _, line := range stderrLines {
if line == "" {
continue
}
if strings.Contains(line, "container not running") || strings.Contains(line, "open pidfd: No such process") {
logrus.Debugf("Failure to kill container (already stopped?): logged %s", line)
continue
}
fmt.Fprintf(os.Stderr, "%s\n", line)
}
if err := r.KillContainer(ctr, stopSignal, all); err != nil {

if err != nil {
// There's an inherent race with the cleanup process (see
// #16142, #17142). If the container has already been marked as
// stopped or exited by the cleanup process, we can return
// immediately.
if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return true, nil
}

// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return true, nil
}

// Is the container gone?
// If so, it probably died between the first check and
// our sending the signal
// The container is stopped, so exit cleanly
err := unix.Kill(ctr.state.PID, 0)
if err == unix.ESRCH {
return nil
return true, nil
}

return false, err
}
return false, nil
}

if timeout > 0 {
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}

stopped, err := killCtr(stopSignal)
if err != nil {
return err
}
if stopped {
return nil
}

if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
Expand All @@ -427,27 +487,13 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
}
}

// If the timeout was set to 0 or if stopping the container with the
// specified signal did not work, use the big hammer with SIGKILL.
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// There's an inherent race with the cleanup process (see
// #16142, #17142). If the container has already been marked as
// stopped or exited by the cleanup process, we can return
// immediately.
if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return nil
}

// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
stopped, err := killCtr(uint(unix.SIGKILL))
if err != nil {
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
}
if stopped {
return nil
}

// Give runtime a few seconds to make it happen
if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
Expand Down
7 changes: 6 additions & 1 deletion libpod/pod_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
icLock := initCon.lock
icLock.Lock()
var time *uint
if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil {
opts := ctrRmOpts{
RemovePod: true,
Timeout: time,
}

if _, _, err := p.runtime.removeContainer(ctx, initCon, opts); err != nil {
icLock.Unlock()
return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
}
Expand Down
7 changes: 6 additions & 1 deletion libpod/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,15 @@ func (r *Runtime) reset(ctx context.Context) error {
return err
}
for _, p := range pods {
if err := r.RemovePod(ctx, p, true, true, timeout); err != nil {
if ctrs, err := r.RemovePod(ctx, p, true, true, timeout); err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
for ctr, err := range ctrs {
if err != nil {
logrus.Errorf("Error removing pod %s container %s: %v", p.ID(), ctr, err)
}
}
logrus.Errorf("Removing Pod %s: %v", p.ID(), err)
}
}
Expand Down

0 comments on commit 3cae574

Please sign in to comment.