From f03faf99aa50865a23f4fae91bab9d3ef506acc9 Mon Sep 17 00:00:00 2001 From: Sascha Grunert Date: Fri, 10 May 2024 10:21:42 +0200 Subject: [PATCH] Kill exec PIDs after main container exited Before applying this patch we killed the exec PIDs right away on container stop which leads into the failing e2e test: ``` [sig-node] [NodeFeature:SidecarContainers] Containers Lifecycle should terminate sidecars simultaneously if prestop doesn't exit ``` This regression is now fixed by killing the exec PIDs after the main container as well as in the same thread. Fixes https://github.com/kubernetes/kubernetes/issues/124743 Follow-up on https://github.com/cri-o/cri-o/pull/7937 Needs a cherry-pick since the enhancement got already backported into supported release branches. Signed-off-by: Sascha Grunert --- internal/oci/runtime_oci.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/oci/runtime_oci.go b/internal/oci/runtime_oci.go index 4237e1861d8..4e5d45af744 100644 --- a/internal/oci/runtime_oci.go +++ b/internal/oci/runtime_oci.go @@ -869,8 +869,6 @@ func (r *runtimeOCI) StopLoopForContainer(c *Container, bm kwait.BackoffManager) startTime := time.Now() - go c.KillExecPIDs() - // Allow for SIGINT to correctly interrupt the stop loop, especially // when CRI-O is run directly in the foreground in the terminal. ctx, stop := signal.NotifyContext(ctx, os.Interrupt) @@ -955,6 +953,10 @@ func (r *runtimeOCI) StopLoopForContainer(c *Container, bm kwait.BackoffManager) } }, bm, true, ctx.Done()) + // Kill the exec PIDs after the main container to avoid pod lifecycle regressions: + // Ref: https://github.com/kubernetes/kubernetes/issues/124743 + c.KillExecPIDs() + c.state.Finished = time.Now() c.opLock.Unlock() c.SetAsDoneStopping()