From 1df7878255fe2e31693994da6dbb65722e80e460 Mon Sep 17 00:00:00 2001 From: Mrudul Harwani Date: Mon, 31 Jul 2023 23:48:04 -0700 Subject: [PATCH] add retry attempts in logger to wait for task start Signed-off-by: Mrudul Harwani --- cmd/nerdctl/container_logs_test.go | 7 +++++ pkg/logging/logging.go | 46 +++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/cmd/nerdctl/container_logs_test.go b/cmd/nerdctl/container_logs_test.go index 47e451b505a..d7334dc9298 100644 --- a/cmd/nerdctl/container_logs_test.go +++ b/cmd/nerdctl/container_logs_test.go @@ -18,6 +18,7 @@ package main import ( "fmt" + "runtime" "strings" "testing" "time" @@ -160,6 +161,9 @@ func TestLogsWithRunningContainer(t *testing.T) { } func TestLogsWithoutNewlineOrEOF(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("FIXME: test does not work on Windows yet because containerd doesn't send an exit event appropriately after task exit on Windows") + } t.Parallel() base := testutil.NewBase(t) containerName := testutil.Identifier(t) @@ -172,6 +176,9 @@ func TestLogsWithoutNewlineOrEOF(t *testing.T) { } func TestLogsAfterRestartingContainer(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("FIXME: test does not work on Windows yet. Restarting a container fails with: failed to create shim task: hcs::CreateComputeSystem : The requested operation for attach namespace failed.: unknown") + } t.Parallel() base := testutil.NewBase(t) containerName := testutil.Identifier(t) diff --git a/pkg/logging/logging.go b/pkg/logging/logging.go index 0d4d496ec74..460455eaa14 100644 --- a/pkg/logging/logging.go +++ b/pkg/logging/logging.go @@ -27,6 +27,7 @@ import ( "path/filepath" "sort" "sync" + "time" "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" @@ -164,21 +165,38 @@ func getContainerWait(ctx context.Context, hostAddress string, config *logging.C if err != nil { return nil, err } + task, err := con.Task(ctx, nil) - if err != nil { + if err == nil { + return task.Wait(ctx) + } + if !errdefs.IsNotFound(err) { return nil, err } - return task.Wait(ctx) + + // If task was not found, it's possible that the container runtime is still being created. + // Retry every 100ms. + for { + select { + case <-ctx.Done(): + return nil, fmt.Errorf("timed out waiting for container task to start") + case <-time.Tick(100 * time.Millisecond): + task, err = con.Task(ctx, nil) + if err != nil { + if errdefs.IsNotFound(err) { + continue + } + return nil, err + } + return task.Wait(ctx) + } + } } func loggingProcessAdapter(ctx context.Context, driver Driver, dataStore, hostAddress string, config *logging.Config) error { if err := driver.PreProcess(dataStore, config); err != nil { return err } - exitCh, err := getContainerWait(ctx, hostAddress, config) - if err != nil { - return err - } // initialize goroutines to copy stdout and stderr streams to a closable pipe stdoutR, stdoutW := io.Pipe() @@ -220,9 +238,15 @@ func loggingProcessAdapter(ctx context.Context, driver Driver, dataStore, hostAd }() go func() { // close stdout and stderr upon container exit + defer stdoutW.Close() + defer stderrW.Close() + + exitCh, err := getContainerWait(ctx, hostAddress, config) + if err != nil { + logrus.Errorf("failed to get container task wait channel: %v", err) + return + } <-exitCh - stdoutW.Close() - stderrW.Close() }() wg.Wait() return driver.PostProcess() @@ -247,8 +271,8 @@ func loggerFunc(dataStore string) (logging.LoggerFunc, error) { return err } - lockFile := getLockPath(dataStore, config.Namespace, config.ID) - f, err := os.Create(lockFile) + loggerLock := getLockPath(dataStore, config.Namespace, config.ID) + f, err := os.Create(loggerLock) if err != nil { return err } @@ -257,7 +281,7 @@ func loggerFunc(dataStore string) (logging.LoggerFunc, error) { // the logger will obtain an exclusive lock on a file until the container is // stopped and the driver has finished processing all output, // so that waiting log viewers can be signalled when the process is complete. - return lockutil.WithDirLock(lockFile, func() error { + return lockutil.WithDirLock(loggerLock, func() error { if err := ready(); err != nil { return err }