diff --git a/CHANGELOG.md b/CHANGELOG.md index f841d7a8..070cd904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## UNRELEASED +FEATURES +* Add a `app-entrypoint` subcommand which can be used to delay application + shutdown after receing a TERM signal to support graceful shutdown in ECS. + [[GH-48](https://github.com/hashicorp/consul-ecs/pull/48)] + ## 0.2.0 (November 16, 2021) BREAKING CHANGES diff --git a/commands.go b/commands.go index d84a44ab..22226e46 100644 --- a/commands.go +++ b/commands.go @@ -4,6 +4,7 @@ import ( "os" cmdController "github.com/hashicorp/consul-ecs/subcommand/acl-controller" + cmdAppEntrypoint "github.com/hashicorp/consul-ecs/subcommand/app-entrypoint" cmdEnvoyEntrypoint "github.com/hashicorp/consul-ecs/subcommand/envoy-entrypoint" cmdHealthSync "github.com/hashicorp/consul-ecs/subcommand/health-sync" cmdMeshInit "github.com/hashicorp/consul-ecs/subcommand/mesh-init" @@ -34,6 +35,9 @@ func init() { "envoy-entrypoint": func() (cli.Command, error) { return &cmdEnvoyEntrypoint.Command{UI: ui}, nil }, + "app-entrypoint": func() (cli.Command, error) { + return &cmdAppEntrypoint.Command{UI: ui}, nil + }, } } diff --git a/subcommand/envoy-entrypoint/envoy.go b/entrypoint/cmd.go similarity index 65% rename from subcommand/envoy-entrypoint/envoy.go rename to entrypoint/cmd.go index 552a65ad..e657d5e6 100644 --- a/subcommand/envoy-entrypoint/envoy.go +++ b/entrypoint/cmd.go @@ -1,7 +1,7 @@ //go:build !windows // +build !windows -package envoyentrypoint +package entrypoint import ( "os" @@ -11,7 +11,11 @@ import ( "github.com/hashicorp/go-hclog" ) -type EnvoyCmd struct { +// Cmd runs a command in a subprocess (asynchronously). +// Call `go cmd.Run()` to run the command asynchronously. +// Use the Started() channel to wait for the command to start. +// Use the Done() channel to wait for the command to complete. +type Cmd struct { *exec.Cmd log hclog.Logger @@ -19,7 +23,7 @@ type EnvoyCmd struct { startedCh chan struct{} } -func NewEnvoyCmd(log hclog.Logger, args []string) *EnvoyCmd { +func NewCmd(log hclog.Logger, args []string) *Cmd { cmd := exec.Command(args[0], args[1:]...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout @@ -29,7 +33,7 @@ func NewEnvoyCmd(log hclog.Logger, args []string) *EnvoyCmd { Setpgid: true, } - return &EnvoyCmd{ + return &Cmd{ Cmd: cmd, log: log, doneCh: make(chan struct{}, 1), @@ -39,12 +43,12 @@ func NewEnvoyCmd(log hclog.Logger, args []string) *EnvoyCmd { // Run the command. The Started() and Done() functions can be used // to wait for the process to start and exit, respectively. -func (e *EnvoyCmd) Run() { +func (e *Cmd) Run() { defer close(e.doneCh) defer close(e.startedCh) if err := e.Cmd.Start(); err != nil { - e.log.Error("starting Envoy process", "error", err.Error()) + e.log.Error("starting process", "error", err.Error()) // Closed channels (in defers) indicate the command failed to start. return } @@ -53,16 +57,16 @@ func (e *EnvoyCmd) Run() { if err := e.Cmd.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { // Do not log if it is only a non-zero exit code. - e.log.Error("waiting for Envoy process to finish", "error", err.Error()) + e.log.Error("waiting for process to finish", "error", err.Error()) } } e.doneCh <- struct{}{} } -func (e *EnvoyCmd) Started() chan struct{} { +func (e *Cmd) Started() chan struct{} { return e.startedCh } -func (e *EnvoyCmd) Done() chan struct{} { +func (e *Cmd) Done() chan struct{} { return e.doneCh } diff --git a/subcommand/app-entrypoint/command_common.go b/subcommand/app-entrypoint/command_common.go new file mode 100644 index 00000000..14bc84a9 --- /dev/null +++ b/subcommand/app-entrypoint/command_common.go @@ -0,0 +1,9 @@ +package appentrypoint + +func (c *Command) Help() string { + return "" +} + +func (c *Command) Synopsis() string { + return "Entrypoint for running a command in ECS" +} diff --git a/subcommand/app-entrypoint/command_unix.go b/subcommand/app-entrypoint/command_unix.go new file mode 100644 index 00000000..c06c9810 --- /dev/null +++ b/subcommand/app-entrypoint/command_unix.go @@ -0,0 +1,153 @@ +//go:build !windows +// +build !windows + +package appentrypoint + +import ( + "flag" + "fmt" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/hashicorp/consul-ecs/entrypoint" + "github.com/hashicorp/go-hclog" + "github.com/mitchellh/cli" +) + +const ( + flagShutdownDelay = "shutdown-delay" +) + +type Command struct { + UI cli.Ui + log hclog.Logger + once sync.Once + flagSet *flag.FlagSet + + sigs chan os.Signal + appCmd *entrypoint.Cmd + shutdownDelay time.Duration +} + +func (c *Command) init() { + c.log = hclog.New(&hclog.LoggerOptions{Name: "consul-ecs"}) + c.flagSet = flag.NewFlagSet("", flag.ContinueOnError) + c.flagSet.DurationVar(&c.shutdownDelay, flagShutdownDelay, 0, + `Continue running for this long after receiving SIGTERM. Must be a duration (e.g. "10s").`) + c.log = hclog.New(nil) + +} + +func (c *Command) Run(args []string) int { + c.once.Do(c.init) + + // Flag parsing stops just before the first non-flag argument ("-" is a non-flag argument) + // or after the terminator "--" + err := c.flagSet.Parse(args) + if err != nil { + c.UI.Error(fmt.Sprint(err)) + return 1 + } + + // Remaining args for the application command, after parsing our flags + args = c.flagSet.Args() + + if len(args) == 0 { + c.UI.Error("command is required") + return 1 + } + + c.sigs = make(chan os.Signal, 1) + c.appCmd = entrypoint.NewCmd(c.log, args) + + return c.realRun() +} + +func (c *Command) realRun() int { + signal.Notify(c.sigs) + defer c.cleanup() + + go c.appCmd.Run() + if _, ok := <-c.appCmd.Started(); !ok { + return 1 + } + + if exitCode, exited := c.waitForSigterm(); exited { + return exitCode + } + if c.shutdownDelay > 0 { + c.log.Info(fmt.Sprintf("consul-ecs: received sigterm. waiting %s before terminating application.", c.shutdownDelay)) + if exitCode, exited := c.waitForShutdownDelay(); exited { + return exitCode + } + } + // We've signaled for the process to exit, so wait until it does. + c.waitForAppExit() + return c.appCmd.ProcessState.ExitCode() +} + +// waitForSigterm waits until c.appCmd has exited, or until a sigterm is received. +// It returns (exitCode, exited), where if exited=true, then c.appCmd has exited. +func (c *Command) waitForSigterm() (int, bool) { + for { + select { + case <-c.appCmd.Done(): + return c.appCmd.ProcessState.ExitCode(), true + case sig := <-c.sigs: + if sig == syscall.SIGTERM { + return -1, false + } + c.forwardSignal(sig) + } + } +} + +// waitForShutdownDelay waits for c.appCmd to exit for `delay` seconds. +// After the delay has passed, it sends a sigterm to c.appCmd. +// It returns (exitCode, exited), where if exited=true, then c.appCmd has exited. +func (c *Command) waitForShutdownDelay() (int, bool) { + timer := time.After(c.shutdownDelay) + for { + select { + case <-c.appCmd.Done(): + return c.appCmd.ProcessState.ExitCode(), true + case sig := <-c.sigs: + c.forwardSignal(sig) + case <-timer: + if err := syscall.Kill(-c.appCmd.Process.Pid, syscall.SIGTERM); err != nil { + c.log.Warn("error sending sigterm to application", "error", err.Error()) + } + } + } + +} + +func (c *Command) waitForAppExit() { + for { + select { + case <-c.appCmd.Done(): + return + case sig := <-c.sigs: + c.forwardSignal(sig) + } + } +} + +func (c *Command) forwardSignal(sig os.Signal) { + switch sig { + case syscall.SIGCHLD, syscall.SIGURG: + return + default: + if err := c.appCmd.Process.Signal(sig); err != nil { + c.log.Warn("forwarding signal", "err", err.Error()) + } + } +} + +func (c *Command) cleanup() { + signal.Stop(c.sigs) + <-c.appCmd.Done() +} diff --git a/subcommand/app-entrypoint/command_unix_test.go b/subcommand/app-entrypoint/command_unix_test.go new file mode 100644 index 00000000..cd9d53e2 --- /dev/null +++ b/subcommand/app-entrypoint/command_unix_test.go @@ -0,0 +1,210 @@ +//go:build !windows +// +build !windows + +package appentrypoint + +import ( + "fmt" + "os" + "os/signal" + "syscall" + "testing" + "time" + + "github.com/hashicorp/consul/sdk/testutil/retry" + "github.com/mitchellh/cli" + "github.com/stretchr/testify/require" +) + +func fakeAppScript(sleepSeconds int) string { + return fmt.Sprintf(`sleep %d & +export SLEEP_PID=$! +trap "{ echo 'target command was interrupted'; kill $SLEEP_PID; exit 42; }" INT +trap "{ echo 'target command was terminated'; kill $SLEEP_PID; exit 55; }" TERM +wait $SLEEP_PID +`, sleepSeconds) +} + +func TestFlagValidation(t *testing.T) { + cases := map[string]struct { + args []string + code int + error string + shutdownDelay time.Duration + }{ + "no-args": { + args: nil, + code: 1, + error: "command is required", + }, + "invalid-delay": { + args: []string{"--shutdown-delay", "asdf"}, + code: 1, + error: `invalid value "asdf" for flag -shutdown-delay`, + }, + "delay-without-app-command": { + args: []string{"--shutdown-delay", "10s"}, + code: 1, + error: "command is required", + shutdownDelay: 10 * time.Second, + }, + "app-command-with-flag-collision": { + // What if the app command uses a flag that collides with one of our flags? + args: []string{"/bin/sh", "-c", "echo", "--shutdown-delay", "asdf"}, + code: 0, + }, + "delay-with-app-command": { + args: []string{"--shutdown-delay", "5s", "/bin/sh", "-c", "exit 0"}, + code: 0, + shutdownDelay: 5 * time.Second, + }, + "delay-with-app-command-and-double-dash": { + // "--" terminates flag parsing, to separate consul-ecs from application args + args: []string{"--shutdown-delay", "5s", "--", "/bin/sh", "-c", "exit 0"}, + code: 0, + shutdownDelay: 5 * time.Second, + }, + } + for name, c := range cases { + t.Run(name, func(t *testing.T) { + ui := cli.NewMockUi() + cmd := Command{UI: ui} + code := cmd.Run(c.args) + require.Equal(t, c.code, code) + require.Contains(t, ui.ErrorWriter.String(), c.error) + require.Equal(t, cmd.shutdownDelay, c.shutdownDelay) + }) + } +} + +func TestRun(t *testing.T) { + cases := map[string]struct { + targetCommand string + sendSigterm bool + sendSigint bool + shutdownDelay time.Duration + exitCode int + }{ + "app-exit-before-sigterm": { + targetCommand: "exit 0", + }, + "app-exit-after-sigterm": { + // T0 : app start + // T1 : entrypoint receives sigterm (ignored) + // T2 : app exits on its own + targetCommand: fakeAppScript(2), + sendSigterm: true, + }, + "app-exit-before-shutdown-delay": { + // T0 : app start + // T1 : entrypoint receives sigterm (ignored) + // T2 : entrypoint waits for the shutdown delay + // T3 : app exits on its own + targetCommand: fakeAppScript(2), + sendSigterm: true, + shutdownDelay: 10 * time.Second, + }, + "app-exit-after-shutdown-delay": { + // T0 : app start + // T1 : entrypoint receives sigterm (ignored) + // T2 : entrypoint waits for the shutdown delay + // T3 : entrypoint sends sigterm to app after shutdown delay + // T4 : app exits due to sigterm + targetCommand: fakeAppScript(10), + sendSigterm: true, + shutdownDelay: 1 * time.Second, + // Our test script exits with 55 when receiving sigterm. + exitCode: 55, + }, + "sigint-is-forwarded": { + targetCommand: fakeAppScript(10), + sendSigint: true, + exitCode: 42, + }, + "sigint-is-forwarded-after-sigterm": { + targetCommand: fakeAppScript(10), + sendSigterm: true, + sendSigint: true, + exitCode: 42, + }, + "sigint-is-forwarded-during-shutdown-delay": { + targetCommand: fakeAppScript(10), + sendSigterm: true, + sendSigint: true, + shutdownDelay: 10 * time.Second, + exitCode: 42, + }, + } + + for name, c := range cases { + t.Run(name, func(t *testing.T) { + ui := cli.NewMockUi() + cmd := Command{UI: ui} + + // Start the target command asynchronously. + exitCodeChan := make(chan int, 1) + go func() { + defer close(exitCodeChan) + var args []string + if c.shutdownDelay > 0 { + args = append(args, "-shutdown-delay", c.shutdownDelay.String()) + } + args = append(args, "/bin/sh", "-c", c.targetCommand) + exitCodeChan <- cmd.Run(args) + }() + + t.Logf("Wait for fake app process to start") + retry.RunWith(&retry.Timer{Timeout: 1 * time.Second, Wait: 100 * time.Millisecond}, t, func(r *retry.R) { + require.NotNil(r, cmd.appCmd) + require.NotNil(r, cmd.appCmd.Process) + require.Greater(r, cmd.appCmd.Process.Pid, 0) + }) + appPid := cmd.appCmd.Process.Pid + t.Logf("Fake app process started pid=%v", appPid) + + // Testing signal handling requires signaling the entrypoint process. + // This is awkward since that is the CURRENT process running this test. + // To avoid accidentally terminating the test run, intercept signals here as well. + // This is okay. Go supports multiple registered channels for signal notification. + sigs := make(chan os.Signal, 2) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT) + t.Cleanup(func() { + signal.Stop(sigs) + }) + + if c.sendSigterm { + t.Logf("Sending sigterm to the entrypoint") + err := syscall.Kill(os.Getpid(), syscall.SIGTERM) + require.NoError(t, err) + time.Sleep(500 * time.Millisecond) // Give it time to react + + t.Logf("Check the fake app process is still running") + proc, err := os.FindProcess(appPid) + require.NoError(t, err, "Failed to find fake app process") + // A zero-signal lets us check the process is still valid/running. + require.NoError(t, proc.Signal(syscall.Signal(0)), + "Sigterm was not ignored by the entrypoint") + } + + if c.sendSigint { + t.Logf("Send sigint to entrypoint") + err := syscall.Kill(os.Getpid(), syscall.SIGINT) + require.NoError(t, err) + + t.Logf("Check the fake app process has exited") + retry.RunWith(&retry.Timer{Timeout: 2 * time.Second, Wait: 100 * time.Millisecond}, t, func(r *retry.R) { + proc, err := os.FindProcess(appPid) + require.NoError(r, err, "Failed to find fake app process") + err = proc.Signal(syscall.Signal(0)) + require.Error(r, err, "Sigint was not forwarded to fake app process") + require.Equal(r, os.ErrProcessDone, err) + }) + } + + // If !ok, then the channel was closed without actually sending the exit code. + exitCode, ok := <-exitCodeChan + require.True(t, ok) + require.Equal(t, c.exitCode, exitCode) + }) + } +} diff --git a/subcommand/app-entrypoint/command_windows.go b/subcommand/app-entrypoint/command_windows.go new file mode 100644 index 00000000..cb72b008 --- /dev/null +++ b/subcommand/app-entrypoint/command_windows.go @@ -0,0 +1,23 @@ +//go:build windows +// +build windows + +// Not implemented for Windows. +// Our Unix implementation doesn't compile on Windows, and we only need to support +// Linux since this is an entrypoint to a Docker container. + +package appentrypoint + +import ( + "github.com/hashicorp/go-hclog" + "github.com/mitchellh/cli" +) + +type Command struct { + UI cli.Ui + log hclog.Logger +} + +func (c *Command) Run(args []string) int { + c.UI.Error("not implemented on Windows") + return 1 +} diff --git a/subcommand/envoy-entrypoint/command_common.go b/subcommand/envoy-entrypoint/command_common.go index 90393538..8cd954e2 100644 --- a/subcommand/envoy-entrypoint/command_common.go +++ b/subcommand/envoy-entrypoint/command_common.go @@ -6,14 +6,6 @@ // * Monitor task metadata to terminate Envoy after application container(s) stop package envoyentrypoint -import ( - "github.com/hashicorp/go-hclog" -) - -func (c *Command) init() { - c.log = hclog.New(&hclog.LoggerOptions{Name: "consul-ecs"}) -} - func (c *Command) Help() string { return "" } diff --git a/subcommand/envoy-entrypoint/command_unix.go b/subcommand/envoy-entrypoint/command_unix.go index 57deef71..ef3fc134 100644 --- a/subcommand/envoy-entrypoint/command_unix.go +++ b/subcommand/envoy-entrypoint/command_unix.go @@ -10,6 +10,7 @@ import ( "sync" "syscall" + "github.com/hashicorp/consul-ecs/entrypoint" "github.com/hashicorp/go-hclog" "github.com/mitchellh/cli" ) @@ -22,10 +23,14 @@ type Command struct { sigs chan os.Signal ctx context.Context cancel context.CancelFunc - envoyCmd *EnvoyCmd + envoyCmd *entrypoint.Cmd appMonitor *AppContainerMonitor } +func (c *Command) init() { + c.log = hclog.New(&hclog.LoggerOptions{Name: "consul-ecs"}) +} + func (c *Command) Run(args []string) int { c.once.Do(c.init) @@ -36,7 +41,7 @@ func (c *Command) Run(args []string) int { c.sigs = make(chan os.Signal, 1) c.ctx, c.cancel = context.WithCancel(context.Background()) - c.envoyCmd = NewEnvoyCmd(c.log, args) + c.envoyCmd = entrypoint.NewCmd(c.log, args) c.appMonitor = NewAppContainerMonitor(c.log, c.ctx) return c.realRun() diff --git a/subcommand/envoy-entrypoint/command_unix_test.go b/subcommand/envoy-entrypoint/command_unix_test.go index 9fe5e990..f88ebbc4 100644 --- a/subcommand/envoy-entrypoint/command_unix_test.go +++ b/subcommand/envoy-entrypoint/command_unix_test.go @@ -120,7 +120,7 @@ func TestRun(t *testing.T) { meta := makeTaskMeta( "some-app-container", "consul-client", - "health-sync", + "consul-ecs-health-sync", "consul-ecs-mesh-init", "sidecar-proxy", ) diff --git a/subcommand/envoy-entrypoint/task-monitor.go b/subcommand/envoy-entrypoint/task-monitor.go index 00db99ce..e675234b 100644 --- a/subcommand/envoy-entrypoint/task-monitor.go +++ b/subcommand/envoy-entrypoint/task-monitor.go @@ -17,10 +17,10 @@ import ( var ( nonAppContainers = map[string]struct{}{ - "consul-client": {}, - "sidecar-proxy": {}, - "health-sync": {}, - "consul-ecs-mesh-init": {}, + "consul-client": {}, + "sidecar-proxy": {}, + "consul-ecs-health-sync": {}, + "consul-ecs-mesh-init": {}, } )