Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package cli

import (
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"

"github.com/coder/boundary/config"
Expand Down Expand Up @@ -241,7 +243,19 @@ func BaseCommand(version string) *serpent.Command {
}
logger.Debug("Application config", "config", appConfigInJSON)

return run.Run(inv.Context(), logger, appConfig)
err = run.Run(inv.Context(), logger, appConfig)

// If the child process exited with a non-zero code, exit
// with the same code directly. All cleanup (proxy, etc.)
// has already happened inside Run(). Exiting here ensures
// the correct code is propagated regardless of how the
// calling framework handles errors (standalone binary or
// embedded as a coder subcommand).
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
os.Exit(exitErr.ExitCode())
}
return err
Comment on lines +247 to +258
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a convenient solution, but it makes me nervous to have an os.Exit that is more than a single level of indirection from the entrypoint like this. Looking at this bit of code here, we don't how what cleanup would have happened on the return path between here and the entry point.

I think the proper solution is to do the error checking near the entry point both here and in the coder subcommand.

In practice, the risk is low and its easy to patch later, so I don't think this blocks the PR. Its worth a mention though.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed it's not ideal. The problem is the embedded mode (coder boundary ...), we don't control coder's entrypoint, and serpent wraps our returned error in RunCommandError, so coder's main() just does os.Exit(1) losing the actual code.

To do it "properly" we'd need changes in coder/coder or serpent. This is a conscious tradeoff: all cleanup (proxy, iptables) already ran inside Run() via defers before the error returns, so the os.Exit is safe.

Let me know your thoughts!

},
}
}
9 changes: 3 additions & 6 deletions landjail/child.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,12 @@ func RunChild(logger *slog.Logger, config config.AppConfig) error {
// Run the command - this will block until it completes
err = cmd.Run()
if err != nil {
// Check if this is a normal exit with non-zero status code
if exitError, ok := err.(*exec.ExitError); ok {
exitCode := exitError.ExitCode()
logger.Debug("Command exited with non-zero status", "exit_code", exitCode)
return fmt.Errorf("command exited with code %d", exitCode)
logger.Debug("Command exited with non-zero status", "exit_code", exitError.ExitCode())
return fmt.Errorf("command exited with code %d: %w", exitError.ExitCode(), err)
}
// This is an unexpected error
logger.Error("Command execution failed", "error", err)
return fmt.Errorf("command execution failed: %v", err)
return fmt.Errorf("command execution failed: %w", err)
}

logger.Debug("Command completed successfully")
Expand Down
19 changes: 14 additions & 5 deletions landjail/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ func (b *LandJail) Run(ctx context.Context) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

// childErr receives the result of RunChildProcess so we can
// propagate the child's exit code to our caller.
childErr := make(chan error, 1)
go func() {
defer cancel()
err := b.RunChildProcess(os.Args)
if err != nil {
b.logger.Error("Failed to run child process", "error", err)
}
childErr <- b.RunChildProcess(os.Args)
}()

// Setup signal handling BEFORE any setup
Expand All @@ -89,7 +89,16 @@ func (b *LandJail) Run(ctx context.Context) error {
b.logger.Info("Command completed, shutting down...")
}

return nil
// Drain the child result if available. In the ctx.Done path the
// error is already buffered. In the signal path the child may still
// be running; return nil so deferred cleanup (iptables, proxy) can
// proceed before the process exits.
select {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Asking for clarity:

why do we need a second select here instead of a three case select above?

select {
case sig := <-sigChan:
// ...
case err := <-childErr:
// ...
case <-ctx.Done():
// ...
}

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the child finishes, the goroutine sends on childErr AND calls defer cancel(), which closes ctx.Done(). So both channels are ready at roughly the same time. Go picks randomly between ready cases - if we land in ctx.Done() instead of childErr, we lose the exit code error and return nil.

Two selects avoid that: first one waits for signal or context cancellation, second one (non-blocking) drains the child result. In the ctx.Done path the error is already buffered so we always get it. In the signal path the child may still be running, so default: return nil lets deferred cleanup proceed.

case err := <-childErr:
return err
default:
return nil
}
}

func (b *LandJail) RunChildProcess(command []string) error {
Expand Down
13 changes: 3 additions & 10 deletions nsjail_manager/child.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,12 @@ func RunChild(logger *slog.Logger, cfg config.AppConfig) error {
}
err = cmd.Run()
if err != nil {
// Check if this is a normal exit with non-zero status code
if exitError, ok := err.(*exec.ExitError); ok {
exitCode := exitError.ExitCode()
// Log at debug level for non-zero exits (normal behavior)
logger.Debug("Command exited with non-zero status", "exit_code", exitCode)
// Exit with the same code as the command - don't log as error
// This is normal behavior (commands can exit with any code)
os.Exit(exitCode)
logger.Debug("Command exited with non-zero status", "exit_code", exitError.ExitCode())
return fmt.Errorf("command exited with code %d: %w", exitError.ExitCode(), err)
}
// This is an unexpected error (not just a non-zero exit)
// Only log actual errors like "command not found" or "permission denied"
logger.Error("Command execution failed", "error", err)
return err
return fmt.Errorf("command execution failed: %w", err)
}

// Command exited successfully
Expand Down
25 changes: 19 additions & 6 deletions nsjail_manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ func (b *NSJailManager) Run(ctx context.Context) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

// childErr receives the result of RunChildProcess so we can
// propagate the child's exit code to our caller.
childErr := make(chan error, 1)
go func() {
defer cancel()
b.RunChildProcess(os.Args)
childErr <- b.RunChildProcess(os.Args)
}()

// Setup signal handling BEFORE any setup
Expand All @@ -90,23 +93,32 @@ func (b *NSJailManager) Run(ctx context.Context) error {
b.logger.Info("Command completed, shutting down...")
}

return nil
// Drain the child result if available. In the ctx.Done path the
// error is already buffered. In the signal path the child may still
// be running; return nil so deferred cleanup (iptables, proxy) can
// proceed before the process exits.
select {
case err := <-childErr:
return err
default:
return nil
}
}

func (b *NSJailManager) RunChildProcess(command []string) {
func (b *NSJailManager) RunChildProcess(command []string) error {
cmd := b.jailer.Command(command)

b.logger.Debug("Executing command in boundary", "command", strings.Join(os.Args, " "))
err := cmd.Start()
if err != nil {
b.logger.Error("Command failed to start", "error", err)
return
return err
}

err = b.jailer.ConfigureHostNsCommunication(cmd.Process.Pid)
if err != nil {
b.logger.Error("configuration after command execution failed", "error", err)
return
return err
}

b.logger.Debug("waiting on a child process to finish")
Expand All @@ -121,9 +133,10 @@ func (b *NSJailManager) RunChildProcess(command []string) {
// This is an unexpected error (not just a non-zero exit)
b.logger.Error("Command execution failed", "error", err)
}
return
return err
}
b.logger.Debug("Command completed successfully")
return nil
}

func (b *NSJailManager) setupHostAndStartProxy() error {
Expand Down
Loading