Skip to content

Commit

Permalink
config: set recover_stopped to default to false (#260)
Browse files Browse the repository at this point in the history
The use of recover_stopped may cause the Nomad agent to hang on startup,
as the plugin tries to start an exited podman task. Podman itself will
hang forever in this state, and the http client on the Nomad side is also
unable to timeout in this case. The result is a permenantly hung Nomad
agent, until someone force kills either Nomad or Podman.

Also emit a log warning that recover_stopped should not be used. We leave
it in place for compatability.

Fixes #229
  • Loading branch information
shoenig committed Jun 14, 2023
1 parent 6465caa commit 0b3a626
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
10 changes: 9 additions & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package main

import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/helper/pluginutils/hclutils"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
)
Expand Down Expand Up @@ -33,7 +34,7 @@ var (
// allow TaskRecover to start a still existing, stopped, container during client/driver restart
"recover_stopped": hclspec.NewDefault(
hclspec.NewAttr("recover_stopped", "bool", false),
hclspec.NewLiteral("true"),
hclspec.NewLiteral("false"),
),
// optional extra_labels to append to all tasks for observability. Globs supported
"extra_labels": hclspec.NewAttr("extra_labels", "list(string)", false),
Expand Down Expand Up @@ -130,6 +131,13 @@ type PluginConfig struct {
ExtraLabels []string `codec:"extra_labels"`
}

// LogWarnings will emit logs about known problematic configurations
func (c *PluginConfig) LogWarnings(logger hclog.Logger) {
if c.RecoverStopped {
logger.Error("WARNING - use of recover_stopped may cause Nomad agent to not start on system restarts")
}
}

// TaskConfig is the driver configuration of a task within a job
type TaskConfig struct {
ApparmorProfile string `codec:"apparmor_profile"`
Expand Down
3 changes: 3 additions & 0 deletions driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
// then send periodic updates at an interval that is appropriate for the driver
// until the context is canceled.
func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
// emit warnings about known bad configs
d.config.LogWarnings(d.logger)

err := shelpers.Init()
if err != nil {
d.logger.Error("Could not init stats helper", "error", err)
Expand Down

0 comments on commit 0b3a626

Please sign in to comment.