Skip to content

Commit

Permalink
vault: support allowing tokens to expire without refresh
Browse files Browse the repository at this point in the history
Some users with batch workloads or short-lived prestart tasks want to derive a
Vaul token, use it, and then allow it to expire without requiring a constant
refresh. Add the `vault.allow_token_expiration` field.

When set to true, this disables the client's renewal loop in the
`vault_hook`. When Vault revokes the token lease, the token will no longer be
valid.

Note this should only be used when a secret is requested from Vault once at the
start of a task or in a short-lived prestart task. Long-running tasks should
never set `allow_token_expiration=true` if they obtain Vault secrets via
`template` blocks, as the Vault token will expire and the template runner will
continue to make failing requests to Vault until the `vault_retry` attempts are
exhausted.

Fixes: #8690
  • Loading branch information
tgross committed Jan 9, 2024
1 parent c875f3e commit df02d81
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 61 deletions.
20 changes: 12 additions & 8 deletions api/tasks.go
Expand Up @@ -937,14 +937,15 @@ func (tmpl *Template) Canonicalize() {
}

type Vault struct {
Policies []string `hcl:"policies,optional"`
Role string `hcl:"role,optional"`
Namespace *string `mapstructure:"namespace" hcl:"namespace,optional"`
Cluster string `hcl:"cluster,optional"`
Env *bool `hcl:"env,optional"`
DisableFile *bool `mapstructure:"disable_file" hcl:"disable_file,optional"`
ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"`
ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"`
Policies []string `hcl:"policies,optional"`
Role string `hcl:"role,optional"`
Namespace *string `mapstructure:"namespace" hcl:"namespace,optional"`
Cluster string `hcl:"cluster,optional"`
Env *bool `hcl:"env,optional"`
DisableFile *bool `mapstructure:"disable_file" hcl:"disable_file,optional"`
ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"`
ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"`
AllowTokenExpiration *bool `mapstructure:"allow_token_expiration" hcl:"allow_token_expiration,optional"`
}

func (v *Vault) Canonicalize() {
Expand All @@ -966,6 +967,9 @@ func (v *Vault) Canonicalize() {
if v.ChangeSignal == nil {
v.ChangeSignal = pointerOf("SIGHUP")
}
if v.AllowTokenExpiration == nil {
v.AllowTokenExpiration = pointerOf(false)
}
}

// NewTask creates and initializes a new Task.
Expand Down
9 changes: 9 additions & 0 deletions client/allocrunner/taskrunner/vault_hook.go
Expand Up @@ -237,6 +237,9 @@ func (h *vaultHook) Shutdown() {
func (h *vaultHook) run(token string) {
// Helper for stopping token renewal
stopRenewal := func() {
if h.vaultBlock.AllowTokenExpiration {
return
}
if err := h.client.StopRenewToken(h.future.Get()); err != nil {
h.logger.Warn("failed to stop token renewal", "error", err)
}
Expand Down Expand Up @@ -280,6 +283,12 @@ OUTER:
}
}

if h.vaultBlock.AllowTokenExpiration {
h.future.Set(token)
h.logger.Debug("Vault token will not renew")
return
}

// Start the renewal process.
//
// This is the initial renew of the token which we derived from the
Expand Down
40 changes: 33 additions & 7 deletions client/allocrunner/taskrunner/vault_hook_test.go
Expand Up @@ -112,11 +112,12 @@ func TestTaskRunner_VaultHook(t *testing.T) {
ci.Parallel(t)

testCases := []struct {
name string
task *structs.Task
configs map[string]*sconfig.VaultConfig
expectRole string
expectLegacy bool
name string
task *structs.Task
configs map[string]*sconfig.VaultConfig
expectRole string
expectLegacy bool
expectNoRenew bool
}{
{
name: "legacy flow",
Expand Down Expand Up @@ -205,6 +206,19 @@ func TestTaskRunner_VaultHook(t *testing.T) {
},
},
},
{
name: "no renewal",
task: &structs.Task{
Vault: &structs.Vault{
Cluster: structs.VaultDefaultCluster,
AllowTokenExpiration: true,
},
Identities: []*structs.WorkloadIdentity{
{Name: "vault_default"},
},
},
expectNoRenew: true,
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -293,8 +307,12 @@ func TestTaskRunner_VaultHook(t *testing.T) {
}

// Token must be set for renewal.
must.MapLen(t, 1, client.RenewTokens())
must.NotNil(t, client.RenewTokens()[updater.currentToken])
if tc.expectNoRenew {
must.MapEmpty(t, client.RenewTokens())
} else {
must.MapLen(t, 1, client.RenewTokens())
must.NotNil(t, client.RenewTokens()[updater.currentToken])
}

// PrestartDone must be false so we can recover tokens.
// firstRun is used to prevent multiple executions.
Expand All @@ -307,6 +325,14 @@ func TestTaskRunner_VaultHook(t *testing.T) {
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(func() error {
tokens := client.StoppedTokens()

if tc.expectNoRenew {
if len(tokens) != 0 {
return fmt.Errorf("expected no stopped tokens when renewal is disabled, got %d", len(tokens))
}
return nil
}

if len(tokens) != 1 {
return fmt.Errorf("expected stopped tokens to be %d, got %d", 1, len(tokens))
}
Expand Down
17 changes: 9 additions & 8 deletions command/agent/job_endpoint.go
Expand Up @@ -1350,14 +1350,15 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,

if apiTask.Vault != nil {
structsTask.Vault = &structs.Vault{
Role: apiTask.Vault.Role,
Policies: apiTask.Vault.Policies,
Namespace: *apiTask.Vault.Namespace,
Cluster: apiTask.Vault.Cluster,
Env: *apiTask.Vault.Env,
DisableFile: *apiTask.Vault.DisableFile,
ChangeMode: *apiTask.Vault.ChangeMode,
ChangeSignal: *apiTask.Vault.ChangeSignal,
Role: apiTask.Vault.Role,
Policies: apiTask.Vault.Policies,
Namespace: *apiTask.Vault.Namespace,
Cluster: apiTask.Vault.Cluster,
Env: *apiTask.Vault.Env,
DisableFile: *apiTask.Vault.DisableFile,
ChangeMode: *apiTask.Vault.ChangeMode,
ChangeSignal: *apiTask.Vault.ChangeSignal,
AllowTokenExpiration: *apiTask.Vault.AllowTokenExpiration,
}
}

Expand Down
17 changes: 9 additions & 8 deletions command/agent/job_endpoint_test.go
Expand Up @@ -3362,14 +3362,15 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
},
},
Vault: &structs.Vault{
Role: "nomad-task",
Namespace: "ns1",
Cluster: structs.VaultDefaultCluster,
Policies: []string{"a", "b", "c"},
Env: true,
DisableFile: false,
ChangeMode: "c",
ChangeSignal: "sighup",
Role: "nomad-task",
Namespace: "ns1",
Cluster: structs.VaultDefaultCluster,
Policies: []string{"a", "b", "c"},
Env: true,
DisableFile: false,
ChangeMode: "c",
ChangeSignal: "sighup",
AllowTokenExpiration: false,
},
Templates: []*structs.Template{
{
Expand Down
88 changes: 58 additions & 30 deletions nomad/structs/diff_test.go
Expand Up @@ -7579,6 +7579,12 @@ func TestTaskDiff(t *testing.T) {
Old: "",
New: "SIGUSR1",
},
{
Type: DiffTypeAdded,
Name: "AllowTokenExpiration",
Old: "",
New: "false",
},
{
Type: DiffTypeAdded,
Name: "DisableFile",
Expand Down Expand Up @@ -7653,6 +7659,12 @@ func TestTaskDiff(t *testing.T) {
Old: "SIGUSR1",
New: "",
},
{
Type: DiffTypeDeleted,
Name: "AllowTokenExpiration",
Old: "false",
New: "",
},
{
Type: DiffTypeDeleted,
Name: "DisableFile",
Expand Down Expand Up @@ -7694,24 +7706,26 @@ func TestTaskDiff(t *testing.T) {
Name: "Vault edited",
Old: &Task{
Vault: &Vault{
Role: "nomad-task",
Namespace: "ns1",
Policies: []string{"foo", "bar"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
Role: "nomad-task",
Namespace: "ns1",
Policies: []string{"foo", "bar"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
AllowTokenExpiration: false,
},
},
New: &Task{
Vault: &Vault{
Role: "nomad-task-2",
Namespace: "ns2",
Policies: []string{"bar", "baz"},
Env: false,
DisableFile: false,
ChangeMode: "restart",
ChangeSignal: "foo",
Role: "nomad-task-2",
Namespace: "ns2",
Policies: []string{"bar", "baz"},
Env: false,
DisableFile: false,
ChangeMode: "restart",
ChangeSignal: "foo",
AllowTokenExpiration: true,
},
},
Expected: &TaskDiff{
Expand All @@ -7733,6 +7747,12 @@ func TestTaskDiff(t *testing.T) {
Old: "SIGUSR1",
New: "foo",
},
{
Type: DiffTypeEdited,
Name: "AllowTokenExpiration",
Old: "false",
New: "true",
},
{
Type: DiffTypeEdited,
Name: "DisableFile",
Expand Down Expand Up @@ -7787,26 +7807,28 @@ func TestTaskDiff(t *testing.T) {
Contextual: true,
Old: &Task{
Vault: &Vault{
Role: "nomad-task",
Namespace: "ns1",
Cluster: VaultDefaultCluster,
Policies: []string{"foo", "bar"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
Role: "nomad-task",
Namespace: "ns1",
Cluster: VaultDefaultCluster,
Policies: []string{"foo", "bar"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
AllowTokenExpiration: true,
},
},
New: &Task{
Vault: &Vault{
Role: "nomad-task",
Namespace: "ns1",
Cluster: VaultDefaultCluster,
Policies: []string{"bar", "baz"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
Role: "nomad-task",
Namespace: "ns1",
Cluster: VaultDefaultCluster,
Policies: []string{"bar", "baz"},
Env: true,
DisableFile: true,
ChangeMode: "signal",
ChangeSignal: "SIGUSR1",
AllowTokenExpiration: true,
},
},
Expected: &TaskDiff{
Expand Down Expand Up @@ -7834,6 +7856,12 @@ func TestTaskDiff(t *testing.T) {
Old: VaultDefaultCluster,
New: VaultDefaultCluster,
},
{
Type: DiffTypeNone,
Name: "AllowTokenExpiration",
Old: "true",
New: "true",
},
{
Type: DiffTypeNone,
Name: "DisableFile",
Expand Down
5 changes: 5 additions & 0 deletions nomad/structs/structs.go
Expand Up @@ -9963,6 +9963,9 @@ type Vault struct {
// ChangeSignal is the signal sent to the task when a new token is
// retrieved. This is only valid when using the signal change mode.
ChangeSignal string

// AllowTokenExpiration disables the Vault token refresh loop on the client
AllowTokenExpiration bool
}

// IdentityName returns the name of the workload identity to be used to access
Expand Down Expand Up @@ -9992,6 +9995,8 @@ func (v *Vault) Equal(o *Vault) bool {
return false
case v.ChangeSignal != o.ChangeSignal:
return false
case v.AllowTokenExpiration != o.AllowTokenExpiration:
return false
}
return true
}
Expand Down
10 changes: 10 additions & 0 deletions website/content/docs/job-specification/vault.mdx
Expand Up @@ -58,6 +58,15 @@ with Vault as well.

## `vault` Parameters

- `allow_token_expiration` `(bool: false)` - Specifies that Nomad clients should
not attempt to renew a task's Vault token, allowing it to expire. This should
only be used when a secret is requested from Vault once at the start of a task
or in a short-lived prestart task. Long-running tasks should never set
`allow_token_expiration=true` if they obtain Vault secrets via `template`
blocks, as the Vault token will expire and the template runner will continue
to make failing requests to Vault until its [`vault_retry`][] attempts are
exhausted, at which point the task will fail.

- `change_mode` `(string: "restart")` - Specifies the behavior Nomad should take
if the Vault token changes. The possible values are:

Expand Down Expand Up @@ -208,3 +217,4 @@ vault {
[template]: /nomad/docs/job-specification/template "Nomad template Job Specification"
[vault]: https://www.vaultproject.io/ "Vault by HashiCorp"
[`vault.name`]: /nomad/docs/configuration/vault#name
[`vault_retry`]: /nomad/docs/configuration/client#vault_retry

0 comments on commit df02d81

Please sign in to comment.