Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vault: add new nomad setup vault -check commmand #19720

Merged
merged 4 commits into from Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/19720.txt
@@ -0,0 +1,3 @@
```release-note:improvement
cli: Add new option `nomad setup vault -check` to help cluster operators migrate to workload identities for Vault
```
60 changes: 60 additions & 0 deletions api/operator.go
Expand Up @@ -411,3 +411,63 @@ type LeadershipTransferResponse struct {

WriteMeta
}

// VaultWorkloadIdentityUpgradeCheck is the result of verifying if the cluster
// is ready to switch to workload identities for Vault.
type VaultWorkloadIdentityUpgradeCheck struct {
// JobsWithoutVaultIdentity is the list of jobs that have a `vault` block
// but do not have an `identity` for Vault.
JobsWithoutVaultIdentity []*JobListStub

// OutdatedNodes is the list of nodes running a version of Nomad that does
// not support workload identities for Vault.
OutdatedNodes []*NodeListStub

// VaultTokens is the list of Vault ACL token accessors that Nomad created
// and will no longer manage after the cluster is migrated to workload
// identities.
VaultTokens []*VaultAccessor
}

// Ready returns true if the cluster is ready to migrate to workload identities
// with Vault.
func (v *VaultWorkloadIdentityUpgradeCheck) Ready() bool {
return v != nil &&
len(v.VaultTokens) == 0 &&
len(v.OutdatedNodes) == 0 &&
len(v.JobsWithoutVaultIdentity) == 0
}

// VaultAccessor is a Vault ACL token created by Nomad for a task to access
// Vault using the legacy authentication flow.
type VaultAccessor struct {
// AllocID is the ID of the allocation that requested this token.
AllocID string

// Task is the name of the task that requested this token.
Task string

// NodeID is the ID of the node running the allocation that requested this
// token.
NodeID string

// Accessor is the Vault ACL token accessor ID.
Accessor string

// CreationTTL is the TTL set when the token was created.
CreationTTL int

// CreateIndex is the Raft index when the token was created.
CreateIndex uint64
}

// UpgradeCheckVaultWorkloadIdentity retrieves the cluster status for migrating
// to workload identities with Vault.
func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWorkloadIdentityUpgradeCheck, *QueryMeta, error) {
var resp VaultWorkloadIdentityUpgradeCheck
qm, err := op.c.query("/v1/operator/upgrade-check/vault-workload-identity", &resp, q)
if err != nil {
return nil, nil, err
}
return &resp, qm, nil
}
1 change: 1 addition & 0 deletions command/agent/http.go
Expand Up @@ -487,6 +487,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
s.mux.HandleFunc("/v1/operator/autopilot/configuration", s.wrap(s.OperatorAutopilotConfiguration))
s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth))
s.mux.HandleFunc("/v1/operator/snapshot", s.wrap(s.SnapshotRequest))
s.mux.HandleFunc("/v1/operator/upgrade-check/", s.wrap(s.UpgradeCheckRequest))

s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))
Expand Down
29 changes: 29 additions & 0 deletions command/agent/operator_endpoint.go
Expand Up @@ -521,3 +521,32 @@ func (s *HTTPServer) snapshotRestoreRequest(resp http.ResponseWriter, req *http.

return nil, codedErr
}

func (s *HTTPServer) UpgradeCheckRequest(resp http.ResponseWriter, req *http.Request) (any, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/upgrade-check")
switch {
case strings.HasSuffix(path, "/vault-workload-identity"):
return s.upgradeCheckVaultWorkloadIdentity(resp, req)
default:
return nil, CodedError(http.StatusNotFound, fmt.Sprintf("Path %s not found", req.URL.Path))
}
}

func (s *HTTPServer) upgradeCheckVaultWorkloadIdentity(resp http.ResponseWriter, req *http.Request) (any, error) {
if req.Method != http.MethodGet {
return nil, CodedError(405, ErrInvalidMethod)
}

args := structs.UpgradeCheckVaultWorkloadIdentityRequest{}
if s.parse(resp, req, &args.Region, &args.QueryOptions) {
return nil, nil
}

var out structs.UpgradeCheckVaultWorkloadIdentityResponse
if err := s.agent.RPC("Operator.UpgradeCheckVaultWorkloadIdentity", &args, &out); err != nil {
return nil, err
}

setMeta(resp, &out.QueryMeta)
return out, nil
}
38 changes: 38 additions & 0 deletions command/agent/operator_endpoint_test.go
Expand Up @@ -661,3 +661,41 @@ func TestOperator_SnapshotRequests(t *testing.T) {
require.True(t, jobExists())
})
}

func TestOperator_UpgradeCheckRequest_VaultWorkloadIdentity(t *testing.T) {
ci.Parallel(t)
httpTest(t, func(c *Config) {
c.Vaults[0].Enabled = pointer.Of(true)
c.Vaults[0].Name = "default"
}, func(s *TestAgent) {
// Create a test job with a Vault block but without an identity.
job := mock.Job()
job.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
Policies: []string{"test"},
}

args := structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{Region: "global"},
}
var resp structs.JobRegisterResponse
err := s.Agent.RPC("Job.Register", &args, &resp)
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved

// Make HTTP request to retrieve
req, err := http.NewRequest(http.MethodGet, "/v1/operator/upgrade-check/vault-workload-identity", nil)
must.NoError(t, err)
respW := httptest.NewRecorder()

obj, err := s.Server.UpgradeCheckRequest(respW, req)
must.NoError(t, err)
must.NotEq(t, "", respW.Header().Get("X-Nomad-Index"))
must.NotEq(t, "", respW.Header().Get("X-Nomad-LastContact"))
must.Eq(t, "true", respW.Header().Get("X-Nomad-KnownLeader"))

upgradeCheck := obj.(structs.UpgradeCheckVaultWorkloadIdentityResponse)
must.Len(t, 1, upgradeCheck.JobsWithoutVaultIdentity)
must.Len(t, 0, upgradeCheck.VaultTokens)
must.Eq(t, job.ID, upgradeCheck.JobsWithoutVaultIdentity[0].ID)
})
}
180 changes: 178 additions & 2 deletions command/setup_vault.go
Expand Up @@ -12,6 +12,7 @@ import (
"slices"
"strings"

"github.com/dustin/go-humanize/english"
"github.com/hashicorp/vault/api"
"github.com/mitchellh/cli"
"github.com/posener/complete"
Expand Down Expand Up @@ -48,6 +49,12 @@ type SetupVaultCommand struct {

destroy bool
autoYes bool

// Options for -check.
check bool
json bool
tmpl string
verbose bool
}

// Help satisfies the cli.Command Help function.
Expand All @@ -62,6 +69,10 @@ Usage: nomad setup vault [options]
VAULT_TOKEN, VAULT_ADDR, and other Vault-related environment variables
as documented in https://developer.hashicorp.com/vault/docs/commands#environment-variables.

The -check option can be used to verify if the Nomad cluster is ready to
migrate to use Workload Identities with Vault. This option requires
operator:read permission for Nomad.

WARNING: This command is an experimental feature and may change its behavior
in future versions of Nomad.

Expand All @@ -79,7 +90,22 @@ Setup Vault options:
Automatically answers "yes" to all the questions, making the setup
non-interactive. Defaults to "false".

`
-check
Verify if the Nomad cluster is ready to migrate to Workload Identities.

Setup Vault options when using -check:

-json
Output migration status information in its JSON format.

-t
Format and display migration status information using a Go template.

-verbose
Display full information.

` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace)

return strings.TrimSpace(helpText)
}

Expand All @@ -89,6 +115,12 @@ func (s *SetupVaultCommand) AutocompleteFlags() complete.Flags {
"-jwks-url": complete.PredictAnything,
"-destroy": complete.PredictSet("true", "false"),
"-y": complete.PredictSet("true", "false"),

// Options for -check.
"-check": complete.PredictSet("true", "false"),
"-json": complete.PredictSet("true", "false"),
"-verbose": complete.PredictSet("true", "false"),
"-t": complete.PredictAnything,
})
}

Expand All @@ -110,6 +142,13 @@ func (s *SetupVaultCommand) Run(args []string) int {
flags.BoolVar(&s.destroy, "destroy", false, "")
flags.BoolVar(&s.autoYes, "y", false, "")
flags.StringVar(&s.jwksURL, "jwks-url", "http://localhost:4646/.well-known/jwks.json", "")

// Options for -check.
flags.BoolVar(&s.check, "check", false, "")
flags.BoolVar(&s.json, "json", false, "")
flags.BoolVar(&s.verbose, "verbose", false, "")
flags.StringVar(&s.tmpl, "t", "", "")

if err := flags.Parse(args); err != nil {
return 1
}
Expand All @@ -121,6 +160,32 @@ func (s *SetupVaultCommand) Run(args []string) int {
return 1
}

if s.check {
return s.checkUpgrade()
} else {
// Verify that -check flags are not set.
var invalid []string
if s.json {
invalid = append(invalid, "-json")
}
if s.verbose {
invalid = append(invalid, "-verbose")
}
if s.tmpl != "" {
invalid = append(invalid, "-t")
}

if len(invalid) > 0 {
s.Ui.Error(fmt.Sprintf(
"The %s %s can only be used with -check",
english.OxfordWordSeries(invalid, "and"),
english.PluralWord(len(invalid), "option", "options"),
))
s.Ui.Error(commandErrorText(s))
return 1
}
}

if !isTty() && !s.autoYes {
s.Ui.Error("This command requires -y option when running in non-interactive mode")
return 1
Expand Down Expand Up @@ -216,7 +281,7 @@ a namespace %q and create all configuration within that namespace.
*/
s.Ui.Output(`
We will now enable the JWT credential backend and create a JWT auth method that
Nomad workloads will use.
Nomad workloads will use.
`)

if s.authMethodExists() {
Expand Down Expand Up @@ -606,6 +671,117 @@ func (s *SetupVaultCommand) removeConfiguredComponents() int {
return exitCode
}

func (s *SetupVaultCommand) checkUpgrade() int {
length := shortId
if s.verbose {
length = fullId
}

client, err := s.Meta.Client()
if err != nil {
s.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}

resp, _, err := client.Operator().UpgradeCheckVaultWorkloadIdentity(nil)
if err != nil {
s.Ui.Error(fmt.Sprintf("Error querying scheduler configuration: %s", err))
return 1
}

// Output formatted option if requested.
if s.json || len(s.tmpl) > 0 {
out, err := Format(s.json, s.tmpl, resp)
if err != nil {
s.Ui.Error(err.Error())
return 1
}

s.Ui.Output(out)
return 0
}

if resp.Ready() {
s.Ui.Output("Nomad cluster is ready to use workload identities with Vault.")
return 0
}

if len(resp.JobsWithoutVaultIdentity) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Jobs Without Workload Identity for Vault[reset]
The following jobs access Vault but are not configured for workload identity.

You should redeploy them before fully migrating to workload identities with
Vault to prevent unexpected errors if their tokens need to be recreated.

Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.JobsWithoutVaultIdentity)+1)
out[0] = "ID|Namespace|Type|Status"
for i, job := range resp.JobsWithoutVaultIdentity {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s",
limit(job.ID, length),
job.Namespace,
job.Type,
job.Status,
)
}
s.Ui.Output(formatList(out))
}

if len(resp.OutdatedNodes) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Outdated Nodes[reset]
The following nodes are running a version of Nomad that does not support using
workload identities with Vault.

You should upgrade them to Nomad 1.7 before fully migrating to workload
identities with Vault to prevent unexpected errors if they receive allocations
for jobs that use Vault.

Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.OutdatedNodes)+1)
out[0] = "ID|Name|Address|Version|Drain|Eligibility|Status"
for i, node := range resp.OutdatedNodes {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s",
limit(node.ID, length),
node.Name,
node.Address,
node.Version,
node.Drain,
node.SchedulingEligibility,
node.Status,
)
}
s.Ui.Output(formatList(out))
}

if len(resp.VaultTokens) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Vault Tokens[reset]
The following Vault ACL tokens were created by Nomad but will not be
automatically revoked after migrating to workload identities. They will expire
once their TTL reaches zero.
`))
out := make([]string, len(resp.VaultTokens)+1)
out[0] = "Accessor ID|Allocation ID|Node ID|Configured TTL"
for i, token := range resp.VaultTokens {
out[i+1] = fmt.Sprintf("%s|%s|%s|%d",
token.Accessor,
limit(token.AllocID, length),
limit(token.NodeID, length),
token.CreationTTL,
)
}
s.Ui.Output(formatList(out))
}

return 0
}

func printMapOfStrings(m map[string]string) string {
var output string

Expand Down