Skip to content

Commit

Permalink
keyring: support prepublishing keys
Browse files Browse the repository at this point in the history
When a root key is rotated, the servers immediately start signing Workload
Identities with the new active key. But workloads may be using those WI tokens
to sign into external services, which may not have had time to fetch the new
public key and which might try to fetch new keys as needed.

Add support for prepublishing keys. Prepublished keys will be visible in the
JWKS endpoint but will not be used for signing or encryption until their
`PublishTime`. Update the periodic key rotation to prepublish keys at half the
`root_key_rotation_threshold` window, and promote prepublished keys to active
after the `PublishTime`.

This changeset also fixes two bugs in periodic root key rotation and garbage
collection, both of which can't be safely fixed without implementing
prepublishing:

* Periodic root key rotation would never happen because the default
  `root_key_rotation_threshold` of 720h exceeds the 72h maximum window of the FSM
  time table. We now compare the `CreateTime` against the wall clock time instead
  of the time table. (We expect to remove the time table in future work, ref
  #16359)
* Root key garbage collection could GC keys that were used to sign
  identities. We now wait until `root_key_rotation_threshold` +
  `root_key_gc_threshold` before GC'ing a key.
* When rekeying a root key, the core job did not mark the key as inactive after
  the rekey was complete.

Ref: https://hashicorp.atlassian.net/browse/NET-10398
Ref: https://hashicorp.atlassian.net/browse/NET-10280
Fixes: #19669
Fixes: #23528
Fixes: #19368
  • Loading branch information
tgross committed Jul 18, 2024
1 parent c970d22 commit 719b42e
Show file tree
Hide file tree
Showing 16 changed files with 540 additions and 169 deletions.
15 changes: 15 additions & 0 deletions .changelog/23577.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
```release-note:improvement
keyring: Added support for prepublishing keys
```

```release-note:bug
keyring: Fixed a bug where periodic key rotation would not occur
```

```release-note:bug
keyring: Fixed a bug where keys could be garbage collected before workload identities expire
```

```release-note:bug
keyring: Fixed a bug where keys would never exit the "rekeying" state after a rotation with the `-full` flag
```
18 changes: 12 additions & 6 deletions api/keyring.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,18 @@ type RootKeyMeta struct {
CreateIndex uint64
ModifyIndex uint64
State RootKeyState
PublishTime int64
}

// RootKeyState enum describes the lifecycle of a root key.
type RootKeyState string

const (
RootKeyStateInactive RootKeyState = "inactive"
RootKeyStateActive = "active"
RootKeyStateRekeying = "rekeying"
RootKeyStateDeprecated = "deprecated"
RootKeyStateInactive RootKeyState = "inactive"
RootKeyStateActive = "active"
RootKeyStateRekeying = "rekeying"
RootKeyStateDeprecated = "deprecated"
RootKeyStatePrepublished = "prepublished"
)

// List lists all the keyring metadata
Expand Down Expand Up @@ -78,6 +80,9 @@ func (k *Keyring) Rotate(opts *KeyringRotateOptions, w *WriteOptions) (*RootKeyM
if opts.Full {
qp.Set("full", "true")
}
if opts.PublishTime > 0 {
qp.Set("publish_time", fmt.Sprintf("%d", opts.PublishTime))
}
}
resp := &struct{ Key *RootKeyMeta }{}
wm, err := k.client.put("/v1/operator/keyring/rotate?"+qp.Encode(), nil, resp, w)
Expand All @@ -86,6 +91,7 @@ func (k *Keyring) Rotate(opts *KeyringRotateOptions, w *WriteOptions) (*RootKeyM

// KeyringRotateOptions are parameters for the Rotate API
type KeyringRotateOptions struct {
Full bool
Algorithm EncryptionAlgorithm
Full bool
Algorithm EncryptionAlgorithm
PublishTime int64
}
10 changes: 10 additions & 0 deletions command/agent/keyring_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package agent
import (
"fmt"
"net/http"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -167,6 +168,15 @@ func (s *HTTPServer) keyringRotateRequest(resp http.ResponseWriter, req *http.Re
args.Full = true
}

ptRaw := query.Get("publish_time")
if ptRaw != "" {
publishTime, err := strconv.ParseInt(ptRaw, 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid publish_time: %w", err)
}
args.PublishTime = publishTime
}

var out structs.KeyringRotateRootKeyResponse
if err := s.agent.RPC("Keyring.Rotate", &args, &out); err != nil {
return nil, err
Expand Down
82 changes: 54 additions & 28 deletions command/agent/keyring_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package agent

import (
"fmt"
"net/http"
"net/http/httptest"
"strconv"
Expand All @@ -13,7 +14,6 @@ import (

"github.com/go-jose/go-jose/v3"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/structs"
Expand All @@ -29,57 +29,83 @@ func TestHTTP_Keyring_CRUD(t *testing.T) {
// List (get bootstrap key)

req, err := http.NewRequest(http.MethodGet, "/v1/operator/keyring/keys", nil)
require.NoError(t, err)
must.NoError(t, err)
obj, err := s.Server.KeyringRequest(respW, req)
require.NoError(t, err)
must.NoError(t, err)
listResp := obj.([]*structs.RootKeyMeta)
require.Len(t, listResp, 1)
oldKeyID := listResp[0].KeyID
must.Len(t, 1, listResp)
key0 := listResp[0].KeyID

// Rotate

req, err = http.NewRequest(http.MethodPut, "/v1/operator/keyring/rotate", nil)
require.NoError(t, err)
must.NoError(t, err)
obj, err = s.Server.KeyringRequest(respW, req)
require.NoError(t, err)
require.NotZero(t, respW.HeaderMap.Get("X-Nomad-Index"))
must.NoError(t, err)
must.NotEq(t, "", respW.HeaderMap.Get("X-Nomad-Index"))
rotateResp := obj.(structs.KeyringRotateRootKeyResponse)
require.NotNil(t, rotateResp.Key)
require.True(t, rotateResp.Key.Active())
newID1 := rotateResp.Key.KeyID
must.NotNil(t, rotateResp.Key)
must.True(t, rotateResp.Key.Active())
key1 := rotateResp.Key.KeyID

// Rotate with prepublish

publishTime := time.Now().Add(24 * time.Hour).UnixNano()
req, err = http.NewRequest(http.MethodPut,
fmt.Sprintf("/v1/operator/keyring/rotate?publish_time=%d", publishTime), nil)
must.NoError(t, err)
obj, err = s.Server.KeyringRequest(respW, req)
must.NoError(t, err)
must.NotEq(t, "", respW.HeaderMap.Get("X-Nomad-Index"))
rotateResp = obj.(structs.KeyringRotateRootKeyResponse)
must.NotNil(t, rotateResp.Key)
must.True(t, rotateResp.Key.Prepublished())
key2 := rotateResp.Key.KeyID

// List

req, err = http.NewRequest(http.MethodGet, "/v1/operator/keyring/keys", nil)
require.NoError(t, err)
must.NoError(t, err)
obj, err = s.Server.KeyringRequest(respW, req)
require.NoError(t, err)
must.NoError(t, err)
listResp = obj.([]*structs.RootKeyMeta)
require.Len(t, listResp, 2)
must.Len(t, 3, listResp)
for _, key := range listResp {
if key.KeyID == newID1 {
require.True(t, key.Active(), "new key should be active")
} else {
require.False(t, key.Active(), "initial key should be inactive")
switch key.KeyID {
case key0:
must.True(t, key.Inactive(), must.Sprint("initial key should be inactive"))
case key1:
must.True(t, key.Active(), must.Sprint("new key should be active"))
case key2:
must.True(t, key.Prepublished(),
must.Sprint("prepublished key should not be active"))
}
}

// Delete the old key and verify its gone
// Delete the original key and verify its gone

req, err = http.NewRequest(http.MethodDelete, "/v1/operator/keyring/key/"+oldKeyID, nil)
require.NoError(t, err)
req, err = http.NewRequest(http.MethodDelete, "/v1/operator/keyring/key/"+key0, nil)
must.NoError(t, err)
obj, err = s.Server.KeyringRequest(respW, req)
require.NoError(t, err)
must.NoError(t, err)

req, err = http.NewRequest(http.MethodGet, "/v1/operator/keyring/keys", nil)
require.NoError(t, err)
must.NoError(t, err)
obj, err = s.Server.KeyringRequest(respW, req)
require.NoError(t, err)
must.NoError(t, err)
listResp = obj.([]*structs.RootKeyMeta)
require.Len(t, listResp, 1)
require.Equal(t, newID1, listResp[0].KeyID)
require.True(t, listResp[0].Active())
require.Len(t, listResp, 1)
must.Len(t, 2, listResp)
for _, key := range listResp {
switch key.KeyID {
case key0:
t.Fatalf("initial key should have been deleted")
case key1:
must.True(t, key.Active(), must.Sprint("new key should be active"))
case key2:
must.True(t, key.Prepublished(),
must.Sprint("prepublished key should not be active"))
}
}
})
}

Expand Down
10 changes: 7 additions & 3 deletions command/operator_root_keyring.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,15 @@ func renderVariablesKeysResponse(keys []*api.RootKeyMeta, verbose bool) string {
length = 8
}
out := make([]string, len(keys)+1)
out[0] = "Key|State|Create Time"
out[0] = "Key|State|Create Time|Publish Time"
i := 1
for _, k := range keys {
out[i] = fmt.Sprintf("%s|%v|%s",
k.KeyID[:length], k.State, formatUnixNanoTime(k.CreateTime))
publishTime := ""
if k.PublishTime > 0 {
publishTime = formatUnixNanoTime(k.PublishTime)
}
out[i] = fmt.Sprintf("%s|%v|%s|%s",
k.KeyID[:length], k.State, formatUnixNanoTime(k.CreateTime), publishTime)
i = i + 1
}
return formatList(out)
Expand Down
24 changes: 21 additions & 3 deletions command/operator_root_keyring_rotate.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package command
import (
"fmt"
"strings"
"time"

"github.com/hashicorp/nomad/api"
"github.com/posener/complete"
Expand Down Expand Up @@ -36,6 +37,12 @@ Keyring Options:
will immediately return and the re-encryption process will run
asynchronously on the leader.
-prepublish
Set a duration for which to prepublish the new key (ex. "1h"). The currently
active key will be unchanged but the new public key will be available in the
JWKS endpoint. Multiple keys can be prepublished and they will be promoted to
active in order of publish time, at most once every root_key_gc_interval.
-verbose
Show full information.
`
Expand All @@ -50,8 +57,9 @@ func (c *OperatorRootKeyringRotateCommand) Synopsis() string {
func (c *OperatorRootKeyringRotateCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-full": complete.PredictNothing,
"-verbose": complete.PredictNothing,
"-full": complete.PredictNothing,
"-prepublish": complete.PredictNothing,
"-verbose": complete.PredictNothing,
})
}

Expand All @@ -65,11 +73,13 @@ func (c *OperatorRootKeyringRotateCommand) Name() string {

func (c *OperatorRootKeyringRotateCommand) Run(args []string) int {
var rotateFull, verbose bool
var prepublishDuration time.Duration

flags := c.Meta.FlagSet("root keyring rotate", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&rotateFull, "full", false, "full key rotation")
flags.BoolVar(&verbose, "verbose", false, "")
flags.DurationVar(&prepublishDuration, "prepublish", 0, "prepublish key")

if err := flags.Parse(args); err != nil {
return 1
Expand All @@ -88,8 +98,16 @@ func (c *OperatorRootKeyringRotateCommand) Run(args []string) int {
return 1
}

publishTime := int64(0)
if prepublishDuration > 0 {
publishTime = time.Now().UnixNano() + prepublishDuration.Nanoseconds()
}

resp, _, err := client.Keyring().Rotate(
&api.KeyringRotateOptions{Full: rotateFull}, nil)
&api.KeyringRotateOptions{
Full: rotateFull,
PublishTime: publishTime,
}, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("error: %s", err))
return 1
Expand Down
Loading

0 comments on commit 719b42e

Please sign in to comment.