Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a periodic test of the autoseal to detect loss of connectivity. #13078

Merged
merged 17 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/13078.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
core: Periodically test the health of connectivity to auto-seal backends
```
8 changes: 8 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,10 @@ func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc,
if err := seal.UpgradeKeys(c.activeContext); err != nil {
c.logger.Warn("post-unseal upgrade seal keys failed", "error", err)
}

// Start a periodic but infrequent heartbeat to detect auto-seal backend outages at runtime rather than being
// surprised by this at the next need to unseal.
seal.StartHealthCheck()
}

c.metricsCh = make(chan struct{})
Expand Down Expand Up @@ -2226,6 +2230,10 @@ func (c *Core) preSeal() error {
c.autoRotateCancel = nil
}

if seal, ok := c.seal.(*autoSeal); ok {
seal.StopHealthCheck()
}

preSealPhysical(c)

c.logger.Info("pre-seal teardown complete")
Expand Down
82 changes: 78 additions & 4 deletions vault/seal_autoseal.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package vault

import (
"bytes"
"context"
"crypto/subtle"
"encoding/json"
"fmt"
mathrand "math/rand"
"sync/atomic"
"time"

proto "github.com/golang/protobuf/proto"
log "github.com/hashicorp/go-hclog"
Expand All @@ -18,16 +21,23 @@ import (
// applicable in the OSS side
var barrierTypeUpgradeCheck = func(_ string, _ *SealConfig) {}

const (
sealHeathTestIntervalNominal = 10 * time.Minute
sealHeathTestIntervalUnhealthy = 1 * time.Minute
)

// autoSeal is a Seal implementation that contains logic for encrypting and
// decrypting stored keys via an underlying AutoSealAccess implementation, as
// well as logic related to recovery keys and barrier config.
type autoSeal struct {
*seal.Access

barrierConfig atomic.Value
recoveryConfig atomic.Value
core *Core
logger log.Logger
barrierConfig atomic.Value
recoveryConfig atomic.Value
core *Core
logger log.Logger
healthCheck *time.Ticker
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
healthCheckStop chan struct{}
}

// Ensure we are implementing the Seal interface
Expand Down Expand Up @@ -499,3 +509,67 @@ func (d *autoSeal) migrateRecoveryConfig(ctx context.Context) error {

return nil
}

// StartHealthCheck starts a goroutine that tests the health of the auto-unseal backend once every 10 minutes.
// If unhealthy, logs a warning on the condition and begins testing every one minute until healthy again.
func (d *autoSeal) StartHealthCheck() {
d.healthCheck = time.NewTicker(sealHeathTestIntervalNominal)
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
d.healthCheckStop = make(chan struct{})
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
go func() {
lastTestOk := true
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
lastSeenOk := time.Now()
for {
select {
case <-d.healthCheckStop:
if d.healthCheck != nil {
d.healthCheck.Stop()
}
d.healthCheck = nil
d.healthCheckStop = nil
return
case t := <-d.healthCheck.C:
testVal := fmt.Sprintf("Heartbeat %d", mathrand.Intn(1000))
ciphertext, err := d.Wrapper.Encrypt(d.core.activeContext, []byte(testVal), nil)
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
d.logger.Warn("failed to encrypt seal health test value, seal backend may be unreachable", "error", err)
if lastTestOk {
d.healthCheck.Reset(sealHeathTestIntervalUnhealthy)
}
lastTestOk = false
} else {
plaintext, err := d.Wrapper.Decrypt(d.core.activeContext, ciphertext, nil)
if err != nil {
d.logger.Warn("failed to decrypt seal health test value, seal backend may be unreachable", "error", err)
if lastTestOk {
d.healthCheck.Reset(sealHeathTestIntervalUnhealthy)
}
lastTestOk = false
}
if !bytes.Equal([]byte(testVal), plaintext) {
d.logger.Warn("seal health test value failed to decrypt to expected value")
if lastTestOk {
d.healthCheck.Reset(sealHeathTestIntervalUnhealthy)
}
lastTestOk = false
} else {
d.logger.Debug("seal health test passed")
if !lastTestOk {
d.logger.Info("seal backend is now healthy again", "downtime", t.Sub(lastSeenOk).String())
}
if !lastTestOk {
d.healthCheck.Reset(sealHeathTestIntervalNominal)
}
lastTestOk = true
lastSeenOk = t
}
}
}
}
}()
}

func (d *autoSeal) StopHealthCheck() {
if d.healthCheckStop != nil {
close(d.healthCheckStop)
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
}
}