Skip to content

Commit

Permalink
Prevent panics in expiration invalidation, and make some changes for …
Browse files Browse the repository at this point in the history
…testing (#18401)
  • Loading branch information
ncabatoff authored and AnPucel committed Feb 3, 2023
1 parent cc1f64e commit d201f1b
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 30 deletions.
3 changes: 3 additions & 0 deletions changelog/18401.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
expiration: Prevent panics on perf standbys when an irrevocable release gets deleted.
```
4 changes: 4 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ type Core struct {
rollbackPeriod time.Duration

pendingRemovalMountsAllowed bool
expirationRevokeRetryBase time.Duration
}

func (c *Core) HAState() consts.HAState {
Expand Down Expand Up @@ -790,6 +791,8 @@ type CoreConfig struct {
RollbackPeriod time.Duration

PendingRemovalMountsAllowed bool

ExpirationRevokeRetryBase time.Duration
}

// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
Expand Down Expand Up @@ -944,6 +947,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
effectiveSDKVersion: effectiveSDKVersion,
userFailedLoginInfo: make(map[FailedLoginUser]*FailedLoginInfo),
pendingRemovalMountsAllowed: conf.PendingRemovalMountsAllowed,
expirationRevokeRetryBase: conf.ExpirationRevokeRetryBase,
}

c.standbyStopCh.Store(make(chan struct{}))
Expand Down
36 changes: 22 additions & 14 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ type ExpirationManager struct {
// request. This value should only be set by tests.
testRegisterAuthFailure uberAtomic.Bool

jobManager *fairshare.JobManager
jobManager *fairshare.JobManager
revokeRetryBase time.Duration
}

type ExpireLeaseStrategy func(context.Context, *ExpirationManager, string, *namespace.Namespace)
Expand Down Expand Up @@ -234,7 +235,6 @@ func (r *revocationJob) Execute() error {

func (r *revocationJob) OnFailure(err error) {
r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)})
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err)

r.m.pendingLock.Lock()
defer r.m.pendingLock.Unlock()
Expand All @@ -246,12 +246,15 @@ func (r *revocationJob) OnFailure(err error) {

pending := pendingRaw.(pendingInfo)
pending.revokesAttempted++
newTimer := r.revokeExponentialBackoff(pending.revokesAttempted)

if pending.revokesAttempted >= maxRevokeAttempts || errIsUnrecoverable(err) {
r.m.logger.Trace("marking lease as irrevocable", "lease_id", r.leaseID, "error", err)
reason := "unrecoverable error"
if pending.revokesAttempted >= maxRevokeAttempts {
r.m.logger.Trace("lease has consumed all retry attempts", "lease_id", r.leaseID)
reason = "lease has consumed all retry attempts"
err = fmt.Errorf("%v: %w", outOfRetriesMessage, err)
}
r.m.logger.Trace("failed to revoke lease, marking lease as irrevocable", "lease_id", r.leaseID, "error", err, "reason", reason)

le, loadErr := r.m.loadEntry(r.nsCtx, r.leaseID)
if loadErr != nil {
Expand All @@ -265,9 +268,12 @@ func (r *revocationJob) OnFailure(err error) {

r.m.markLeaseIrrevocable(r.nsCtx, le, err)
return
} else {
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err,
"attempts", pending.revokesAttempted, "next_attempt", newTimer)
}

pending.timer.Reset(revokeExponentialBackoff(pending.revokesAttempted))
pending.timer.Reset(newTimer)
r.m.pending.Store(r.leaseID, pending)
}

Expand All @@ -285,8 +291,8 @@ func expireLeaseStrategyFairsharing(ctx context.Context, m *ExpirationManager, l
m.jobManager.AddJob(job, mountAccessor)
}

func revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * revokeRetryBase
func (r *revocationJob) revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * r.m.revokeRetryBase
randomDelta := 0.5 * float64(exp)

// Allow backoff time to be a random value between exp +/- (0.5*exp)
Expand Down Expand Up @@ -351,7 +357,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e,

jobManager: jobManager,
jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
}
*exp.restoreMode = 1

Expand Down Expand Up @@ -495,16 +505,14 @@ func (m *ExpirationManager) invalidate(key string) {
m.nonexpiring.Delete(leaseID)

if info, ok := m.irrevocable.Load(leaseID); ok {
irrevocable := info.(pendingInfo)
ile := info.(*leaseEntry)
m.irrevocable.Delete(leaseID)
m.irrevocableLeaseCount--

m.leaseCount--
// Avoid nil pointer dereference. Without cachedLeaseInfo we do not have enough information to
// accurately update quota lease information.
// Note that cachedLeaseInfo should never be nil under normal operation.
if irrevocable.cachedLeaseInfo != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: irrevocable.cachedLeaseInfo.LoginRole}
// Note that the leaseEntry should never be nil under normal operation.
if ile != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: ile.LoginRole}
if err := m.core.quotasHandleLeases(ctx, quotas.LeaseActionDeleted, []*quotas.QuotaLeaseInformation{leaseInfo}); err != nil {
m.logger.Error("failed to update quota on lease invalidation", "error", err)
return
Expand Down
23 changes: 18 additions & 5 deletions vault/logical_passthrough.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,32 @@ import (
// PassthroughBackendFactory returns a PassthroughBackend
// with leases switched off
func PassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, false)
return LeaseSwitchedPassthroughBackend(ctx, conf, nil)
}

// LeasedPassthroughBackendFactory returns a PassthroughBackend
// with leases switched on
func LeasedPassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, true)
return LeaseSwitchedPassthroughBackend(ctx, conf, func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
})
}

type revokeFunc func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)

// LeaseSwitchedPassthroughBackend returns a PassthroughBackend
// with leases switched on or off
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, leases bool) (logical.Backend, error) {
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, revoke revokeFunc) (logical.Backend, error) {
var b PassthroughBackend
b.generateLeases = leases
if revoke == nil {
// We probably don't need this, since we should never have to handle revoke requests, but just in case...
b.revoke = func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
}
} else {
b.generateLeases = true
b.revoke = revoke
}
b.Backend = &framework.Backend{
Help: strings.TrimSpace(passthroughHelp),

Expand Down Expand Up @@ -65,7 +77,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
Type: "kv",

Renew: b.handleRead,
Revoke: b.handleRevoke,
Revoke: b.revoke,
},
}

Expand All @@ -84,6 +96,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
type PassthroughBackend struct {
*framework.Backend
generateLeases bool
revoke func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
}

func (b *PassthroughBackend) handleRevoke(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
Expand Down
13 changes: 8 additions & 5 deletions vault/logical_passthrough_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ func TestPassthroughBackend_List(t *testing.T) {
}

func TestPassthroughBackend_Revoke(t *testing.T) {
test := func(b logical.Backend) {
test := func(t *testing.T, b logical.Backend) {
t.Helper()
req := logical.TestRequest(t, logical.RevokeOperation, "kv")
req.Secret = &logical.Secret{
InternalData: map[string]interface{}{
Expand All @@ -209,10 +210,12 @@ func TestPassthroughBackend_Revoke(t *testing.T) {
t.Fatalf("err: %v", err)
}
}
b := testPassthroughBackend()
test(b)
b = testPassthroughLeasedBackend()
test(b)
t.Run("passthrough", func(t *testing.T) {
test(t, testPassthroughBackend())
})
t.Run("passthrough-leased", func(t *testing.T) {
test(t, testPassthroughLeasedBackend())
})
}

func testPassthroughBackend() logical.Backend {
Expand Down
7 changes: 1 addition & 6 deletions vault/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -1701,20 +1701,15 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
}

coreConfig.ClusterCipherSuites = base.ClusterCipherSuites

coreConfig.DisableCache = base.DisableCache

coreConfig.DevToken = base.DevToken
coreConfig.RecoveryMode = base.RecoveryMode

coreConfig.ActivityLogConfig = base.ActivityLogConfig
coreConfig.EnableResponseHeaderHostname = base.EnableResponseHeaderHostname
coreConfig.EnableResponseHeaderRaftNodeID = base.EnableResponseHeaderRaftNodeID

coreConfig.RollbackPeriod = base.RollbackPeriod

coreConfig.PendingRemovalMountsAllowed = base.PendingRemovalMountsAllowed

coreConfig.ExpirationRevokeRetryBase = base.ExpirationRevokeRetryBase
testApplyEntBaseConfig(coreConfig, base)
}
if coreConfig.ClusterName == "" {
Expand Down

0 comments on commit d201f1b

Please sign in to comment.