Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent panics in expiration invalidation, and make some changes for testing #18401

Merged
merged 3 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ type Core struct {
rollbackPeriod time.Duration

pendingRemovalMountsAllowed bool
expirationRevokeRetryBase time.Duration
}

func (c *Core) HAState() consts.HAState {
Expand Down Expand Up @@ -790,6 +791,8 @@ type CoreConfig struct {
RollbackPeriod time.Duration

PendingRemovalMountsAllowed bool

ExpirationRevokeRetryBase time.Duration
}

// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
Expand Down Expand Up @@ -944,6 +947,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
effectiveSDKVersion: effectiveSDKVersion,
userFailedLoginInfo: make(map[FailedLoginUser]*FailedLoginInfo),
pendingRemovalMountsAllowed: conf.PendingRemovalMountsAllowed,
expirationRevokeRetryBase: conf.ExpirationRevokeRetryBase,
}

c.standbyStopCh.Store(make(chan struct{}))
Expand Down
36 changes: 22 additions & 14 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ type ExpirationManager struct {
// request. This value should only be set by tests.
testRegisterAuthFailure uberAtomic.Bool

jobManager *fairshare.JobManager
jobManager *fairshare.JobManager
revokeRetryBase time.Duration
}

type ExpireLeaseStrategy func(context.Context, *ExpirationManager, string, *namespace.Namespace)
Expand Down Expand Up @@ -234,7 +235,6 @@ func (r *revocationJob) Execute() error {

func (r *revocationJob) OnFailure(err error) {
r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)})
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err)

r.m.pendingLock.Lock()
defer r.m.pendingLock.Unlock()
Expand All @@ -246,12 +246,15 @@ func (r *revocationJob) OnFailure(err error) {

pending := pendingRaw.(pendingInfo)
pending.revokesAttempted++
newTimer := r.revokeExponentialBackoff(pending.revokesAttempted)

if pending.revokesAttempted >= maxRevokeAttempts || errIsUnrecoverable(err) {
r.m.logger.Trace("marking lease as irrevocable", "lease_id", r.leaseID, "error", err)
reason := "unrecoverable error"
if pending.revokesAttempted >= maxRevokeAttempts {
r.m.logger.Trace("lease has consumed all retry attempts", "lease_id", r.leaseID)
reason = "lease has consumed all retry attempts"
err = fmt.Errorf("%v: %w", outOfRetriesMessage, err)
}
r.m.logger.Trace("failed to revoke lease, marking lease as irrevocable", "lease_id", r.leaseID, "error", err, "reason", reason)

le, loadErr := r.m.loadEntry(r.nsCtx, r.leaseID)
if loadErr != nil {
Expand All @@ -265,9 +268,12 @@ func (r *revocationJob) OnFailure(err error) {

r.m.markLeaseIrrevocable(r.nsCtx, le, err)
return
} else {
r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err,
"attempts", pending.revokesAttempted, "next_attempt", newTimer)
}

pending.timer.Reset(revokeExponentialBackoff(pending.revokesAttempted))
pending.timer.Reset(newTimer)
r.m.pending.Store(r.leaseID, pending)
}

Expand All @@ -285,8 +291,8 @@ func expireLeaseStrategyFairsharing(ctx context.Context, m *ExpirationManager, l
m.jobManager.AddJob(job, mountAccessor)
}

func revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * revokeRetryBase
func (r *revocationJob) revokeExponentialBackoff(attempt uint8) time.Duration {
exp := (1 << attempt) * r.m.revokeRetryBase
randomDelta := 0.5 * float64(exp)

// Allow backoff time to be a random value between exp +/- (0.5*exp)
Expand Down Expand Up @@ -351,7 +357,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e,

jobManager: jobManager,
jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
}
*exp.restoreMode = 1

Expand Down Expand Up @@ -495,16 +505,14 @@ func (m *ExpirationManager) invalidate(key string) {
m.nonexpiring.Delete(leaseID)

if info, ok := m.irrevocable.Load(leaseID); ok {
irrevocable := info.(pendingInfo)
ile := info.(*leaseEntry)
m.irrevocable.Delete(leaseID)
m.irrevocableLeaseCount--

m.leaseCount--
// Avoid nil pointer dereference. Without cachedLeaseInfo we do not have enough information to
// accurately update quota lease information.
// Note that cachedLeaseInfo should never be nil under normal operation.
if irrevocable.cachedLeaseInfo != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: irrevocable.cachedLeaseInfo.LoginRole}
// Note that the leaseEntry should never be nil under normal operation.
if ile != nil {
leaseInfo := &quotas.QuotaLeaseInformation{LeaseId: leaseID, Role: ile.LoginRole}
if err := m.core.quotasHandleLeases(ctx, quotas.LeaseActionDeleted, []*quotas.QuotaLeaseInformation{leaseInfo}); err != nil {
m.logger.Error("failed to update quota on lease invalidation", "error", err)
return
Expand Down
23 changes: 18 additions & 5 deletions vault/logical_passthrough.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,32 @@ import (
// PassthroughBackendFactory returns a PassthroughBackend
// with leases switched off
func PassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, false)
return LeaseSwitchedPassthroughBackend(ctx, conf, nil)
}

// LeasedPassthroughBackendFactory returns a PassthroughBackend
// with leases switched on
func LeasedPassthroughBackendFactory(ctx context.Context, conf *logical.BackendConfig) (logical.Backend, error) {
return LeaseSwitchedPassthroughBackend(ctx, conf, true)
return LeaseSwitchedPassthroughBackend(ctx, conf, func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
})
}

type revokeFunc func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)

// LeaseSwitchedPassthroughBackend returns a PassthroughBackend
// with leases switched on or off
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, leases bool) (logical.Backend, error) {
func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendConfig, revoke revokeFunc) (logical.Backend, error) {
var b PassthroughBackend
b.generateLeases = leases
if revoke == nil {
// We probably don't need this, since we should never have to handle revoke requests, but just in case...
b.revoke = func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
return nil, nil
}
} else {
b.generateLeases = true
}
b.revoke = revoke
b.Backend = &framework.Backend{
Help: strings.TrimSpace(passthroughHelp),

Expand Down Expand Up @@ -65,7 +77,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
Type: "kv",

Renew: b.handleRead,
Revoke: b.handleRevoke,
Revoke: b.revoke,
},
}

Expand All @@ -84,6 +96,7 @@ func LeaseSwitchedPassthroughBackend(ctx context.Context, conf *logical.BackendC
type PassthroughBackend struct {
*framework.Backend
generateLeases bool
revoke func(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error)
}

func (b *PassthroughBackend) handleRevoke(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
Expand Down
7 changes: 1 addition & 6 deletions vault/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -1701,20 +1701,15 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
}

coreConfig.ClusterCipherSuites = base.ClusterCipherSuites

coreConfig.DisableCache = base.DisableCache

coreConfig.DevToken = base.DevToken
coreConfig.RecoveryMode = base.RecoveryMode

coreConfig.ActivityLogConfig = base.ActivityLogConfig
coreConfig.EnableResponseHeaderHostname = base.EnableResponseHeaderHostname
coreConfig.EnableResponseHeaderRaftNodeID = base.EnableResponseHeaderRaftNodeID

coreConfig.RollbackPeriod = base.RollbackPeriod

coreConfig.PendingRemovalMountsAllowed = base.PendingRemovalMountsAllowed

coreConfig.ExpirationRevokeRetryBase = base.ExpirationRevokeRetryBase
testApplyEntBaseConfig(coreConfig, base)
}
if coreConfig.ClusterName == "" {
Expand Down