Skip to content

Commit

Permalink
Prevent long delays in ExpirationManager.Stop due to blanking a large…
Browse files Browse the repository at this point in the history
… pending map (#23282)
  • Loading branch information
ncabatoff committed Sep 27, 2023
1 parent d7e4447 commit 547bff7
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 23 deletions.
3 changes: 3 additions & 0 deletions changelog/23282.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
expiration: Prevent large lease loads from delaying state changes, e.g. becoming active or standby.
```
56 changes: 33 additions & 23 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ type ExpirationManager struct {
leaseCheckCounter *uint32

logLeaseExpirations bool
expireFunc ExpireLeaseStrategy
expireFunc atomic.Pointer[ExpireLeaseStrategy]

// testRegisterAuthFailure, if set to true, triggers an explicit failure on
// RegisterAuth to simulate a partial failure during a token creation
Expand Down Expand Up @@ -360,11 +360,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
leaseCheckCounter: new(uint32),

logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e,

jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
exp.expireFunc.Store(&e)
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
}
Expand Down Expand Up @@ -846,6 +846,8 @@ func (m *ExpirationManager) processRestore(ctx context.Context, leaseID string)
return nil
}

func expireNoop(ctx context.Context, manager *ExpirationManager, s string, n *namespace.Namespace) {}

// Stop is used to prevent further automatic revocations.
// This must be called before sealing the view.
func (m *ExpirationManager) Stop() error {
Expand All @@ -860,27 +862,24 @@ func (m *ExpirationManager) Stop() error {
close(m.quitCh)

m.pendingLock.Lock()
// Replacing the entire map would cause a race with
// a simultaneous WalkTokens, which doesn't hold pendingLock.
m.pending.Range(func(key, value interface{}) bool {
info := value.(pendingInfo)
info.timer.Stop()
m.pending.Delete(key)
return true
})
// We don't want any lease timers that fire to do anything; they can wait
// for the next ExpirationManager to handle them.
newStrategy := ExpireLeaseStrategy(expireNoop)
m.expireFunc.Store(&newStrategy)
oldPending := m.pending
m.pending, m.nonexpiring, m.irrevocable = sync.Map{}, sync.Map{}, sync.Map{}
m.leaseCount = 0
m.nonexpiring.Range(func(key, value interface{}) bool {
m.nonexpiring.Delete(key)
return true
})
m.uniquePolicies = make(map[string][]string)
m.irrevocable.Range(func(key, _ interface{}) bool {
m.irrevocable.Delete(key)
return true
})
m.irrevocableLeaseCount = 0
m.pendingLock.Unlock()

go oldPending.Range(func(key, value interface{}) bool {
info := value.(pendingInfo)
// We need to stop the timers to prevent memory leaks.
info.timer.Stop()
return true
})

if m.inRestoreMode() {
for {
if !m.inRestoreMode() {
Expand Down Expand Up @@ -1881,7 +1880,8 @@ func (m *ExpirationManager) updatePendingInternal(le *leaseEntry) {
leaseID, namespace := le.LeaseID, le.namespace
// Extend the timer by the lease total
timer := time.AfterFunc(leaseTotal, func() {
m.expireFunc(m.quitContext, m, leaseID, namespace)
expFn := *m.expireFunc.Load() // Load and deref the pointer
expFn(m.quitContext, m, leaseID, namespace)
})
pending = pendingInfo{
timer: timer,
Expand Down Expand Up @@ -2471,8 +2471,13 @@ func (m *ExpirationManager) WalkTokens(walkFn ExpirationWalkFunction) error {
return true
}

m.pending.Range(callback)
m.nonexpiring.Range(callback)
m.pendingLock.RLock()
toWalk := []sync.Map{m.pending, m.nonexpiring}
m.pendingLock.RUnlock()

for _, m := range toWalk {
m.Range(callback)
}

return nil
}
Expand All @@ -2495,8 +2500,13 @@ func (m *ExpirationManager) walkLeases(walkFn leaseWalkFunction) error {
return walkFn(key.(string), expireTime)
}

m.pending.Range(callback)
m.nonexpiring.Range(callback)
m.pendingLock.RLock()
toWalk := []sync.Map{m.pending, m.nonexpiring}
m.pendingLock.RUnlock()

for _, m := range toWalk {
m.Range(callback)
}

return nil
}
Expand Down

0 comments on commit 547bff7

Please sign in to comment.