Skip to content

Commit

Permalink
add core state lock deadlock detection config option v2 (#18604)
Browse files Browse the repository at this point in the history
* add core state lockd eadlock detection config option v2

* add changelog

* split out NewTestCluster function to maintain build flag

* replace long func with constant

* remove line

* rename file, and move where detect deadlock flag is set
  • Loading branch information
elliesterner authored and AnPucel committed Feb 3, 2023
1 parent 72e77e9 commit 06e83f3
Show file tree
Hide file tree
Showing 14 changed files with 131 additions and 30 deletions.
3 changes: 3 additions & 0 deletions changelog/18604.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
core: add `detect_deadlocks` config to optionally detect core state deadlocks
```
1 change: 1 addition & 0 deletions command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2619,6 +2619,7 @@ func createCoreConfig(c *ServerCommand, config *server.Config, backend physical.
CredentialBackends: c.CredentialBackends,
LogicalBackends: c.LogicalBackends,
Logger: c.logger,
DetectDeadlocks: config.DetectDeadlocks,
DisableSentinelTrace: config.DisableSentinelTrace,
DisableCache: config.DisableCache,
DisableMlock: config.DisableMlock,
Expand Down
9 changes: 9 additions & 0 deletions command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ type Config struct {
LogRequestsLevel string `hcl:"-"`
LogRequestsLevelRaw interface{} `hcl:"log_requests_level"`

DetectDeadlocks string `hcl:"detect_deadlocks"`

EnableResponseHeaderRaftNodeID bool `hcl:"-"`
EnableResponseHeaderRaftNodeIDRaw interface{} `hcl:"enable_response_header_raft_node_id"`

Expand Down Expand Up @@ -389,6 +391,11 @@ func (c *Config) Merge(c2 *Config) *Config {
result.LogRequestsLevel = c2.LogRequestsLevel
}

result.DetectDeadlocks = c.DetectDeadlocks
if c2.DetectDeadlocks != "" {
result.DetectDeadlocks = c2.DetectDeadlocks
}

result.EnableResponseHeaderRaftNodeID = c.EnableResponseHeaderRaftNodeID
if c2.EnableResponseHeaderRaftNodeID {
result.EnableResponseHeaderRaftNodeID = c2.EnableResponseHeaderRaftNodeID
Expand Down Expand Up @@ -1025,6 +1032,8 @@ func (c *Config) Sanitized() map[string]interface{} {
"enable_response_header_raft_node_id": c.EnableResponseHeaderRaftNodeID,

"log_requests_level": c.LogRequestsLevel,

"detect_deadlocks": c.DetectDeadlocks,
}
for k, v := range sharedResult {
result[k] = v
Expand Down
1 change: 1 addition & 0 deletions command/server/config_test_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@ func testConfig_Sanitized(t *testing.T) {
"raw_storage_endpoint": true,
"introspection_endpoint": false,
"disable_sentinel_trace": true,
"detect_deadlocks": "",
"enable_ui": true,
"enable_response_header_hostname": false,
"enable_response_header_raft_node_id": false,
Expand Down
21 changes: 0 additions & 21 deletions helper/locking/deadlock.go

This file was deleted.

39 changes: 33 additions & 6 deletions helper/locking/lock.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,46 @@
//go:build !deadlock

package locking

import (
"sync"

"github.com/sasha-s/go-deadlock"
)

// DeadlockMutex is just a sync.Mutex when the build tag `deadlock` is absent.
// See its other definition in the corresponding deadlock-build-tag-constrained
// file for more details.
// Common mutex interface to allow either built-in or imported deadlock use
type Mutex interface {
Lock()
Unlock()
}

// Common r/w mutex interface to allow either built-in or imported deadlock use
type RWMutex interface {
Lock()
RLock()
RLocker() sync.Locker
RUnlock()
Unlock()
}

// DeadlockMutex (used when requested via config option `detact_deadlocks`),
// behaves like a sync.Mutex but does periodic checking to see if outstanding
// locks and requests look like a deadlock. If it finds a deadlock candidate it
// will output it prefixed with "POTENTIAL DEADLOCK", as described at
// https://github.com/sasha-s/go-deadlock
type DeadlockMutex struct {
sync.Mutex
deadlock.Mutex
}

// DeadlockRWMutex is the RW version of DeadlockMutex.
type DeadlockRWMutex struct {
deadlock.RWMutex
}

// Regular sync/mutex.
type SyncMutex struct {
sync.Mutex
}

// DeadlockRWMutex is the RW version of SyncMutex.
type SyncRWMutex struct {
sync.RWMutex
}
1 change: 1 addition & 0 deletions http/sys_config_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ func TestSysConfigState_Sanitized(t *testing.T) {
"disable_printable_check": false,
"disable_sealwrap": false,
"raw_storage_endpoint": false,
"detect_deadlocks": "",
"introspection_endpoint": false,
"disable_sentinel_trace": false,
"enable_ui": false,
Expand Down
14 changes: 13 additions & 1 deletion vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ type Core struct {
auditBackends map[string]audit.Factory

// stateLock protects mutable state
stateLock locking.DeadlockRWMutex
stateLock locking.RWMutex
sealed *uint32

standby bool
Expand Down Expand Up @@ -732,6 +732,9 @@ type CoreConfig struct {

Logger log.Logger

// Use the deadlocks library to detect deadlocks
DetectDeadlocks string

// Disables the trace display for Sentinel checks
DisableSentinelTrace bool

Expand Down Expand Up @@ -904,6 +907,14 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
conf.NumExpirationWorkers = numExpirationWorkersDefault
}

// Use imported logging deadlock if requested
var stateLock locking.RWMutex
if strings.Contains(conf.DetectDeadlocks, "statelock") {
stateLock = &locking.DeadlockRWMutex{}
} else {
stateLock = &locking.SyncRWMutex{}
}

effectiveSDKVersion := conf.EffectiveSDKVersion
if effectiveSDKVersion == "" {
effectiveSDKVersion = version.GetVersion().Version
Expand All @@ -922,6 +933,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
clusterListener: new(atomic.Value),
customListenerHeader: new(atomic.Value),
seal: conf.Seal,
stateLock: stateLock,
router: NewRouter(),
sealed: new(uint32),
sealMigrationDone: new(uint32),
Expand Down
50 changes: 50 additions & 0 deletions vault/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"reflect"
"strings"
"sync"
"sync/atomic"
"testing"
"time"

Expand All @@ -21,6 +22,7 @@ import (
"github.com/hashicorp/vault/sdk/physical"
"github.com/hashicorp/vault/sdk/physical/inmem"
"github.com/hashicorp/vault/version"
"github.com/sasha-s/go-deadlock"
)

// invalidKey is used to test Unseal
Expand Down Expand Up @@ -2836,3 +2838,51 @@ func TestCore_ServiceRegistration(t *testing.T) {
t.Fatal(diff)
}
}

func TestDetectedDeadlock(t *testing.T) {
testCore, _, _ := TestCoreUnsealedWithConfig(t, &CoreConfig{DetectDeadlocks: "statelock"})
InduceDeadlock(t, testCore, 1)
}

func TestDefaultDeadlock(t *testing.T) {
testCore, _, _ := TestCoreUnsealed(t)
InduceDeadlock(t, testCore, 0)
}

func RestoreDeadlockOpts() func() {
opts := deadlock.Opts
return func() {
deadlock.Opts = opts
}
}

func InduceDeadlock(t *testing.T, vaultcore *Core, expected uint32) {
defer RestoreDeadlockOpts()()
var deadlocks uint32
deadlock.Opts.OnPotentialDeadlock = func() {
atomic.AddUint32(&deadlocks, 1)
}
var mtx deadlock.Mutex
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
vaultcore.expiration.coreStateLock.Lock()
mtx.Lock()
mtx.Unlock()
vaultcore.expiration.coreStateLock.Unlock()
}()
wg.Wait()
wg.Add(1)
go func() {
defer wg.Done()
mtx.Lock()
vaultcore.expiration.coreStateLock.RLock()
vaultcore.expiration.coreStateLock.RUnlock()
mtx.Unlock()
}()
wg.Wait()
if atomic.LoadUint32(&deadlocks) != expected {
t.Fatalf("expected 1 deadlock, detected %d", deadlocks)
}
}
4 changes: 2 additions & 2 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ type ExpirationManager struct {
quitCh chan struct{}

// do not hold coreStateLock in any API handler code - it is already held
coreStateLock *locking.DeadlockRWMutex
coreStateLock locking.RWMutex
quitContext context.Context
leaseCheckCounter *uint32

Expand Down Expand Up @@ -350,7 +350,7 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
restoreLocks: locksutil.CreateLocks(),
quitCh: make(chan struct{}),

coreStateLock: &c.stateLock,
coreStateLock: c.stateLock,
quitContext: c.activeContext,
leaseCheckCounter: new(uint32),

Expand Down
5 changes: 5 additions & 0 deletions vault/test_cluster_detect_deadlock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//go:build deadlock

package vault

const TestDeadlockDetection = "statelock"
5 changes: 5 additions & 0 deletions vault/test_cluster_do_not_detect_deadlock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//go:build !deadlock

package vault

const TestDeadlockDetection = ""
3 changes: 3 additions & 0 deletions vault/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ func TestCoreWithSealAndUINoCleanup(t testing.T, opts *CoreConfig) *Core {
conf.EnableResponseHeaderHostname = opts.EnableResponseHeaderHostname
conf.DisableSSCTokens = opts.DisableSSCTokens
conf.PluginDirectory = opts.PluginDirectory
conf.DetectDeadlocks = opts.DetectDeadlocks

if opts.Logger != nil {
conf.Logger = opts.Logger
Expand Down Expand Up @@ -1382,6 +1383,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
var baseAddr *net.TCPAddr
if opts != nil && opts.BaseListenAddress != "" {
baseAddr, err = net.ResolveTCPAddr("tcp", opts.BaseListenAddress)

if err != nil {
t.Fatal("could not parse given base IP")
}
Expand Down Expand Up @@ -1633,6 +1635,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
}

if base != nil {
coreConfig.DetectDeadlocks = TestDeadlockDetection
coreConfig.RawConfig = base.RawConfig
coreConfig.DisableCache = base.DisableCache
coreConfig.EnableUI = base.EnableUI
Expand Down
5 changes: 5 additions & 0 deletions website/content/docs/configuration/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ to specify where the configuration is.
maximum request duration allowed before Vault cancels the request. This can
be overridden per listener via the `max_request_duration` value.

- `detect_deadlocks` `(string: "")` - Specifies the internal mutex locks that should be monitored for
potential deadlocks. Currently supported value is `statelock`, which will cause "POTENTIAL DEADLOCK:"
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this can have
a negative effect on performance due to the tracking of each lock attempt.

- `raw_storage_endpoint` `(bool: false)` – Enables the `sys/raw` endpoint which
allows the decryption/encryption of raw data into and out of the security
barrier. This is a highly privileged endpoint.
Expand Down

0 comments on commit 06e83f3

Please sign in to comment.