Skip to content

Commit

Permalink
storage: assert local single delete safety
Browse files Browse the repository at this point in the history
During intent resolution, examine internal keys when intending to delete an
intent using a single delete tombstone to validate that it is indeed safe
within the local Engine. This is a safety measure to validate that we do not
violate the invariants that make singe deletion deterministic causing replica
divergence. False negatives are possible, since this safety mechanism only
examines the local engine state and only the state visible to the open lock
table iterator at the time of proposal evaluation.

Release note: none
Epic: none
Informs cockroachdb#114492.
Informs cockroachdb#114421.
  • Loading branch information
jbowens committed Nov 22, 2023
1 parent fdbbbb8 commit 0f14f44
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 7 deletions.
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/spanset/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,12 @@ func (i *EngineIterator) Stats() storage.IteratorStats {
return i.i.Stats()
}

// CanDeterministicallySingleDelete is part of the storage.EngineIterator
// interface.
func (i *EngineIterator) CanDeterministicallySingleDelete() (bool, error) {
return i.i.CanDeterministicallySingleDelete()
}

type spanSetReader struct {
r storage.Reader
spans *SpanSet
Expand Down
16 changes: 16 additions & 0 deletions pkg/storage/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,22 @@ type EngineIterator interface {
PrevEngineKeyWithLimit(limit roachpb.Key) (state pebble.IterValidityState, err error)
// Stats returns statistics about the iterator.
Stats() IteratorStats
// CanDeterministicallySingleDelete is a specific purpose-built method for
// determining whether the current key (UnsafeRawEngineKey()/EngineKey())
// may be deterministically deleted through a single delete key on the local
// engine state. The determination is completely to local to the Engine, and
// a true return value does not mean that clearing the key with a single
// delete will be deterministic on other replicas on other Engines.
//
// CanDeterministicallySingleDelete does not change the iterator position
// (all subsequent iterator operations should behave as if
// CanDeterministicallySingleDelete was never invoked), although it DOES
// invalidate the memory associated with the current iterator position's
// value.
//
// CanDeterministicallySingleDelete may only be called when oriented in the
// forward direction and only once at a given iterator position.
CanDeterministicallySingleDelete() (ok bool, err error)
}

// CloneContext is an opaque type encapsulating sufficient context to construct
Expand Down
5 changes: 5 additions & 0 deletions pkg/storage/lock_table_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,11 @@ func (i *LockTableIterator) Stats() IteratorStats {
return i.iter.Stats()
}

// CanDeterministicallySingleDelete implements the EngineIterator interface.
func (i *LockTableIterator) CanDeterministicallySingleDelete() (ok bool, err error) {
return i.iter.CanDeterministicallySingleDelete()
}

//gcassert:inline
func isLockTableKey(key roachpb.Key) bool {
return bytes.HasPrefix(key, keys.LocalRangeLockTablePrefix)
Expand Down
80 changes: 74 additions & 6 deletions pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,16 @@ func getMaxConcurrentCompactions() int {
var l0SubLevelCompactionConcurrency = envutil.EnvOrDefaultInt(
"COCKROACH_L0_SUB_LEVEL_CONCURRENCY", 2)

// assertSingleDeleteSafety configures whether or not to perform verification
// that SINGLEDELs are deterministic when applied to the local Engine during
// intent resolution. See call sites of CanDeterministicallySingleDelete. See
// cockroach#114421 for the motivation.
//
// An environment variable is provided for disabling these assertions in case
// conditions under which false positives or severe peformance regressions are
// possible.
var assertSingleDeleteSafety = envutil.EnvOrDefaultBool("COCKROACH_SINGLEDEL_ASSERT", true)

// MakeValue returns the inline value.
func MakeValue(meta enginepb.MVCCMetadata) roachpb.Value {
return roachpb.Value{RawBytes: meta.RawBytes}
Expand Down Expand Up @@ -4971,7 +4981,38 @@ func MVCCResolveWriteIntent(
if err != nil {
return false, 0, nil, false, err
}
ok = ok || outcome != lockNoop
switch outcome {
case lockNoop:
// Do nothing.
case lockClearedBySingleDelete:
ok = true
if !assertSingleDeleteSafety {
continue
}

// Single deletes rely on subtle invariants and logic. We can detect
// misuse if the local store's internal state could result in
// nondeterministic behavior if we write a single delete. This
// doesn't guard against writes to the key committed to the engine
// after we opened ltIter but before the single delete is applied,
// and it also doesn't guarantee the single delete will be okay on
// other replicas' engines.
if isDeterministic, err := ltIter.CanDeterministicallySingleDelete(); err != nil {
return false, 0, nil, false, errors.Wrap(err, "validating single delete invariant")
} else if !isDeterministic {
err := errors.AssertionFailedf("deleting by single delete is unsafe")
if key, keyErr := ltIter.EngineKey(); keyErr != nil {
err = errors.WithSecondaryError(err, keyErr)
} else {
err = errors.Wrapf(err, "resolving lock key %s", key)
}
log.Fatalf(ctx, "intent resolution: %v", err)
}
case lockClearedByDelete, lockOverwritten:
ok = true
default:
panic("unreachable")
}
}
numBytes = int64(rw.BufferedSize() - beforeBytes)
return ok, numBytes, nil, replLocksReleased, nil
Expand Down Expand Up @@ -5719,11 +5760,38 @@ func MVCCResolveWriteIntentRange(
if err != nil {
log.Warningf(ctx, "failed to resolve intent for key %q: %+v", lastResolvedKey, err)
}
if outcome != lockNoop && !lastResolvedKeyOk {
// We only count the first successfully resolved lock/intent on a
// given key towards the returned key count and key limit.
lastResolvedKeyOk = true
numKeys++

switch outcome {
case lockNoop:
// Do nothing.
case lockClearedBySingleDelete:
if assertSingleDeleteSafety {
// Single deletes rely on subtle invariants and logic. We can
// detect misuse if the local store's internal state could
// result in nondeterministic behavior if we write a single
// delete. This doesn't guard against writes to the key
// committed to the engine after we opened ltIter but before the
// single delete is applied, and it also doesn't guarantee the
// single delete will be okay on other replicas' engines.
if ok, err := ltIter.CanDeterministicallySingleDelete(); err != nil {
return 0, 0, nil, 0, false, errors.Wrap(err, "validating single delete invariant")
} else if !ok {
log.Fatalf(ctx, "resolving lock key %s: %+v", ltKey,
errors.AssertionFailedf("deleting by single delete is unsafe"))
}
}

// Fallthrough to update numKeys and lastResolvedKeyOk if necessary.
fallthrough
case lockClearedByDelete, lockOverwritten:
if !lastResolvedKeyOk {
// We only count the first successfully resolved lock/intent on a
// given key towards the returned key count and key limit.
lastResolvedKeyOk = true
numKeys++
}
default:
panic("unreachable")
}
numBytes += int64(rw.BufferedSize() - beforeBytes)
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/storage/pebble_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func (p *pebbleIterator) Next() {
p.iter.Next()
}

// NextEngineKey implements the Engineterator interface.
// NextEngineKey implements the EngineIterator interface.
func (p *pebbleIterator) NextEngineKey() (valid bool, err error) {
ok := p.iter.Next()
// NB: A Pebble Iterator always returns ok==false when an error is
Expand Down Expand Up @@ -947,6 +947,11 @@ func (p *pebbleIterator) CloneContext() CloneContext {
return CloneContext{rawIter: p.iter, engine: p.parent}
}

// CanDeterministicallySingleDelete implements the EngineIterator interface.
func (p *pebbleIterator) CanDeterministicallySingleDelete() (ok bool, err error) {
return pebbleiter.CanDeterministicallySingleDelete(p.iter)
}

func (p *pebbleIterator) getBlockPropertyFilterMask() pebble.BlockPropertyFilterMask {
return &p.maskFilter
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/storage/pebbleiter/crdb_test_off.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@ type Iterator = *pebble.Iterator
func MaybeWrap(iter *pebble.Iterator) Iterator {
return iter
}

// CanDeterministicallySingleDelete wraps
// pebble.CanDeterministicallySingleDelete.
func CanDeterministicallySingleDelete(it Iterator) (bool, error) {
return pebble.CanDeterministicallySingleDelete(it)
}
6 changes: 6 additions & 0 deletions pkg/storage/pebbleiter/crdb_test_on.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ func MaybeWrap(iter *pebble.Iterator) Iterator {
return &assertionIter{Iterator: iter, closedCh: make(chan struct{})}
}

// CanDeterministicallySingleDelete wraps
// pebble.CanDeterministicallySingleDelete.
func CanDeterministicallySingleDelete(it Iterator) (bool, error) {
return pebble.CanDeterministicallySingleDelete(it.Iterator)
}

// assertionIter wraps a *pebble.Iterator with assertion checking.
type assertionIter struct {
*pebble.Iterator
Expand Down

0 comments on commit 0f14f44

Please sign in to comment.