Skip to content

Commit 0a5046f

Browse files
author
Ibrahim Jarif
authored
perf(GC): Remove move keys (#1539)
Move keys are used to fix the invalid value pointers when vlog GC runs. This PR removes the move keys. To ensure we return the correct results for Get calls, we will search all the levels of the LSM tree to find the correct entry. Earlier, we used to return the first entry whose version was less than or equal to the required version.
1 parent 4f6bd10 commit 0a5046f

File tree

7 files changed

+210
-280
lines changed

7 files changed

+210
-280
lines changed

badger/cmd/write_bench.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,6 @@ func writeBench(cmd *cobra.Command, args []string) error {
292292
func showKeysStats(db *badger.DB) {
293293
var (
294294
internalKeyCount uint32
295-
moveKeyCount uint32
296295
invalidKeyCount uint32
297296
validKeyCount uint32
298297
)
@@ -311,18 +310,14 @@ func showKeysStats(db *badger.DB) {
311310
if bytes.HasPrefix(i.Key(), []byte("!badger!")) {
312311
internalKeyCount++
313312
}
314-
if bytes.HasPrefix(i.Key(), []byte("!badger!Move")) {
315-
moveKeyCount++
316-
}
317313
if i.IsDeletedOrExpired() {
318314
invalidKeyCount++
319315
} else {
320316
validKeyCount++
321317
}
322318
}
323-
fmt.Printf("Valid Keys: %d Invalid Keys: %d Move Keys:"+
324-
" %d Internal Keys: %d\n", validKeyCount, invalidKeyCount,
325-
moveKeyCount, internalKeyCount)
319+
fmt.Printf("Valid Keys: %d Invalid Keys: %d Internal Keys: %d\n",
320+
validKeyCount, invalidKeyCount, internalKeyCount)
326321
}
327322

328323
func reportStats(c *z.Closer, db *badger.DB) {

db.go

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ var (
4646
badgerPrefix = []byte("!badger!") // Prefix for internal keys used by badger.
4747
head = []byte("!badger!head") // For storing value offset for replay.
4848
txnKey = []byte("!badger!txn") // For indicating end of entries in txn.
49-
badgerMove = []byte("!badger!move") // For key-value pairs which got moved during GC.
5049
lfDiscardStatsKey = []byte("!badger!discard") // For storing lfDiscardStats
5150
)
5251

@@ -704,29 +703,20 @@ func (db *DB) getMemTables() ([]*skl.Skiplist, func()) {
704703
// that all versions of a key are always present in the same table from level 1, because compaction
705704
// can push any table down.
706705
//
707-
// Update (Sep 22, 2018): To maintain the above invariant, and to allow keys to be moved from one
708-
// value log to another (while reclaiming space during value log GC), we have logically moved this
709-
// need to write "old versions after new versions" to the badgerMove keyspace. Thus, for normal
710-
// gets, we can stop going down the LSM tree once we find any version of the key (note however that
711-
// we will ALWAYS skip versions with ts greater than the key version). However, if that key has
712-
// been moved, then for the corresponding movekey, we'll look through all the levels of the tree
713-
// to ensure that we pick the highest version of the movekey present.
706+
// Update(23/09/2020) - We have dropped the move key implementation. Earlier we
707+
// were inserting move keys to fix the invalid value pointers but we no longer
708+
// do that. For every get("fooX") call where X is the version, we will search
709+
// for "fooX" in all the levels of the LSM tree. This is expensive but it
710+
// removes the overhead of handling move keys completely.
714711
func (db *DB) get(key []byte) (y.ValueStruct, error) {
715712
if db.IsClosed() {
716713
return y.ValueStruct{}, ErrDBClosed
717714
}
718715
tables, decr := db.getMemTables() // Lock should be released.
719716
defer decr()
720717

721-
var maxVs *y.ValueStruct
722-
var version uint64
723-
if bytes.HasPrefix(key, badgerMove) {
724-
// If we are checking badgerMove key, we should look into all the
725-
// levels, so we can pick up the newer versions, which might have been
726-
// compacted down the tree.
727-
maxVs = &y.ValueStruct{}
728-
version = y.ParseTs(key)
729-
}
718+
var maxVs y.ValueStruct
719+
version := y.ParseTs(key)
730720

731721
y.NumGets.Add(1)
732722
for i := 0; i < len(tables); i++ {
@@ -735,13 +725,12 @@ func (db *DB) get(key []byte) (y.ValueStruct, error) {
735725
if vs.Meta == 0 && vs.Value == nil {
736726
continue
737727
}
738-
// Found a version of the key. For user keyspace, return immediately. For move keyspace,
739-
// continue iterating, unless we found a version == given key version.
740-
if maxVs == nil || vs.Version == version {
728+
// Found the required version of the key, return immediately.
729+
if vs.Version == version {
741730
return vs, nil
742731
}
743732
if maxVs.Version < vs.Version {
744-
*maxVs = vs
733+
maxVs = vs
745734
}
746735
}
747736
return db.lc.get(key, maxVs, 0)
@@ -1307,7 +1296,7 @@ func (db *DB) RunValueLogGC(discardRatio float64) error {
13071296
// Find head on disk
13081297
headKey := y.KeyWithTs(head, math.MaxUint64)
13091298
// Need to pass with timestamp, lsm get removes the last 8 bytes and compares key
1310-
val, err := db.lc.get(headKey, nil, startLevel)
1299+
val, err := db.lc.get(headKey, y.ValueStruct{}, startLevel)
13111300
if err != nil {
13121301
return errors.Wrap(err, "Retrieving head from on-disk LSM")
13131302
}

errors.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@ var (
5555
// reserved for internal usage.
5656
ErrInvalidKey = errors.New("Key is using a reserved !badger! prefix")
5757

58-
// ErrRetry is returned when a log file containing the value is not found.
59-
// This usually indicates that it may have been garbage collected, and the
60-
// operation needs to be retried.
61-
ErrRetry = errors.New("Unable to find log file. Please retry")
62-
6358
// ErrThresholdZero is returned if threshold is set to zero, and value log GC is called.
6459
// In such a case, GC can't be run.
6560
ErrThresholdZero = errors.New(

iterator.go

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -151,66 +151,29 @@ func (item *Item) DiscardEarlierVersions() bool {
151151

152152
func (item *Item) yieldItemValue() ([]byte, func(), error) {
153153
key := item.Key() // No need to copy.
154-
for {
155-
if !item.hasValue() {
156-
return nil, nil, nil
157-
}
154+
if !item.hasValue() {
155+
return nil, nil, nil
156+
}
158157

159-
if item.slice == nil {
160-
item.slice = new(y.Slice)
161-
}
158+
if item.slice == nil {
159+
item.slice = new(y.Slice)
160+
}
162161

163-
if (item.meta & bitValuePointer) == 0 {
164-
val := item.slice.Resize(len(item.vptr))
165-
copy(val, item.vptr)
166-
return val, nil, nil
167-
}
162+
if (item.meta & bitValuePointer) == 0 {
163+
val := item.slice.Resize(len(item.vptr))
164+
copy(val, item.vptr)
165+
return val, nil, nil
166+
}
168167

169-
var vp valuePointer
170-
vp.Decode(item.vptr)
171-
db := item.txn.db
172-
result, cb, err := db.vlog.Read(vp, item.slice)
173-
if err != ErrRetry {
174-
if err != nil {
175-
db.opt.Logger.Errorf(`Unable to read: Key: %v, Version : %v,
168+
var vp valuePointer
169+
vp.Decode(item.vptr)
170+
db := item.txn.db
171+
result, cb, err := db.vlog.Read(vp, item.slice)
172+
if err != nil {
173+
db.opt.Logger.Errorf(`Unable to read: Key: %v, Version : %v,
176174
meta: %v, userMeta: %v`, key, item.version, item.meta, item.userMeta)
177-
}
178-
return result, cb, err
179-
}
180-
if bytes.HasPrefix(key, badgerMove) {
181-
// err == ErrRetry
182-
// Error is retry even after checking the move keyspace. So, let's
183-
// just assume that value is not present.
184-
return nil, cb, nil
185-
}
186-
187-
// The value pointer is pointing to a deleted value log. Look for the
188-
// move key and read that instead.
189-
runCallback(cb)
190-
// Do not put badgerMove on the left in append. It seems to cause some sort of manipulation.
191-
keyTs := y.KeyWithTs(item.Key(), item.Version())
192-
key = make([]byte, len(badgerMove)+len(keyTs))
193-
n := copy(key, badgerMove)
194-
copy(key[n:], keyTs)
195-
// Note that we can't set item.key to move key, because that would
196-
// change the key user sees before and after this call. Also, this move
197-
// logic is internal logic and should not impact the external behavior
198-
// of the retrieval.
199-
vs, err := item.txn.db.get(key)
200-
if err != nil {
201-
return nil, nil, err
202-
}
203-
if vs.Version != item.Version() {
204-
return nil, nil, nil
205-
}
206-
// Bug fix: Always copy the vs.Value into vptr here. Otherwise, when item is reused this
207-
// slice gets overwritten.
208-
item.vptr = y.SafeCopy(item.vptr, vs.Value)
209-
item.meta &^= bitValuePointer // Clear the value pointer bit.
210-
if vs.Meta&bitValuePointer > 0 {
211-
item.meta |= bitValuePointer // This meta would only be about value pointer.
212-
}
213175
}
176+
return result, cb, err
214177
}
215178

216179
func runCallback(cb func()) {

levels.go

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -269,21 +269,13 @@ func (s *levelsController) dropTree() (int, error) {
269269
}
270270

271271
// dropPrefix runs a L0->L1 compaction, and then runs same level compaction on the rest of the
272-
// levels. For L0->L1 compaction, it runs compactions normally, but skips over all the keys with the
273-
// provided prefix and also the internal move keys for the same prefix.
272+
// levels. For L0->L1 compaction, it runs compactions normally, but skips over
273+
// all the keys with the provided prefix.
274274
// For Li->Li compactions, it picks up the tables which would have the prefix. The
275275
// tables who only have keys with this prefix are quickly dropped. The ones which have other keys
276276
// are run through MergeIterator and compacted to create new tables. All the mechanisms of
277277
// compactions apply, i.e. level sizes and MANIFEST are updated as in the normal flow.
278278
func (s *levelsController) dropPrefixes(prefixes [][]byte) error {
279-
// Internal move keys related to the given prefix should also be skipped.
280-
for _, prefix := range prefixes {
281-
key := make([]byte, 0, len(badgerMove)+len(prefix))
282-
key = append(key, badgerMove...)
283-
key = append(key, prefix...)
284-
prefixes = append(prefixes, key)
285-
}
286-
287279
opt := s.kv.opt
288280
// Iterate levels in the reverse order because if we were to iterate from
289281
// lower level (say level 0) to a higher level (say level 3) we could have
@@ -1078,8 +1070,10 @@ func (s *levelsController) close() error {
10781070
return errors.Wrap(err, "levelsController.Close")
10791071
}
10801072

1081-
// get returns the found value if any. If not found, we return nil.
1082-
func (s *levelsController) get(key []byte, maxVs *y.ValueStruct, startLevel int) (
1073+
// get searches for a given key in all the levels of the LSM tree. It returns
1074+
// key version <= the expected version (maxVs). If not found, it returns an empty
1075+
// y.ValueStruct.
1076+
func (s *levelsController) get(key []byte, maxVs y.ValueStruct, startLevel int) (
10831077
y.ValueStruct, error) {
10841078
if s.kv.IsClosed() {
10851079
return y.ValueStruct{}, ErrDBClosed
@@ -1102,17 +1096,14 @@ func (s *levelsController) get(key []byte, maxVs *y.ValueStruct, startLevel int)
11021096
if vs.Value == nil && vs.Meta == 0 {
11031097
continue
11041098
}
1105-
if maxVs == nil || vs.Version == version {
1099+
if vs.Version == version {
11061100
return vs, nil
11071101
}
11081102
if maxVs.Version < vs.Version {
1109-
*maxVs = vs
1103+
maxVs = vs
11101104
}
11111105
}
1112-
if maxVs != nil {
1113-
return *maxVs, nil
1114-
}
1115-
return y.ValueStruct{}, nil
1106+
return maxVs, nil
11161107
}
11171108

11181109
func appendIteratorsReversed(out []y.Iterator, th []*table.Table, opt int) []y.Iterator {

0 commit comments

Comments
 (0)