@@ -274,9 +274,25 @@ func (s *levelsController) dropTree() (int, error) {
274274// tables who only have keys with this prefix are quickly dropped. The ones which have other keys
275275// are run through MergeIterator and compacted to create new tables. All the mechanisms of
276276// compactions apply, i.e. level sizes and MANIFEST are updated as in the normal flow.
277- func (s * levelsController ) dropPrefix (prefix []byte ) error {
277+ func (s * levelsController ) dropPrefixes (prefixes [][]byte ) error {
278+ // Internal move keys related to the given prefix should also be skipped.
279+ for _ , prefix := range prefixes {
280+ key := make ([]byte , 0 , len (badgerMove )+ len (prefix ))
281+ key = append (key , badgerMove ... )
282+ key = append (key , prefix ... )
283+ prefixes = append (prefixes , key )
284+ }
285+
278286 opt := s .kv .opt
279- for _ , l := range s .levels {
287+ // Iterate levels in the reverse order because if we were to iterate from
288+ // lower level (say level 0) to a higher level (say level 3) we could have
289+ // a state in which level 0 is compacted and an older version of a key exists in lower level.
290+ // At this point, if someone creates an iterator, they would see an old
291+ // value for a key from lower levels. Iterating in reverse order ensures we
292+ // drop the oldest data first so that lookups never return stale data.
293+ for i := len (s .levels ) - 1 ; i >= 0 ; i -- {
294+ l := s .levels [i ]
295+
280296 l .RLock ()
281297 if l .level == 0 {
282298 size := len (l .tables )
@@ -288,7 +304,7 @@ func (s *levelsController) dropPrefix(prefix []byte) error {
288304 score : 1.74 ,
289305 // A unique number greater than 1.0 does two things. Helps identify this
290306 // function in logs, and forces a compaction.
291- dropPrefix : prefix ,
307+ dropPrefixes : prefixes ,
292308 }
293309 if err := s .doCompact (cp ); err != nil {
294310 opt .Warningf ("While compacting level 0: %v" , err )
@@ -298,39 +314,49 @@ func (s *levelsController) dropPrefix(prefix []byte) error {
298314 continue
299315 }
300316
301- var tables []* table.Table
302- // Internal move keys related to the given prefix should also be skipped.
303- moveKeyForPrefix := append (badgerMove , prefix ... )
304- prefixesToSkip := [][]byte {prefix , moveKeyForPrefix }
305- for _ , table := range l .tables {
306- var absent bool
307- switch {
308- case hasAnyPrefixes (table .Smallest (), prefixesToSkip ):
309- case hasAnyPrefixes (table .Biggest (), prefixesToSkip ):
310- case containsAnyPrefixes (table .Smallest (), table .Biggest (), prefixesToSkip ):
311- default :
312- absent = true
317+ // Build a list of compaction tableGroups affecting all the prefixes we
318+ // need to drop. We need to build tableGroups that satisfy the invariant that
319+ // bottom tables are consecutive.
320+ // tableGroup contains groups of consecutive tables.
321+ var tableGroups [][]* table.Table
322+ var tableGroup []* table.Table
323+
324+ finishGroup := func () {
325+ if len (tableGroup ) > 0 {
326+ tableGroups = append (tableGroups , tableGroup )
327+ tableGroup = nil
313328 }
314- if ! absent {
315- tables = append (tables , table )
329+ }
330+
331+ for _ , table := range l .tables {
332+ if containsAnyPrefixes (table .Smallest (), table .Biggest (), prefixes ) {
333+ tableGroup = append (tableGroup , table )
334+ } else {
335+ finishGroup ()
316336 }
317337 }
338+ finishGroup ()
339+
318340 l .RUnlock ()
319- if len (tables ) == 0 {
341+
342+ if len (tableGroups ) == 0 {
320343 continue
321344 }
322345
323- cd := compactDef {
324- elog : trace .New (fmt .Sprintf ("Badger.L%d" , l .level ), "Compact" ),
325- thisLevel : l ,
326- nextLevel : l ,
327- top : []* table.Table {},
328- bot : tables ,
329- dropPrefix : prefix ,
330- }
331- if err := s .runCompactDef (l .level , cd ); err != nil {
332- opt .Warningf ("While running compact def: %+v. Error: %v" , cd , err )
333- return err
346+ opt .Infof ("Dropping prefix at level %d (%d tableGroups)" , l .level , len (tableGroups ))
347+ for _ , operation := range tableGroups {
348+ cd := compactDef {
349+ elog : trace .New (fmt .Sprintf ("Badger.L%d" , l .level ), "Compact" ),
350+ thisLevel : l ,
351+ nextLevel : l ,
352+ top : nil ,
353+ bot : operation ,
354+ dropPrefixes : prefixes ,
355+ }
356+ if err := s .runCompactDef (l .level , cd ); err != nil {
357+ opt .Warningf ("While running compact def: %+v. Error: %v" , cd , err )
358+ return err
359+ }
334360 }
335361 }
336362 return nil
@@ -395,9 +421,9 @@ func (l *levelHandler) isCompactable(delSize int64) bool {
395421}
396422
397423type compactionPriority struct {
398- level int
399- score float64
400- dropPrefix []byte
424+ level int
425+ score float64
426+ dropPrefixes [] []byte
401427}
402428
403429// pickCompactLevel determines which level to compact.
@@ -491,13 +517,19 @@ func (s *levelsController) compactBuildTables(
491517
492518 // Next level has level>=1 and we can use ConcatIterator as key ranges do not overlap.
493519 var valid []* table.Table
520+
521+ nextTable:
494522 for _ , table := range botTables {
495- if len (cd .dropPrefix ) > 0 &&
496- bytes .HasPrefix (table .Smallest (), cd .dropPrefix ) &&
497- bytes .HasPrefix (table .Biggest (), cd .dropPrefix ) {
498- // All the keys in this table have the dropPrefix. So, this table does not need to be
499- // in the iterator and can be dropped immediately.
500- continue
523+ if len (cd .dropPrefixes ) > 0 {
524+ for _ , prefix := range cd .dropPrefixes {
525+ if bytes .HasPrefix (table .Smallest (), prefix ) &&
526+ bytes .HasPrefix (table .Biggest (), prefix ) {
527+ // All the keys in this table have the dropPrefix. So, this
528+ // table does not need to be in the iterator and can be
529+ // dropped immediately.
530+ continue nextTable
531+ }
532+ }
501533 }
502534 valid = append (valid , table )
503535 }
@@ -535,12 +567,9 @@ func (s *levelsController) compactBuildTables(
535567 bopts .BfCache = s .kv .bfCache
536568 builder := table .NewTableBuilder (bopts )
537569 var numKeys , numSkips uint64
538- // Internal move keys related to the given prefix should also be skipped.
539- moveKeyForPrefix := append (badgerMove , cd .dropPrefix ... )
540- prefixesToSkip := [][]byte {cd .dropPrefix , moveKeyForPrefix }
541570 for ; it .Valid (); it .Next () {
542571 // See if we need to skip the prefix.
543- if len (cd .dropPrefix ) > 0 && hasAnyPrefixes (it .Key (), prefixesToSkip ) {
572+ if len (cd .dropPrefixes ) > 0 && hasAnyPrefixes (it .Key (), cd . dropPrefixes ) {
544573 numSkips ++
545574 updateStats (it .Value ())
546575 continue
@@ -719,10 +748,24 @@ func hasAnyPrefixes(s []byte, listOfPrefixes [][]byte) bool {
719748 return false
720749}
721750
751+ func containsPrefix (smallValue , largeValue , prefix []byte ) bool {
752+ if bytes .HasPrefix (smallValue , prefix ) {
753+ return true
754+ }
755+ if bytes .HasPrefix (largeValue , prefix ) {
756+ return true
757+ }
758+ if bytes .Compare (prefix , smallValue ) > 0 &&
759+ bytes .Compare (prefix , largeValue ) < 0 {
760+ return true
761+ }
762+
763+ return false
764+ }
765+
722766func containsAnyPrefixes (smallValue , largeValue []byte , listOfPrefixes [][]byte ) bool {
723767 for _ , prefix := range listOfPrefixes {
724- if bytes .Compare (prefix , smallValue ) > 0 &&
725- bytes .Compare (prefix , largeValue ) < 0 {
768+ if containsPrefix (smallValue , largeValue , prefix ) {
726769 return true
727770 }
728771 }
@@ -744,7 +787,7 @@ type compactDef struct {
744787
745788 thisSize int64
746789
747- dropPrefix []byte
790+ dropPrefixes [] []byte
748791}
749792
750793func (cd * compactDef ) lockLevels () {
@@ -918,10 +961,10 @@ func (s *levelsController) doCompact(p compactionPriority) error {
918961 y .AssertTrue (l + 1 < s .kv .opt .MaxLevels ) // Sanity check.
919962
920963 cd := compactDef {
921- elog : trace .New (fmt .Sprintf ("Badger.L%d" , l ), "Compact" ),
922- thisLevel : s .levels [l ],
923- nextLevel : s .levels [l + 1 ],
924- dropPrefix : p .dropPrefix ,
964+ elog : trace .New (fmt .Sprintf ("Badger.L%d" , l ), "Compact" ),
965+ thisLevel : s .levels [l ],
966+ nextLevel : s .levels [l + 1 ],
967+ dropPrefixes : p .dropPrefixes ,
925968 }
926969 cd .elog .SetMaxEvents (100 )
927970 defer cd .elog .Finish ()
0 commit comments