@@ -410,69 +410,67 @@ func mergeIntervals(
410410}
411411
412412// addL0Files incrementally builds a new l0Sublevels for when the only change
413- // since the receiver l0Sublevels was an addition of the specified files , with
413+ // since the receiver l0Sublevels was an addition of the specified tables , with
414414// no L0 deletions. The common case of this is an ingestion or a flush. These
415415// files can "sit on top" of existing sublevels, creating at most one new
416416// sublevel for a flush (and possibly multiple for an ingestion), and at most
417417// 2*len(files) additions to s.orderedIntervals. No files must have been deleted
418418// from L0, and the added files must all be newer in sequence numbers than
419- // existing files in l0Sublevels. The files parameter must be sorted in seqnum
420- // order. The levelMetadata parameter corresponds to the new L0 post addition of
421- // files. This method is meant to be significantly more performant than
422- // newL0Sublevels.
419+ // existing files in l0Sublevels. The levelMetadata parameter corresponds to the
420+ // new L0 post addition of files. This method is meant to be significantly more
421+ // performant than newL0Sublevels.
423422//
424423// Note that this function can only be called once on a given receiver; it
425424// appends to some slices in s which is only safe when done once. This is okay,
426425// as the common case (generating a new l0Sublevels after a flush/ingestion) is
427426// only going to necessitate one call of this method on a given receiver. The
428427// returned value, if non-nil, can then have [*l0Sublevels.addL0Files] called on
429428// it again, and so on. If [errInvalidL0SublevelsOpt] is returned as an error,
430- // it likely means the optimization could not be applied (i.e. files added were
431- // older than files already in the sublevels, which is possible around
432- // ingestions and in tests). Eg. it can happen when an ingested file was
433- // ingested without queueing a flush since it did not actually overlap with any
434- // keys in the memtable. Later on the memtable was flushed, and the memtable had
435- // keys spanning around the ingested file, producing a flushed file that
436- // overlapped with the ingested file in file bounds but not in keys. It's
437- // possible for that flushed file to have a lower LargestSeqNum than the
438- // ingested file if all the additions after the ingestion were to another
439- // flushed file that was split into a separate sstable during flush. Any other
440- // non-nil error means [l0Sublevels] generation failed in the same way as
441- // [newL0Sublevels] would likely fail.
429+ // it means the optimization could not be applied (i.e. files added were older
430+ // than files already in the sublevels, which is possible around ingestions and
431+ // in tests). Eg. it can happen when an ingested file was ingested without
432+ // queueing a flush since it did not actually overlap with any keys in the
433+ // memtable. Later on the memtable was flushed, and the memtable had keys
434+ // spanning around the ingested file, producing a flushed file that overlapped
435+ // with the ingested file in file bounds but not in keys. It's possible for that
436+ // flushed file to have a lower LargestSeqNum than the ingested file if all the
437+ // additions after the ingestion were to another flushed file that was split
438+ // into a separate sstable during flush. Any other non-nil error means
439+ // [l0Sublevels] generation failed in the same way as [newL0Sublevels] would
440+ // likely fail.
442441func (s * l0Sublevels ) addL0Files (
443- files []* TableMetadata , flushSplitMaxBytes int64 , levelMetadata * LevelMetadata ,
442+ addedTables map [base.FileNum ]* TableMetadata ,
443+ flushSplitMaxBytes int64 ,
444+ levelMetadata * LevelMetadata ,
444445) (* l0Sublevels , error ) {
445446 if s .addL0FilesCalled {
446447 if invariants .Enabled {
447448 panic ("addL0Files called twice on the same receiver" )
448449 }
449450 return nil , errInvalidL0SublevelsOpt
450451 }
451- if s .levelMetadata .Len ()+ len (files ) != levelMetadata .Len () {
452+ if s .levelMetadata .Len ()+ len (addedTables ) != levelMetadata .Len () {
452453 if invariants .Enabled {
453454 panic ("levelMetadata mismatch" )
454455 }
455456 return nil , errInvalidL0SublevelsOpt
456457 }
458+
457459 // addL0Files only works when the files we are adding match exactly the last
458- // files in the new levelMetadata (and the previous s.levelMetadata matches a
459- // prefix of the new levelMetadata).
460- {
461- iterOld , iterNew := s .levelMetadata .Iter (), levelMetadata .Iter ()
462- tOld , tNew := iterOld .First (), iterNew .First ()
463- for i := 0 ; i < s .levelMetadata .Len (); i ++ {
464- if tOld != tNew {
465- return nil , errInvalidL0SublevelsOpt
466- }
467- tOld , tNew = iterOld .Next (), iterNew .Next ()
468- }
469- for i := range files {
470- if files [i ] != tNew {
471- return nil , errInvalidL0SublevelsOpt
472- }
473- tNew = iterNew .Next ()
460+ // files in the new levelMetadata (this is the case usually, but not always).
461+ files := make ([]* TableMetadata , len (addedTables ))
462+ iter := levelMetadata .Iter ()
463+ t := iter .Last ()
464+ for i := len (addedTables ) - 1 ; i >= 0 ; i -- {
465+ if addedTables [t .FileNum ] == nil {
466+ // t is an existing table that sorts after some of the new tables
467+ // (specifically the ones we haven't yet seen).
468+ return nil , errInvalidL0SublevelsOpt
474469 }
470+ files [i ] = t
471+ t = iter .Prev ()
475472 }
473+ // Note: files now contains addedTables in increasing seqnum order.
476474 s .addL0FilesCalled = true
477475
478476 // Start with a shallow copy of s.
@@ -2077,15 +2075,16 @@ func (s *l0Sublevels) extendCandidateToRectangle(
20772075// L0Organizer keeps track of L0 state, including the subdivision into
20782076// sublevels.
20792077//
2080- // It is designed to be used as a singleton (per store ) which gets updated as
2078+ // It is designed to be used as a singleton (per DB ) which gets updated as
20812079// the version changes. It is used to initialize L0-related Version fields.
20822080//
20832081// The level 0 sstables are organized in a series of sublevels. Similar to the
2084- // seqnum invariant in normal levels, there is no internal key in a higher level
2085- // table that has both the same user key and a higher sequence number. Within a
2086- // sublevel, tables are sorted by their internal key range and any two tables at
2087- // the same sublevel do not overlap. Unlike the normal levels, sublevel n
2088- // contains older tables (lower sequence numbers) than sublevel n+1.
2082+ // seqnum invariant in normal levels, there is no internal key in a lower
2083+ // sublevel table that has both the same user key and a higher sequence number.
2084+ // Within a sublevel, tables are sorted by their internal key range and any two
2085+ // tables at the same sublevel do not overlap. Unlike the normal levels,
2086+ // sublevel n contains older tables (lower sequence numbers) than sublevel n+1
2087+ // (this is because the number of sublevels is variable).
20892088type L0Organizer struct {
20902089 cmp base.Compare
20912090 formatKey base.FormatKey
@@ -2125,7 +2124,9 @@ func NewL0Organizer(comparer *base.Comparer, flushSplitBytes int64) *L0Organizer
21252124 return o
21262125}
21272126
2128- // SublevelFiles returns the sublevels as LevelSlices.
2127+ // SublevelFiles returns the sublevels as LevelSlices. The returned value (both
2128+ // the slice and each LevelSlice) is immutable. The L0Organizer creates new
2129+ // slices every time L0 changes.
21292130func (o * L0Organizer ) SublevelFiles () []LevelSlice {
21302131 return o .l0Sublevels .Levels
21312132}
@@ -2146,45 +2147,31 @@ func (o *L0Organizer) Update(
21462147 }
21472148 // If we only added tables, try to use addL0Files.
21482149 if len (deletedL0Tables ) == 0 {
2149- // Construct the file slice needed by addL0Files.
2150- // TODO(radu): change addL0Files to do this internally.
2151- files := make ([]* TableMetadata , 0 , len (addedL0Tables ))
2152- iter := newLevelMeta .Iter ()
2153- for t := iter .Last (); len (files ) < len (addedL0Tables ); t = iter .Prev () {
2154- if t == nil || addedL0Tables [t .FileNum ] == nil {
2155- break
2156- }
2157- files = append (files , t )
2158- }
2159- if len (files ) == len (addedL0Tables ) {
2160- slices .Reverse (files )
2161- newSublevels , err := o .l0Sublevels .addL0Files (files , o .flushSplitBytes , newLevelMeta )
2162-
2163- if err == nil {
2164- // In invariants mode, sometimes rebuild from scratch to verify that
2165- // AddL0Files did the right thing. Note that NewL0Sublevels updates
2166- // fields in TableMetadata like L0Index, so we don't want to do this
2167- // every time.
2168- if invariants .Enabled && invariants .Sometimes (10 ) {
2169- expectedSublevels , err := newL0Sublevels (newLevelMeta , o .cmp , o .formatKey , o .flushSplitBytes )
2170- if err != nil {
2171- panic (fmt .Sprintf ("error when regenerating sublevels: %s" , err ))
2172- }
2173- s1 := describeSublevels (o .formatKey , false /* verbose */ , expectedSublevels .Levels )
2174- s2 := describeSublevels (o .formatKey , false /* verbose */ , newSublevels .Levels )
2175- if s1 != s2 {
2176- // Add verbosity.
2177- s1 := describeSublevels (o .formatKey , true /* verbose */ , expectedSublevels .Levels )
2178- s2 := describeSublevels (o .formatKey , true /* verbose */ , newSublevels .Levels )
2179- panic (fmt .Sprintf ("incremental L0 sublevel generation produced different output than regeneration: %s != %s" , s1 , s2 ))
2180- }
2150+ newSublevels , err := o .l0Sublevels .addL0Files (addedL0Tables , o .flushSplitBytes , newLevelMeta )
2151+ if err == nil {
2152+ // In invariants mode, sometimes rebuild from scratch to verify that
2153+ // AddL0Files did the right thing. Note that NewL0Sublevels updates
2154+ // fields in TableMetadata like L0Index, so we don't want to do this
2155+ // every time.
2156+ if invariants .Enabled && invariants .Sometimes (10 ) {
2157+ expectedSublevels , err := newL0Sublevels (newLevelMeta , o .cmp , o .formatKey , o .flushSplitBytes )
2158+ if err != nil {
2159+ panic (fmt .Sprintf ("error when regenerating sublevels: %s" , err ))
2160+ }
2161+ s1 := describeSublevels (o .formatKey , false /* verbose */ , expectedSublevels .Levels )
2162+ s2 := describeSublevels (o .formatKey , false /* verbose */ , newSublevels .Levels )
2163+ if s1 != s2 {
2164+ // Add verbosity.
2165+ s1 := describeSublevels (o .formatKey , true /* verbose */ , expectedSublevels .Levels )
2166+ s2 := describeSublevels (o .formatKey , true /* verbose */ , newSublevels .Levels )
2167+ panic (fmt .Sprintf ("incremental L0 sublevel generation produced different output than regeneration: %s != %s" , s1 , s2 ))
21812168 }
2182- o .l0Sublevels = newSublevels
2183- return
2184- }
2185- if ! errors .Is (err , errInvalidL0SublevelsOpt ) {
2186- panic (errors .AssertionFailedf ("error generating L0Sublevels: %s" , err ))
21872169 }
2170+ o .l0Sublevels = newSublevels
2171+ return
2172+ }
2173+ if ! errors .Is (err , errInvalidL0SublevelsOpt ) {
2174+ panic (errors .AssertionFailedf ("error generating L0Sublevels: %s" , err ))
21882175 }
21892176 }
21902177 var err error
@@ -2194,8 +2181,8 @@ func (o *L0Organizer) Update(
21942181 }
21952182}
21962183
2197- // Reset the L0Organizer to reflect a given L0 level. Used for testing .
2198- func (o * L0Organizer ) Reset (levelMetadata * LevelMetadata ) {
2184+ // ResetForTesting reinitializes the L0Organizer to reflect a given L0 level.
2185+ func (o * L0Organizer ) ResetForTesting (levelMetadata * LevelMetadata ) {
21992186 o .levelMetadata = * levelMetadata
22002187 var err error
22012188 o .l0Sublevels , err = newL0Sublevels (levelMetadata , o .cmp , o .formatKey , o .flushSplitBytes )
0 commit comments