From 29acf19103de48e8309eb70810caa8d2d0799b90 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Wed, 18 Sep 2019 20:21:38 +0530 Subject: [PATCH 01/23] Introduce fast merge iterator This commit introduces fast merge iterator which is faster than normal merge iterator if there are less than 18 tables. --- iterator.go | 3 +- levels.go | 2 +- y/iterator.go | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/iterator.go b/iterator.go index 78ff7f655..3ebed3877 100644 --- a/iterator.go +++ b/iterator.go @@ -466,9 +466,10 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator { iters = append(iters, tables[i].NewUniIterator(opt.Reverse)) } iters = txn.db.lc.appendIterators(iters, &opt) // This will increment references. + res := &Iterator{ txn: txn, - iitr: y.NewMergeIterator(iters, opt.Reverse), + iitr: y.GetMergeIterator(iters, opt.Reverse), opt: opt, readTs: txn.readTs, } diff --git a/levels.go b/levels.go index f23ec8c1b..6a94d2367 100644 --- a/levels.go +++ b/levels.go @@ -478,7 +478,7 @@ func (s *levelsController) compactBuildTables( valid = append(valid, table) } iters = append(iters, table.NewConcatIterator(valid, false)) - it := y.NewMergeIterator(iters, false) + it := y.GetMergeIterator(iters, false) defer it.Close() // Important to close the iterator to do ref counting. it.Rewind() diff --git a/y/iterator.go b/y/iterator.go index 13f0bb2ce..044a480f6 100644 --- a/y/iterator.go +++ b/y/iterator.go @@ -262,3 +262,115 @@ func (s *MergeIterator) Close() error { } return nil } + +// FastMergeIterator is a specialized MergeIterator that works better than the merge Iterator for +// small number of iterators. We found out that if we have less than 18 iterators in merge iterator, +// the fast merge iterator performs better. +type FastMergeIterator struct { + second Iterator + smaller Iterator + bigger Iterator + reverse bool +} + +func (mt *FastMergeIterator) fix() { + if !mt.bigger.Valid() { + return + } + var cmp int + for mt.smaller.Valid() { + cmp = CompareKeys(mt.smaller.Key(), mt.bigger.Key()) + if cmp == 0 { + mt.second.Next() + if !mt.second.Valid() { + return + } + continue + } + if mt.reverse { + if cmp < 0 { + mt.smaller, mt.bigger = mt.bigger, mt.smaller + } + } else { + if cmp > 0 { + mt.smaller, mt.bigger = mt.bigger, mt.smaller + } + } + return + } + mt.smaller, mt.bigger = mt.bigger, mt.smaller +} + +// Next returns the next element. If it is the same as the current key, ignore it. +func (mt *FastMergeIterator) Next() { + mt.smaller.Next() + mt.fix() +} + +// Rewind seeks to first element (or last element for reverse iterator). +func (mt *FastMergeIterator) Rewind() { + mt.smaller.Rewind() + mt.bigger.Rewind() + mt.fix() +} + +// Seek brings us to element with key >= given key. +func (mt *FastMergeIterator) Seek(key []byte) { + mt.smaller.Seek(key) + mt.bigger.Seek(key) + mt.fix() +} + +// Valid returns whether the FastMergeIterator is at a valid element. +func (mt *FastMergeIterator) Valid() bool { + return mt.smaller.Valid() +} + +// Key returns the key associated with the current iterator +func (mt *FastMergeIterator) Key() []byte { + return mt.smaller.Key() +} + +// Value returns the value associated with the iterator. +func (mt *FastMergeIterator) Value() ValueStruct { + return mt.smaller.Value() +} + +// Close implements y.Iterator +func (mt *FastMergeIterator) Close() error { + err1 := mt.smaller.Close() + err2 := mt.bigger.Close() + if err1 != nil { + return errors.Wrap(err1, "FastMergeIterator") + } + return errors.Wrap(err2, "FastMergeIterator") +} + +// NewFastMergeIterator creates a merge iterator +func NewFastMergeIterator(iters []Iterator, reverse bool) Iterator { + if len(iters) == 1 { + return iters[0] + } else if len(iters) == 2 { + return &FastMergeIterator{ + smaller: iters[0], + bigger: iters[1], + second: iters[1], + reverse: reverse} + } + mid := len(iters) / 2 + return NewFastMergeIterator( + []Iterator{ + NewFastMergeIterator(iters[:mid], reverse), + NewFastMergeIterator(iters[mid:], reverse), + }, reverse) +} + +// GetMergeIterator returns a new merge Iterator based on the number of iters provided. +func GetMergeIterator(iters []Iterator, reverse bool) Iterator { + // We found out that if there are less than 18 tables in the merge iterator, + // the "FastMergeIterator" is faster than the "MergeIterator". + if len(iters) < 18 { + return NewFastMergeIterator(iters, reverse) + } + return NewMergeIterator(iters, reverse) +} From bc05e40d015ac8497db8159ee58568d31f04f915 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Wed, 18 Sep 2019 20:29:28 +0530 Subject: [PATCH 02/23] fixup --- iterator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iterator.go b/iterator.go index 3ebed3877..79c9e1711 100644 --- a/iterator.go +++ b/iterator.go @@ -422,7 +422,7 @@ var DefaultIteratorOptions = IteratorOptions{ // Iterator helps iterating over the KV pairs in a lexicographically sorted order. type Iterator struct { - iitr *y.MergeIterator + iitr y.Iterator txn *Txn readTs uint64 From 3d232457a35b8ff236b4452617a2a5fdab78b5f6 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 19 Sep 2019 19:52:53 +0530 Subject: [PATCH 03/23] More improvements to merge iterator --- iterator.go | 2 +- levels.go | 2 +- table/merge_iterator.go | 173 +++++++++++++++++++++++++ table/table_test.go | 10 +- y/iterator.go | 279 ---------------------------------------- 5 files changed, 180 insertions(+), 286 deletions(-) create mode 100644 table/merge_iterator.go diff --git a/iterator.go b/iterator.go index 79c9e1711..7b719e126 100644 --- a/iterator.go +++ b/iterator.go @@ -469,7 +469,7 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator { res := &Iterator{ txn: txn, - iitr: y.GetMergeIterator(iters, opt.Reverse), + iitr: table.NewMergeIterator(iters, opt.Reverse), opt: opt, readTs: txn.readTs, } diff --git a/levels.go b/levels.go index 6a94d2367..dbc4f4e9c 100644 --- a/levels.go +++ b/levels.go @@ -478,7 +478,7 @@ func (s *levelsController) compactBuildTables( valid = append(valid, table) } iters = append(iters, table.NewConcatIterator(valid, false)) - it := y.GetMergeIterator(iters, false) + it := table.NewMergeIterator(iters, false) defer it.Close() // Important to close the iterator to do ref counting. it.Rewind() diff --git a/table/merge_iterator.go b/table/merge_iterator.go new file mode 100644 index 000000000..48ffdb41e --- /dev/null +++ b/table/merge_iterator.go @@ -0,0 +1,173 @@ +package table + +import ( + "github.com/dgraph-io/badger/y" + "github.com/pkg/errors" +) + +// MergeIterator merges multiple iterators. +// NOTE: MergeIterator owns the array of iterators and is responsible for closing them. +type MergeIterator struct { + smaller mergeIteratorChild + bigger mergeIteratorChild + + // when the two iterators has the same value, the value in the second iterator is ignored. + second y.Iterator + reverse bool +} + +type mergeIteratorChild struct { + valid bool + key []byte + iter y.Iterator + + // The two iterators are type asserted from `y.Iterator`, used to inline more function calls. + // Calling functions on concrete types is much faster (about 30%) than calling the + // iterface's function. + merge *MergeIterator + concat *ConcatIterator +} + +func (child *mergeIteratorChild) setIterator(iter y.Iterator) { + child.iter = iter + child.merge, _ = iter.(*MergeIterator) + child.concat, _ = iter.(*ConcatIterator) +} + +func (child *mergeIteratorChild) reset() { + if child.merge != nil { + child.valid = child.merge.smaller.valid + if child.valid { + child.key = child.merge.smaller.key + } + } else if child.concat != nil { + child.valid = child.concat.Valid() + if child.valid { + child.key = child.concat.Key() + } + } else { + child.valid = child.iter.Valid() + if child.valid { + child.key = child.iter.Key() + } + } +} + +func (mt *MergeIterator) fix() { + if !mt.bigger.valid { + return + } + for mt.smaller.valid { + cmp := y.CompareKeys(mt.smaller.key, mt.bigger.key) + if cmp == 0 { + // Ignore the value in second iterator. + mt.second.Next() + var secondValid bool + if mt.second == mt.smaller.iter { + mt.smaller.reset() + secondValid = mt.smaller.valid + } else { + mt.bigger.reset() + secondValid = mt.bigger.valid + } + if !secondValid { + return + } + continue + } + if mt.reverse { + if cmp < 0 { + mt.swap() + } + } else { + if cmp > 0 { + mt.swap() + } + } + return + } + mt.swap() +} + +func (mt *MergeIterator) swap() { + mt.smaller, mt.bigger = mt.bigger, mt.smaller +} + +// Next returns the next element. If it is the same as the current key, ignore it. +func (mt *MergeIterator) Next() { + if mt.smaller.merge != nil { + mt.smaller.merge.Next() + } else if mt.smaller.concat != nil { + mt.smaller.concat.Next() + } else { + mt.smaller.iter.Next() + } + mt.smaller.reset() + mt.fix() +} + +// Rewind seeks to first element (or last element for reverse iterator). +func (mt *MergeIterator) Rewind() { + mt.smaller.iter.Rewind() + mt.smaller.reset() + mt.bigger.iter.Rewind() + mt.bigger.reset() + mt.fix() +} + +// Seek brings us to element with key >= given key. +func (mt *MergeIterator) Seek(key []byte) { + mt.smaller.iter.Seek(key) + mt.smaller.reset() + mt.bigger.iter.Seek(key) + mt.bigger.reset() + mt.fix() +} + +// Valid returns whether the MergeIterator is at a valid element. +func (mt *MergeIterator) Valid() bool { + return mt.smaller.valid +} + +// Key returns the key associated with the current iterator +func (mt *MergeIterator) Key() []byte { + return mt.smaller.key +} + +// Value returns the value associated with the iterator. +func (mt *MergeIterator) Value() y.ValueStruct { + return mt.smaller.iter.Value() +} + +// Close implements y.Iterator +func (mt *MergeIterator) Close() error { + err1 := mt.smaller.iter.Close() + err2 := mt.bigger.iter.Close() + if err1 != nil { + return errors.Wrap(err1, "MergeIterator") + } + return errors.Wrap(err2, "MergeIterator") +} + +// NewMergeIterator creates a merge iterator +func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { + if len(iters) == 0 { + return nil + } else if len(iters) == 1 { + return iters[0] + } else if len(iters) == 2 { + mi := &MergeIterator{ + second: iters[1], + reverse: reverse, + } + mi.smaller.setIterator(iters[0]) + mi.bigger.setIterator(iters[1]) + return mi + } + mid := len(iters) / 2 + return NewMergeIterator( + []y.Iterator{ + NewMergeIterator(iters[:mid], reverse), + NewMergeIterator(iters[mid:], reverse), + }, reverse) +} diff --git a/table/table_test.go b/table/table_test.go index c78855474..83edd9873 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -502,7 +502,7 @@ func TestMergingIterator(t *testing.T) { defer tbl2.DecrRef() it1 := tbl1.NewIterator(false) it2 := NewConcatIterator([]*Table{tbl2}, false) - it := y.NewMergeIterator([]y.Iterator{it1, it2}, false) + it := NewMergeIterator([]y.Iterator{it1, it2}, false) defer it.Close() it.Rewind() @@ -543,7 +543,7 @@ func TestMergingIteratorReversed(t *testing.T) { defer tbl2.DecrRef() it1 := tbl1.NewIterator(true) it2 := NewConcatIterator([]*Table{tbl2}, true) - it := y.NewMergeIterator([]y.Iterator{it1, it2}, true) + it := NewMergeIterator([]y.Iterator{it1, it2}, true) defer it.Close() it.Rewind() @@ -584,7 +584,7 @@ func TestMergingIteratorTakeOne(t *testing.T) { it1 := NewConcatIterator([]*Table{t1}, false) it2 := NewConcatIterator([]*Table{t2}, false) - it := y.NewMergeIterator([]y.Iterator{it1, it2}, false) + it := NewMergeIterator([]y.Iterator{it1, it2}, false) defer it.Close() it.Rewind() @@ -625,7 +625,7 @@ func TestMergingIteratorTakeTwo(t *testing.T) { it1 := NewConcatIterator([]*Table{t1}, false) it2 := NewConcatIterator([]*Table{t2}, false) - it := y.NewMergeIterator([]y.Iterator{it1, it2}, false) + it := NewMergeIterator([]y.Iterator{it1, it2}, false) defer it.Close() it.Rewind() @@ -779,7 +779,7 @@ func BenchmarkReadMerged(b *testing.B) { for _, tbl := range tables { iters = append(iters, tbl.NewIterator(false)) } - it := y.NewMergeIterator(iters, false) + it := NewMergeIterator(iters, false) defer it.Close() for it.Rewind(); it.Valid(); it.Next() { } diff --git a/y/iterator.go b/y/iterator.go index 044a480f6..6d0f677c0 100644 --- a/y/iterator.go +++ b/y/iterator.go @@ -18,10 +18,7 @@ package y import ( "bytes" - "container/heap" "encoding/binary" - - "github.com/pkg/errors" ) // ValueStruct represents the value info that can be associated with a key, but also the internal @@ -98,279 +95,3 @@ type Iterator interface { // All iterators should be closed so that file garbage collection works. Close() error } - -type elem struct { - itr Iterator - nice int - reversed bool -} - -type elemHeap []*elem - -func (eh elemHeap) Len() int { return len(eh) } -func (eh elemHeap) Swap(i, j int) { eh[i], eh[j] = eh[j], eh[i] } -func (eh *elemHeap) Push(x interface{}) { *eh = append(*eh, x.(*elem)) } -func (eh *elemHeap) Pop() interface{} { - // Remove the last element, because Go has already swapped 0th elem <-> last. - old := *eh - n := len(old) - x := old[n-1] - *eh = old[0 : n-1] - return x -} -func (eh elemHeap) Less(i, j int) bool { - cmp := CompareKeys(eh[i].itr.Key(), eh[j].itr.Key()) - if cmp < 0 { - return !eh[i].reversed - } - if cmp > 0 { - return eh[i].reversed - } - // The keys are equal. In this case, lower nice take precedence. This is important. - return eh[i].nice < eh[j].nice -} - -// MergeIterator merges multiple iterators. -// NOTE: MergeIterator owns the array of iterators and is responsible for closing them. -type MergeIterator struct { - h elemHeap - curKey []byte - reversed bool - - all []Iterator -} - -// NewMergeIterator returns a new MergeIterator from a list of Iterators. -func NewMergeIterator(iters []Iterator, reversed bool) *MergeIterator { - m := &MergeIterator{all: iters, reversed: reversed} - m.h = make(elemHeap, 0, len(iters)) - m.initHeap() - return m -} - -func (s *MergeIterator) storeKey(smallest Iterator) { - if cap(s.curKey) < len(smallest.Key()) { - s.curKey = make([]byte, 2*len(smallest.Key())) - } - s.curKey = s.curKey[:len(smallest.Key())] - copy(s.curKey, smallest.Key()) -} - -// initHeap checks all iterators and initializes our heap and array of keys. -// Whenever we reverse direction, we need to run this. -func (s *MergeIterator) initHeap() { - s.h = s.h[:0] - for idx, itr := range s.all { - if !itr.Valid() { - continue - } - e := &elem{itr: itr, nice: idx, reversed: s.reversed} - s.h = append(s.h, e) - } - heap.Init(&s.h) - for len(s.h) > 0 { - it := s.h[0].itr - if it == nil || !it.Valid() { - heap.Pop(&s.h) - continue - } - s.storeKey(s.h[0].itr) - break - } -} - -// Valid returns whether the MergeIterator is at a valid element. -func (s *MergeIterator) Valid() bool { - if s == nil { - return false - } - if len(s.h) == 0 { - return false - } - return s.h[0].itr.Valid() -} - -// Key returns the key associated with the current iterator -func (s *MergeIterator) Key() []byte { - if len(s.h) == 0 { - return nil - } - return s.h[0].itr.Key() -} - -// Value returns the value associated with the iterator. -func (s *MergeIterator) Value() ValueStruct { - if len(s.h) == 0 { - return ValueStruct{} - } - return s.h[0].itr.Value() -} - -// Next returns the next element. If it is the same as the current key, ignore it. -func (s *MergeIterator) Next() { - if len(s.h) == 0 { - return - } - - smallest := s.h[0].itr - smallest.Next() - - for len(s.h) > 0 { - smallest = s.h[0].itr - if !smallest.Valid() { - heap.Pop(&s.h) - continue - } - - heap.Fix(&s.h, 0) - smallest = s.h[0].itr - if smallest.Valid() { - if !bytes.Equal(smallest.Key(), s.curKey) { - break - } - smallest.Next() - } - } - if !smallest.Valid() { - return - } - s.storeKey(smallest) -} - -// Rewind seeks to first element (or last element for reverse iterator). -func (s *MergeIterator) Rewind() { - for _, itr := range s.all { - itr.Rewind() - } - s.initHeap() -} - -// Seek brings us to element with key >= given key. -func (s *MergeIterator) Seek(key []byte) { - for _, itr := range s.all { - itr.Seek(key) - } - s.initHeap() -} - -// Close implements y.Iterator -func (s *MergeIterator) Close() error { - for _, itr := range s.all { - if err := itr.Close(); err != nil { - return errors.Wrap(err, "MergeIterator") - } - } - return nil -} - -// FastMergeIterator is a specialized MergeIterator that works better than the merge Iterator for -// small number of iterators. We found out that if we have less than 18 iterators in merge iterator, -// the fast merge iterator performs better. -type FastMergeIterator struct { - second Iterator - smaller Iterator - bigger Iterator - reverse bool -} - -func (mt *FastMergeIterator) fix() { - if !mt.bigger.Valid() { - return - } - var cmp int - for mt.smaller.Valid() { - cmp = CompareKeys(mt.smaller.Key(), mt.bigger.Key()) - if cmp == 0 { - mt.second.Next() - if !mt.second.Valid() { - return - } - continue - } - if mt.reverse { - if cmp < 0 { - mt.smaller, mt.bigger = mt.bigger, mt.smaller - } - } else { - if cmp > 0 { - mt.smaller, mt.bigger = mt.bigger, mt.smaller - } - } - return - } - mt.smaller, mt.bigger = mt.bigger, mt.smaller -} - -// Next returns the next element. If it is the same as the current key, ignore it. -func (mt *FastMergeIterator) Next() { - mt.smaller.Next() - mt.fix() -} - -// Rewind seeks to first element (or last element for reverse iterator). -func (mt *FastMergeIterator) Rewind() { - mt.smaller.Rewind() - mt.bigger.Rewind() - mt.fix() -} - -// Seek brings us to element with key >= given key. -func (mt *FastMergeIterator) Seek(key []byte) { - mt.smaller.Seek(key) - mt.bigger.Seek(key) - mt.fix() -} - -// Valid returns whether the FastMergeIterator is at a valid element. -func (mt *FastMergeIterator) Valid() bool { - return mt.smaller.Valid() -} - -// Key returns the key associated with the current iterator -func (mt *FastMergeIterator) Key() []byte { - return mt.smaller.Key() -} - -// Value returns the value associated with the iterator. -func (mt *FastMergeIterator) Value() ValueStruct { - return mt.smaller.Value() -} - -// Close implements y.Iterator -func (mt *FastMergeIterator) Close() error { - err1 := mt.smaller.Close() - err2 := mt.bigger.Close() - if err1 != nil { - return errors.Wrap(err1, "FastMergeIterator") - } - return errors.Wrap(err2, "FastMergeIterator") -} - -// NewFastMergeIterator creates a merge iterator -func NewFastMergeIterator(iters []Iterator, reverse bool) Iterator { - if len(iters) == 1 { - return iters[0] - } else if len(iters) == 2 { - return &FastMergeIterator{ - smaller: iters[0], - bigger: iters[1], - second: iters[1], - reverse: reverse} - } - mid := len(iters) / 2 - return NewFastMergeIterator( - []Iterator{ - NewFastMergeIterator(iters[:mid], reverse), - NewFastMergeIterator(iters[mid:], reverse), - }, reverse) -} - -// GetMergeIterator returns a new merge Iterator based on the number of iters provided. -func GetMergeIterator(iters []Iterator, reverse bool) Iterator { - // We found out that if there are less than 18 tables in the merge iterator, - // the "FastMergeIterator" is faster than the "MergeIterator". - if len(iters) < 18 { - return NewFastMergeIterator(iters, reverse) - } - return NewMergeIterator(iters, reverse) -} From cb233425aebf85e24f961537006d6c83b1464013 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Fri, 20 Sep 2019 10:32:17 +0530 Subject: [PATCH 04/23] Move iterator_test to table package --- .../merge_iterator_test.go | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) rename y/iterator_test.go => table/merge_iterator_test.go (85%) diff --git a/y/iterator_test.go b/table/merge_iterator_test.go similarity index 85% rename from y/iterator_test.go rename to table/merge_iterator_test.go index cff88be0a..d036e41f8 100644 --- a/y/iterator_test.go +++ b/table/merge_iterator_test.go @@ -14,12 +14,13 @@ * limitations under the License. */ -package y +package table import ( "sort" "testing" + "github.com/dgraph-io/badger/y" "github.com/stretchr/testify/require" ) @@ -53,22 +54,22 @@ func (s *SimpleIterator) Rewind() { } func (s *SimpleIterator) Seek(key []byte) { - key = KeyWithTs(key, 0) + key = y.KeyWithTs(key, 0) if !s.reversed { s.idx = sort.Search(len(s.keys), func(i int) bool { - return CompareKeys(s.keys[i], key) >= 0 + return y.CompareKeys(s.keys[i], key) >= 0 }) } else { n := len(s.keys) s.idx = n - 1 - sort.Search(n, func(i int) bool { - return CompareKeys(s.keys[n-1-i], key) <= 0 + return y.CompareKeys(s.keys[n-1-i], key) <= 0 }) } } func (s *SimpleIterator) Key() []byte { return s.keys[s.idx] } -func (s *SimpleIterator) Value() ValueStruct { - return ValueStruct{ +func (s *SimpleIterator) Value() y.ValueStruct { + return y.ValueStruct{ Value: s.vals[s.idx], UserMeta: 55, Meta: 0, @@ -78,12 +79,14 @@ func (s *SimpleIterator) Valid() bool { return s.idx >= 0 && s.idx < len(s.keys) } +var _ y.Iterator = &SimpleIterator{} + func newSimpleIterator(keys []string, vals []string, reversed bool) *SimpleIterator { k := make([][]byte, len(keys)) v := make([][]byte, len(vals)) - AssertTrue(len(keys) == len(vals)) + y.AssertTrue(len(keys) == len(vals)) for i := 0; i < len(keys); i++ { - k[i] = KeyWithTs([]byte(keys[i]), 0) + k[i] = y.KeyWithTs([]byte(keys[i]), 0) v[i] = []byte(vals[i]) } return &SimpleIterator{ @@ -94,18 +97,18 @@ func newSimpleIterator(keys []string, vals []string, reversed bool) *SimpleItera } } -func getAll(it Iterator) ([]string, []string) { +func getAll(it y.Iterator) ([]string, []string) { var keys, vals []string for ; it.Valid(); it.Next() { k := it.Key() - keys = append(keys, string(ParseKey(k))) + keys = append(keys, string(y.ParseKey(k))) v := it.Value() vals = append(vals, string(v.Value)) } return keys, vals } -func closeAndCheck(t *testing.T, it Iterator, expected int) { +func closeAndCheck(t *testing.T, it y.Iterator, expected int) { closeCount = 0 it.Close() require.EqualValues(t, expected, closeCount) @@ -135,7 +138,7 @@ func TestMergeSingle(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, false) - mergeIt := NewMergeIterator([]Iterator{it}, false) + mergeIt := NewMergeIterator([]y.Iterator{it}, false) mergeIt.Rewind() k, v := getAll(mergeIt) require.EqualValues(t, keys, k) @@ -147,7 +150,7 @@ func TestMergeSingleReversed(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, true) - mergeIt := NewMergeIterator([]Iterator{it}, true) + mergeIt := NewMergeIterator([]y.Iterator{it}, true) mergeIt.Rewind() k, v := getAll(mergeIt) require.EqualValues(t, reversed(keys), k) @@ -161,7 +164,7 @@ func TestMergeMore(t *testing.T) { it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) expectedKeys := []string{"1", "2", "3", "5", "7", "9"} expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} mergeIt.Rewind() @@ -176,8 +179,8 @@ func TestMergeIteratorNested(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, false) - mergeIt := NewMergeIterator([]Iterator{it}, false) - mergeIt2 := NewMergeIterator([]Iterator{mergeIt}, false) + mergeIt := NewMergeIterator([]y.Iterator{it}, false) + mergeIt2 := NewMergeIterator([]y.Iterator{mergeIt}, false) mergeIt2.Rewind() k, v := getAll(mergeIt2) require.EqualValues(t, keys, k) @@ -190,7 +193,7 @@ func TestMergeIteratorSeek(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) mergeIt.Seek([]byte("4")) k, v := getAll(mergeIt) require.EqualValues(t, []string{"5", "7", "9"}, k) @@ -203,7 +206,7 @@ func TestMergeIteratorSeekReversed(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, true) mergeIt.Seek([]byte("5")) k, v := getAll(mergeIt) require.EqualValues(t, []string{"5", "3", "2", "1"}, k) @@ -216,7 +219,7 @@ func TestMergeIteratorSeekInvalid(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) mergeIt.Seek([]byte("f")) require.False(t, mergeIt.Valid()) closeAndCheck(t, mergeIt, 4) @@ -227,7 +230,7 @@ func TestMergeIteratorSeekInvalidReversed(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, true) mergeIt.Seek([]byte("0")) require.False(t, mergeIt.Valid()) closeAndCheck(t, mergeIt, 4) From 8ae39f4fc8f45cc858e17421f463e1550b812dbf Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 23 Sep 2019 15:45:52 +0530 Subject: [PATCH 05/23] Add missing licence --- table/merge_iterator.go | 16 ++++++++++++++++ table/merge_iterator_test.go | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 48ffdb41e..0ad3ef3fd 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -1,3 +1,19 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package table import ( diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index d036e41f8..1cba9164a 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From d90afb44914782307fbfcf7aacdd1779aaa4fc97 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Tue, 1 Oct 2019 18:42:13 +0530 Subject: [PATCH 06/23] fix bug in merge iterator --- table/merge_iterator.go | 35 +++++--- .../merge_iterator_test.go | 88 +++++++++++++----- table/table_test.go | 90 +++++++++++-------- 3 files changed, 140 insertions(+), 73 deletions(-) rename y/iterator_test.go => table/merge_iterator_test.go (70%) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 48ffdb41e..cafcbb823 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -11,7 +11,10 @@ type MergeIterator struct { smaller mergeIteratorChild bigger mergeIteratorChild - // when the two iterators has the same value, the value in the second iterator is ignored. + // When the two iterators has the same value, the value in the second iterator is ignored. + // The iterators are swapped when bigger iterator < small iterator. Second always points to + // bigger iterator and that allows us to pick the value from smaller iterator (in the intial + // state) when both the iterators have the same value. second y.Iterator reverse bool } @@ -22,7 +25,7 @@ type mergeIteratorChild struct { iter y.Iterator // The two iterators are type asserted from `y.Iterator`, used to inline more function calls. - // Calling functions on concrete types is much faster (about 30%) than calling the + // Calling functions on concrete types is much faster (about 25-30%) than calling the // iterface's function. merge *MergeIterator concat *ConcatIterator @@ -34,7 +37,7 @@ func (child *mergeIteratorChild) setIterator(iter y.Iterator) { child.concat, _ = iter.(*ConcatIterator) } -func (child *mergeIteratorChild) reset() { +func (child *mergeIteratorChild) setKey() { if child.merge != nil { child.valid = child.merge.smaller.valid if child.valid { @@ -53,24 +56,28 @@ func (child *mergeIteratorChild) reset() { } } -func (mt *MergeIterator) fix() { +func (mt *MergeIterator) fixSmallerBigger() { if !mt.bigger.valid { return } for mt.smaller.valid { cmp := y.CompareKeys(mt.smaller.key, mt.bigger.key) + // Both the keys are equal. if cmp == 0 { // Ignore the value in second iterator. mt.second.Next() var secondValid bool if mt.second == mt.smaller.iter { - mt.smaller.reset() + mt.smaller.setKey() secondValid = mt.smaller.valid } else { - mt.bigger.reset() + mt.bigger.setKey() secondValid = mt.bigger.valid } if !secondValid { + if mt.second == mt.smaller.iter && mt.bigger.valid { + mt.swap() + } return } continue @@ -102,26 +109,26 @@ func (mt *MergeIterator) Next() { } else { mt.smaller.iter.Next() } - mt.smaller.reset() - mt.fix() + mt.smaller.setKey() + mt.fixSmallerBigger() } // Rewind seeks to first element (or last element for reverse iterator). func (mt *MergeIterator) Rewind() { mt.smaller.iter.Rewind() - mt.smaller.reset() + mt.smaller.setKey() mt.bigger.iter.Rewind() - mt.bigger.reset() - mt.fix() + mt.bigger.setKey() + mt.fixSmallerBigger() } // Seek brings us to element with key >= given key. func (mt *MergeIterator) Seek(key []byte) { mt.smaller.iter.Seek(key) - mt.smaller.reset() + mt.smaller.setKey() mt.bigger.iter.Seek(key) - mt.bigger.reset() - mt.fix() + mt.bigger.setKey() + mt.fixSmallerBigger() } // Valid returns whether the MergeIterator is at a valid element. diff --git a/y/iterator_test.go b/table/merge_iterator_test.go similarity index 70% rename from y/iterator_test.go rename to table/merge_iterator_test.go index cff88be0a..717937dc6 100644 --- a/y/iterator_test.go +++ b/table/merge_iterator_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,13 @@ * limitations under the License. */ -package y +package table import ( "sort" "testing" + "github.com/dgraph-io/badger/y" "github.com/stretchr/testify/require" ) @@ -53,22 +54,22 @@ func (s *SimpleIterator) Rewind() { } func (s *SimpleIterator) Seek(key []byte) { - key = KeyWithTs(key, 0) + key = y.KeyWithTs(key, 0) if !s.reversed { s.idx = sort.Search(len(s.keys), func(i int) bool { - return CompareKeys(s.keys[i], key) >= 0 + return y.CompareKeys(s.keys[i], key) >= 0 }) } else { n := len(s.keys) s.idx = n - 1 - sort.Search(n, func(i int) bool { - return CompareKeys(s.keys[n-1-i], key) <= 0 + return y.CompareKeys(s.keys[n-1-i], key) <= 0 }) } } func (s *SimpleIterator) Key() []byte { return s.keys[s.idx] } -func (s *SimpleIterator) Value() ValueStruct { - return ValueStruct{ +func (s *SimpleIterator) Value() y.ValueStruct { + return y.ValueStruct{ Value: s.vals[s.idx], UserMeta: 55, Meta: 0, @@ -81,9 +82,9 @@ func (s *SimpleIterator) Valid() bool { func newSimpleIterator(keys []string, vals []string, reversed bool) *SimpleIterator { k := make([][]byte, len(keys)) v := make([][]byte, len(vals)) - AssertTrue(len(keys) == len(vals)) + y.AssertTrue(len(keys) == len(vals)) for i := 0; i < len(keys); i++ { - k[i] = KeyWithTs([]byte(keys[i]), 0) + k[i] = y.KeyWithTs([]byte(keys[i]), 0) v[i] = []byte(vals[i]) } return &SimpleIterator{ @@ -94,18 +95,18 @@ func newSimpleIterator(keys []string, vals []string, reversed bool) *SimpleItera } } -func getAll(it Iterator) ([]string, []string) { +func getAll(it y.Iterator) ([]string, []string) { var keys, vals []string for ; it.Valid(); it.Next() { k := it.Key() - keys = append(keys, string(ParseKey(k))) + keys = append(keys, string(y.ParseKey(k))) v := it.Value() vals = append(vals, string(v.Value)) } return keys, vals } -func closeAndCheck(t *testing.T, it Iterator, expected int) { +func closeAndCheck(t *testing.T, it y.Iterator, expected int) { closeCount = 0 it.Close() require.EqualValues(t, expected, closeCount) @@ -135,7 +136,7 @@ func TestMergeSingle(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, false) - mergeIt := NewMergeIterator([]Iterator{it}, false) + mergeIt := NewMergeIterator([]y.Iterator{it}, false) mergeIt.Rewind() k, v := getAll(mergeIt) require.EqualValues(t, keys, k) @@ -147,7 +148,7 @@ func TestMergeSingleReversed(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, true) - mergeIt := NewMergeIterator([]Iterator{it}, true) + mergeIt := NewMergeIterator([]y.Iterator{it}, true) mergeIt.Rewind() k, v := getAll(mergeIt) require.EqualValues(t, reversed(keys), k) @@ -161,7 +162,7 @@ func TestMergeMore(t *testing.T) { it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) expectedKeys := []string{"1", "2", "3", "5", "7", "9"} expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} mergeIt.Rewind() @@ -176,8 +177,8 @@ func TestMergeIteratorNested(t *testing.T) { keys := []string{"1", "2", "3"} vals := []string{"v1", "v2", "v3"} it := newSimpleIterator(keys, vals, false) - mergeIt := NewMergeIterator([]Iterator{it}, false) - mergeIt2 := NewMergeIterator([]Iterator{mergeIt}, false) + mergeIt := NewMergeIterator([]y.Iterator{it}, false) + mergeIt2 := NewMergeIterator([]y.Iterator{mergeIt}, false) mergeIt2.Rewind() k, v := getAll(mergeIt2) require.EqualValues(t, keys, k) @@ -190,7 +191,7 @@ func TestMergeIteratorSeek(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) mergeIt.Seek([]byte("4")) k, v := getAll(mergeIt) require.EqualValues(t, []string{"5", "7", "9"}, k) @@ -203,7 +204,7 @@ func TestMergeIteratorSeekReversed(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, true) mergeIt.Seek([]byte("5")) k, v := getAll(mergeIt) require.EqualValues(t, []string{"5", "3", "2", "1"}, k) @@ -216,7 +217,7 @@ func TestMergeIteratorSeekInvalid(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) mergeIt.Seek([]byte("f")) require.False(t, mergeIt.Valid()) closeAndCheck(t, mergeIt, 4) @@ -227,8 +228,53 @@ func TestMergeIteratorSeekInvalidReversed(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true) - mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, true) mergeIt.Seek([]byte("0")) require.False(t, mergeIt.Valid()) closeAndCheck(t, mergeIt, 4) } + +func TestMergeIteratorDuplicate(t *testing.T) { + it1 := newSimpleIterator([]string{"0", "1", "2"}, []string{"a0", "a1", "a2"}, false) + it2 := newSimpleIterator([]string{"1", "3"}, []string{"b1", "b3"}, false) + it3 := newSimpleIterator([]string{"0", "1", "2"}, []string{"c0", "c1", "c2"}, false) + t.Run("forward", func(t *testing.T) { + it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) + + expectedKeys := []string{"0", "1", "2", "3"} + expectedVals := []string{"c0", "c1", "c2", "b3"} + it.Rewind() + k, v := getAll(it) + require.Equal(t, expectedKeys, k) + require.Equal(t, expectedVals, v) + }) + + t.Run("reverse", func(t *testing.T) { + it1.reversed = true + it2.reversed = true + it3.reversed = true + + it := NewMergeIterator([]y.Iterator{it3, it2, it1}, true) + + expectedKeys := []string{"3", "2", "1", "0"} + expectedVals := []string{"b3", "c2", "c1", "c0"} + it.Rewind() + k, v := getAll(it) + require.Equal(t, expectedKeys, k) + require.Equal(t, expectedVals, v) + }) + + t.Run("edge case", func(t *testing.T) { + it1 := newSimpleIterator([]string{"0", "1", "2"}, []string{"0", "1", "2"}, false) + it2 := newSimpleIterator([]string{"1"}, []string{"1"}, false) + it3 := newSimpleIterator([]string{"2"}, []string{"2"}, false) + it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) + + var cnt int + for it.Rewind(); it.Valid(); it.Next() { + require.EqualValues(t, cnt+48, it.Key()[0]) + cnt++ + } + require.Equal(t, 3, cnt) + }) +} diff --git a/table/table_test.go b/table/table_test.go index 83edd9873..3446d3b73 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -487,12 +487,25 @@ func TestConcatIterator(t *testing.T) { func TestMergingIterator(t *testing.T) { f1 := buildTable(t, [][]string{ {"k1", "a1"}, - {"k2", "a2"}, + {"k4", "a4"}, + {"k5", "a5"}, }) f2 := buildTable(t, [][]string{ - {"k1", "b1"}, {"k2", "b2"}, + {"k3", "b3"}, + {"k4", "b4"}, }) + + expected := []struct { + key string + value string + }{ + {"k1", "a1"}, + {"k2", "b2"}, + {"k3", "b3"}, + {"k4", "a4"}, + {"k5", "a5"}, + } opts := Options{LoadingMode: options.LoadToRAM, ChkMode: options.OnTableAndBlockRead} tbl1, err := OpenTable(f1, opts) require.NoError(t, err) @@ -505,35 +518,42 @@ func TestMergingIterator(t *testing.T) { it := NewMergeIterator([]y.Iterator{it1, it2}, false) defer it.Close() - it.Rewind() - require.True(t, it.Valid()) - k := it.Key() - require.EqualValues(t, "k1", string(y.ParseKey(k))) - vs := it.Value() - require.EqualValues(t, "a1", string(vs.Value)) - require.EqualValues(t, 'A', vs.Meta) - it.Next() - - require.True(t, it.Valid()) - k = it.Key() - require.EqualValues(t, "k2", string(y.ParseKey(k))) - vs = it.Value() - require.EqualValues(t, "a2", string(vs.Value)) - require.EqualValues(t, 'A', vs.Meta) - it.Next() - + var i int + for it.Rewind(); it.Valid(); it.Next() { + k := it.Key() + vs := it.Value() + require.EqualValues(t, expected[i].key, string(y.ParseKey(k))) + require.EqualValues(t, expected[i].value, string(vs.Value)) + require.EqualValues(t, 'A', vs.Meta) + i++ + } + require.Equal(t, i, len(expected)) require.False(t, it.Valid()) } func TestMergingIteratorReversed(t *testing.T) { f1 := buildTable(t, [][]string{ {"k1", "a1"}, - {"k2", "a2"}, + {"k4", "a4"}, + {"k5", "a5"}, }) f2 := buildTable(t, [][]string{ - {"k1", "b1"}, - {"k2", "b2"}, + {"k1", "b2"}, + {"k3", "b3"}, + {"k4", "b4"}, + {"k5", "b5"}, }) + + expected := []struct { + key string + value string + }{ + {"k5", "a5"}, + {"k4", "a4"}, + {"k3", "b3"}, + // {"k2", "b2"}, + {"k1", "a1"}, + } opts := Options{LoadingMode: options.LoadToRAM, ChkMode: options.OnTableAndBlockRead} tbl1, err := OpenTable(f1, opts) require.NoError(t, err) @@ -546,23 +566,17 @@ func TestMergingIteratorReversed(t *testing.T) { it := NewMergeIterator([]y.Iterator{it1, it2}, true) defer it.Close() - it.Rewind() - require.True(t, it.Valid()) - k := it.Key() - require.EqualValues(t, "k2", string(y.ParseKey(k))) - vs := it.Value() - require.EqualValues(t, "a2", string(vs.Value)) - require.EqualValues(t, 'A', vs.Meta) - it.Next() - - require.True(t, it.Valid()) - k = it.Key() - require.EqualValues(t, "k1", string(y.ParseKey(k))) - vs = it.Value() - require.EqualValues(t, "a1", string(vs.Value)) - require.EqualValues(t, 'A', vs.Meta) - it.Next() + var i int + for it.Rewind(); it.Valid(); it.Next() { + k := it.Key() + vs := it.Value() + require.EqualValues(t, expected[i].key, string(y.ParseKey(k))) + require.EqualValues(t, expected[i].value, string(vs.Value)) + require.EqualValues(t, 'A', vs.Meta) + i++ + } + require.Equal(t, i, len(expected)) require.False(t, it.Valid()) } From 3f24d4351c96e45f7b01bbbafa784218f623baf7 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 3 Oct 2019 16:40:29 +0530 Subject: [PATCH 07/23] Add comment and fix typo --- table/merge_iterator.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 8f58ea70f..8ba8f4d63 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -42,7 +42,7 @@ type mergeIteratorChild struct { // The two iterators are type asserted from `y.Iterator`, used to inline more function calls. // Calling functions on concrete types is much faster (about 25-30%) than calling the - // iterface's function. + // interface's function. merge *MergeIterator concat *ConcatIterator } @@ -91,6 +91,7 @@ func (mt *MergeIterator) fixSmallerBigger() { secondValid = mt.bigger.valid } if !secondValid { + // Swap smaller and bigger only if second points to the smaller one and the bigger is valid. if mt.second == mt.smaller.iter && mt.bigger.valid { mt.swap() } From a03683a771637ad5cca71c0cbb8e8eb0e4d29b9e Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 3 Oct 2019 18:50:56 +0530 Subject: [PATCH 08/23] Update comment --- table/merge_iterator.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 8ba8f4d63..8b7343c69 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -27,10 +27,7 @@ type MergeIterator struct { smaller mergeIteratorChild bigger mergeIteratorChild - // When the two iterators has the same value, the value in the second iterator is ignored. - // The iterators are swapped when bigger iterator < small iterator. Second always points to - // bigger iterator and that allows us to pick the value from smaller iterator (in the intial - // state) when both the iterators have the same value. + // When the two iterators have the same value, the value in the second iterator is ignored. second y.Iterator reverse bool } From 28df20ec783b9c510f3d76af6ff4c9da7b5d06c6 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 3 Oct 2019 19:08:56 +0530 Subject: [PATCH 09/23] Address review comments --- table/merge_iterator.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 8b7343c69..aa9f4f42b 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -88,7 +88,8 @@ func (mt *MergeIterator) fixSmallerBigger() { secondValid = mt.bigger.valid } if !secondValid { - // Swap smaller and bigger only if second points to the smaller one and the bigger is valid. + // Swap smaller and bigger only if second points to + // the smaller one and the bigger is valid. if mt.second == mt.smaller.iter && mt.bigger.valid { mt.swap() } @@ -150,7 +151,7 @@ func (mt *MergeIterator) Valid() bool { return mt.smaller.valid } -// Key returns the key associated with the current iterator +// Key returns the key associated with the current iterator. func (mt *MergeIterator) Key() []byte { return mt.smaller.key } @@ -160,7 +161,7 @@ func (mt *MergeIterator) Value() y.ValueStruct { return mt.smaller.iter.Value() } -// Close implements y.Iterator +// Close implements y.Iterator. func (mt *MergeIterator) Close() error { err1 := mt.smaller.iter.Close() err2 := mt.bigger.iter.Close() @@ -170,7 +171,7 @@ func (mt *MergeIterator) Close() error { return errors.Wrap(err2, "MergeIterator") } -// NewMergeIterator creates a merge iterator +// NewMergeIterator creates a merge iterator. func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { if len(iters) == 0 { return nil From ff7427e8840c8705231630c7a0b4ed8484c06575 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 10 Oct 2019 14:31:31 +0530 Subject: [PATCH 10/23] Rename smaller/bigger to small and big --- table/merge_iterator.go | 74 ++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index aa9f4f42b..5ae1dbbbf 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -24,8 +24,8 @@ import ( // MergeIterator merges multiple iterators. // NOTE: MergeIterator owns the array of iterators and is responsible for closing them. type MergeIterator struct { - smaller mergeIteratorChild - bigger mergeIteratorChild + small mergeIteratorChild + big mergeIteratorChild // When the two iterators have the same value, the value in the second iterator is ignored. second y.Iterator @@ -52,9 +52,9 @@ func (child *mergeIteratorChild) setIterator(iter y.Iterator) { func (child *mergeIteratorChild) setKey() { if child.merge != nil { - child.valid = child.merge.smaller.valid + child.valid = child.merge.small.valid if child.valid { - child.key = child.merge.smaller.key + child.key = child.merge.small.key } } else if child.concat != nil { child.valid = child.concat.Valid() @@ -70,27 +70,27 @@ func (child *mergeIteratorChild) setKey() { } func (mt *MergeIterator) fixSmallerBigger() { - if !mt.bigger.valid { + if !mt.big.valid { return } - for mt.smaller.valid { - cmp := y.CompareKeys(mt.smaller.key, mt.bigger.key) + for mt.small.valid { + cmp := y.CompareKeys(mt.small.key, mt.big.key) // Both the keys are equal. if cmp == 0 { // Ignore the value in second iterator. mt.second.Next() var secondValid bool - if mt.second == mt.smaller.iter { - mt.smaller.setKey() - secondValid = mt.smaller.valid + if mt.second == mt.small.iter { + mt.small.setKey() + secondValid = mt.small.valid } else { - mt.bigger.setKey() - secondValid = mt.bigger.valid + mt.big.setKey() + secondValid = mt.big.valid } if !secondValid { - // Swap smaller and bigger only if second points to - // the smaller one and the bigger is valid. - if mt.second == mt.smaller.iter && mt.bigger.valid { + // Swap small and big only if second points to + // the small one and the big is valid. + if mt.second == mt.small.iter && mt.big.valid { mt.swap() } return @@ -112,59 +112,59 @@ func (mt *MergeIterator) fixSmallerBigger() { } func (mt *MergeIterator) swap() { - mt.smaller, mt.bigger = mt.bigger, mt.smaller + mt.small, mt.big = mt.big, mt.small } // Next returns the next element. If it is the same as the current key, ignore it. func (mt *MergeIterator) Next() { - if mt.smaller.merge != nil { - mt.smaller.merge.Next() - } else if mt.smaller.concat != nil { - mt.smaller.concat.Next() + if mt.small.merge != nil { + mt.small.merge.Next() + } else if mt.small.concat != nil { + mt.small.concat.Next() } else { - mt.smaller.iter.Next() + mt.small.iter.Next() } - mt.smaller.setKey() + mt.small.setKey() mt.fixSmallerBigger() } // Rewind seeks to first element (or last element for reverse iterator). func (mt *MergeIterator) Rewind() { - mt.smaller.iter.Rewind() - mt.smaller.setKey() - mt.bigger.iter.Rewind() - mt.bigger.setKey() + mt.small.iter.Rewind() + mt.small.setKey() + mt.big.iter.Rewind() + mt.big.setKey() mt.fixSmallerBigger() } // Seek brings us to element with key >= given key. func (mt *MergeIterator) Seek(key []byte) { - mt.smaller.iter.Seek(key) - mt.smaller.setKey() - mt.bigger.iter.Seek(key) - mt.bigger.setKey() + mt.small.iter.Seek(key) + mt.small.setKey() + mt.big.iter.Seek(key) + mt.big.setKey() mt.fixSmallerBigger() } // Valid returns whether the MergeIterator is at a valid element. func (mt *MergeIterator) Valid() bool { - return mt.smaller.valid + return mt.small.valid } // Key returns the key associated with the current iterator. func (mt *MergeIterator) Key() []byte { - return mt.smaller.key + return mt.small.key } // Value returns the value associated with the iterator. func (mt *MergeIterator) Value() y.ValueStruct { - return mt.smaller.iter.Value() + return mt.small.iter.Value() } // Close implements y.Iterator. func (mt *MergeIterator) Close() error { - err1 := mt.smaller.iter.Close() - err2 := mt.bigger.iter.Close() + err1 := mt.small.iter.Close() + err2 := mt.big.iter.Close() if err1 != nil { return errors.Wrap(err1, "MergeIterator") } @@ -182,8 +182,8 @@ func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { second: iters[1], reverse: reverse, } - mi.smaller.setIterator(iters[0]) - mi.bigger.setIterator(iters[1]) + mi.small.setIterator(iters[0]) + mi.big.setIterator(iters[1]) return mi } mid := len(iters) / 2 From 37c3827a8524eb0540dbc395c14a6e9deee96df8 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 10 Oct 2019 14:32:28 +0530 Subject: [PATCH 11/23] Rename mergeIteratorChild to node --- table/merge_iterator.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 5ae1dbbbf..ff1083323 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -24,15 +24,15 @@ import ( // MergeIterator merges multiple iterators. // NOTE: MergeIterator owns the array of iterators and is responsible for closing them. type MergeIterator struct { - small mergeIteratorChild - big mergeIteratorChild + small node + big node // When the two iterators have the same value, the value in the second iterator is ignored. second y.Iterator reverse bool } -type mergeIteratorChild struct { +type node struct { valid bool key []byte iter y.Iterator @@ -44,13 +44,13 @@ type mergeIteratorChild struct { concat *ConcatIterator } -func (child *mergeIteratorChild) setIterator(iter y.Iterator) { +func (child *node) setIterator(iter y.Iterator) { child.iter = iter child.merge, _ = iter.(*MergeIterator) child.concat, _ = iter.(*ConcatIterator) } -func (child *mergeIteratorChild) setKey() { +func (child *node) setKey() { if child.merge != nil { child.valid = child.merge.small.valid if child.valid { From 4476a2fc2b064aa9f959a3c9eb0b0f8de8e390b0 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 10 Oct 2019 14:50:17 +0530 Subject: [PATCH 12/23] Add comments --- table/merge_iterator.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index ff1083323..409cbf915 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -28,6 +28,9 @@ type MergeIterator struct { big node // When the two iterators have the same value, the value in the second iterator is ignored. + // On level 0, we can have multiple iterators with the same key. In this case we want to + // use value of the iterator that was added first to the merge iterator. Second keeps track of the + // iterator that was added second so that we can resolve the same key conflict. second y.Iterator reverse bool } @@ -77,7 +80,7 @@ func (mt *MergeIterator) fixSmallerBigger() { cmp := y.CompareKeys(mt.small.key, mt.big.key) // Both the keys are equal. if cmp == 0 { - // Ignore the value in second iterator. + // Key conflict. Ignore the value in second iterator. mt.second.Next() var secondValid bool if mt.second == mt.small.iter { From 762ce181184ba20a8d55bc2ba983f5c67c49ac74 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Thu, 10 Oct 2019 15:32:17 +0530 Subject: [PATCH 13/23] Rename fixSmallerBigger to fix --- table/merge_iterator.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 409cbf915..7c6ce7b46 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -72,7 +72,7 @@ func (child *node) setKey() { } } -func (mt *MergeIterator) fixSmallerBigger() { +func (mt *MergeIterator) fix() { if !mt.big.valid { return } @@ -128,7 +128,7 @@ func (mt *MergeIterator) Next() { mt.small.iter.Next() } mt.small.setKey() - mt.fixSmallerBigger() + mt.fix() } // Rewind seeks to first element (or last element for reverse iterator). @@ -137,7 +137,7 @@ func (mt *MergeIterator) Rewind() { mt.small.setKey() mt.big.iter.Rewind() mt.big.setKey() - mt.fixSmallerBigger() + mt.fix() } // Seek brings us to element with key >= given key. @@ -146,7 +146,7 @@ func (mt *MergeIterator) Seek(key []byte) { mt.small.setKey() mt.big.iter.Seek(key) mt.big.setKey() - mt.fixSmallerBigger() + mt.fix() } // Valid returns whether the MergeIterator is at a valid element. From 8b489257f79f9eac614df0fe79247599aa8c051a Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Fri, 11 Oct 2019 16:05:33 +0530 Subject: [PATCH 14/23] Doesn't work --- table/merge_iterator.go | 81 ++++++++++++++++++++++++------------ table/merge_iterator_test.go | 7 +++- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 7c6ce7b46..451c9f08e 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -17,6 +17,8 @@ package table import ( + "fmt" + "github.com/dgraph-io/badger/y" "github.com/pkg/errors" ) @@ -24,8 +26,10 @@ import ( // MergeIterator merges multiple iterators. // NOTE: MergeIterator owns the array of iterators and is responsible for closing them. type MergeIterator struct { - small node - big node + left node + right node + + small *node // When the two iterators have the same value, the value in the second iterator is ignored. // On level 0, we can have multiple iterators with the same key. In this case we want to @@ -55,9 +59,9 @@ func (child *node) setIterator(iter y.Iterator) { func (child *node) setKey() { if child.merge != nil { - child.valid = child.merge.small.valid + child.valid = child.merge.left.valid if child.valid { - child.key = child.merge.small.key + child.key = child.merge.left.key } } else if child.concat != nil { child.valid = child.concat.Valid() @@ -73,11 +77,11 @@ func (child *node) setKey() { } func (mt *MergeIterator) fix() { - if !mt.big.valid { + if !mt.bigger().valid { return } for mt.small.valid { - cmp := y.CompareKeys(mt.small.key, mt.big.key) + cmp := y.CompareKeys(mt.small.key, mt.bigger().key) // Both the keys are equal. if cmp == 0 { // Key conflict. Ignore the value in second iterator. @@ -86,15 +90,19 @@ func (mt *MergeIterator) fix() { if mt.second == mt.small.iter { mt.small.setKey() secondValid = mt.small.valid + } else if mt.second == mt.bigger().iter { + mt.bigger().setKey() + secondValid = mt.bigger().valid } else { - mt.big.setKey() - secondValid = mt.big.valid + fmt.Println(mt.second) + panic("////") } if !secondValid { // Swap small and big only if second points to // the small one and the big is valid. - if mt.second == mt.small.iter && mt.big.valid { - mt.swap() + if mt.second == mt.small.iter && mt.bigger().valid { + // mt.small = &mt.right + mt.swapSmall() } return } @@ -102,20 +110,37 @@ func (mt *MergeIterator) fix() { } if mt.reverse { if cmp < 0 { - mt.swap() + mt.swapSmall() } } else { if cmp > 0 { - mt.swap() + mt.swapSmall() } } return } - mt.swap() + mt.swapSmall() } -func (mt *MergeIterator) swap() { - mt.small, mt.big = mt.big, mt.small +func (mt *MergeIterator) bigger() *node { + if mt.small == &mt.left { + return &mt.right + } + return &mt.left +} + +func (mt *MergeIterator) swapSmall() { + if mt.small == &mt.left { + mt.small = &mt.right + return + } + if mt.small == &mt.right { + mt.small = &mt.left + return + } + fmt.Println("mt.small is nil ", mt.small == nil) + panic(".....") + // mt.left, mt.right = mt.right, mt.left } // Next returns the next element. If it is the same as the current key, ignore it. @@ -133,19 +158,19 @@ func (mt *MergeIterator) Next() { // Rewind seeks to first element (or last element for reverse iterator). func (mt *MergeIterator) Rewind() { - mt.small.iter.Rewind() - mt.small.setKey() - mt.big.iter.Rewind() - mt.big.setKey() + mt.left.iter.Rewind() + mt.left.setKey() + mt.right.iter.Rewind() + mt.right.setKey() mt.fix() } // Seek brings us to element with key >= given key. func (mt *MergeIterator) Seek(key []byte) { - mt.small.iter.Seek(key) - mt.small.setKey() - mt.big.iter.Seek(key) - mt.big.setKey() + mt.left.iter.Seek(key) + mt.left.setKey() + mt.right.iter.Seek(key) + mt.right.setKey() mt.fix() } @@ -166,8 +191,8 @@ func (mt *MergeIterator) Value() y.ValueStruct { // Close implements y.Iterator. func (mt *MergeIterator) Close() error { - err1 := mt.small.iter.Close() - err2 := mt.big.iter.Close() + err1 := mt.left.iter.Close() + err2 := mt.right.iter.Close() if err1 != nil { return errors.Wrap(err1, "MergeIterator") } @@ -185,8 +210,10 @@ func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { second: iters[1], reverse: reverse, } - mi.small.setIterator(iters[0]) - mi.big.setIterator(iters[1]) + mi.left.setIterator(iters[0]) + mi.right.setIterator(iters[1]) + // mi.small.setIterator(iters[0]) + mi.small = &mi.left return mi } mid := len(iters) / 2 diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index d3a5d27c7..9e4b935d9 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -17,6 +17,7 @@ package table import ( + "fmt" "sort" "testing" @@ -102,8 +103,10 @@ func getAll(it y.Iterator) ([]string, []string) { for ; it.Valid(); it.Next() { k := it.Key() keys = append(keys, string(y.ParseKey(k))) + fmt.Println(keys) v := it.Value() vals = append(vals, string(v.Value)) + fmt.Println(vals) } return keys, vals } @@ -161,10 +164,10 @@ func TestMergeSingleReversed(t *testing.T) { func TestMergeMore(t *testing.T) { it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, false) it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) - it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) + // it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it4}, false) expectedKeys := []string{"1", "2", "3", "5", "7", "9"} expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} mergeIt.Rewind() From 89e4c79f671bbb29d4da0d2a12a160462f8d44c2 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 14 Oct 2019 18:59:42 +0530 Subject: [PATCH 15/23] Clean up --- table/merge_iterator.go | 165 ++++++++++++++++++----------------- table/merge_iterator_test.go | 7 +- 2 files changed, 86 insertions(+), 86 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 451c9f08e..50a045078 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -17,8 +17,6 @@ package table import ( - "fmt" - "github.com/dgraph-io/badger/y" "github.com/pkg/errors" ) @@ -51,148 +49,153 @@ type node struct { concat *ConcatIterator } -func (child *node) setIterator(iter y.Iterator) { - child.iter = iter - child.merge, _ = iter.(*MergeIterator) - child.concat, _ = iter.(*ConcatIterator) +func (n *node) setIterator(iter y.Iterator) { + n.iter = iter + n.merge, _ = iter.(*MergeIterator) + n.concat, _ = iter.(*ConcatIterator) } -func (child *node) setKey() { - if child.merge != nil { - child.valid = child.merge.left.valid - if child.valid { - child.key = child.merge.left.key +func (n *node) setKey() { + if n.merge != nil { + n.valid = n.merge.small.valid + if n.valid { + n.key = n.merge.small.key } - } else if child.concat != nil { - child.valid = child.concat.Valid() - if child.valid { - child.key = child.concat.Key() + } else if n.concat != nil { + n.valid = n.concat.Valid() + if n.valid { + n.key = n.concat.Key() } } else { - child.valid = child.iter.Valid() - if child.valid { - child.key = child.iter.Key() + n.valid = n.iter.Valid() + if n.valid { + n.key = n.iter.Key() } } } -func (mt *MergeIterator) fix() { - if !mt.bigger().valid { +func (n *node) next() { + if n.merge != nil { + n.merge.Next() + } else if n.concat != nil { + n.concat.Next() + } else { + n.iter.Next() + } + n.setKey() +} + +func (n *node) rewind() { + n.iter.Rewind() + n.setKey() +} + +func (n *node) seek(key []byte) { + n.iter.Seek(key) + n.setKey() +} + +func (mi *MergeIterator) fix() { + if !mi.bigger().valid { return } - for mt.small.valid { - cmp := y.CompareKeys(mt.small.key, mt.bigger().key) + for mi.small.valid { + cmp := y.CompareKeys(mi.small.key, mi.bigger().key) // Both the keys are equal. if cmp == 0 { // Key conflict. Ignore the value in second iterator. - mt.second.Next() + mi.second.Next() var secondValid bool - if mt.second == mt.small.iter { - mt.small.setKey() - secondValid = mt.small.valid - } else if mt.second == mt.bigger().iter { - mt.bigger().setKey() - secondValid = mt.bigger().valid + if mi.second == mi.small.iter { + mi.small.setKey() + secondValid = mi.small.valid + } else if mi.second == mi.bigger().iter { + mi.bigger().setKey() + secondValid = mi.bigger().valid } else { - fmt.Println(mt.second) - panic("////") + panic("mt.second invalid") } if !secondValid { // Swap small and big only if second points to // the small one and the big is valid. - if mt.second == mt.small.iter && mt.bigger().valid { - // mt.small = &mt.right - mt.swapSmall() + if mi.second == mi.small.iter && mi.bigger().valid { + mi.swapSmall() } return } continue } - if mt.reverse { + if mi.reverse { if cmp < 0 { - mt.swapSmall() + mi.swapSmall() } } else { if cmp > 0 { - mt.swapSmall() + mi.swapSmall() } } return } - mt.swapSmall() + mi.swapSmall() } -func (mt *MergeIterator) bigger() *node { - if mt.small == &mt.left { - return &mt.right +func (mi *MergeIterator) bigger() *node { + if mi.small == &mi.left { + return &mi.right } - return &mt.left + return &mi.left } -func (mt *MergeIterator) swapSmall() { - if mt.small == &mt.left { - mt.small = &mt.right +func (mi *MergeIterator) swapSmall() { + if mi.small == &mi.left { + mi.small = &mi.right return } - if mt.small == &mt.right { - mt.small = &mt.left + if mi.small == &mi.right { + mi.small = &mi.left return } - fmt.Println("mt.small is nil ", mt.small == nil) - panic(".....") - // mt.left, mt.right = mt.right, mt.left } // Next returns the next element. If it is the same as the current key, ignore it. -func (mt *MergeIterator) Next() { - if mt.small.merge != nil { - mt.small.merge.Next() - } else if mt.small.concat != nil { - mt.small.concat.Next() - } else { - mt.small.iter.Next() - } - mt.small.setKey() - mt.fix() +func (mi *MergeIterator) Next() { + mi.small.next() + mi.fix() } // Rewind seeks to first element (or last element for reverse iterator). -func (mt *MergeIterator) Rewind() { - mt.left.iter.Rewind() - mt.left.setKey() - mt.right.iter.Rewind() - mt.right.setKey() - mt.fix() +func (mi *MergeIterator) Rewind() { + mi.left.rewind() + mi.right.rewind() + mi.fix() } // Seek brings us to element with key >= given key. -func (mt *MergeIterator) Seek(key []byte) { - mt.left.iter.Seek(key) - mt.left.setKey() - mt.right.iter.Seek(key) - mt.right.setKey() - mt.fix() +func (mi *MergeIterator) Seek(key []byte) { + mi.left.seek(key) + mi.right.seek(key) + mi.fix() } // Valid returns whether the MergeIterator is at a valid element. -func (mt *MergeIterator) Valid() bool { - return mt.small.valid +func (mi *MergeIterator) Valid() bool { + return mi.small.valid } // Key returns the key associated with the current iterator. -func (mt *MergeIterator) Key() []byte { - return mt.small.key +func (mi *MergeIterator) Key() []byte { + return mi.small.key } // Value returns the value associated with the iterator. -func (mt *MergeIterator) Value() y.ValueStruct { - return mt.small.iter.Value() +func (mi *MergeIterator) Value() y.ValueStruct { + return mi.small.iter.Value() } // Close implements y.Iterator. -func (mt *MergeIterator) Close() error { - err1 := mt.left.iter.Close() - err2 := mt.right.iter.Close() +func (mi *MergeIterator) Close() error { + err1 := mi.left.iter.Close() + err2 := mi.right.iter.Close() if err1 != nil { return errors.Wrap(err1, "MergeIterator") } diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index 9e4b935d9..d3a5d27c7 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -17,7 +17,6 @@ package table import ( - "fmt" "sort" "testing" @@ -103,10 +102,8 @@ func getAll(it y.Iterator) ([]string, []string) { for ; it.Valid(); it.Next() { k := it.Key() keys = append(keys, string(y.ParseKey(k))) - fmt.Println(keys) v := it.Value() vals = append(vals, string(v.Value)) - fmt.Println(vals) } return keys, vals } @@ -164,10 +161,10 @@ func TestMergeSingleReversed(t *testing.T) { func TestMergeMore(t *testing.T) { it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, false) it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) - // it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) + it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - mergeIt := NewMergeIterator([]y.Iterator{it, it2, it4}, false) + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) expectedKeys := []string{"1", "2", "3", "5", "7", "9"} expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} mergeIt.Rewind() From ad4c19cc64268f3b655d40b6b46c52bbe6859cab Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 14 Oct 2019 19:01:01 +0530 Subject: [PATCH 16/23] Fix up --- table/merge_iterator.go | 1 - 1 file changed, 1 deletion(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 50a045078..abc70f7a4 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -215,7 +215,6 @@ func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { } mi.left.setIterator(iters[0]) mi.right.setIterator(iters[1]) - // mi.small.setIterator(iters[0]) mi.small = &mi.left return mi } From c154e93dd7a68d6957880c26b863fec6a3e352bb Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Tue, 15 Oct 2019 13:42:18 +0530 Subject: [PATCH 17/23] Remove second iterator --- table/merge_iterator.go | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index abc70f7a4..f78cb8176 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -24,16 +24,9 @@ import ( // MergeIterator merges multiple iterators. // NOTE: MergeIterator owns the array of iterators and is responsible for closing them. type MergeIterator struct { - left node - right node - - small *node - - // When the two iterators have the same value, the value in the second iterator is ignored. - // On level 0, we can have multiple iterators with the same key. In this case we want to - // use value of the iterator that was added first to the merge iterator. Second keeps track of the - // iterator that was added second so that we can resolve the same key conflict. - second y.Iterator + left node + right node + small *node reverse bool } @@ -103,27 +96,9 @@ func (mi *MergeIterator) fix() { cmp := y.CompareKeys(mi.small.key, mi.bigger().key) // Both the keys are equal. if cmp == 0 { - // Key conflict. Ignore the value in second iterator. - mi.second.Next() - var secondValid bool - if mi.second == mi.small.iter { - mi.small.setKey() - secondValid = mi.small.valid - } else if mi.second == mi.bigger().iter { - mi.bigger().setKey() - secondValid = mi.bigger().valid - } else { - panic("mt.second invalid") - } - if !secondValid { - // Swap small and big only if second points to - // the small one and the big is valid. - if mi.second == mi.small.iter && mi.bigger().valid { - mi.swapSmall() - } - return - } - continue + mi.right.next() + mi.fix() + return } if mi.reverse { if cmp < 0 { @@ -210,7 +185,6 @@ func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { return iters[0] } else if len(iters) == 2 { mi := &MergeIterator{ - second: iters[1], reverse: reverse, } mi.left.setIterator(iters[0]) From 76e7a0abc5f416e660c9e9fc8773431c7ed681e0 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Tue, 15 Oct 2019 19:50:46 +0530 Subject: [PATCH 18/23] refactor fix method --- table/merge_iterator.go | 36 +++++++++++++++++++----------------- table/merge_iterator_test.go | 33 ++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index f78cb8176..a48c0b338 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -92,26 +92,28 @@ func (mi *MergeIterator) fix() { if !mi.bigger().valid { return } - for mi.small.valid { - cmp := y.CompareKeys(mi.small.key, mi.bigger().key) - // Both the keys are equal. - if cmp == 0 { - mi.right.next() - mi.fix() - return + if !mi.small.valid { + mi.swapSmall() + return + } + cmp := y.CompareKeys(mi.small.key, mi.bigger().key) + // Both the keys are equal. + if cmp == 0 { + // In case of same keys, move the right iterator ahead. + mi.right.next() + if &mi.right == mi.small { + mi.swapSmall() + } + } + if mi.reverse { + if cmp < 0 { + mi.swapSmall() } - if mi.reverse { - if cmp < 0 { - mi.swapSmall() - } - } else { - if cmp > 0 { - mi.swapSmall() - } + } else { + if cmp > 0 { + mi.swapSmall() } - return } - mi.swapSmall() } func (mi *MergeIterator) bigger() *node { diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index d3a5d27c7..97fe4ef01 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -163,15 +163,30 @@ func TestMergeMore(t *testing.T) { it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false) it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false) it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false) - - mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) - expectedKeys := []string{"1", "2", "3", "5", "7", "9"} - expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} - mergeIt.Rewind() - k, v := getAll(mergeIt) - require.EqualValues(t, expectedKeys, k) - require.EqualValues(t, expectedVals, v) - closeAndCheck(t, mergeIt, 4) + t.Run("forward", func(t *testing.T) { + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, false) + expectedKeys := []string{"1", "2", "3", "5", "7", "9"} + expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"} + mergeIt.Rewind() + k, v := getAll(mergeIt) + require.EqualValues(t, expectedKeys, k) + require.EqualValues(t, expectedVals, v) + closeAndCheck(t, mergeIt, 4) + }) + t.Run("reverse", func(t *testing.T) { + it.reversed = true + it2.reversed = true + it3.reversed = true + it4.reversed = true + mergeIt := NewMergeIterator([]y.Iterator{it, it2, it3, it4}, true) + expectedKeys := []string{"9", "7", "5", "3", "2", "1"} + expectedVals := []string{"d9", "a7", "b5", "a3", "b2", "a1"} + mergeIt.Rewind() + k, v := getAll(mergeIt) + require.EqualValues(t, expectedKeys, k) + require.EqualValues(t, expectedVals, v) + closeAndCheck(t, mergeIt, 4) + }) } // Ensure MergeIterator satisfies the Iterator interface From 9f2541e285510a818a9823ea0768091d26b212a4 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Tue, 15 Oct 2019 19:53:22 +0530 Subject: [PATCH 19/23] refactor test --- table/merge_iterator_test.go | 44 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index 97fe4ef01..31819570a 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -256,14 +256,28 @@ func TestMergeIteratorDuplicate(t *testing.T) { it2 := newSimpleIterator([]string{"1", "3"}, []string{"b1", "b3"}, false) it3 := newSimpleIterator([]string{"0", "1", "2"}, []string{"c0", "c1", "c2"}, false) t.Run("forward", func(t *testing.T) { - it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) - - expectedKeys := []string{"0", "1", "2", "3"} - expectedVals := []string{"c0", "c1", "c2", "b3"} - it.Rewind() - k, v := getAll(it) - require.Equal(t, expectedKeys, k) - require.Equal(t, expectedVals, v) + t.Run("one", func(t *testing.T) { + it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) + expectedKeys := []string{"0", "1", "2", "3"} + expectedVals := []string{"c0", "c1", "c2", "b3"} + it.Rewind() + k, v := getAll(it) + require.Equal(t, expectedKeys, k) + require.Equal(t, expectedVals, v) + }) + t.Run("two", func(t *testing.T) { + it1 := newSimpleIterator([]string{"0", "1", "2"}, []string{"0", "1", "2"}, false) + it2 := newSimpleIterator([]string{"1"}, []string{"1"}, false) + it3 := newSimpleIterator([]string{"2"}, []string{"2"}, false) + it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) + + var cnt int + for it.Rewind(); it.Valid(); it.Next() { + require.EqualValues(t, cnt+48, it.Key()[0]) + cnt++ + } + require.Equal(t, 3, cnt) + }) }) t.Run("reverse", func(t *testing.T) { @@ -280,18 +294,4 @@ func TestMergeIteratorDuplicate(t *testing.T) { require.Equal(t, expectedKeys, k) require.Equal(t, expectedVals, v) }) - - t.Run("edge case", func(t *testing.T) { - it1 := newSimpleIterator([]string{"0", "1", "2"}, []string{"0", "1", "2"}, false) - it2 := newSimpleIterator([]string{"1"}, []string{"1"}, false) - it3 := newSimpleIterator([]string{"2"}, []string{"2"}, false) - it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) - - var cnt int - for it.Rewind(); it.Valid(); it.Next() { - require.EqualValues(t, cnt+48, it.Key()[0]) - cnt++ - } - require.Equal(t, 3, cnt) - }) } From 2429ef3811e9f6902e51a455e9169cae38f16000 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Wed, 16 Oct 2019 15:52:57 +0530 Subject: [PATCH 20/23] Add another test --- table/merge_iterator_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/table/merge_iterator_test.go b/table/merge_iterator_test.go index 31819570a..9733e2392 100644 --- a/table/merge_iterator_test.go +++ b/table/merge_iterator_test.go @@ -256,6 +256,15 @@ func TestMergeIteratorDuplicate(t *testing.T) { it2 := newSimpleIterator([]string{"1", "3"}, []string{"b1", "b3"}, false) it3 := newSimpleIterator([]string{"0", "1", "2"}, []string{"c0", "c1", "c2"}, false) t.Run("forward", func(t *testing.T) { + t.Run("only duplicates", func(t *testing.T) { + it := NewMergeIterator([]y.Iterator{it1, it3}, false) + expectedKeys := []string{"0", "1", "2"} + expectedVals := []string{"a0", "a1", "a2"} + it.Rewind() + k, v := getAll(it) + require.Equal(t, expectedKeys, k) + require.Equal(t, expectedVals, v) + }) t.Run("one", func(t *testing.T) { it := NewMergeIterator([]y.Iterator{it3, it2, it1}, false) expectedKeys := []string{"0", "1", "2", "3"} From cc868958712f14d6c0e16bce5387dbeea06c841b Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 21 Oct 2019 13:03:13 +0530 Subject: [PATCH 21/23] Address review comments --- table/merge_iterator.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index a48c0b338..294109723 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -44,6 +44,8 @@ type node struct { func (n *node) setIterator(iter y.Iterator) { n.iter = iter + // It's okay if the conversion below fails. We handle the nil values of + // merge and concat in all the methods. n.merge, _ = iter.(*MergeIterator) n.concat, _ = iter.(*ConcatIterator) } @@ -104,15 +106,22 @@ func (mi *MergeIterator) fix() { if &mi.right == mi.small { mi.swapSmall() } - } - if mi.reverse { - if cmp < 0 { + return + } else if cmp < 0 { // Small is less than bigger(). + if mi.reverse { mi.swapSmall() + } else { + // we don't need to do anything. Small already points to the smallest. } - } else { - if cmp > 0 { + return + } else { // bigger() is less than small. + if mi.reverse { + // Do nothing since we're iterating in reverse. Small currently points to + // the bigger key and that's okay in reverse iteration. + } else { mi.swapSmall() } + return } } @@ -191,6 +200,7 @@ func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { } mi.left.setIterator(iters[0]) mi.right.setIterator(iters[1]) + // Assign left iterator randomly. This will be fixed when user calls rewind/seek. mi.small = &mi.left return mi } From 59542eaf76a1af0609ee0a3ede1a31a4f940739b Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 21 Oct 2019 13:30:06 +0530 Subject: [PATCH 22/23] Reword comment --- table/merge_iterator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/table/merge_iterator.go b/table/merge_iterator.go index 294109723..579b61408 100644 --- a/table/merge_iterator.go +++ b/table/merge_iterator.go @@ -44,8 +44,8 @@ type node struct { func (n *node) setIterator(iter y.Iterator) { n.iter = iter - // It's okay if the conversion below fails. We handle the nil values of - // merge and concat in all the methods. + // It's okay if the type assertion below fails and n.merge/n.concat are set to nil. + // We handle the nil values of merge and concat in all the methods. n.merge, _ = iter.(*MergeIterator) n.concat, _ = iter.(*ConcatIterator) } From fcced512414494aea57c6a9c5a1c7e095d2f1685 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 21 Oct 2019 15:12:14 +0530 Subject: [PATCH 23/23] uncomment --- table/table_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/table/table_test.go b/table/table_test.go index 82e1640cb..f2bd63012 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -550,6 +550,7 @@ func TestMergingIteratorReversed(t *testing.T) { opts := getTestTableOptions() f1 := buildTable(t, [][]string{ {"k1", "a1"}, + {"k2", "a2"}, {"k4", "a4"}, {"k5", "a5"}, }, opts) @@ -567,7 +568,7 @@ func TestMergingIteratorReversed(t *testing.T) { {"k5", "a5"}, {"k4", "a4"}, {"k3", "b3"}, - // {"k2", "b2"}, + {"k2", "a2"}, {"k1", "a1"}, } tbl1, err := OpenTable(f1, opts)