Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging iterator to avoid child iterator reseek for some cases #5286

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
### Performance Improvements
* Reduce binary search when iterator reseek into the same data block.
* DBIter::Next() can skip user key checking if previous entry's seqnum is 0.
* Merging iterator to avoid child iterator reseek for some cases

## 6.2.0 (4/30/2019)
### New Features
Expand Down
69 changes: 69 additions & 0 deletions db/db_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,75 @@ TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBIteratorTest, AvoidReseekChildIterator) {
Options options = CurrentOptions();
options.compression = CompressionType::kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 800;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(options);

Random rnd(301);
std::string random_str = RandomString(&rnd, 180);

ASSERT_OK(Put("1", random_str));
ASSERT_OK(Put("2", random_str));
ASSERT_OK(Put("3", random_str));
ASSERT_OK(Put("4", random_str));
ASSERT_OK(Put("8", random_str));
ASSERT_OK(Put("9", random_str));
ASSERT_OK(Flush());
ASSERT_OK(Put("5", random_str));
ASSERT_OK(Put("6", random_str));
ASSERT_OK(Put("7", random_str));
ASSERT_OK(Flush());

// These two keys will be kept in memtable.
ASSERT_OK(Put("0", random_str));
ASSERT_OK(Put("8", random_str));

int num_iter_wrapper_seek = 0;
SyncPoint::GetInstance()->SetCallBack(
"IteratorWrapper::Seek:0",
[&](void* /*arg*/) { num_iter_wrapper_seek++; });
SyncPoint::GetInstance()->EnableProcessing();
{
std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
iter->Seek("1");
ASSERT_TRUE(iter->Valid());
// DBIter always wraps internal iterator with IteratorWrapper,
// and in merging iterator each child iterator will be wrapped
// with IteratorWrapper.
ASSERT_EQ(4, num_iter_wrapper_seek);

// child position: 1 and 5
num_iter_wrapper_seek = 0;
iter->Seek("2");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_iter_wrapper_seek);

// child position: 2 and 5
num_iter_wrapper_seek = 0;
iter->Seek("6");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(4, num_iter_wrapper_seek);

// child position: 8 and 6
num_iter_wrapper_seek = 0;
iter->Seek("7");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(3, num_iter_wrapper_seek);

// child position: 8 and 7
num_iter_wrapper_seek = 0;
iter->Seek("5");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(4, num_iter_wrapper_seek);
}

SyncPoint::GetInstance()->DisableProcessing();
}

INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
testing::Values(true, false));

Expand Down
3 changes: 2 additions & 1 deletion db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,8 @@ class LevelIterator final : public InternalIterator {
bool skip_filters, int level, RangeDelAggregator* range_del_agg,
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
nullptr)
: table_cache_(table_cache),
: InternalIterator(false),
table_cache_(table_cache),
read_options_(read_options),
env_options_(env_options),
icomparator_(icomparator),
Expand Down
3 changes: 2 additions & 1 deletion table/block_based_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
bool key_includes_seq = true,
bool index_key_is_full = true,
bool for_compaction = false)
: table_(table),
: InternalIteratorBase<TValue>(false),
table_(table),
read_options_(read_options),
icomp_(icomp),
user_comparator_(icomp.user_comparator()),
Expand Down
5 changes: 4 additions & 1 deletion table/internal_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class PinnedIteratorsManager;
template <class TValue>
class InternalIteratorBase : public Cleanable {
public:
InternalIteratorBase() {}
InternalIteratorBase() : is_mutable_(true) {}
InternalIteratorBase(bool _is_mutable) : is_mutable_(_is_mutable) {}
virtual ~InternalIteratorBase() {}

// An iterator is either positioned at a key/value pair, or
Expand Down Expand Up @@ -119,6 +120,7 @@ class InternalIteratorBase : public Cleanable {
virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) {
return Status::NotSupported("");
}
bool is_mutable() const { return is_mutable_; }

protected:
void SeekForPrevImpl(const Slice& target, const Comparator* cmp) {
Expand All @@ -130,6 +132,7 @@ class InternalIteratorBase : public Cleanable {
Prev();
}
}
bool is_mutable_;

private:
// No copying allowed
Expand Down
7 changes: 6 additions & 1 deletion table/iterator_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,12 @@ class IteratorWrapperBase {
assert(!valid_ || iter_->status().ok());
}
void Prev() { assert(iter_); iter_->Prev(); Update(); }
void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); }
void Seek(const Slice& k) {
TEST_SYNC_POINT("IteratorWrapper::Seek:0");
assert(iter_);
iter_->Seek(k);
Update();
}
void SeekForPrev(const Slice& k) {
assert(iter_);
iter_->SeekForPrev(k);
Expand Down
19 changes: 17 additions & 2 deletions table/merging_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,29 @@ class MergingIterator : public InternalIterator {
}

void Seek(const Slice& target) override {
bool is_increasing_reseek = false;
if (current_ != nullptr && direction_ == kForward && status_.ok() &&
comparator_->Compare(target, key()) >= 0) {
is_increasing_reseek = true;
}
ClearHeaps();
status_ = Status::OK();
for (auto& child : children_) {
{
// If upper bound never changes, we can skip Seek() for
// the !Valid() case too, but people do hack the code to change
// upper bound between Seek(), so it's not a good idea to break
// the API.
// If DBIter is used on top of merging iterator, we probably
// can skip mutable child iterators if they are invalid too,
// but it's a less clean API. We can optimize for it later if
// needed.
if (!is_increasing_reseek || !child.Valid() ||
comparator_->Compare(target, child.key()) > 0 ||
child.iter()->is_mutable()) {
PERF_TIMER_GUARD(seek_child_seek_time);
child.Seek(target);
PERF_COUNTER_ADD(seek_child_seek_count, 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be interesting to have a stat for how many were skipped.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean adding a new counter just for this? But reseek might not be common in most use cases. Is it worth it?

}
PERF_COUNTER_ADD(seek_child_seek_count, 1);

if (child.Valid()) {
assert(child.status().ok());
Expand Down