From 4b031b79874063e8385bea4a25a7510d5a700d99 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 28 Dec 2018 21:07:14 -0800 Subject: [PATCH 1/3] Fix point lookup on range tombstone sentinel endpoint --- HISTORY.md | 1 + db/db_range_del_test.cc | 5 +++++ db/version_set.cc | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 113381d8c02..a631532da07 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -14,6 +14,7 @@ * Fix a memory leak when files with range tombstones are read in mmap mode and block cache is enabled * Fix handling of corrupt range tombstone blocks such that corruptions cannot cause deleted keys to reappear * Lock free MultiGet +* Fix incorrect `NotFound` point lookup result when querying the endpoint of a file that has been extended by a range tombstone. ## 5.18.0 (11/30/2018) ### New Features diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 508149a340c..73bc4d27550 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -1041,11 +1041,16 @@ TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) { // L2: // [key000000#1,1, key000000#1,1] // [key000002#6,1, key000004#72057594037927935,15] + // + // At the same time, verify the compaction does not cause the key at the + // endpoint (key000002#6,1) to disappear. + ASSERT_EQ(value, Get(Key(2))); auto begin_str = Key(3); const rocksdb::Slice begin = begin_str; dbfull()->TEST_CompactRange(1, &begin, nullptr); ASSERT_EQ(1, NumTableFilesAtLevel(1)); ASSERT_EQ(2, NumTableFilesAtLevel(2)); + ASSERT_EQ(value, Get(Key(2))); } { diff --git a/db/version_set.cc b/db/version_set.cc index 8de8c86b555..e7f018bb204 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -200,6 +200,13 @@ class FilePicker { break; } } + if (cmp_largest == 0 && + GetInternalKeySeqno(f->largest_key) == kMaxSequenceNumber) { + assert(ExtractValueType(f->largest_key) == kTypeRangeDeletion); + // Key falls at the range tombstone sentinel endpoint. Proceed to + // next level. + break; + } } #ifndef NDEBUG // Sanity check to make sure that the files are correctly sorted From 61b8571bf3dffdb4898c2dc95bbd542715b2cc84 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 2 Jan 2019 16:32:01 -0800 Subject: [PATCH 2/3] skip files before lookup key in internal key ordering --- db/version_set.cc | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index e7f018bb204..973f02f0045 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -200,13 +200,6 @@ class FilePicker { break; } } - if (cmp_largest == 0 && - GetInternalKeySeqno(f->largest_key) == kMaxSequenceNumber) { - assert(ExtractValueType(f->largest_key) == kTypeRangeDeletion); - // Key falls at the range tombstone sentinel endpoint. Proceed to - // next level. - break; - } } #ifndef NDEBUG // Sanity check to make sure that the files are correctly sorted @@ -315,10 +308,23 @@ class FilePicker { search_right_bound_ = static_cast(curr_file_level_->num_files) - 1; } + // `search_right_bound_` is an inclusive upper-bound, but since it was + // determined based on user key, it is still possible the lookup key + // falls to the right of `search_right_bound_`'s corresponding file. + // So, pass a limit one higher, which allows us to detect this case. start_index = FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_, static_cast(search_left_bound_), - static_cast(search_right_bound_)); + static_cast(search_right_bound_) + 1); + if (start_index == search_right_bound_ + 1) { + // `ikey_` comes after `search_right_bound_`. The lookup key does + // not exist on this level, so let's skip this level and do a full + // binary search on the next level. + search_left_bound_ = 0; + search_right_bound_ = FileIndexer::kLevelMaxIndex; + curr_level_++; + continue; + } } else { // search_left_bound > search_right_bound, key does not exist in // this level. Since no comparison is done in this level, it will From bdfb0225dfc030cc45fe750e3d2ee55f8c24ac70 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 2 Jan 2019 18:38:34 -0800 Subject: [PATCH 3/3] unify the case where file range has size one --- db/version_set.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 973f02f0045..fb7542eb454 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -301,9 +301,7 @@ class FilePicker { // On Level-n (n>=1), files are sorted. Binary search to find the // earliest file whose largest key >= ikey. Search left bound and // right bound are used to narrow the range. - if (search_left_bound_ == search_right_bound_) { - start_index = search_left_bound_; - } else if (search_left_bound_ < search_right_bound_) { + if (search_left_bound_ <= search_right_bound_) { if (search_right_bound_ == FileIndexer::kLevelMaxIndex) { search_right_bound_ = static_cast(curr_file_level_->num_files) - 1;