Skip to content

Commit

Permalink
Fix wrong smallest key of delete range tombstones
Browse files Browse the repository at this point in the history
Summary:
Since tombstones are not stored in order, we may get a wrong smallest key if we only consider the first added tombstone.
Check #2752 for more details.
Closes #2799

Differential Revision: D5728217

Pulled By: ajkr

fbshipit-source-id: 4a53edb0ca80d2a9fcf10749e52d47d57d6417d3
  • Loading branch information
huachaohuang authored and facebook-github-bot committed Aug 30, 2017
1 parent b767972 commit 0980dc6
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
34 changes: 34 additions & 0 deletions db/db_range_del_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,40 @@ TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
}
}

TEST_F(DBRangeDelTest, UnorderedTombstones) {
// Regression test for #2752. Range delete tombstones between
// different snapshot stripes are not stored in order, so the first
// tombstone of each snapshot stripe should be checked as a smallest
// candidate.
Options options = CurrentOptions();
DestroyAndReopen(options);

auto cf = db_->DefaultColumnFamily();

ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a"));
ASSERT_OK(db_->Flush(FlushOptions(), cf));
ASSERT_EQ(1, NumTableFilesAtLevel(0));
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
ASSERT_EQ(1, NumTableFilesAtLevel(1));

ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c"));
// Hold a snapshot to separate these two delete ranges.
auto snapshot = db_->GetSnapshot();
ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b"));
ASSERT_OK(db_->Flush(FlushOptions(), cf));
db_->ReleaseSnapshot(snapshot);

std::vector<std::vector<FileMetaData>> files;
dbfull()->TEST_GetFilesMetaData(cf, &files);
ASSERT_EQ(1, files[0].size());
ASSERT_EQ("a", files[0][0].smallest.user_key());
ASSERT_EQ("c", files[0][0].largest.user_key());

std::string v;
auto s = db_->Get(ReadOptions(), "a", &v);
ASSERT_TRUE(s.IsNotFound());
}

#endif // ROCKSDB_LITE

} // namespace rocksdb
Expand Down
4 changes: 2 additions & 2 deletions db/range_del_aggregator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,8 @@ void RangeDelAggregator::AddToBuilder(

// Note the order in which tombstones are stored is insignificant since we
// insert them into a std::map on the read path.
bool first_added = false;
while (stripe_map_iter != rep_->stripe_map_.end()) {
bool first_added = false;
for (auto tombstone_map_iter = stripe_map_iter->second.raw_map.begin();
tombstone_map_iter != stripe_map_iter->second.raw_map.end();
++tombstone_map_iter) {
Expand Down Expand Up @@ -453,7 +453,7 @@ void RangeDelAggregator::AddToBuilder(
builder->Add(ikey_and_end_key.first.Encode(), ikey_and_end_key.second);
if (!first_added) {
first_added = true;
InternalKey smallest_candidate = std::move(ikey_and_end_key.first);;
InternalKey smallest_candidate = std::move(ikey_and_end_key.first);
if (lower_bound != nullptr &&
icmp_.user_comparator()->Compare(smallest_candidate.user_key(),
*lower_bound) <= 0) {
Expand Down

0 comments on commit 0980dc6

Please sign in to comment.