Skip to content

Commit

Permalink
Fix bug in partition filters with format_version=4 (#4381)
Browse files Browse the repository at this point in the history
Summary:
Value delta encoding in format_version 4 requires the differences between the size of two consecutive handles to be sent to BlockBuilder::Add. This applies not only to indexes on blocks but also the indexes on indexes and filters in partitioned indexes and filters respectively. The patch fixes a bug where the partitioned filters would encode the entire size of the handle rather than the difference of the size with the last size.
Pull Request resolved: #4381

Differential Revision: D9879505

Pulled By: maysamyabandeh

fbshipit-source-id: 27a22e49b482b927fbd5629dc310c46d63d4b6d1
  • Loading branch information
Maysam Yabandeh authored and facebook-github-bot committed Sep 18, 2018
1 parent 1626f6a commit 65ac72e
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 26 deletions.
37 changes: 31 additions & 6 deletions db/db_bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ class DBBloomFilterTest : public DBTestBase {

class DBBloomFilterTestWithParam
: public DBTestBase,
public testing::WithParamInterface<std::tuple<bool, bool>> {
public testing::WithParamInterface<std::tuple<bool, bool, uint32_t>> {
// public testing::WithParamInterface<bool> {
protected:
bool use_block_based_filter_;
bool partition_filters_;
uint32_t format_version_;

public:
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
Expand All @@ -36,9 +37,12 @@ class DBBloomFilterTestWithParam
void SetUp() override {
use_block_based_filter_ = std::get<0>(GetParam());
partition_filters_ = std::get<1>(GetParam());
format_version_ = std::get<2>(GetParam());
}
};

class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {};

class SliceTransformLimitedDomainGeneric : public SliceTransform {
const char* Name() const override {
return "SliceTransformLimitedDomainGeneric";
Expand All @@ -62,7 +66,7 @@ class SliceTransformLimitedDomainGeneric : public SliceTransform {
// KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated
TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
do {
ReadOptions ropts;
std::string value;
Expand Down Expand Up @@ -401,6 +405,11 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
}
table_options.format_version = format_version_;
if (format_version_ >= 4) {
// value delta encoding challenged more with index interval > 1
table_options.index_block_restart_interval = 8;
}
table_options.metadata_block_size = 32;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

Expand Down Expand Up @@ -456,10 +465,26 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
} while (ChangeCompactOptions());
}

INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false),
std::make_tuple(false, true),
std::make_tuple(false, false)));
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));

INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));

INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kLatestFormatVersion),
std::make_tuple(false, true, test::kLatestFormatVersion),
std::make_tuple(false, false,
test::kLatestFormatVersion)));

TEST_F(DBBloomFilterTest, BloomFilterRate) {
while (ChangeFilterOptions()) {
Expand Down
5 changes: 3 additions & 2 deletions db/db_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,13 +451,14 @@ Options DBTestBase::GetOptions(
}
case kBlockBasedTableWithPartitionedIndexFormat4: {
table_options.format_version = 4;
// Format 3 changes the binary index format. Since partitioned index is a
// Format 4 changes the binary index format. Since partitioned index is a
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to
// test this format.
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
// The top-level index in partition filters are also affected by format 3.
// The top-level index in partition filters are also affected by format 4.
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
table_options.partition_filters = true;
table_options.index_block_restart_interval = 8;
break;
}
case kBlockBasedTableWithIndexRestartInterval: {
Expand Down
2 changes: 0 additions & 2 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ struct OptionsOverride {
// These will be used only if filter_policy is set
bool partition_filters = false;
uint64_t metadata_block_size = 1024;
BlockBasedTableOptions::IndexType index_type =
BlockBasedTableOptions::IndexType::kBinarySearch;

// Used as a bit mask of individual enums in which to skip an XF test point
int skip_policy = 0;
Expand Down
7 changes: 0 additions & 7 deletions table/format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ void BlockHandle::EncodeTo(std::string* dst) const {
PutVarint64Varint64(dst, offset_, size_);
}

void BlockHandle::EncodeSizeTo(std::string* dst) const {
// Sanity check that all fields have been set
assert(offset_ != ~static_cast<uint64_t>(0));
assert(size_ != ~static_cast<uint64_t>(0));
PutVarint64(dst, size_);
}

Status BlockHandle::DecodeFrom(Slice* input) {
if (GetVarint64(input, &offset_) &&
GetVarint64(input, &size_)) {
Expand Down
1 change: 0 additions & 1 deletion table/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class BlockHandle {
void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input);
Status DecodeSizeFrom(uint64_t offset, Slice* input);
void EncodeSizeTo(std::string* dst) const;

// Return a string that contains the copy of handle.
std::string ToString(bool hex = true) const;
Expand Down
5 changes: 4 additions & 1 deletion table/partitioned_filter_block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding;
last_partition_block_handle.EncodeSizeTo(&handle_delta_encoding);
PutVarsignedint64(
&handle_delta_encoding,
last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
&handle_delta_encoding_slice);
Expand Down
1 change: 1 addition & 0 deletions table/partitioned_filter_block.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
uint32_t filters_in_partition_;
// Number of keys added
size_t num_added_;
BlockHandle last_encoded_handle_;
};

class PartitionedFilterBlockReader : public FilterBlockReader,
Expand Down
23 changes: 16 additions & 7 deletions table/partitioned_filter_block_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ class MockedBlockBasedTable : public BlockBasedTable {
}
};

class PartitionedFilterBlockTest : public testing::Test {
class PartitionedFilterBlockTest
: public testing::Test,
virtual public ::testing::WithParamInterface<uint32_t> {
public:
BlockBasedTableOptions table_options_;
InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator());
Expand All @@ -60,6 +62,8 @@ class PartitionedFilterBlockTest : public testing::Test {
table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close
// will access variable that are not
// initialized in our mocked version
table_options_.format_version = GetParam();
table_options_.index_block_restart_interval = 3;
}

std::shared_ptr<Cache> cache_;
Expand Down Expand Up @@ -279,22 +283,27 @@ class PartitionedFilterBlockTest : public testing::Test {
}
};

TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
testing::Values(test::kLatestFormatVersion));

TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
const bool empty = true;
VerifyReader(builder.get(), pib.get(), empty);
}

TEST_F(PartitionedFilterBlockTest, OneBlock) {
TEST_P(PartitionedFilterBlockTest, OneBlock) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
TestBlockPerAllKeys();
}
}

TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
Expand All @@ -304,7 +313,7 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {

// This reproduces the bug that a prefix is the same among multiple consecutive
// blocks but the bug would add it only to the first block.
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
// some small number to cause partition cuts
table_options_.metadata_block_size = 1;
std::unique_ptr<const SliceTransform> prefix_extractor
Expand All @@ -330,15 +339,15 @@ TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
}
}

TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
TestBlockPerKey();
}
}

TEST_F(PartitionedFilterBlockTest, PartitionCount) {
TEST_P(PartitionedFilterBlockTest, PartitionCount) {
int num_keys = sizeof(keys) / sizeof(*keys);
table_options_.metadata_block_size =
std::max(MaxIndexSize(), MaxFilterSize());
Expand Down

0 comments on commit 65ac72e

Please sign in to comment.