Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FullBloomTest.CorruptFilters,RawSchema #5834

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion util/bloom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ class FullFilterBitsReader : public FilterBitsReader {
if (num_lines_at_curr_cache_size == 0) {
// The cache line size seems not a power of two. It's not supported
// and indicates a corruption so disable using this filter.
assert(false);
// Removed for unit testing corruption: assert(false);
num_lines_ = 0;
num_probes_ = 0;
break;
Expand Down
185 changes: 185 additions & 0 deletions util/bloom_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ class FullBloomTest : public testing::Test {
bits_builder_->AddKey(s);
}

void OpenRaw(const Slice& s) {
bits_reader_.reset(policy_->GetFilterBitsReader(s));
}

void Build() {
Slice filter = bits_builder_->Finish(&buf_);
bits_reader_.reset(policy_->GetFilterBitsReader(filter));
Expand All @@ -298,6 +302,17 @@ class FullBloomTest : public testing::Test {
return bits_reader_->MayMatch(s);
}

uint64_t PackedMatches() {
char buffer[sizeof(int)];
uint64_t result = 0;
for (int i = 0; i < 64; i++) {
if (Matches(Key(i + 12345, buffer))) {
result |= 1 << i;
}
}
return result;
}

double FalsePositiveRate() {
char buffer[sizeof(int)];
int result = 0;
Expand Down Expand Up @@ -458,6 +473,176 @@ TEST_F(FullBloomTest, Schema) {
ResetPolicy();
}

// A helper class for testing custom or corrupt filter bits as read by
// FullFilterBitsReader.
struct RawFilterTester {
pdillinger marked this conversation as resolved.
Show resolved Hide resolved
// Buffer, from which we always return a tail Slice, so the
// last five bytes are always the metadata bytes.
std::array<char, 3000> data_;
// Points five bytes from the end
char* metadata_ptr_;

RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}

Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes) {
metadata_ptr_[0] = static_cast<char>(num_probes);
EncodeFixed32(metadata_ptr_ + 1, num_lines);
pdillinger marked this conversation as resolved.
Show resolved Hide resolved
uint32_t len = len_without_metadata + /*metadata*/ 5;
assert(len <= data_.size());
return Slice(metadata_ptr_ - len_without_metadata, len);
}

Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes, bool fill_ones) {
data_.fill(fill_ones ? 0xff : 0);
return ResetNoFill(len_without_metadata, num_lines, num_probes);
}

Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
uint32_t num_probes) {
for (uint32_t i = 0; i < data_.size(); ++i) {
data_[i] = static_cast<char>(0x7b7b >> (i % 7));
}
return ResetNoFill(len_without_metadata, num_lines, num_probes);
}
};

TEST_F(FullBloomTest, RawSchema) {
RawFilterTester cft;
// Two probes, about 3/4 bits set: ~50% "FP" rate
// One 256-byte cache line.
OpenRaw(cft.ResetWeirdFill(256, 1, 2));
ASSERT_EQ(18446744073675927543ULL, PackedMatches());

// Two 128-byte cache lines.
OpenRaw(cft.ResetWeirdFill(256, 2, 2));
ASSERT_EQ(18446744073407559151ULL, PackedMatches());

// Four 64-byte cache lines.
OpenRaw(cft.ResetWeirdFill(256, 4, 2));
ASSERT_EQ(18446744073441107966ULL, PackedMatches());
}

TEST_F(FullBloomTest, CorruptFilters) {
RawFilterTester cft;

for (bool fill : {false, true}) {
// Good filter bits - returns same as fill
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Good filter bits - returns same as fill
OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Good filter bits - returns same as fill
// 256 is unusual but legal cache line size
OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Good filter bits - returns same as fill
// 30 should be max num_probes
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Good filter bits - returns same as fill
// 1 should be min num_probes
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Type 1 trivial filter bits - returns true as if FP by zero probes
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));

// Type 2 trivial filter bits - returns false as if built from zero keys
OpenRaw(cft.Reset(0, 0, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));

// Type 2 trivial filter bits - returns false as if built from zero keys
OpenRaw(cft.Reset(0, 37, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));

// Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
OpenRaw(cft.Reset(0, 0, 0, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));

// Bad filter bits - returns true for safety
// No solution to 0 * x == CACHE_LINE_SIZE
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));

// Bad filter bits - returns true for safety
// Can't have 3 * x == 4 for integer x
OpenRaw(cft.Reset(4, 3, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));

// Bad filter bits - returns true for safety
// 97 bytes is not a power of two, so not a legal cache line size
OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));

// Bad filter bits
// 65 bytes is not a power of two, so not a legal cache line size
OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
// ASSERT_TRUE(Matches("hello"));
// ASSERT_TRUE(Matches("world"));
// NB: NOT PROPERLY CHECKED in implementation
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Bad filter bits - returns false as if built from zero keys
// < 5 bytes overall means missing even metadata
OpenRaw(cft.Reset(-1, 3, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));

OpenRaw(cft.Reset(-5, 3, 6, fill));
ASSERT_FALSE(Matches("hello"));
ASSERT_FALSE(Matches("world"));

// Dubious filter bits - returns same as fill (for now)
// 31 is not a useful num_probes, nor generated by RocksDB unless directly
// using filter bits API without BloomFilterPolicy.
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Dubious filter bits - returns same as fill (for now)
// Similar, with 127, largest positive char
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
ASSERT_EQ(fill, Matches("hello"));
ASSERT_EQ(fill, Matches("world"));

// Dubious filter bits - returns true (for now)
// num_probes set to 128 / -128, lowest negative char
// NB: Bug in implementation interprets this as negative and has same
// effect as zero probes, but effectively reserves negative char values
// for future use.
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));

// Dubious filter bits - returns true (for now)
// Similar, with 255 / -1
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
}
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down