Skip to content
Permalink
Browse files

xxHash

Summary:
Originally: https://github.com/facebook/rocksdb/pull/87/files

I'm taking over to apply some finishing touches

Test Plan: will add tests

Reviewers: dhruba, haobo, sdong, yhchiang, ljin

Reviewed By: yhchiang

CC: leveldb

Differential Revision: https://reviews.facebook.net/D18315
  • Loading branch information
igorcanadi committed May 1, 2014
1 parent 61955a0 commit 0afc8bc29a5800e3212388c327c750d32e31f3d6
@@ -4,12 +4,11 @@

### Public API changes
* Added _LEVEL to all InfoLogLevel enums
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
### New Features
* Column family support
### Public API changes
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
* Added an option to use different checksum functions in BlockBasedTableOptions
## 2.8.0 (04/04/2014)
@@ -36,6 +36,7 @@
#include "util/string_util.h"
#include "util/statistics.h"
#include "util/testutil.h"
#include "util/xxhash.h"
#include "hdfs/env_hdfs.h"
#include "utilities/merge_operators.h"

@@ -64,6 +65,7 @@ DEFINE_string(benchmarks,
"randomwithverify,"
"fill100K,"
"crc32c,"
"xxhash,"
"compress,"
"uncompress,"
"acquireload,",
@@ -107,6 +109,7 @@ DEFINE_string(benchmarks,
"\tseekrandom -- N random seeks\n"
"\tseekrandom -- 1 writer, N threads doing random seeks\n"
"\tcrc32c -- repeated crc32c of 4K of data\n"
"\txxhash -- repeated xxHash of 4K of data\n"
"\tacquireload -- load N*1000 times\n"
"Meta operations:\n"
"\tcompact -- Compact the entire DB\n"
@@ -1234,6 +1237,8 @@ class Benchmark {
method = &Benchmark::Compact;
} else if (name == Slice("crc32c")) {
method = &Benchmark::Crc32c;
} else if (name == Slice("xxhash")) {
method = &Benchmark::xxHash;
} else if (name == Slice("acquireload")) {
method = &Benchmark::AcquireLoad;
} else if (name == Slice("compress")) {
@@ -1382,6 +1387,25 @@ class Benchmark {
thread->stats.AddMessage(label);
}

void xxHash(ThreadState* thread) {
// Checksum about 500MB of data total
const int size = 4096;
const char* label = "(4K per op)";
std::string data(size, 'x');
int64_t bytes = 0;
unsigned int xxh32 = 0;
while (bytes < 500 * 1048576) {
xxh32 = XXH32(data.data(), size, 0);
thread->stats.FinishedSingleOp(nullptr);
bytes += size;
}
// Print so result is not dead
fprintf(stderr, "... xxh32=0x%x\r", static_cast<unsigned int>(xxh32));

thread->stats.AddBytes(bytes);
thread->stats.AddMessage(label);
}

void AcquireLoad(ThreadState* thread) {
int dummy;
port::AtomicPointer ap(&dummy);
@@ -316,6 +316,7 @@ class DBTest {
kUniversalCompaction,
kCompressedBlockCache,
kInfiniteMaxOpenFiles,
kxxHashChecksum,
kEnd
};
int option_config_;
@@ -496,6 +497,12 @@ class DBTest {
case kInfiniteMaxOpenFiles:
options.max_open_files = -1;
break;
case kxxHashChecksum: {
BlockBasedTableOptions table_options;
table_options.checksum = kxxHash;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
break;
}
case kBlockBasedTableWithPrefixHashIndex: {
BlockBasedTableOptions table_options;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
@@ -6778,7 +6785,40 @@ TEST(DBTest, TailingIteratorPrefixSeek) {
ASSERT_TRUE(!iter->Valid());
}

TEST(DBTest, ChecksumTest) {
BlockBasedTableOptions table_options;
Options options = CurrentOptions();

table_options.checksum = kCRC32c;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(&options);
ASSERT_OK(Put("a", "b"));
ASSERT_OK(Put("c", "d"));
ASSERT_OK(Flush()); // table with crc checksum

table_options.checksum = kxxHash;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(&options);
ASSERT_OK(Put("e", "f"));
ASSERT_OK(Put("g", "h"));
ASSERT_OK(Flush()); // table with xxhash checksum

table_options.checksum = kCRC32c;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(&options);
ASSERT_EQ("b", Get("a"));
ASSERT_EQ("d", Get("c"));
ASSERT_EQ("f", Get("e"));
ASSERT_EQ("h", Get("g"));

table_options.checksum = kCRC32c;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(&options);
ASSERT_EQ("b", Get("a"));
ASSERT_EQ("d", Get("c"));
ASSERT_EQ("f", Get("e"));
ASSERT_EQ("h", Get("g"));
}
} // namespace rocksdb

int main(int argc, char** argv) {
@@ -38,6 +38,12 @@ struct Options;

using std::unique_ptr;

enum ChecksumType : char {
kNoChecksum = 0x0, // not yet supported. Will fail
kCRC32c = 0x1,
kxxHash = 0x2,
};

// For advanced user only
struct BlockBasedTableOptions {
// @flush_block_policy_factory creates the instances of flush block policy.
@@ -67,6 +73,11 @@ struct BlockBasedTableOptions {
};

IndexType index_type = kBinarySearch;

// Use the specified checksum type. Newly created table files will be
// protected with this checksum type. Old table files will still be readable,
// even though they have different checksum type.
ChecksumType checksum = kCRC32c;
};

// Table Properties that are specific to block-based table properties.
@@ -37,6 +37,7 @@
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
#include "util/xxhash.h"

namespace rocksdb {

@@ -231,12 +232,14 @@ Slice CompressBlock(const Slice& raw,
} // anonymous namespace

// kBlockBasedTableMagicNumber was picked by running
// echo http://code.google.com/p/leveldb/ | sha1sum
// echo rocksdb.table.block_based | sha1sum
// and taking the leading 64 bits.
// Please note that kBlockBasedTableMagicNumber may also be accessed by
// other .cc files so it have to be explicitly declared with "extern".
extern const uint64_t kBlockBasedTableMagicNumber
= 0xdb4775248b80fb57ull;
extern const uint64_t kBlockBasedTableMagicNumber = 0x88e241b785f4cff7ull;
// We also support reading and writing legacy block based table format (for
// backwards compatibility)
extern const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull;

// A collector that collects properties of interest to block-based table.
// For now this class looks heavy-weight since we only write one additional
@@ -289,6 +292,7 @@ struct BlockBasedTableBuilder::Rep {

std::string last_key;
CompressionType compression_type;
ChecksumType checksum_type;
TableProperties props;

bool closed = false; // Either Finish() or Abandon() has been called.
@@ -303,14 +307,16 @@ struct BlockBasedTableBuilder::Rep {

Rep(const Options& opt, const InternalKeyComparator& icomparator,
WritableFile* f, FlushBlockPolicyFactory* flush_block_policy_factory,
CompressionType compression_type, IndexType index_block_type)
CompressionType compression_type, IndexType index_block_type,
ChecksumType checksum_type)
: options(opt),
internal_comparator(icomparator),
file(f),
data_block(options, &internal_comparator),
index_builder(
CreateIndexBuilder(index_block_type, &internal_comparator)),
compression_type(compression_type),
checksum_type(checksum_type),
filter_block(opt.filter_policy == nullptr
? nullptr
: new FilterBlockBuilder(opt, &internal_comparator)),
@@ -330,7 +336,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
: rep_(new Rep(options, internal_comparator, file,
table_options.flush_block_policy_factory.get(),
compression_type,
BlockBasedTableOptions::IndexType::kBinarySearch)) {
BlockBasedTableOptions::IndexType::kBinarySearch,
table_options.checksum)) {
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}
@@ -443,9 +450,27 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
if (r->status.ok()) {
char trailer[kBlockTrailerSize];
trailer[0] = type;
uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type
EncodeFixed32(trailer+1, crc32c::Mask(crc));
char* trailer_without_type = trailer + 1;
switch (r->checksum_type) {
case kNoChecksum:
// we don't support no checksum yet
assert(false);
// intentional fallthrough in release binary
case kCRC32c: {
auto crc = crc32c::Value(block_contents.data(), block_contents.size());
crc = crc32c::Extend(crc, trailer, 1); // Extend to cover block type
EncodeFixed32(trailer_without_type, crc32c::Mask(crc));
break;
}
case kxxHash: {
void* xxh = XXH32_init(0);
XXH32_update(xxh, block_contents.data(), block_contents.size());
XXH32_update(xxh, trailer, 1); // Extend to cover block type
EncodeFixed32(trailer_without_type, XXH32_digest(xxh));
break;
}
}

r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
if (r->status.ok()) {
r->status = InsertBlockInCache(block_contents, type, handle);
@@ -596,9 +621,19 @@ Status BlockBasedTableBuilder::Finish() {

// Write footer
if (ok()) {
Footer footer(kBlockBasedTableMagicNumber);
// No need to write out new footer if we're using default checksum.
// We're writing legacy magic number because we want old versions of RocksDB
// be able to read files generated with new release (just in case if
// somebody wants to roll back after an upgrade)
// TODO(icanadi) at some point in the future, when we're absolutely sure
// nobody will roll back to RocksDB 2.x versions, retire the legacy magic
// number and always write new table files with new magic number
bool legacy = (r->checksum_type == kCRC32c);
Footer footer(legacy ? kLegacyBlockBasedTableMagicNumber
: kBlockBasedTableMagicNumber);
footer.set_metaindex_handle(metaindex_block_handle);
footer.set_index_handle(index_block_handle);
footer.set_checksum(r->checksum_type);
std::string footer_encoding;
footer.EncodeTo(&footer_encoding);
r->status = r->file->Append(footer_encoding);

0 comments on commit 0afc8bc

Please sign in to comment.
You can’t perform that action at this time.