Permalink
Browse files

xxHash

Summary:
Originally: https://github.com/facebook/rocksdb/pull/87/files

I'm taking over to apply some finishing touches

Test Plan: will add tests

Reviewers: dhruba, haobo, sdong, yhchiang, ljin

Reviewed By: yhchiang

CC: leveldb

Differential Revision: https://reviews.facebook.net/D18315
  • Loading branch information...
igorcanadi committed May 1, 2014
1 parent 61955a0 commit 0afc8bc29a5800e3212388c327c750d32e31f3d6
View
@@ -4,12 +4,11 @@
### Public API changes
* Added _LEVEL to all InfoLogLevel enums
+* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
### New Features
* Column family support
-
-### Public API changes
-* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
+* Added an option to use different checksum functions in BlockBasedTableOptions
## 2.8.0 (04/04/2014)
View
@@ -36,6 +36,7 @@
#include "util/string_util.h"
#include "util/statistics.h"
#include "util/testutil.h"
+#include "util/xxhash.h"
#include "hdfs/env_hdfs.h"
#include "utilities/merge_operators.h"
@@ -64,6 +65,7 @@ DEFINE_string(benchmarks,
"randomwithverify,"
"fill100K,"
"crc32c,"
+ "xxhash,"
"compress,"
"uncompress,"
"acquireload,",
@@ -107,6 +109,7 @@ DEFINE_string(benchmarks,
"\tseekrandom -- N random seeks\n"
"\tseekrandom -- 1 writer, N threads doing random seeks\n"
"\tcrc32c -- repeated crc32c of 4K of data\n"
+ "\txxhash -- repeated xxHash of 4K of data\n"
"\tacquireload -- load N*1000 times\n"
"Meta operations:\n"
"\tcompact -- Compact the entire DB\n"
@@ -1234,6 +1237,8 @@ class Benchmark {
method = &Benchmark::Compact;
} else if (name == Slice("crc32c")) {
method = &Benchmark::Crc32c;
+ } else if (name == Slice("xxhash")) {
+ method = &Benchmark::xxHash;
} else if (name == Slice("acquireload")) {
method = &Benchmark::AcquireLoad;
} else if (name == Slice("compress")) {
@@ -1382,6 +1387,25 @@ class Benchmark {
thread->stats.AddMessage(label);
}
+ void xxHash(ThreadState* thread) {
+ // Checksum about 500MB of data total
+ const int size = 4096;
+ const char* label = "(4K per op)";
+ std::string data(size, 'x');
+ int64_t bytes = 0;
+ unsigned int xxh32 = 0;
+ while (bytes < 500 * 1048576) {
+ xxh32 = XXH32(data.data(), size, 0);
+ thread->stats.FinishedSingleOp(nullptr);
+ bytes += size;
+ }
+ // Print so result is not dead
+ fprintf(stderr, "... xxh32=0x%x\r", static_cast<unsigned int>(xxh32));
+
+ thread->stats.AddBytes(bytes);
+ thread->stats.AddMessage(label);
+ }
+
void AcquireLoad(ThreadState* thread) {
int dummy;
port::AtomicPointer ap(&dummy);
View
@@ -316,6 +316,7 @@ class DBTest {
kUniversalCompaction,
kCompressedBlockCache,
kInfiniteMaxOpenFiles,
+ kxxHashChecksum,
kEnd
};
int option_config_;
@@ -496,6 +497,12 @@ class DBTest {
case kInfiniteMaxOpenFiles:
options.max_open_files = -1;
break;
+ case kxxHashChecksum: {
+ BlockBasedTableOptions table_options;
+ table_options.checksum = kxxHash;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ break;
+ }
case kBlockBasedTableWithPrefixHashIndex: {
BlockBasedTableOptions table_options;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
@@ -6778,7 +6785,40 @@ TEST(DBTest, TailingIteratorPrefixSeek) {
ASSERT_TRUE(!iter->Valid());
}
+TEST(DBTest, ChecksumTest) {
+ BlockBasedTableOptions table_options;
+ Options options = CurrentOptions();
+
+ table_options.checksum = kCRC32c;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(&options);
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ ASSERT_OK(Flush()); // table with crc checksum
+
+ table_options.checksum = kxxHash;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(&options);
+ ASSERT_OK(Put("e", "f"));
+ ASSERT_OK(Put("g", "h"));
+ ASSERT_OK(Flush()); // table with xxhash checksum
+
+ table_options.checksum = kCRC32c;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(&options);
+ ASSERT_EQ("b", Get("a"));
+ ASSERT_EQ("d", Get("c"));
+ ASSERT_EQ("f", Get("e"));
+ ASSERT_EQ("h", Get("g"));
+ table_options.checksum = kCRC32c;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(&options);
+ ASSERT_EQ("b", Get("a"));
+ ASSERT_EQ("d", Get("c"));
+ ASSERT_EQ("f", Get("e"));
+ ASSERT_EQ("h", Get("g"));
+}
} // namespace rocksdb
int main(int argc, char** argv) {
View
@@ -38,6 +38,12 @@ struct Options;
using std::unique_ptr;
+enum ChecksumType : char {
+ kNoChecksum = 0x0, // not yet supported. Will fail
+ kCRC32c = 0x1,
+ kxxHash = 0x2,
+};
+
// For advanced user only
struct BlockBasedTableOptions {
// @flush_block_policy_factory creates the instances of flush block policy.
@@ -67,6 +73,11 @@ struct BlockBasedTableOptions {
};
IndexType index_type = kBinarySearch;
+
+ // Use the specified checksum type. Newly created table files will be
+ // protected with this checksum type. Old table files will still be readable,
+ // even though they have different checksum type.
+ ChecksumType checksum = kCRC32c;
};
// Table Properties that are specific to block-based table properties.
@@ -37,6 +37,7 @@
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
+#include "util/xxhash.h"
namespace rocksdb {
@@ -231,12 +232,14 @@ Slice CompressBlock(const Slice& raw,
} // anonymous namespace
// kBlockBasedTableMagicNumber was picked by running
-// echo http://code.google.com/p/leveldb/ | sha1sum
+// echo rocksdb.table.block_based | sha1sum
// and taking the leading 64 bits.
// Please note that kBlockBasedTableMagicNumber may also be accessed by
// other .cc files so it have to be explicitly declared with "extern".
-extern const uint64_t kBlockBasedTableMagicNumber
- = 0xdb4775248b80fb57ull;
+extern const uint64_t kBlockBasedTableMagicNumber = 0x88e241b785f4cff7ull;
+// We also support reading and writing legacy block based table format (for
+// backwards compatibility)
+extern const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull;
// A collector that collects properties of interest to block-based table.
// For now this class looks heavy-weight since we only write one additional
@@ -289,6 +292,7 @@ struct BlockBasedTableBuilder::Rep {
std::string last_key;
CompressionType compression_type;
+ ChecksumType checksum_type;
TableProperties props;
bool closed = false; // Either Finish() or Abandon() has been called.
@@ -303,14 +307,16 @@ struct BlockBasedTableBuilder::Rep {
Rep(const Options& opt, const InternalKeyComparator& icomparator,
WritableFile* f, FlushBlockPolicyFactory* flush_block_policy_factory,
- CompressionType compression_type, IndexType index_block_type)
+ CompressionType compression_type, IndexType index_block_type,
+ ChecksumType checksum_type)
: options(opt),
internal_comparator(icomparator),
file(f),
data_block(options, &internal_comparator),
index_builder(
CreateIndexBuilder(index_block_type, &internal_comparator)),
compression_type(compression_type),
+ checksum_type(checksum_type),
filter_block(opt.filter_policy == nullptr
? nullptr
: new FilterBlockBuilder(opt, &internal_comparator)),
@@ -330,7 +336,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
: rep_(new Rep(options, internal_comparator, file,
table_options.flush_block_policy_factory.get(),
compression_type,
- BlockBasedTableOptions::IndexType::kBinarySearch)) {
+ BlockBasedTableOptions::IndexType::kBinarySearch,
+ table_options.checksum)) {
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}
@@ -443,9 +450,27 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
if (r->status.ok()) {
char trailer[kBlockTrailerSize];
trailer[0] = type;
- uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
- crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type
- EncodeFixed32(trailer+1, crc32c::Mask(crc));
+ char* trailer_without_type = trailer + 1;
+ switch (r->checksum_type) {
+ case kNoChecksum:
+ // we don't support no checksum yet
+ assert(false);
+ // intentional fallthrough in release binary
+ case kCRC32c: {
+ auto crc = crc32c::Value(block_contents.data(), block_contents.size());
+ crc = crc32c::Extend(crc, trailer, 1); // Extend to cover block type
+ EncodeFixed32(trailer_without_type, crc32c::Mask(crc));
+ break;
+ }
+ case kxxHash: {
+ void* xxh = XXH32_init(0);
+ XXH32_update(xxh, block_contents.data(), block_contents.size());
+ XXH32_update(xxh, trailer, 1); // Extend to cover block type
+ EncodeFixed32(trailer_without_type, XXH32_digest(xxh));
+ break;
+ }
+ }
+
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
if (r->status.ok()) {
r->status = InsertBlockInCache(block_contents, type, handle);
@@ -596,9 +621,19 @@ Status BlockBasedTableBuilder::Finish() {
// Write footer
if (ok()) {
- Footer footer(kBlockBasedTableMagicNumber);
+ // No need to write out new footer if we're using default checksum.
+ // We're writing legacy magic number because we want old versions of RocksDB
+ // be able to read files generated with new release (just in case if
+ // somebody wants to roll back after an upgrade)
+ // TODO(icanadi) at some point in the future, when we're absolutely sure
+ // nobody will roll back to RocksDB 2.x versions, retire the legacy magic
+ // number and always write new table files with new magic number
+ bool legacy = (r->checksum_type == kCRC32c);
+ Footer footer(legacy ? kLegacyBlockBasedTableMagicNumber
+ : kBlockBasedTableMagicNumber);
footer.set_metaindex_handle(metaindex_block_handle);
footer.set_index_handle(index_block_handle);
+ footer.set_checksum(r->checksum_type);
std::string footer_encoding;
footer.EncodeTo(&footer_encoding);
r->status = r->file->Append(footer_encoding);
Oops, something went wrong.

0 comments on commit 0afc8bc

Please sign in to comment.