Skip to content

Commit 0afc8bc

Browse files
committed
xxHash
Summary: Originally: https://github.com/facebook/rocksdb/pull/87/files I'm taking over to apply some finishing touches Test Plan: will add tests Reviewers: dhruba, haobo, sdong, yhchiang, ljin Reviewed By: yhchiang CC: leveldb Differential Revision: https://reviews.facebook.net/D18315
1 parent 61955a0 commit 0afc8bc

15 files changed

Lines changed: 1084 additions & 167 deletions

HISTORY.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44

55
### Public API changes
66
* Added _LEVEL to all InfoLogLevel enums
7+
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
78

89
### New Features
910
* Column family support
10-
11-
### Public API changes
12-
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
11+
* Added an option to use different checksum functions in BlockBasedTableOptions
1312

1413
## 2.8.0 (04/04/2014)
1514

db/db_bench.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "util/string_util.h"
3737
#include "util/statistics.h"
3838
#include "util/testutil.h"
39+
#include "util/xxhash.h"
3940
#include "hdfs/env_hdfs.h"
4041
#include "utilities/merge_operators.h"
4142

@@ -64,6 +65,7 @@ DEFINE_string(benchmarks,
6465
"randomwithverify,"
6566
"fill100K,"
6667
"crc32c,"
68+
"xxhash,"
6769
"compress,"
6870
"uncompress,"
6971
"acquireload,",
@@ -107,6 +109,7 @@ DEFINE_string(benchmarks,
107109
"\tseekrandom -- N random seeks\n"
108110
"\tseekrandom -- 1 writer, N threads doing random seeks\n"
109111
"\tcrc32c -- repeated crc32c of 4K of data\n"
112+
"\txxhash -- repeated xxHash of 4K of data\n"
110113
"\tacquireload -- load N*1000 times\n"
111114
"Meta operations:\n"
112115
"\tcompact -- Compact the entire DB\n"
@@ -1234,6 +1237,8 @@ class Benchmark {
12341237
method = &Benchmark::Compact;
12351238
} else if (name == Slice("crc32c")) {
12361239
method = &Benchmark::Crc32c;
1240+
} else if (name == Slice("xxhash")) {
1241+
method = &Benchmark::xxHash;
12371242
} else if (name == Slice("acquireload")) {
12381243
method = &Benchmark::AcquireLoad;
12391244
} else if (name == Slice("compress")) {
@@ -1382,6 +1387,25 @@ class Benchmark {
13821387
thread->stats.AddMessage(label);
13831388
}
13841389

1390+
void xxHash(ThreadState* thread) {
1391+
// Checksum about 500MB of data total
1392+
const int size = 4096;
1393+
const char* label = "(4K per op)";
1394+
std::string data(size, 'x');
1395+
int64_t bytes = 0;
1396+
unsigned int xxh32 = 0;
1397+
while (bytes < 500 * 1048576) {
1398+
xxh32 = XXH32(data.data(), size, 0);
1399+
thread->stats.FinishedSingleOp(nullptr);
1400+
bytes += size;
1401+
}
1402+
// Print so result is not dead
1403+
fprintf(stderr, "... xxh32=0x%x\r", static_cast<unsigned int>(xxh32));
1404+
1405+
thread->stats.AddBytes(bytes);
1406+
thread->stats.AddMessage(label);
1407+
}
1408+
13851409
void AcquireLoad(ThreadState* thread) {
13861410
int dummy;
13871411
port::AtomicPointer ap(&dummy);

db/db_test.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ class DBTest {
316316
kUniversalCompaction,
317317
kCompressedBlockCache,
318318
kInfiniteMaxOpenFiles,
319+
kxxHashChecksum,
319320
kEnd
320321
};
321322
int option_config_;
@@ -496,6 +497,12 @@ class DBTest {
496497
case kInfiniteMaxOpenFiles:
497498
options.max_open_files = -1;
498499
break;
500+
case kxxHashChecksum: {
501+
BlockBasedTableOptions table_options;
502+
table_options.checksum = kxxHash;
503+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
504+
break;
505+
}
499506
case kBlockBasedTableWithPrefixHashIndex: {
500507
BlockBasedTableOptions table_options;
501508
table_options.index_type = BlockBasedTableOptions::kHashSearch;
@@ -6778,7 +6785,40 @@ TEST(DBTest, TailingIteratorPrefixSeek) {
67786785
ASSERT_TRUE(!iter->Valid());
67796786
}
67806787

6788+
TEST(DBTest, ChecksumTest) {
6789+
BlockBasedTableOptions table_options;
6790+
Options options = CurrentOptions();
6791+
6792+
table_options.checksum = kCRC32c;
6793+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6794+
Reopen(&options);
6795+
ASSERT_OK(Put("a", "b"));
6796+
ASSERT_OK(Put("c", "d"));
6797+
ASSERT_OK(Flush()); // table with crc checksum
6798+
6799+
table_options.checksum = kxxHash;
6800+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6801+
Reopen(&options);
6802+
ASSERT_OK(Put("e", "f"));
6803+
ASSERT_OK(Put("g", "h"));
6804+
ASSERT_OK(Flush()); // table with xxhash checksum
6805+
6806+
table_options.checksum = kCRC32c;
6807+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6808+
Reopen(&options);
6809+
ASSERT_EQ("b", Get("a"));
6810+
ASSERT_EQ("d", Get("c"));
6811+
ASSERT_EQ("f", Get("e"));
6812+
ASSERT_EQ("h", Get("g"));
67816813

6814+
table_options.checksum = kCRC32c;
6815+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6816+
Reopen(&options);
6817+
ASSERT_EQ("b", Get("a"));
6818+
ASSERT_EQ("d", Get("c"));
6819+
ASSERT_EQ("f", Get("e"));
6820+
ASSERT_EQ("h", Get("g"));
6821+
}
67826822
} // namespace rocksdb
67836823

67846824
int main(int argc, char** argv) {

include/rocksdb/table.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ struct Options;
3838

3939
using std::unique_ptr;
4040

41+
enum ChecksumType : char {
42+
kNoChecksum = 0x0, // not yet supported. Will fail
43+
kCRC32c = 0x1,
44+
kxxHash = 0x2,
45+
};
46+
4147
// For advanced user only
4248
struct BlockBasedTableOptions {
4349
// @flush_block_policy_factory creates the instances of flush block policy.
@@ -67,6 +73,11 @@ struct BlockBasedTableOptions {
6773
};
6874

6975
IndexType index_type = kBinarySearch;
76+
77+
// Use the specified checksum type. Newly created table files will be
78+
// protected with this checksum type. Old table files will still be readable,
79+
// even though they have different checksum type.
80+
ChecksumType checksum = kCRC32c;
7081
};
7182

7283
// Table Properties that are specific to block-based table properties.

table/block_based_table_builder.cc

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "util/coding.h"
3838
#include "util/crc32c.h"
3939
#include "util/stop_watch.h"
40+
#include "util/xxhash.h"
4041

4142
namespace rocksdb {
4243

@@ -231,12 +232,14 @@ Slice CompressBlock(const Slice& raw,
231232
} // anonymous namespace
232233

233234
// kBlockBasedTableMagicNumber was picked by running
234-
// echo http://code.google.com/p/leveldb/ | sha1sum
235+
// echo rocksdb.table.block_based | sha1sum
235236
// and taking the leading 64 bits.
236237
// Please note that kBlockBasedTableMagicNumber may also be accessed by
237238
// other .cc files so it have to be explicitly declared with "extern".
238-
extern const uint64_t kBlockBasedTableMagicNumber
239-
= 0xdb4775248b80fb57ull;
239+
extern const uint64_t kBlockBasedTableMagicNumber = 0x88e241b785f4cff7ull;
240+
// We also support reading and writing legacy block based table format (for
241+
// backwards compatibility)
242+
extern const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull;
240243

241244
// A collector that collects properties of interest to block-based table.
242245
// For now this class looks heavy-weight since we only write one additional
@@ -289,6 +292,7 @@ struct BlockBasedTableBuilder::Rep {
289292

290293
std::string last_key;
291294
CompressionType compression_type;
295+
ChecksumType checksum_type;
292296
TableProperties props;
293297

294298
bool closed = false; // Either Finish() or Abandon() has been called.
@@ -303,14 +307,16 @@ struct BlockBasedTableBuilder::Rep {
303307

304308
Rep(const Options& opt, const InternalKeyComparator& icomparator,
305309
WritableFile* f, FlushBlockPolicyFactory* flush_block_policy_factory,
306-
CompressionType compression_type, IndexType index_block_type)
310+
CompressionType compression_type, IndexType index_block_type,
311+
ChecksumType checksum_type)
307312
: options(opt),
308313
internal_comparator(icomparator),
309314
file(f),
310315
data_block(options, &internal_comparator),
311316
index_builder(
312317
CreateIndexBuilder(index_block_type, &internal_comparator)),
313318
compression_type(compression_type),
319+
checksum_type(checksum_type),
314320
filter_block(opt.filter_policy == nullptr
315321
? nullptr
316322
: new FilterBlockBuilder(opt, &internal_comparator)),
@@ -330,7 +336,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
330336
: rep_(new Rep(options, internal_comparator, file,
331337
table_options.flush_block_policy_factory.get(),
332338
compression_type,
333-
BlockBasedTableOptions::IndexType::kBinarySearch)) {
339+
BlockBasedTableOptions::IndexType::kBinarySearch,
340+
table_options.checksum)) {
334341
if (rep_->filter_block != nullptr) {
335342
rep_->filter_block->StartBlock(0);
336343
}
@@ -443,9 +450,27 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
443450
if (r->status.ok()) {
444451
char trailer[kBlockTrailerSize];
445452
trailer[0] = type;
446-
uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
447-
crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type
448-
EncodeFixed32(trailer+1, crc32c::Mask(crc));
453+
char* trailer_without_type = trailer + 1;
454+
switch (r->checksum_type) {
455+
case kNoChecksum:
456+
// we don't support no checksum yet
457+
assert(false);
458+
// intentional fallthrough in release binary
459+
case kCRC32c: {
460+
auto crc = crc32c::Value(block_contents.data(), block_contents.size());
461+
crc = crc32c::Extend(crc, trailer, 1); // Extend to cover block type
462+
EncodeFixed32(trailer_without_type, crc32c::Mask(crc));
463+
break;
464+
}
465+
case kxxHash: {
466+
void* xxh = XXH32_init(0);
467+
XXH32_update(xxh, block_contents.data(), block_contents.size());
468+
XXH32_update(xxh, trailer, 1); // Extend to cover block type
469+
EncodeFixed32(trailer_without_type, XXH32_digest(xxh));
470+
break;
471+
}
472+
}
473+
449474
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
450475
if (r->status.ok()) {
451476
r->status = InsertBlockInCache(block_contents, type, handle);
@@ -596,9 +621,19 @@ Status BlockBasedTableBuilder::Finish() {
596621

597622
// Write footer
598623
if (ok()) {
599-
Footer footer(kBlockBasedTableMagicNumber);
624+
// No need to write out new footer if we're using default checksum.
625+
// We're writing legacy magic number because we want old versions of RocksDB
626+
// be able to read files generated with new release (just in case if
627+
// somebody wants to roll back after an upgrade)
628+
// TODO(icanadi) at some point in the future, when we're absolutely sure
629+
// nobody will roll back to RocksDB 2.x versions, retire the legacy magic
630+
// number and always write new table files with new magic number
631+
bool legacy = (r->checksum_type == kCRC32c);
632+
Footer footer(legacy ? kLegacyBlockBasedTableMagicNumber
633+
: kBlockBasedTableMagicNumber);
600634
footer.set_metaindex_handle(metaindex_block_handle);
601635
footer.set_index_handle(index_block_handle);
636+
footer.set_checksum(r->checksum_type);
602637
std::string footer_encoding;
603638
footer.EncodeTo(&footer_encoding);
604639
r->status = r->file->Append(footer_encoding);

0 commit comments

Comments
 (0)