Permalink
Browse files

Added CRC32 checksum to all data formats.

- Added CRC32 combine function from zlib.
- Added CRC32 checksum to all data formats, along with proper
  computation and verification of the checksum.
- Added proper handling of corrupted files during recovery and during
  compaction.
  • Loading branch information...
goossaert committed Sep 23, 2014
1 parent 43fb929 commit d3c0b5c47ac80856d09bf9eec84f9a437a7b56ad
Showing with 248 additions and 100 deletions.
  1. +25 −0 3rdparty/zlib/LICENSE
  2. +1 −0 LICENSE
  3. +5 −0 include/kingdb/byte_array.h
  4. +20 −10 include/kingdb/common.h
  5. +4 −1 interface/kingdb.cc
  6. +80 −80 storage/storage_engine.h
  7. +9 −4 util/compressor.cc
  8. +88 −3 util/crc32c.cc
  9. +16 −2 util/crc32c.h
View
@@ -0,0 +1,25 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+ version 1.2.8, April 28th, 2013
+
+ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ jloup@gzip.org madler@alumni.caltech.edu
+
+*/
View
@@ -42,3 +42,4 @@ location of their respective LICENSE files.
- LZ4, under BSD 2-Clause License: /3rdparty/lz4/
- Murmurhash3, public domain: /3rdparty/murmurhash3/
- xxHash, under BSD 2-Clause License: /3rdparty/xxhash/
+- zlib, under zlib/libpng License: /3rdparty/zlib/
@@ -174,6 +174,10 @@ class SharedMmappedByteArray: public ByteArrayCommon {
size_ += add;
}
+ void SetInitialCRC32(uint32_t c32) {
+ crc32_.put(c32);
+ }
+
virtual Status data_chunk(char **data_out, uint64_t *size_out) {
if (size_compressed_ == 0) { // if no compression
crc32_.stream(data_, size_);
@@ -204,6 +208,7 @@ class SharedMmappedByteArray: public ByteArrayCommon {
fprintf(stderr, "Bad CRC32 - stored:0x%08llx computed:0x%08llx\n", crc32_value_, crc32_.get());
return Status::IOError("Bad CRC32");
} else if (!s.IsOK()) {
+ fprintf(stderr, "Good CRC32 - stored:0x%08llx computed:0x%08llx\n", crc32_value_, crc32_.get());
return s;
}
View
@@ -17,6 +17,7 @@
#include "util/logger.h"
#include "util/status.h"
#include "util/coding.h"
+#include "util/crc32c.h"
#include "kingdb/byte_array_base.h"
#include "kingdb/byte_array.h"
#include "kingdb/options.h"
@@ -230,6 +231,7 @@ enum FileType {
};
struct LogFileHeader {
+ uint32_t crc32;
uint32_t version_data_format_major;
uint32_t version_data_format_minor;
uint32_t filetype;
@@ -260,24 +262,29 @@ struct LogFileHeader {
static Status DecodeFrom(const char* buffer_in, uint64_t num_bytes_max, struct LogFileHeader *output) {
if (num_bytes_max < GetFixedSize()) return Status::IOError("Decoding error");
- GetFixed32(buffer_in, &(output->version_data_format_major));
- GetFixed32(buffer_in + 4, &(output->version_data_format_minor));
- GetFixed32(buffer_in + 8, &(output->filetype));
- GetFixed64(buffer_in + 12, &(output->timestamp));
+ GetFixed32(buffer_in , &(output->crc32));
+ GetFixed32(buffer_in + 4, &(output->version_data_format_major));
+ GetFixed32(buffer_in + 8, &(output->version_data_format_minor));
+ GetFixed32(buffer_in + 12, &(output->filetype));
+ GetFixed64(buffer_in + 16, &(output->timestamp));
+ uint32_t crc32_computed = crc32c::Value(buffer_in + 4, 20);
if (!output->IsFileVersionSupported()) return Status::IOError("Data format version not supported");
+ if (crc32_computed != output->crc32) return Status::IOError("Invalid checksum");
return Status::OK();
}
static uint32_t EncodeTo(const struct LogFileHeader *input, char* buffer) {
- EncodeFixed32(buffer, kVersionDataFormatMajor);
- EncodeFixed32(buffer + 4, kVersionDataFormatMinor);
- EncodeFixed32(buffer + 8, input->filetype);
- EncodeFixed64(buffer + 12, input->timestamp);
+ EncodeFixed32(buffer + 4, kVersionDataFormatMajor);
+ EncodeFixed32(buffer + 8, kVersionDataFormatMinor);
+ EncodeFixed32(buffer + 12, input->filetype);
+ EncodeFixed64(buffer + 16, input->timestamp);
+ uint32_t crc32 = crc32c::Value(buffer + 4, 20);
+ EncodeFixed32(buffer, crc32);
return GetFixedSize();
}
static uint32_t GetFixedSize() {
- return 20; // in bytes
+ return 24; // in bytes
}
};
@@ -292,6 +299,7 @@ struct LogFileFooter {
uint64_t offset_indexes;
uint64_t num_entries;
uint64_t magic_number;
+ uint32_t crc32;
LogFileFooter() { flags = 0; }
@@ -310,6 +318,7 @@ struct LogFileFooter {
GetFixed64(buffer_in + 8, &(output->offset_indexes));
GetFixed64(buffer_in + 16, &(output->num_entries));
GetFixed64(buffer_in + 24, &(output->magic_number));
+ GetFixed32(buffer_in + 32, &(output->crc32));
return Status::OK();
}
@@ -319,11 +328,12 @@ struct LogFileFooter {
EncodeFixed64(buffer + 8, input->offset_indexes);
EncodeFixed64(buffer + 16, input->num_entries);
EncodeFixed64(buffer + 24, input->magic_number);
+ // the checksum is computed in the method that writes the footer
return GetFixedSize();
}
static uint32_t GetFixedSize() {
- return 32; // in bytes
+ return 36; // in bytes
}
};
View
@@ -89,7 +89,10 @@ Status KingDB::PutChunk(WriteOptions& write_options,
// Compute CRC32 checksum
uint32_t crc32 = 0;
- if (offset_chunk == 0) crc32_.ResetThreadLocalStorage();
+ if (offset_chunk == 0) {
+ crc32_.ResetThreadLocalStorage();
+ crc32_.stream(key->data(), key->size());
+ }
crc32_.stream(chunk_final->data(), chunk_final->size());
if (is_last_chunk) crc32 = crc32_.get();
Oops, something went wrong.

0 comments on commit d3c0b5c

Please sign in to comment.