Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bytes read stat for VerifyChecksum() and VerifyFileChecksums() APIs #8741

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

### New Features
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries.

### Public API change
* Remove obsolete implementation details FullKey and ParseFullKey from public API
Expand Down
13 changes: 13 additions & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4936,6 +4936,11 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {

Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
bool use_file_checksum) {
// `bytes_read` stat is enabled based on compile-time support and cannot
// be dynamically toggled. So we do not need to worry about `PerfLevel`
// here, unlike many other `IOStatsContext` / `PerfContext` stats.
uint64_t prev_bytes_read = IOSTATS(bytes_read);

Status s;

if (use_file_checksum) {
Expand Down Expand Up @@ -4990,6 +4995,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
read_options, fname);
}
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
prev_bytes_read = IOSTATS(bytes_read);
}
}

Expand All @@ -5004,6 +5012,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
s = VerifyFullFileChecksum(meta->GetChecksumValue(),
meta->GetChecksumMethod(), blob_file_name,
read_options);
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
prev_bytes_read = IOSTATS(bytes_read);
if (!s.ok()) {
break;
}
Expand Down Expand Up @@ -5035,6 +5046,8 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
cfd->UnrefAndTryDelete();
}
}
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
return s;
}

Expand Down
49 changes: 49 additions & 0 deletions db/db_statistics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,55 @@ TEST_F(DBStatisticsTest, ExcludeTickers) {
ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0);
}

#ifndef ROCKSDB_LITE

TEST_F(DBStatisticsTest, VerifyChecksumReadStat) {
Options options = CurrentOptions();
options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
Reopen(options);

// Expected to be populated regardless of `PerfLevel` in user thread
SetPerfLevel(kDisable);

{
// Scenario 0: only WAL data. Not verified so require ticker to be zero.
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_OK(db_->VerifyChecksum());
ASSERT_EQ(0,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}

// Create one SST.
ASSERT_OK(Flush());
std::unordered_map<std::string, uint64_t> table_files;
uint64_t table_files_size = 0;
GetAllDataFiles(kTableFile, &table_files, &table_files_size);

{
// Scenario 1: Table verified in `VerifyFileChecksums()`. This should read
// the whole file so we require the ticker stat exactly matches the file
// size.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_EQ(table_files_size,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}

{
// Scenario 2: Table verified in `VerifyChecksum()`. This opens a
// `TableReader` to verify each block. It can involve duplicate reads of the
// same data so we set a lower-bound only.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyChecksum());
ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES),
table_files_size);
}
}

#endif // !ROCKSDB_LITE

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
10 changes: 7 additions & 3 deletions include/rocksdb/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
namespace ROCKSDB_NAMESPACE {

/**
* Keep adding ticker's here.
* 1. Any ticker should be added before TICKER_ENUM_MAX.
* Keep adding tickers here.
* 1. Any ticker should be added immediately before TICKER_ENUM_MAX, taking
* over its old value.
* 2. Add a readable string in TickersNameMap below for the newly added ticker.
* 3. Add a corresponding enum value to TickerType.java in the java API
* 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType
* and toCppTickers
* and toCppTickers
*/
enum Tickers : uint32_t {
// total block cache misses
Expand Down Expand Up @@ -404,6 +405,9 @@ enum Tickers : uint32_t {
// Secondary cache statistics
SECONDARY_CACHE_HITS,

// Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
VERIFY_CHECKSUM_READ_BYTES,

TICKER_ENUM_MAX
};

Expand Down
26 changes: 22 additions & 4 deletions java/rocksjni/portal.h
Original file line number Diff line number Diff line change
Expand Up @@ -4876,7 +4876,7 @@ class TickerTypeJni {
case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND:
return 0x5E;
case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED:
// -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX.
// -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
return -0x01;
case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED:
return 0x60;
Expand Down Expand Up @@ -5002,8 +5002,17 @@ class TickerTypeJni {
return -0x1D;
case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS:
return -0x1E;
case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES:
return -0x1F;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F for backwards compatibility on current minor version.
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
// the value the same forever.
//
// TODO: This particular case seems confusing and unnecessary to pin the
// value since it's meant to be the number of tickers, not an actual
// ticker value. But we aren't yet in a position to fix it since the
// number of tickers doesn't fit in the Java representation (jbyte).
return 0x5F;
default:
// undefined/default
Expand Down Expand Up @@ -5206,7 +5215,7 @@ class TickerTypeJni {
case 0x5E:
return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND;
case -0x01:
// -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX.
// -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED;
case 0x60:
return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED;
Expand Down Expand Up @@ -5334,8 +5343,17 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH;
case -0x1E:
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS;
case -0x1F:
return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES;
case 0x5F:
// 0x5F for backwards compatibility on current minor version.
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
// the value the same forever.
//
// TODO: This particular case seems confusing and unnecessary to pin the
// value since it's meant to be the number of tickers, not an actual
// ticker value. But we aren't yet in a position to fix it since the
// number of tickers doesn't fit in the Java representation (jbyte).
return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;

default:
Expand Down
1 change: 1 addition & 0 deletions monitoring/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
"rocksdb.memtable.garbage.bytes.at.flush"},
{SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"},
{VERIFY_CHECKSUM_READ_BYTES, "rocksdb.verify_checksum.read.bytes"},
};

const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
Expand Down