diff --git a/HISTORY.md b/HISTORY.md index 0b6409dbe47..18feefafce8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,7 @@ * options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. * Add initial support for Get/Put with user timestamps. Users can specify timestamps via ReadOptions and WriteOptions when calling DB::Get and DB::Put. * Accessing a partition of a partitioned filter or index through a pinned reference is no longer considered a cache hit. +* The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. ### New Features * Add an option `snap_refresh_nanos` (default to 0.1s) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 0caea508822..a07cb8e89b5 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -80,14 +80,14 @@ Status ReadBlockFromFile( RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, std::unique_ptr* result, const ImmutableCFOptions& ioptions, - bool do_uncompress, bool maybe_compressed, + bool do_uncompress, bool maybe_compressed, BlockType block_type, const UncompressionDict& uncompression_dict, const PersistentCacheOptions& cache_options, SequenceNumber global_seqno, size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator) { BlockContents contents; BlockFetcher block_fetcher(file, prefetch_buffer, footer, options, handle, &contents, ioptions, do_uncompress, - maybe_compressed, uncompression_dict, + maybe_compressed, block_type, uncompression_dict, cache_options, memory_allocator); Status s = block_fetcher.ReadBlockContents(); if (s.ok()) { @@ -597,8 +597,8 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { BlockFetcher prefixes_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_handle, &prefixes_contents, ioptions, true /*decompress*/, - true /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - cache_options, memory_allocator); + true /*maybe_compressed*/, BlockType::kHashIndexPrefixes, + UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = prefixes_block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; @@ -607,8 +607,8 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { BlockFetcher prefixes_meta_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_meta_handle, &prefixes_meta_contents, ioptions, true /*decompress*/, - true /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - cache_options, memory_allocator); + true /*maybe_compressed*/, BlockType::kHashIndexMetadata, + UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = prefixes_meta_block_fetcher.ReadBlockContents(); if (!s.ok()) { // TODO: log error @@ -1366,7 +1366,8 @@ Status BlockBasedTable::ReadCompressionDictBlock( rep_->file.get(), prefetch_buffer, rep_->footer, read_options, rep_->compression_dict_handle, compression_dict_cont.get(), rep_->ioptions, false /* decompress */, false /*maybe_compressed*/, - UncompressionDict::GetEmptyDict(), cache_options); + BlockType::kCompressionDictionary, UncompressionDict::GetEmptyDict(), + cache_options); s = compression_block_fetcher.ReadBlockContents(); if (!s.ok()) { @@ -1576,7 +1577,7 @@ Status BlockBasedTable::ReadMetaBlock(FilePrefetchBuffer* prefetch_buffer, Status s = ReadBlockFromFile( rep_->file.get(), prefetch_buffer, rep_->footer, ReadOptions(), rep_->footer.metaindex_handle(), &meta, rep_->ioptions, - true /* decompress */, true /*maybe_compressed*/, + true /* decompress */, true /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options, kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, GetMemoryAllocator(rep_->table_options)); @@ -1811,8 +1812,9 @@ FilterBlockReader* BlockBasedTable::ReadFilter( BlockFetcher block_fetcher( rep->file.get(), prefetch_buffer, rep->footer, ReadOptions(), filter_handle, &block, rep->ioptions, false /* decompress */, - false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - rep->persistent_cache_options, GetMemoryAllocator(rep->table_options)); + false /*maybe_compressed*/, BlockType::kFilter, + UncompressionDict::GetEmptyDict(), rep->persistent_cache_options, + GetMemoryAllocator(rep->table_options)); Status s = block_fetcher.ReadBlockContents(); if (!s.ok()) { @@ -1933,7 +1935,6 @@ CachableEntry BlockBasedTable::GetFilter( ? Cache::Priority::HIGH : Cache::Priority::LOW); if (s.ok()) { - PERF_COUNTER_ADD(filter_block_read_count, 1); UpdateCacheInsertionMetrics(BlockType::kFilter, get_context, usage); } else { RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES); @@ -2014,7 +2015,6 @@ CachableEntry BlockBasedTable::GetUncompressionDict( : Cache::Priority::LOW); if (s.ok()) { - PERF_COUNTER_ADD(compression_dict_block_read_count, 1); UpdateCacheInsertionMetrics(BlockType::kCompressionDictionary, get_context, usage); dict = uncompression_dict.release(); @@ -2210,7 +2210,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &raw_block_contents, rep_->ioptions, do_decompress /* do uncompress */, rep_->blocks_maybe_compressed, - uncompression_dict, rep_->persistent_cache_options, + block_type, uncompression_dict, rep_->persistent_cache_options, GetMemoryAllocator(rep_->table_options), GetMemoryAllocatorForCompressedBlock(rep_->table_options)); s = block_fetcher.ReadBlockContents(); @@ -2328,7 +2328,7 @@ Status BlockBasedTable::RetrieveBlock( s = ReadBlockFromFile( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block, rep_->ioptions, rep_->blocks_maybe_compressed, - rep_->blocks_maybe_compressed, uncompression_dict, + rep_->blocks_maybe_compressed, block_type, uncompression_dict, rep_->persistent_cache_options, rep_->get_global_seqno(block_type), block_type == BlockType::kData ? rep_->table_options.read_amp_bytes_per_bit @@ -3328,7 +3328,7 @@ Status BlockBasedTable::VerifyChecksumInBlocks( BlockFetcher block_fetcher( rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, ReadOptions(), handle, &contents, rep_->ioptions, - false /* decompress */, false /*maybe_compressed*/, + false /* decompress */, false /*maybe_compressed*/, BlockType::kData, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { @@ -3338,6 +3338,38 @@ Status BlockBasedTable::VerifyChecksumInBlocks( return s; } +BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( + const Slice& meta_block_name) { + if (meta_block_name.starts_with(kFilterBlockPrefix) || + meta_block_name.starts_with(kFullFilterBlockPrefix) || + meta_block_name.starts_with(kPartitionedFilterBlockPrefix)) { + return BlockType::kFilter; + } + + if (meta_block_name == kPropertiesBlock) { + return BlockType::kProperties; + } + + if (meta_block_name == kCompressionDictBlock) { + return BlockType::kCompressionDictionary; + } + + if (meta_block_name == kRangeDelBlock) { + return BlockType::kRangeDeletion; + } + + if (meta_block_name == kHashIndexPrefixesBlock) { + return BlockType::kHashIndexPrefixes; + } + + if (meta_block_name == kHashIndexPrefixesMetadataBlock) { + return BlockType::kHashIndexMetadata; + } + + assert(false); + return BlockType::kInvalid; +} + Status BlockBasedTable::VerifyChecksumInMetaBlocks( InternalIteratorBase* index_iter) { Status s; @@ -3350,13 +3382,15 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks( Slice input = index_iter->value(); s = handle.DecodeFrom(&input); BlockContents contents; + const Slice meta_block_name = index_iter->key(); BlockFetcher block_fetcher( rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, ReadOptions(), handle, &contents, rep_->ioptions, false /* decompress */, false /*maybe_compressed*/, + GetBlockTypeForMetaBlockByName(meta_block_name), UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); - if (s.IsCorruption() && index_iter->key() == kPropertiesBlock) { + if (s.IsCorruption() && meta_block_name == kPropertiesBlock) { TableProperties* table_properties; s = TryReadPropertiesWithGlobalSeqno(nullptr /* prefetch_buffer */, index_iter->value(), @@ -3655,7 +3689,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file, rep_->file.get(), nullptr /* prefetch_buffer */, rep_->footer, ReadOptions(), handle, &block, rep_->ioptions, false /*decompress*/, false /*maybe_compressed*/, - UncompressionDict::GetEmptyDict(), + BlockType::kFilter, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 17c4e7238c8..3c92621bdcd 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -407,6 +407,8 @@ class BlockBasedTable : public TableReader { const BlockBasedTableOptions& table_options, const int level, BlockCacheLookupContext* lookup_context); + static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name); + Status VerifyChecksumInMetaBlocks(InternalIteratorBase* index_iter); Status VerifyChecksumInBlocks(InternalIteratorBase* index_iter); diff --git a/table/block_based/block_type.h b/table/block_based/block_type.h index 9b9c53946c9..a60be2e6a70 100644 --- a/table/block_based/block_type.h +++ b/table/block_based/block_type.h @@ -5,6 +5,8 @@ #pragma once +#include + namespace rocksdb { // Represents the types of blocks used in the block based table format. @@ -17,8 +19,12 @@ enum class BlockType : uint8_t { kProperties, kCompressionDictionary, kRangeDeletion, + kHashIndexPrefixes, + kHashIndexMetadata, kMetaIndex, kIndex, + // Note: keep kInvalid the last value when adding new enum values. + kInvalid }; } // namespace rocksdb diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index afcbbaee4f5..35beb79502b 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -220,6 +220,26 @@ Status BlockFetcher::ReadBlockContents() { &slice_, used_buf_); } PERF_COUNTER_ADD(block_read_count, 1); + + // TODO: introduce dedicated perf counter for range tombstones + switch (block_type_) { + case BlockType::kFilter: + PERF_COUNTER_ADD(filter_block_read_count, 1); + break; + + case BlockType::kCompressionDictionary: + PERF_COUNTER_ADD(compression_dict_block_read_count, 1); + break; + + case BlockType::kIndex: + PERF_COUNTER_ADD(index_block_read_count, 1); + break; + + // Nothing to do here as we don't have counters for the other types. + default: + break; + } + PERF_COUNTER_ADD(block_read_byte, block_size_ + kBlockTrailerSize); if (!status_.ok()) { return status_; diff --git a/table/block_fetcher.h b/table/block_fetcher.h index 6451d6d2acc..06e5d9dfa31 100644 --- a/table/block_fetcher.h +++ b/table/block_fetcher.h @@ -10,6 +10,7 @@ #pragma once #include "memory/memory_allocator.h" #include "table/block_based/block.h" +#include "table/block_based/block_type.h" #include "table/format.h" namespace rocksdb { @@ -39,7 +40,7 @@ class BlockFetcher { FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& read_options, const BlockHandle& handle, BlockContents* contents, const ImmutableCFOptions& ioptions, - bool do_uncompress, bool maybe_compressed, + bool do_uncompress, bool maybe_compressed, BlockType block_type, const UncompressionDict& uncompression_dict, const PersistentCacheOptions& cache_options, MemoryAllocator* memory_allocator = nullptr, @@ -53,6 +54,7 @@ class BlockFetcher { ioptions_(ioptions), do_uncompress_(do_uncompress), maybe_compressed_(maybe_compressed), + block_type_(block_type), uncompression_dict_(uncompression_dict), cache_options_(cache_options), memory_allocator_(memory_allocator), @@ -72,6 +74,7 @@ class BlockFetcher { const ImmutableCFOptions& ioptions_; bool do_uncompress_; bool maybe_compressed_; + BlockType block_type_; const UncompressionDict& uncompression_dict_; const PersistentCacheOptions& cache_options_; MemoryAllocator* memory_allocator_; diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 341a1185579..7bbbc7966de 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -216,7 +216,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, BlockFetcher block_fetcher( file, prefetch_buffer, footer, read_options, handle, &block_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, - UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); + BlockType::kProperties, UncompressionDict::GetEmptyDict(), cache_options, + memory_allocator); s = block_fetcher.ReadBlockContents(); // property block is never compressed. Need to add uncompress logic if we are // to compress it.. @@ -375,8 +376,8 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, BlockFetcher block_fetcher( file, nullptr /* prefetch_buffer */, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - cache_options, memory_allocator); + false /*maybe_compressed*/, BlockType::kMetaIndex, + UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; @@ -446,7 +447,8 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, file, nullptr /* prefetch_buffer */, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* do decompression */, false /*maybe_compressed*/, - UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); + BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options, + memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; @@ -467,7 +469,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, - const std::string& meta_block_name, + const std::string& meta_block_name, BlockType block_type, BlockContents* contents, bool /*compression_type_missing*/, MemoryAllocator* memory_allocator) { Status status; @@ -488,6 +490,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, + BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); status = block_fetcher.ReadBlockContents(); @@ -515,7 +518,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, // Reading metablock BlockFetcher block_fetcher2( file, prefetch_buffer, footer, read_options, block_handle, contents, - ioptions, false /* decompress */, false /*maybe_compressed*/, + ioptions, false /* decompress */, false /*maybe_compressed*/, block_type, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); return block_fetcher2.ReadBlockContents(); } diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 5224c54714d..86c703f953c 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -16,6 +16,7 @@ #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "table/block_based/block_builder.h" +#include "table/block_based/block_type.h" #include "table/format.h" #include "util/kv_map.h" @@ -143,7 +144,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, - const std::string& meta_block_name, + const std::string& meta_block_name, BlockType block_type, BlockContents* contents, bool compression_type_missing = false, MemoryAllocator* memory_allocator = nullptr); diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index 15f7be1c253..2f8f300d871 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -299,7 +299,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, file_size_, kPlainTableMagicNumber, ioptions_, PlainTableIndexBuilder::kPlainTableIndexBlock, - &index_block_contents, + BlockType::kIndex, &index_block_contents, true /* compression_type_missing */); bool index_in_file = s.ok(); @@ -310,7 +310,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, if (index_in_file) { s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, file_size_, kPlainTableMagicNumber, ioptions_, - BloomBlockBuilder::kBloomBlock, &bloom_block_contents, + BloomBlockBuilder::kBloomBlock, BlockType::kFilter, + &bloom_block_contents, true /* compression_type_missing */); bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0; } diff --git a/table/table_test.cc b/table/table_test.cc index c59c9d8c33f..e836f89a8df 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2268,6 +2268,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) { if (index_and_filter_in_cache) { // data, index and filter block ASSERT_EQ(get_perf_context()->block_read_count, 3); + ASSERT_EQ(get_perf_context()->index_block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } else { // just the data block ASSERT_EQ(get_perf_context()->block_read_count, 1); @@ -2293,9 +2295,12 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) { if (bloom_filter_type == 0) { // with block-based, we read index and then the filter ASSERT_EQ(get_perf_context()->block_read_count, 2); + ASSERT_EQ(get_perf_context()->index_block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } else { // with full-filter, we read filter first and then we stop ASSERT_EQ(get_perf_context()->block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } } else { // filter is already in memory and it figures out that the key doesn't @@ -3565,7 +3570,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, &footer, kBlockBasedTableMagicNumber)); - auto BlockFetchHelper = [&](const BlockHandle& handle, + auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type, BlockContents* contents) { ReadOptions read_options; read_options.verify_checksums = false; @@ -3574,8 +3579,8 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { BlockFetcher block_fetcher( file, nullptr /* prefetch_buffer */, footer, read_options, handle, contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - cache_options); + false /*maybe_compressed*/, block_type, + UncompressionDict::GetEmptyDict(), cache_options); ASSERT_OK(block_fetcher.ReadBlockContents()); }; @@ -3584,7 +3589,8 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; - BlockFetchHelper(metaindex_handle, &metaindex_contents); + BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex, + &metaindex_contents); Block metaindex_block(std::move(metaindex_contents), kDisableGlobalSequenceNumber); @@ -3601,7 +3607,8 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { ASSERT_OK(properties_handle.DecodeFrom(&v)); BlockContents properties_contents; - BlockFetchHelper(properties_handle, &properties_contents); + BlockFetchHelper(properties_handle, BlockType::kProperties, + &properties_contents); Block properties_block(std::move(properties_contents), kDisableGlobalSequenceNumber); @@ -3660,8 +3667,9 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { BlockFetcher block_fetcher( table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(), - pcache_opts, nullptr /*memory_allocator*/); + false /*maybe_compressed*/, BlockType::kMetaIndex, + UncompressionDict::GetEmptyDict(), pcache_opts, + nullptr /*memory_allocator*/); ASSERT_OK(block_fetcher.ReadBlockContents()); Block metaindex_block(std::move(metaindex_contents), kDisableGlobalSequenceNumber);