Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing ApproximateSize to create index iterator just once #5693

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -531,4 +531,31 @@ uint64_t TableCache::ApproximateOffsetOf(

return result;
}

uint64_t TableCache::ApproximateSize(
const Slice& start, const Slice& end, const FileDescriptor& fd,
TableReaderCaller caller, const InternalKeyComparator& internal_comparator,
const SliceTransform* prefix_extractor) {
uint64_t result = 0;
TableReader* table_reader = fd.table_reader;
Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(env_options_, internal_comparator, fd, &table_handle,
prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle);
}
}

if (table_reader != nullptr) {
result = table_reader->ApproximateSize(start, end, caller);
}
if (table_handle != nullptr) {
ReleaseHandle(table_handle);
}

return result;
}
} // namespace rocksdb
7 changes: 7 additions & 0 deletions db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ class TableCache {
const InternalKeyComparator& internal_comparator,
const SliceTransform* prefix_extractor = nullptr);

// Returns approximated data size between start and end keys in a file
// represented by fd (the start key must not be greater than the end key).
uint64_t ApproximateSize(const Slice& start, const Slice& end,
const FileDescriptor& fd, TableReaderCaller caller,
const InternalKeyComparator& internal_comparator,
const SliceTransform* prefix_extractor = nullptr);

// Release the handle from a cache
void ReleaseHandle(Cache::Handle* handle);

Expand Down
55 changes: 45 additions & 10 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4957,7 +4957,7 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
uint64_t file_size = files_brief.files[i].fd.GetFileSize();
// The entire file falls into the range, so we can just take its size.
assert(file_size ==
ApproximateSize(v, files_brief.files[i], end, caller));
ApproximateSize(v, files_brief.files[i], start, end, caller));
total_full_size += file_size;
}

Expand Down Expand Up @@ -4991,25 +4991,23 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
} else {
// Estimate for all the first files, at each level
for (const auto file_ptr : first_files) {
total_full_size += ApproximateSize(v, *file_ptr, end, caller);
// subtract the bytes needed to be scanned to get to the starting key
uint64_t val = ApproximateSize(v, *file_ptr, start, caller);
assert(total_full_size >= val);
total_full_size -= val;
total_full_size += ApproximateSize(v, *file_ptr, start, end, caller);
}

// Estimate for all the last files, at each level
for (const auto file_ptr : last_files) {
total_full_size += ApproximateSize(v, *file_ptr, end, caller);
// We could use ApproximateSize here, but calling ApproximateOffsetOf
// directly is just more efficient.
total_full_size += ApproximateOffsetOf(v, *file_ptr, end, caller);
}
}

return total_full_size;
}

uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
const Slice& key,
TableReaderCaller caller) {
uint64_t VersionSet::ApproximateOffsetOf(Version* v, const FdWithKeyRange& f,
const Slice& key,
TableReaderCaller caller) {
// pre-condition
assert(v);
const auto& icmp = v->cfd_->internal_comparator();
Expand All @@ -5034,6 +5032,43 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
return result;
}

uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
const Slice& start, const Slice& end,
TableReaderCaller caller) {
// pre-condition
assert(v);
const auto& icmp = v->cfd_->internal_comparator();
assert(icmp.Compare(start, end) <= 0);

if (icmp.Compare(f.largest_key, start) <= 0 ||
icmp.Compare(f.smallest_key, end) > 0) {
// Entire file is before or after the start/end keys range
return 0;
}

if (icmp.Compare(f.smallest_key, start) >= 0) {
// Start of the range is before the file start - approximate by end offset
return ApproximateOffsetOf(v, f, end, caller);
}

if (icmp.Compare(f.largest_key, end) < 0) {
// End of the range is after the file end - approximate by subtracting
// start offset from the file size
uint64_t start_offset = ApproximateOffsetOf(v, f, start, caller);
assert(f.fd.GetFileSize() >= start_offset);
return f.fd.GetFileSize() - start_offset;
}

// The interval falls entirely in the range for this file.
TableCache* table_cache = v->cfd_->table_cache();
if (table_cache == nullptr) {
return 0;
}
return table_cache->ApproximateSize(
start, end, f.file_metadata->fd, caller, icmp,
v->GetMutableCFOptions().prefix_extractor.get());
}

void VersionSet::AddLiveFiles(std::vector<FileDescriptor>* live_list) {
// pre-calculate space requirement
int64_t total_files = 0;
Expand Down
9 changes: 8 additions & 1 deletion db/version_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -1038,8 +1038,15 @@ class VersionSet {
}
};

// Returns approximated offset of a key in a file for a given version.
uint64_t ApproximateOffsetOf(Version* v, const FdWithKeyRange& f,
const Slice& key, TableReaderCaller caller);

// Returns approximated data size between start and end keys in a file
// for a given version.
uint64_t ApproximateSize(Version* v, const FdWithKeyRange& f,
const Slice& key, TableReaderCaller caller);
const Slice& start, const Slice& end,
TableReaderCaller caller);

// Save current contents to *log
Status WriteSnapshot(log::Writer* log);
Expand Down
62 changes: 45 additions & 17 deletions table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3912,25 +3912,16 @@ Status BlockBasedTable::CreateIndexReader(
}
}

uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) {
BlockCacheLookupContext context(caller);
IndexBlockIter iiter_on_stack;
auto index_iter =
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
/*lookup_context=*/&context);

index_iter->Seek(key);
uint64_t result;
if (index_iter->Valid()) {
BlockHandle handle = index_iter->value().handle;
uint64_t BlockBasedTable::ApproximateOffsetOf(
const InternalIteratorBase<IndexValue>& index_iter) const {
uint64_t result = 0;
if (index_iter.Valid()) {
BlockHandle handle = index_iter.value().handle;
result = handle.offset();
} else {
// key is past the last key in the file. If table_properties is not
// The iterator is past the last key in the file. If table_properties is not
// available, approximate the offset by returning the offset of the
// metaindex block (which is right near the end of the file).
result = 0;
if (rep_->table_properties) {
result = rep_->table_properties->data_size;
}
Expand All @@ -3940,11 +3931,48 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
}
}

return result;
}

uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) {
BlockCacheLookupContext context(caller);
IndexBlockIter iiter_on_stack;
auto index_iter =
NewIndexIterator(ReadOptions(), /*disable_prefix_seek=*/false,
/*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
/*lookup_context=*/&context);
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
if (index_iter != &iiter_on_stack) {
delete index_iter;
iiter_unique_ptr.reset(index_iter);
}

return result;
index_iter->Seek(key);
return ApproximateOffsetOf(*index_iter);
}

uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
TableReaderCaller caller) {
assert(rep_->internal_comparator.Compare(start, end) <= 0);

BlockCacheLookupContext context(caller);
IndexBlockIter iiter_on_stack;
auto index_iter =
NewIndexIterator(ReadOptions(), /*disable_prefix_seek=*/false,
/*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
/*lookup_context=*/&context);
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
if (index_iter != &iiter_on_stack) {
iiter_unique_ptr.reset(index_iter);
}

index_iter->Seek(start);
uint64_t start_offset = ApproximateOffsetOf(*index_iter);
index_iter->Seek(end);
uint64_t end_offset = ApproximateOffsetOf(*index_iter);

assert(end_offset >= start_offset);
return end_offset - start_offset;
}

bool BlockBasedTable::TEST_FilterBlockInCache() const {
Expand Down
13 changes: 12 additions & 1 deletion table/block_based/block_based_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,20 @@ class BlockBasedTable : public TableReader {

// Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were
// present in the file). The returned value is in terms of file
// present in the file). The returned value is in terms of file
// bytes, and so includes effects like compression of the underlying data.
// E.g., the approximate offset of the last key in the table will
// be close to the file length.
uint64_t ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) override;

// Given start and end keys, return the approximate data size in the file
// between the keys. The returned value is in terms of file bytes, and so
// includes effects like compression of the underlying data.
// The start key must not be greater than the end key.
uint64_t ApproximateSize(const Slice& start, const Slice& end,
TableReaderCaller caller) override;

bool TEST_BlockInCache(const BlockHandle& handle) const;

// Returns true if the block for the specified key is in cache.
Expand Down Expand Up @@ -438,6 +445,10 @@ class BlockBasedTable : public TableReader {
static void GenerateCachePrefix(Cache* cc, WritableFile* file, char* buffer,
size_t* size);

// Given an iterator return its offset in file.
uint64_t ApproximateOffsetOf(
const InternalIteratorBase<IndexValue>& index_iter) const;

// Helper functions for DumpTable()
Status DumpIndexBlock(WritableFile* out_file);
Status DumpDataBlocks(WritableFile* out_file);
Expand Down
6 changes: 6 additions & 0 deletions table/cuckoo/cuckoo_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ class CuckooTableReader: public TableReader {
TableReaderCaller /*caller*/) override {
return 0;
}

uint64_t ApproximateSize(const Slice& /*start*/, const Slice& /*end*/,
TableReaderCaller /*caller*/) override {
return 0;
}

void SetupForCompaction() override {}
// End of methods not implemented.

Expand Down
5 changes: 5 additions & 0 deletions table/mock_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class MockTableReader : public TableReader {
return 0;
}

uint64_t ApproximateSize(const Slice& /*start*/, const Slice& /*end*/,
TableReaderCaller /*caller*/) override {
return 0;
}

size_t ApproximateMemoryUsage() const override { return 0; }

void SetupForCompaction() override {}
Expand Down
6 changes: 6 additions & 0 deletions table/plain/plain_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,12 @@ uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/,
return 0;
}

uint64_t PlainTableReader::ApproximateSize(const Slice& /*start*/,
const Slice& /*end*/,
TableReaderCaller /*caller*/) {
return 0;
}

PlainTableIterator::PlainTableIterator(PlainTableReader* table,
bool use_prefix_seek)
: table_(table),
Expand Down
3 changes: 3 additions & 0 deletions table/plain/plain_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class PlainTableReader: public TableReader {
uint64_t ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) override;

uint64_t ApproximateSize(const Slice& start, const Slice& end,
TableReaderCaller caller) override;

uint32_t GetIndexSize() const { return index_.GetIndexSize(); }
void SetupForCompaction() override;

Expand Down
6 changes: 6 additions & 0 deletions table/table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ class TableReader {
virtual uint64_t ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) = 0;

// Given start and end keys, return the approximate data size in the file
// between the keys. The returned value is in terms of file bytes, and so
// includes effects like compression of the underlying data.
virtual uint64_t ApproximateSize(const Slice& start, const Slice& end,
TableReaderCaller caller) = 0;

// Set up the table for Compaction. Might change some parameters with
// posix_fadvise
virtual void SetupForCompaction() = 0;
Expand Down