diff --git a/HISTORY.md b/HISTORY.md index 59205341020..ace55cab404 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -21,6 +21,7 @@ * Added new APIs ExportColumnFamily() and CreateColumnFamilyWithImport() to support export and import of a Column Family. https://github.com/facebook/rocksdb/issues/3469 * ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator. * Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env. +* Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors. ### New Features * Add an option `snap_refresh_nanos` (default to 0) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. @@ -29,6 +30,7 @@ * Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error. * Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact. * Compression dictionary blocks are now prefetched and pinned in the cache (based on the customer's settings) the same way as index and filter blocks. +* Added DBOptions::log_readahead_size which specifies the number of bytes to prefetch when reading the log. This is mostly useful for reading a remotely located log, as it can save the number of round-trips. If 0 (default), then the prefetching is disabled. ### Performance Improvements * Reduce binary search when iterator reseek into the same data block. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 8132d5a0b38..70f32c4c765 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2779,11 +2779,13 @@ void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family, ReturnAndCleanupSuperVersion(cfd, sv); } -void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, - const Range* range, int n, uint64_t* sizes, - uint8_t include_flags) { - assert(include_flags & DB::SizeApproximationFlags::INCLUDE_FILES || - include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES); +Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options, + ColumnFamilyHandle* column_family, + const Range* range, int n, uint64_t* sizes) { + if (!options.include_memtabtles && !options.include_files) { + return Status::InvalidArgument("Invalid options"); + } + Version* v; auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); @@ -2795,18 +2797,19 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); sizes[i] = 0; - if (include_flags & DB::SizeApproximationFlags::INCLUDE_FILES) { + if (options.include_files) { sizes[i] += versions_->ApproximateSize( v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1, TableReaderCaller::kUserApproximateSize); } - if (include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES) { + if (options.include_memtabtles) { sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size; sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size; } } ReturnAndCleanupSuperVersion(cfd, sv); + return Status::OK(); } std::list::iterator diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 547e3e1d6be..fe3a2f6f20f 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -233,9 +233,10 @@ class DBImpl : public DB { virtual bool GetAggregatedIntProperty(const Slice& property, uint64_t* aggregated_value) override; using DB::GetApproximateSizes; - virtual void GetApproximateSizes( - ColumnFamilyHandle* column_family, const Range* range, int n, - uint64_t* sizes, uint8_t include_flags = INCLUDE_FILES) override; + virtual Status GetApproximateSizes(const SizeApproximationOptions& options, + ColumnFamilyHandle* column_family, + const Range* range, int n, + uint64_t* sizes) override; using DB::GetApproximateMemTableStats; virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, const Range& range, diff --git a/db/db_test.cc b/db/db_test.cc index 36bdda59e21..f247ddb80fa 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2598,13 +2598,14 @@ class ModelDB : public DB { return false; } using DB::GetApproximateSizes; - void GetApproximateSizes(ColumnFamilyHandle* /*column_family*/, - const Range* /*range*/, int n, uint64_t* sizes, - uint8_t /*include_flags*/ - = INCLUDE_FILES) override { + Status GetApproximateSizes(const SizeApproximationOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/, + const Range* /*range*/, int n, + uint64_t* sizes) override { for (int i = 0; i < n; i++) { sizes[i] = 0; } + return Status::OK(); } using DB::GetApproximateMemTableStats; void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/, diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d90ca900f45..1d90dc50b4b 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -808,7 +808,7 @@ class DB { // stats should be included, or file stats approximation or both enum SizeApproximationFlags : uint8_t { NONE = 0, - INCLUDE_MEMTABLES = 1, + INCLUDE_MEMTABLES = 1 << 0, INCLUDE_FILES = 1 << 1 }; @@ -818,14 +818,24 @@ class DB { // Note that the returned sizes measure file system space usage, so // if the user data compresses by a factor of ten, the returned // sizes will be one-tenth the size of the corresponding user data size. - // - // If include_flags defines whether the returned size should include - // the recently written data in the mem-tables (if - // the mem-table type supports it), data serialized to disk, or both. - // include_flags should be of type DB::SizeApproximationFlags + virtual Status GetApproximateSizes(const SizeApproximationOptions& options, + ColumnFamilyHandle* column_family, + const Range* range, int n, + uint64_t* sizes) = 0; + + // Simpler versions of the GetApproximateSizes() method above. + // The include_flags argumenbt must of type DB::SizeApproximationFlags + // and can not be NONE. virtual void GetApproximateSizes(ColumnFamilyHandle* column_family, const Range* range, int n, uint64_t* sizes, - uint8_t include_flags = INCLUDE_FILES) = 0; + uint8_t include_flags = INCLUDE_FILES) { + SizeApproximationOptions options; + options.include_memtabtles = + (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0; + options.include_files = + (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0; + GetApproximateSizes(options, column_family, range, n, sizes); + } virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes, uint8_t include_flags = INCLUDE_FILES) { GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 896beba23fc..5ae010b8f52 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1093,10 +1093,6 @@ struct DBOptions { // The number of bytes to prefetch when reading the log. This is mostly useful // for reading a remotely located log, as it can save the number of // round-trips. If 0, then the prefetching is disabled. - - // If non-zero, we perform bigger reads when reading the log. - // This is mostly useful for reading a remotely located log, as it can save - // the number of round-trips. If 0, then the prefetching is disabled. // // Default: 0 size_t log_readahead_size = 0; @@ -1510,4 +1506,14 @@ struct ImportColumnFamilyOptions { bool move_files = false; }; +// Options used with DB::GetApproximateSizes() +struct SizeApproximationOptions { + // Defines whether the returned size should include the recently written + // data in the mem-tables. If set to false, include_files must be true. + bool include_memtabtles = false; + // Defines whether the returned size should include data serialized to disk. + // If set to false, include_memtabtles must be true. + bool include_files = true; +}; + } // namespace rocksdb diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index a52aff5d8b1..67bf4e2fa6b 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -209,10 +209,11 @@ class StackableDB : public DB { } using DB::GetApproximateSizes; - virtual void GetApproximateSizes( - ColumnFamilyHandle* column_family, const Range* r, int n, uint64_t* sizes, - uint8_t include_flags = INCLUDE_FILES) override { - return db_->GetApproximateSizes(column_family, r, n, sizes, include_flags); + virtual Status GetApproximateSizes(const SizeApproximationOptions& options, + ColumnFamilyHandle* column_family, + const Range* r, int n, + uint64_t* sizes) override { + return db_->GetApproximateSizes(options, column_family, r, n, sizes); } using DB::GetApproximateMemTableStats;