From 82afa0181529d01978d364043bdde47045ee6fa0 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 2 Nov 2021 20:29:07 -0700 Subject: [PATCH] Some API clarifications (#9080) Summary: * Clarify that RocksDB is not exception safe on many of our callback and extension interfaces * Clarify FSRandomAccessFile::MultiRead implementations must accept non-sorted inputs (see https://github.com/facebook/rocksdb/issues/8953) * Clarify ConcurrentTaskLimiter and SstFileManager are not (currently) extensible interfaces * Mark WriteBufferManager as `final`, so it is then clearly not a callback interface, even though it smells like one * Clarify TablePropertiesCollector Status returns are mostly ignored Pull Request resolved: https://github.com/facebook/rocksdb/pull/9080 Test Plan: comments only (except WriteBufferManager final) Reviewed By: ajkr Differential Revision: D31968782 Pulled By: pdillinger fbshipit-source-id: 11b648ce3ce3c5e5bdc02d2eafc7ea4b864bd1d2 --- HISTORY.md | 2 ++ include/rocksdb/advanced_options.h | 16 ++++++++++------ include/rocksdb/cache.h | 4 ++++ include/rocksdb/compaction_filter.h | 9 ++++++++- include/rocksdb/comparator.h | 4 ++++ include/rocksdb/concurrent_task_limiter.h | 2 ++ include/rocksdb/env.h | 7 +++++++ include/rocksdb/env_encryption.h | 8 ++++++++ include/rocksdb/file_checksum.h | 12 ++++++++++++ include/rocksdb/file_system.h | 14 +++++++++----- include/rocksdb/flush_block_policy.h | 6 +++++- include/rocksdb/listener.h | 4 ++++ include/rocksdb/merge_operator.h | 3 +++ include/rocksdb/options.h | 3 +++ include/rocksdb/rate_limiter.h | 3 +++ include/rocksdb/secondary_cache.h | 4 ++++ include/rocksdb/slice_transform.h | 14 ++++++++------ include/rocksdb/sst_file_manager.h | 3 ++- include/rocksdb/sst_partitioner.h | 3 +++ include/rocksdb/statistics.h | 4 ++++ include/rocksdb/table_properties.h | 13 ++++++++++++- include/rocksdb/utilities/backup_engine.h | 4 ++++ include/rocksdb/wal_filter.h | 4 ++++ include/rocksdb/write_buffer_manager.h | 6 +++--- 24 files changed, 128 insertions(+), 24 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index af28ae9b67d..ec289775dec 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -17,6 +17,8 @@ ### Public API change * Made FileSystem extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* Clarified in API comments that RocksDB is not exception safe for callbacks and custom extensions. An exception propagating into RocksDB can lead to undefined behavior, including data loss, unreported corruption, deadlocks, and more. +* Marked `WriteBufferManager` as `final` because it is not intended for extension. ## 6.26.0 (2021-10-20) ### Bug Fixes diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 5abda968847..f2f4c0ed3a6 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -307,35 +307,39 @@ struct AdvancedColumnFamilyOptions { // delta_value - Delta value to be merged with the existing_value. // Stored in transaction logs. // merged_value - Set when delta is applied on the previous value. - + // // Applicable only when inplace_update_support is true, // this callback function is called at the time of updating the memtable // as part of a Put operation, lets say Put(key, delta_value). It allows the // 'delta_value' specified as part of the Put operation to be merged with // an 'existing_value' of the key in the database. - + // // If the merged value is smaller in size that the 'existing_value', // then this function can update the 'existing_value' buffer inplace and // the corresponding 'existing_value'_size pointer, if it wishes to. // The callback should return UpdateStatus::UPDATED_INPLACE. // In this case. (In this case, the snapshot-semantics of the rocksdb // Iterator is not atomic anymore). - + // // If the merged value is larger in size than the 'existing_value' or the // application does not wish to modify the 'existing_value' buffer inplace, // then the merged value should be returned via *merge_value. It is set by // merging the 'existing_value' and the Put 'delta_value'. The callback should // return UpdateStatus::UPDATED in this case. This merged value will be added // to the memtable. - + // // If merging fails or the application does not wish to take any action, // then the callback should return UpdateStatus::UPDATE_FAILED. - + // // Please remember that the original call from the application is Put(key, // delta_value). So the transaction log (if enabled) will still contain (key, // delta_value). The 'merged_value' is not stored in the transaction log. // Hence the inplace_callback function should be consistent across db reopens. - + // + // RocksDB callbacks are NOT exception-safe. A callback completing with an + // exception can lead to undefined behavior in RocksDB, including data loss, + // unreported corruption, deadlocks, and more. + // // Default: nullptr UpdateStatus (*inplace_callback)(char* existing_value, uint32_t* existing_value_size, diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index b7ab36dc037..772977df334 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -170,6 +170,10 @@ class Cache { // The SizeCallback takes a void* pointer to the object and returns the size // of the persistable data. It can be used by the secondary cache to allocate // memory if needed. + // + // RocksDB callbacks are NOT exception-safe. A callback completing with an + // exception can lead to undefined behavior in RocksDB, including data loss, + // unreported corruption, deadlocks, and more. using SizeCallback = size_t (*)(void* obj); // The SaveToCallback takes a void* object pointer and saves the persistable diff --git a/include/rocksdb/compaction_filter.h b/include/rocksdb/compaction_filter.h index 400f3388e07..3c334f4bfef 100644 --- a/include/rocksdb/compaction_filter.h +++ b/include/rocksdb/compaction_filter.h @@ -24,7 +24,10 @@ class SliceTransform; // CompactionFilter allows an application to modify/delete a key-value during // table file creation. - +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class CompactionFilter : public Customizable { public: enum ValueType { @@ -219,6 +222,10 @@ class CompactionFilter : public Customizable { // `CompactionFilter` according to `ShouldFilterTableFileCreation()`. This // allows the application to know about the different ongoing threads of work // and makes it unnecessary for `CompactionFilter` to provide thread-safety. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class CompactionFilterFactory : public Customizable { public: virtual ~CompactionFilterFactory() {} diff --git a/include/rocksdb/comparator.h b/include/rocksdb/comparator.h index fe96eb0c756..6c73a026f40 100644 --- a/include/rocksdb/comparator.h +++ b/include/rocksdb/comparator.h @@ -21,6 +21,10 @@ class Slice; // used as keys in an sstable or a database. A Comparator implementation // must be thread-safe since rocksdb may invoke its methods concurrently // from multiple threads. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class Comparator : public Customizable { public: Comparator() : timestamp_size_(0) {} diff --git a/include/rocksdb/concurrent_task_limiter.h b/include/rocksdb/concurrent_task_limiter.h index 760444e1892..9ad741f98d5 100644 --- a/include/rocksdb/concurrent_task_limiter.h +++ b/include/rocksdb/concurrent_task_limiter.h @@ -17,6 +17,8 @@ namespace ROCKSDB_NAMESPACE { +// This is NOT an extensible interface but a public interface for result of +// NewConcurrentTaskLimiter. Any derived classes must be RocksDB internal. class ConcurrentTaskLimiter { public: virtual ~ConcurrentTaskLimiter() {} diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index cd1fd670fd7..46f11ec9919 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -145,6 +145,9 @@ struct EnvOptions { RateLimiter* rate_limiter = nullptr; }; +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class Env { public: struct FileAttributes { @@ -1150,6 +1153,10 @@ enum InfoLogLevel : unsigned char { }; // An interface for writing log messages. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class Logger { public: size_t kDoNotSupportGetLogFileSize = (std::numeric_limits::max)(); diff --git a/include/rocksdb/env_encryption.h b/include/rocksdb/env_encryption.h index 6fe011188e3..282db6ed413 100644 --- a/include/rocksdb/env_encryption.h +++ b/include/rocksdb/env_encryption.h @@ -62,6 +62,10 @@ class BlockAccessCipherStream { }; // BlockCipher +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class BlockCipher : public Customizable { public: virtual ~BlockCipher(){}; @@ -105,6 +109,10 @@ class BlockCipher : public Customizable { // The encryption provider is used to create a cipher stream for a specific // file. The returned cipher stream will be used for actual // encryption/decryption actions. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class EncryptionProvider : public Customizable { public: virtual ~EncryptionProvider(){}; diff --git a/include/rocksdb/file_checksum.h b/include/rocksdb/file_checksum.h index 50b6e8fdf9e..4881dcf3822 100644 --- a/include/rocksdb/file_checksum.h +++ b/include/rocksdb/file_checksum.h @@ -43,6 +43,10 @@ struct FileChecksumGenContext { // * Finalize is called at most once during the life of the object // * All calls to Update come before Finalize // * All calls to GetChecksum come after Finalize +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class FileChecksumGenerator { public: virtual ~FileChecksumGenerator() {} @@ -64,6 +68,10 @@ class FileChecksumGenerator { }; // Create the FileChecksumGenerator object for each SST file. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class FileChecksumGenFactory : public Customizable { public: virtual ~FileChecksumGenFactory() {} @@ -86,6 +94,10 @@ class FileChecksumGenFactory : public Customizable { // the checksum information of all valid SST file of a DB instance. It can // also be used to store the checksum information of a list of SST files to // be ingested. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class FileChecksumList { public: virtual ~FileChecksumList() {} diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 492a0dac7f9..0c409a9888f 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -208,7 +208,10 @@ struct IODebugContext { // retryable. // NewCompositeEnv can be used to create an Env with a custom FileSystem for // DBOptions::env. - +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class FileSystem : public Customizable { public: FileSystem(); @@ -731,10 +734,11 @@ class FSRandomAccessFile { // Read a bunch of blocks as described by reqs. The blocks can // optionally be read in parallel. This is a synchronous call, i.e it // should return after all reads have completed. The reads will be - // non-overlapping. If the function return Status is not ok, status of - // individual requests will be ignored and return status will be assumed - // for all read requests. The function return status is only meant for any - // any errors that occur before even processing specific read requests + // non-overlapping but can be in any order. If the function return Status + // is not ok, status of individual requests will be ignored and return + // status will be assumed for all read requests. The function return status + // is only meant for errors that occur before processing individual read + // requests. virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) { assert(reqs != nullptr); diff --git a/include/rocksdb/flush_block_policy.h b/include/rocksdb/flush_block_policy.h index 38fc633f204..7a5dd957e4e 100644 --- a/include/rocksdb/flush_block_policy.h +++ b/include/rocksdb/flush_block_policy.h @@ -18,7 +18,11 @@ struct ConfigOptions; struct Options; // FlushBlockPolicy provides a configurable way to determine when to flush a -// block in the block based tables, +// block in the block based tables. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class FlushBlockPolicy { public: // Keep track of the key/value sequences and return the boolean value to diff --git a/include/rocksdb/listener.h b/include/rocksdb/listener.h index 6ffb3fe4c96..b361f88cf0e 100644 --- a/include/rocksdb/listener.h +++ b/include/rocksdb/listener.h @@ -476,6 +476,10 @@ struct ExternalFileIngestionInfo { // the current thread holding any DB mutex. This is to prevent potential // deadlock and performance issue when using EventListener callback // in a complex way. +// +// [Exceptions] Exceptions MUST NOT propagate out of overridden functions into +// RocksDB, because RocksDB is not exception-safe. This could cause undefined +// behavior including data loss, unreported corruption, deadlocks, and more. class EventListener : public Customizable { public: static const char* Type() { return "EventListener"; } diff --git a/include/rocksdb/merge_operator.h b/include/rocksdb/merge_operator.h index 68065454aa9..bd6e336a59e 100644 --- a/include/rocksdb/merge_operator.h +++ b/include/rocksdb/merge_operator.h @@ -44,6 +44,9 @@ class Logger; // // Refer to rocksdb-merge wiki for more details and example implementations. // +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class MergeOperator : public Customizable { public: virtual ~MergeOperator() {} diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 884e80ebb57..7a1de826689 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -396,6 +396,9 @@ struct CompactionServiceJobInfo { priority(priority_) {} }; +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class CompactionService : public Customizable { public: static const char* Type() { return "CompactionService"; } diff --git a/include/rocksdb/rate_limiter.h b/include/rocksdb/rate_limiter.h index 6bc0e7a4d54..6d4ea0ea790 100644 --- a/include/rocksdb/rate_limiter.h +++ b/include/rocksdb/rate_limiter.h @@ -15,6 +15,9 @@ namespace ROCKSDB_NAMESPACE { +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class RateLimiter { public: enum class OpType { diff --git a/include/rocksdb/secondary_cache.h b/include/rocksdb/secondary_cache.h index 20632d2ffac..2bfe863dfdb 100644 --- a/include/rocksdb/secondary_cache.h +++ b/include/rocksdb/secondary_cache.h @@ -43,6 +43,10 @@ class SecondaryCacheResultHandle { // // Cache interface for caching blocks on a secondary tier (which can include // non-volatile media, or alternate forms of caching such as compressed data) +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class SecondaryCache : public Customizable { public: virtual ~SecondaryCache() {} diff --git a/include/rocksdb/slice_transform.h b/include/rocksdb/slice_transform.h index 0774ecc0242..742060969dd 100644 --- a/include/rocksdb/slice_transform.h +++ b/include/rocksdb/slice_transform.h @@ -25,12 +25,14 @@ namespace ROCKSDB_NAMESPACE { class Slice; struct ConfigOptions; -/* - * A SliceTransform is a generic pluggable way of transforming one string - * to another. Its primary use-case is in configuring rocksdb - * to store prefix blooms by setting prefix_extractor in - * ColumnFamilyOptions. - */ +// A SliceTransform is a generic pluggable way of transforming one string +// to another. Its primary use-case is in configuring rocksdb +// to store prefix blooms by setting prefix_extractor in +// ColumnFamilyOptions. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class SliceTransform : public Customizable { public: virtual ~SliceTransform(){}; diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h index 5aae88dc1ed..61329215121 100644 --- a/include/rocksdb/sst_file_manager.h +++ b/include/rocksdb/sst_file_manager.h @@ -21,7 +21,8 @@ class Logger; // SstFileManager is used to track SST and blob files in the DB and control // their deletion rate. All SstFileManager public functions are thread-safe. -// SstFileManager is not extensible. +// SstFileManager is NOT an extensible interface but a public interface for +// result of NewSstFileManager. Any derived classes must be RocksDB internal. class SstFileManager { public: virtual ~SstFileManager() {} diff --git a/include/rocksdb/sst_partitioner.h b/include/rocksdb/sst_partitioner.h index c8b8c39e5f2..dd719b43e29 100644 --- a/include/rocksdb/sst_partitioner.h +++ b/include/rocksdb/sst_partitioner.h @@ -78,6 +78,9 @@ class SstPartitioner { }; }; +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class SstPartitionerFactory : public Customizable { public: virtual ~SstPartitionerFactory() {} diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 9dc2bcce9cd..795f23fe42e 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -573,6 +573,10 @@ enum StatsLevel : uint8_t { // options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); // HistogramData hist; // options.statistics->histogramData(FLUSH_TIME, &hist); +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class Statistics : public Customizable { public: virtual ~Statistics() {} diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index ad7c7fb37f0..e6d59c8212e 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -80,7 +80,14 @@ extern const std::string kRangeDelBlock; // a collection of callback functions that will be invoked during table // building. It is constructed with TablePropertiesCollectorFactory. The methods // don't need to be thread-safe, as we will create exactly one -// TablePropertiesCollector object per table and then call it sequentially +// TablePropertiesCollector object per table and then call it sequentially. +// +// Statuses from these callbacks are currently logged when not OK, but +// otherwise ignored by RocksDB. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class TablePropertiesCollector { public: virtual ~TablePropertiesCollector() {} @@ -133,6 +140,10 @@ class TablePropertiesCollector { // Constructs TablePropertiesCollector. Internals create a new // TablePropertiesCollector for each new table +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class TablePropertiesCollectorFactory : public Customizable { public: struct Context { diff --git a/include/rocksdb/utilities/backup_engine.h b/include/rocksdb/utilities/backup_engine.h index 2c31bb705d9..6dbce3364e2 100644 --- a/include/rocksdb/utilities/backup_engine.h +++ b/include/rocksdb/utilities/backup_engine.h @@ -262,6 +262,10 @@ struct CreateBackupOptions { bool flush_before_backup = false; // Callback for reporting progress, based on callback_trigger_interval_size. + // + // RocksDB callbacks are NOT exception-safe. A callback completing with an + // exception can lead to undefined behavior in RocksDB, including data loss, + // unreported corruption, deadlocks, and more. std::function progress_callback = []() {}; // If false, background_thread_cpu_priority is ignored. diff --git a/include/rocksdb/wal_filter.h b/include/rocksdb/wal_filter.h index 109080cf0da..80ed2dfc5b5 100644 --- a/include/rocksdb/wal_filter.h +++ b/include/rocksdb/wal_filter.h @@ -19,6 +19,10 @@ struct ConfigOptions; // WALFilter allows an application to inspect write-ahead-log (WAL) // records or modify their processing on recovery. // Please see the details below. +// +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. class WalFilter : public Customizable { public: static const char* Type() { return "WalFilter"; } diff --git a/include/rocksdb/write_buffer_manager.h b/include/rocksdb/write_buffer_manager.h index a5471aabd25..065186f5356 100644 --- a/include/rocksdb/write_buffer_manager.h +++ b/include/rocksdb/write_buffer_manager.h @@ -23,8 +23,8 @@ namespace ROCKSDB_NAMESPACE { class CacheReservationManager; -// Interface to block and signal DB instances. -// Each DB instance contains ptr to StallInterface. +// Interface to block and signal DB instances, intended for RocksDB +// internal use only. Each DB instance contains ptr to StallInterface. class StallInterface { public: virtual ~StallInterface() {} @@ -34,7 +34,7 @@ class StallInterface { virtual void Signal() = 0; }; -class WriteBufferManager { +class WriteBufferManager final { public: // Parameters: // _buffer_size: _buffer_size = 0 indicates no limit. Memory won't be capped.