diff --git a/include/paimon/defs.h b/include/paimon/defs.h index 3ea0da43c..e633dcf6c 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -414,6 +414,13 @@ struct PAIMON_EXPORT Options { /// "lookup.cache.high-priority-pool-ratio" - The fraction of cache memory that is reserved for /// high-priority data like index, filter. Default value is 0.25. static const char LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[]; + /// "lookup.cache-file-retention" - The cached files retention time for lookup. + /// After the file expires, if there is a need for access, it will be re-read from the DFS + /// to build an index on the local disk. Default value is 1 hour. + static const char LOOKUP_CACHE_FILE_RETENTION[]; + /// "lookup.cache-max-disk-size" - Max disk size for lookup cache, you can use this option + /// to limit the use of local disks. Default value is unlimited (INT64_MAX). + static const char LOOKUP_CACHE_MAX_DISK_SIZE[]; }; static constexpr int64_t BATCH_WRITE_COMMIT_IDENTIFIER = std::numeric_limits::max(); diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index f771438a4..08bd8eaf8 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -243,6 +243,7 @@ set(PAIMON_CORE_SRCS core/mergetree/merge_tree_writer.cpp core/mergetree/write_buffer.cpp core/mergetree/levels.cpp + core/mergetree/lookup_file.cpp core/mergetree/lookup_levels.cpp core/mergetree/lookup/remote_lookup_file_manager.cpp core/migrate/file_meta_utils.cpp @@ -479,6 +480,7 @@ if(PAIMON_BUILD_TESTS) common/utils/uuid_test.cpp common/utils/decimal_utils_test.cpp common/utils/threadsafe_queue_test.cpp + common/utils/generic_lru_cache_test.cpp STATIC_LINK_LIBS paimon_shared test_utils_static diff --git a/src/paimon/common/defs.cpp b/src/paimon/common/defs.cpp index 113d1327b..70e830085 100644 --- a/src/paimon/common/defs.cpp +++ b/src/paimon/common/defs.cpp @@ -117,5 +117,7 @@ const char Options::LOOKUP_COMPACT[] = "lookup-compact"; const char Options::LOOKUP_COMPACT_MAX_INTERVAL[] = "lookup-compact.max-interval"; const char Options::LOOKUP_CACHE_MAX_MEMORY_SIZE[] = "lookup.cache-max-memory-size"; const char Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[] = "lookup.cache.high-priority-pool-ratio"; +const char Options::LOOKUP_CACHE_FILE_RETENTION[] = "lookup.cache-file-retention"; +const char Options::LOOKUP_CACHE_MAX_DISK_SIZE[] = "lookup.cache-max-disk-size"; } // namespace paimon diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index a6335b1f6..e53ceca57 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -39,8 +39,8 @@ class Cache { std::function>(const std::shared_ptr&)> supplier) = 0; - virtual void Put(const std::shared_ptr& key, - const std::shared_ptr& value) = 0; + virtual Status Put(const std::shared_ptr& key, + const std::shared_ptr& value) = 0; virtual void Invalidate(const std::shared_ptr& key) = 0; @@ -65,6 +65,13 @@ class CacheValue { } } + bool operator==(const CacheValue& other) const { + if (this == &other) { + return true; + } + return segment_ == other.segment_; + } + private: MemorySegment segment_; CacheCallback callback_; diff --git a/src/paimon/common/io/cache/lru_cache.cpp b/src/paimon/common/io/cache/lru_cache.cpp index 172622b1f..5eb23309b 100644 --- a/src/paimon/common/io/cache/lru_cache.cpp +++ b/src/paimon/common/io/cache/lru_cache.cpp @@ -18,128 +18,51 @@ namespace paimon { -LruCache::LruCache(int64_t max_weight) : max_weight_(max_weight), current_weight_(0) {} +LruCache::LruCache(int64_t max_weight) + : inner_cache_(InnerCache::Options{ + .max_weight = max_weight, + .expire_after_access_ms = -1, + .weigh_func = [](const std::shared_ptr& /*key*/, + const std::shared_ptr& value) -> int64_t { + return value ? value->GetSegment().Size() : 0; + }, + .removal_callback = + [](const std::shared_ptr& key, const std::shared_ptr& value, + auto cause) { + if (value) { + value->OnEvict(key); + } + }}) {} Result> LruCache::Get( const std::shared_ptr& key, std::function>(const std::shared_ptr&)> supplier) { - { - std::unique_lock write_lock(mutex_); - auto cached = FindAndPromote(key); - if (cached) { - return cached.value(); - } - } - // Cache miss: load via supplier (outside lock) - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr value, supplier(key)); - if (GetWeight(value) > max_weight_) { - return value; - } - - std::unique_lock write_lock(mutex_); - // Another thread may have inserted the key while we were loading - auto cached = FindAndPromote(key); - if (cached) { - return cached.value(); - } - - Insert(key, value); - EvictIfNeeded(); - return value; + return inner_cache_.Get(key, std::move(supplier)); } -void LruCache::Put(const std::shared_ptr& key, const std::shared_ptr& value) { - if (GetWeight(value) > max_weight_) { - return; - } - std::unique_lock write_lock(mutex_); - - auto it = lru_map_.find(key); - if (it != lru_map_.end()) { - // Update existing entry: adjust weight - current_weight_ -= GetWeight(it->second->second); - it->second->second = value; - current_weight_ += GetWeight(value); - lru_list_.splice(lru_list_.begin(), lru_list_, it->second); - } else { - Insert(key, value); - } - - EvictIfNeeded(); +Status LruCache::Put(const std::shared_ptr& key, + const std::shared_ptr& value) { + return inner_cache_.Put(key, value); } void LruCache::Invalidate(const std::shared_ptr& key) { - std::unique_lock write_lock(mutex_); - - auto it = lru_map_.find(key); - if (it != lru_map_.end()) { - RemoveEntry(it->second); - } + inner_cache_.Invalidate(key); } void LruCache::InvalidateAll() { - std::unique_lock write_lock(mutex_); - - while (!lru_list_.empty()) { - RemoveEntry(std::prev(lru_list_.end())); - } - current_weight_ = 0; + inner_cache_.InvalidateAll(); } size_t LruCache::Size() const { - std::shared_lock read_lock(mutex_); - return lru_map_.size(); + return inner_cache_.Size(); } int64_t LruCache::GetCurrentWeight() const { - std::shared_lock read_lock(mutex_); - return current_weight_; + return inner_cache_.GetCurrentWeight(); } int64_t LruCache::GetMaxWeight() const { - return max_weight_; -} - -std::optional> LruCache::FindAndPromote( - const std::shared_ptr& key) { - auto it = lru_map_.find(key); - if (it != lru_map_.end()) { - lru_list_.splice(lru_list_.begin(), lru_list_, it->second); - return it->second->second; - } - return std::nullopt; + return inner_cache_.GetMaxWeight(); } -void LruCache::Insert(const std::shared_ptr& key, - const std::shared_ptr& value) { - // Insert at front of LRU list - lru_list_.emplace_front(key, value); - lru_map_[key] = lru_list_.begin(); - current_weight_ += GetWeight(value); -} - -void LruCache::RemoveEntry(LruList::iterator list_it) { - auto entry_key = list_it->first; - auto entry_value = list_it->second; - current_weight_ -= GetWeight(entry_value); - lru_map_.erase(entry_key); - lru_list_.erase(list_it); - - if (entry_value) { - entry_value->OnEvict(entry_key); - } -} - -void LruCache::EvictIfNeeded() { - while (current_weight_ > max_weight_ && !lru_list_.empty()) { - RemoveEntry(std::prev(lru_list_.end())); - } -} - -int64_t LruCache::GetWeight(const std::shared_ptr& value) { - if (!value) { - return 0; - } - return value->GetSegment().Size(); -} } // namespace paimon diff --git a/src/paimon/common/io/cache/lru_cache.h b/src/paimon/common/io/cache/lru_cache.h index 2a4e85afc..e58175687 100644 --- a/src/paimon/common/io/cache/lru_cache.h +++ b/src/paimon/common/io/cache/lru_cache.h @@ -15,30 +15,25 @@ */ #pragma once + #include #include -#include #include -#include -#include -#include -#include -#include -#include #include "paimon/common/io/cache/cache.h" #include "paimon/common/io/cache/cache_key.h" -#include "paimon/common/memory/memory_segment.h" +#include "paimon/common/utils/generic_lru_cache.h" #include "paimon/result.h" namespace paimon { -/// LRU Cache implementation with weight-based eviction. -/// Uses std::list + unordered_map for O(1) get/put/evict: -/// list stores entries in LRU order (most recently used at front) -/// map stores key -> list::iterator for O(1) lookup -/// capacity is measured in bytes (sum of MemorySegment sizes) -/// when an entry is evicted, its CacheCallback is invoked to notify the upper layer -/// @note Thread-safe: all public methods are protected by mutex (read-write lock). + +/// LRU Cache implementation with weight-based eviction for block cache. +/// +/// Wraps GenericLruCache with CacheKey/CacheValue types. Capacity is measured +/// in bytes (sum of MemorySegment sizes). When an entry is evicted, its +/// CacheCallback is invoked to notify the upper layer. +/// +/// @note Thread-safe: all public methods are protected by the underlying GenericLruCache lock. class LruCache : public Cache { public: explicit LruCache(int64_t max_weight); @@ -48,8 +43,8 @@ class LruCache : public Cache { std::function>(const std::shared_ptr&)> supplier) override; - void Put(const std::shared_ptr& key, - const std::shared_ptr& value) override; + Status Put(const std::shared_ptr& key, + const std::shared_ptr& value) override; void Invalidate(const std::shared_ptr& key) override; @@ -62,24 +57,10 @@ class LruCache : public Cache { int64_t GetMaxWeight() const; private: - using LruEntry = std::pair, std::shared_ptr>; - using LruList = std::list; - using LruMap = std::unordered_map, LruList::iterator, CacheKeyHash, - CacheKeyEqual>; - - std::optional> FindAndPromote(const std::shared_ptr& key); - void Insert(const std::shared_ptr& key, const std::shared_ptr& value); - void RemoveEntry(LruList::iterator list_it); - - void EvictIfNeeded(); - - static int64_t GetWeight(const std::shared_ptr& value); + using InnerCache = GenericLruCache, std::shared_ptr, + CacheKeyHash, CacheKeyEqual>; - int64_t max_weight_; - int64_t current_weight_; - LruList lru_list_; - LruMap lru_map_; - mutable std::shared_mutex mutex_; + InnerCache inner_cache_; }; } // namespace paimon diff --git a/src/paimon/common/io/cache/lru_cache_test.cpp b/src/paimon/common/io/cache/lru_cache_test.cpp index 4b56a82d2..64c4c3a09 100644 --- a/src/paimon/common/io/cache/lru_cache_test.cpp +++ b/src/paimon/common/io/cache/lru_cache_test.cpp @@ -95,12 +95,12 @@ TEST_F(LruCacheTest, TestPutInsertAndUpdate) { auto value_b = MakeValue(128, 'B'); // Insert - cache.Put(key, value_a); + ASSERT_OK(cache.Put(key, value_a)); ASSERT_EQ(cache.Size(), 1); ASSERT_EQ(cache.GetCurrentWeight(), 64); // Update with larger value - cache.Put(key, value_b); + ASSERT_OK(cache.Put(key, value_b)); ASSERT_EQ(cache.Size(), 1); ASSERT_EQ(cache.GetCurrentWeight(), 128); @@ -120,13 +120,13 @@ TEST_F(LruCacheTest, TestWeightBasedEviction) { auto key2 = MakeKey(2); // Insert 2 entries of 100 bytes each (total 200, at capacity) - cache.Put(key0, MakeValue(100, 'A')); - cache.Put(key1, MakeValue(100, 'B')); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A'))); + ASSERT_OK(cache.Put(key1, MakeValue(100, 'B'))); ASSERT_EQ(cache.Size(), 2); ASSERT_EQ(cache.GetCurrentWeight(), 200); // Insert a 3rd entry: should evict key0 (LRU, inserted first) - cache.Put(key2, MakeValue(100, 'C')); + ASSERT_OK(cache.Put(key2, MakeValue(100, 'C'))); ASSERT_EQ(cache.Size(), 2); ASSERT_EQ(cache.GetCurrentWeight(), 200); @@ -156,12 +156,12 @@ TEST_F(LruCacheTest, TestEvictionCallback) { auto key1 = MakeKey(1); auto key2 = MakeKey(2); - cache.Put(key0, MakeValue(100, 'A', make_callback(0))); - cache.Put(key1, MakeValue(100, 'B', make_callback(1))); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A', make_callback(0)))); + ASSERT_OK(cache.Put(key1, MakeValue(100, 'B', make_callback(1)))); ASSERT_TRUE(evicted_positions.empty()); // Insert key2: should evict key0 and trigger its callback - cache.Put(key2, MakeValue(100, 'C', make_callback(2))); + ASSERT_OK(cache.Put(key2, MakeValue(100, 'C', make_callback(2)))); ASSERT_EQ(evicted_positions.size(), 1); ASSERT_EQ(evicted_positions[0], 0); } @@ -174,8 +174,8 @@ TEST_F(LruCacheTest, TestLruOrdering) { auto key1 = MakeKey(1); auto key2 = MakeKey(2); - cache.Put(key0, MakeValue(100, 'A')); - cache.Put(key1, MakeValue(100, 'B')); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A'))); + ASSERT_OK(cache.Put(key1, MakeValue(100, 'B'))); // Access key0 via Get to move it to front (most recently used) ASSERT_OK_AND_ASSIGN(auto val, cache.Get(key0, MakeSupplier(0))); @@ -188,14 +188,14 @@ TEST_F(LruCacheTest, TestLruOrdering) { // Re-insert with callbacks to track eviction cache.InvalidateAll(); - cache.Put(key0, MakeValue(100, 'A', callback0)); - cache.Put(key1, MakeValue(100, 'B', callback1)); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A', callback0))); + ASSERT_OK(cache.Put(key1, MakeValue(100, 'B', callback1))); // Access key0 to move it to front ASSERT_OK_AND_ASSIGN(val, cache.Get(key0, MakeSupplier(0))); // Insert key2: key1 should be evicted (it's at the back) - cache.Put(key2, MakeValue(100, 'C')); + ASSERT_OK(cache.Put(key2, MakeValue(100, 'C'))); ASSERT_EQ(evicted.size(), 1); ASSERT_EQ(evicted[0], 1); ASSERT_EQ(cache.Size(), 2); @@ -211,8 +211,8 @@ TEST_F(LruCacheTest, TestInvalidate) { std::vector evicted; auto callback0 = [&evicted](const std::shared_ptr&) { evicted.push_back(0); }; auto callback1 = [&evicted](const std::shared_ptr&) { evicted.push_back(1); }; - cache.Put(key0, MakeValue(100, 'A', callback0)); - cache.Put(key1, MakeValue(200, 'B', callback1)); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A', callback0))); + ASSERT_OK(cache.Put(key1, MakeValue(200, 'B', callback1))); ASSERT_EQ(cache.Size(), 2); ASSERT_EQ(cache.GetCurrentWeight(), 300); ASSERT_TRUE(evicted.empty()); @@ -239,7 +239,7 @@ TEST_F(LruCacheTest, TestInvalidateAll) { auto callback = [&evicted, id = i](const std::shared_ptr&) { evicted.push_back(id); }; - cache.Put(MakeKey(i), MakeValue(50, 'A', callback)); + ASSERT_OK(cache.Put(MakeKey(i), MakeValue(50, 'A', callback))); } ASSERT_EQ(cache.Size(), 5); ASSERT_EQ(cache.GetCurrentWeight(), 250); @@ -259,15 +259,15 @@ TEST_F(LruCacheTest, TestWeightTracking) { auto key1 = MakeKey(1); // Put 100 bytes - cache.Put(key0, MakeValue(100)); + ASSERT_OK(cache.Put(key0, MakeValue(100))); ASSERT_EQ(cache.GetCurrentWeight(), 100); // Put 200 bytes - cache.Put(key1, MakeValue(200)); + ASSERT_OK(cache.Put(key1, MakeValue(200))); ASSERT_EQ(cache.GetCurrentWeight(), 300); // Update key0 from 100 to 150 bytes - cache.Put(key0, MakeValue(150)); + ASSERT_OK(cache.Put(key0, MakeValue(150))); ASSERT_EQ(cache.GetCurrentWeight(), 350); // Invalidate key1 (200 bytes) @@ -275,11 +275,11 @@ TEST_F(LruCacheTest, TestWeightTracking) { ASSERT_EQ(cache.GetCurrentWeight(), 150); // Put 200 bytes - cache.Put(MakeKey(2), MakeValue(200)); + ASSERT_OK(cache.Put(MakeKey(2), MakeValue(200))); ASSERT_EQ(cache.GetCurrentWeight(), 350); // Add: total would be 550 > 500, should evict key0 (150 bytes) - cache.Put(MakeKey(3), MakeValue(200)); + ASSERT_OK(cache.Put(MakeKey(3), MakeValue(200))); ASSERT_EQ(cache.GetCurrentWeight(), 400); ASSERT_EQ(cache.Size(), 2); } @@ -347,14 +347,14 @@ TEST_F(LruCacheTest, TestPutMovesToFront) { return [&evicted, pos](const std::shared_ptr&) { evicted.push_back(pos); }; }; - cache.Put(key0, MakeValue(100, 'A', make_callback(0))); - cache.Put(key1, MakeValue(100, 'B', make_callback(1))); + ASSERT_OK(cache.Put(key0, MakeValue(100, 'A', make_callback(0)))); + ASSERT_OK(cache.Put(key1, MakeValue(100, 'B', make_callback(1)))); - // Update key0 via Put (should move it to front) - cache.Put(key0, MakeValue(100, 'A', make_callback(0))); + // Get key0 (should move it to front) + ASSERT_OK(cache.Get(key0, /*supplier=*/nullptr)); // Insert key2: should evict key1 (now at back), not key0 - cache.Put(key2, MakeValue(100, 'C', make_callback(2))); + ASSERT_OK(cache.Put(key2, MakeValue(100, 'C', make_callback(2)))); ASSERT_EQ(evicted.size(), 1); ASSERT_EQ(evicted[0], 1); } @@ -369,14 +369,14 @@ TEST_F(LruCacheTest, TestMultipleEvictions) { }; // Insert 3 entries of 100 bytes each - cache.Put(MakeKey(0), MakeValue(100, 'A', make_callback(0))); - cache.Put(MakeKey(1), MakeValue(100, 'B', make_callback(1))); - cache.Put(MakeKey(2), MakeValue(100, 'C', make_callback(2))); + ASSERT_OK(cache.Put(MakeKey(0), MakeValue(100, 'A', make_callback(0)))); + ASSERT_OK(cache.Put(MakeKey(1), MakeValue(100, 'B', make_callback(1)))); + ASSERT_OK(cache.Put(MakeKey(2), MakeValue(100, 'C', make_callback(2)))); ASSERT_EQ(cache.Size(), 3); ASSERT_EQ(cache.GetCurrentWeight(), 300); // Insert a 250-byte entry: should evict key0, key1 and key2. - cache.Put(MakeKey(3), MakeValue(250, 'D')); + ASSERT_OK(cache.Put(MakeKey(3), MakeValue(250, 'D'))); ASSERT_EQ(cache.Size(), 1); ASSERT_EQ(cache.GetCurrentWeight(), 250); diff --git a/src/paimon/common/memory/memory_segment.h b/src/paimon/common/memory/memory_segment.h index 03205d883..909e5fb76 100644 --- a/src/paimon/common/memory/memory_segment.h +++ b/src/paimon/common/memory/memory_segment.h @@ -48,6 +48,19 @@ class PAIMON_EXPORT MemorySegment { MemorySegment& operator=(const MemorySegment& other) = default; + bool operator==(const MemorySegment& other) const { + if (this == &other) { + return true; + } + if (heap_memory_ == other.heap_memory_) { + return true; + } + if (!heap_memory_ || !other.heap_memory_) { + return false; + } + return *heap_memory_ == *other.heap_memory_; + } + inline int32_t Size() const { return heap_memory_->size(); } diff --git a/src/paimon/common/utils/generic_lru_cache.h b/src/paimon/common/utils/generic_lru_cache.h new file mode 100644 index 000000000..9d799fcb8 --- /dev/null +++ b/src/paimon/common/utils/generic_lru_cache.h @@ -0,0 +1,326 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fmt/format.h" +#include "paimon/result.h" +#include "paimon/traits.h" + +namespace paimon { +/// A generic LRU cache with support for weight-based eviction, time-based expiration, +/// and removal callbacks. +/// +/// Uses std::list + unordered_map for O(1) get/put/evict: +/// - list stores entries in LRU order (most recently used at front) +/// - map stores key -> list::iterator for O(1) lookup +/// +/// @tparam K Key type +/// @tparam V Value type +/// @tparam Hash Hash function for K (default: std::hash) +/// @tparam KeyEqual Equality function for K (default: std::equal_to) +/// +/// @note Thread-safe: all public methods are protected by a read-write lock. +template , + typename KeyEqual = std::equal_to> +class GenericLruCache { + public: + /// Cause of a cache entry removal, passed to the removal callback. + enum class RemovalCause { + EXPLICIT, // Removed by Invalidate() or InvalidateAll() + SIZE, // Evicted because total weight exceeded max_weight + EXPIRED, // Evicted because the entry expired (expireAfterAccess) + REPLACED // Replaced by a new value for the same key via Put() + }; + + using WeighFunc = std::function; + using RemovalCallback = std::function; + + /// Configuration options for the cache. + struct Options { + /// Maximum total weight of all entries. Entries are evicted (LRU) when exceeded. + int64_t max_weight = INT64_MAX; + + /// Time in milliseconds after last access before an entry expires. + /// A value < 0 disables expiration. + int64_t expire_after_access_ms = -1; + + /// Function to compute the weight of an entry. If nullptr, each entry has weight 1. + WeighFunc weigh_func = nullptr; + + /// Callback invoked when an entry is removed (evicted, invalidated, or replaced). + /// If nullptr, no callback is invoked. + RemovalCallback removal_callback = nullptr; + }; + + explicit GenericLruCache(Options options) : options_(std::move(options)) {} + + /// Look up a key in the cache. On hit, promotes the entry to the front (most recently + /// used) and updates its access time. Returns std::nullopt on miss or if the entry + /// has expired. + std::optional GetIfPresent(const K& key) { + std::unique_lock lock(mutex_); + return FindPromoteOrExpire(key); + } + + /// Look up a key. On miss, load via the supplier, insert into cache, and return. + /// If the supplier returns an error, the error is propagated and nothing is cached. + Result Get(const K& key, std::function(const K&)> supplier) { + { + std::unique_lock lock(mutex_); + auto cached = FindPromoteOrExpire(key); + if (cached.has_value()) { + return std::move(cached.value()); + } + } + + // Cache miss: load via supplier outside the lock + PAIMON_ASSIGN_OR_RAISE(V value, supplier(key)); + int64_t weight = ComputeWeight(key, value); + if (weight > options_.max_weight) { + return value; + } + + std::unique_lock lock(mutex_); + // Double-check: another thread may have inserted while we were loading + auto cached = FindPromoteOrExpire(key); + if (cached.has_value()) { + return std::move(cached.value()); + } + + InsertEntry(key, value, weight); + EvictIfNeeded(); + return value; + } + + /// Insert or update an entry. If the key already exists, the old value is replaced + /// and the REPLACED callback is invoked. Triggers eviction if needed. + /// @return Status::Invalid if the entry's weight exceeds max_weight, Status::OK otherwise. + Status Put(const K& key, V value) { + int64_t weight = ComputeWeight(key, value); + if (weight > options_.max_weight) { + return Status::Invalid( + fmt::format("Entry weight {} exceeds cache max weight {}, entry will not be cached", + weight, options_.max_weight)); + } + + std::unique_lock lock(mutex_); + auto it = lru_map_.find(key); + if (it != lru_map_.end()) { + if (ValuesEqual(it->second->value, value)) { + Promote(it->second); + return Status::OK(); + } + ReplaceEntry(it->second, std::move(value), weight); + } else { + InsertEntry(key, std::move(value), weight); + } + + EvictIfNeeded(); + return Status::OK(); + } + + /// Remove a specific entry. Invokes the EXPLICIT removal callback if the key exists. + void Invalidate(const K& key) { + std::unique_lock lock(mutex_); + auto it = lru_map_.find(key); + if (it != lru_map_.end()) { + RemoveEntry(it->second, RemovalCause::EXPLICIT); + } + } + + /// Remove all entries. Each entry's EXPLICIT removal callback is invoked. + void InvalidateAll() { + std::unique_lock lock(mutex_); + while (!lru_list_.empty()) { + RemoveEntry(std::prev(lru_list_.end()), RemovalCause::EXPLICIT); + } + current_weight_ = 0; + } + + /// @return The number of entries currently in the cache. + size_t Size() const { + std::shared_lock lock(mutex_); + return lru_map_.size(); + } + + /// @return The current total weight of all entries. + int64_t GetCurrentWeight() const { + std::shared_lock lock(mutex_); + return current_weight_; + } + + /// @return The maximum weight configured for this cache. + int64_t GetMaxWeight() const { + return options_.max_weight; + } + + private: + struct CacheEntry { + K key; + V value; + int64_t weight; + std::chrono::steady_clock::time_point last_access_time; + }; + + using EntryList = std::list; + using EntryMap = std::unordered_map; + + /// Look up a key, promote if found and not expired, or remove if expired. + /// Must be called with mutex_ held. + /// @return The value if found and valid, std::nullopt otherwise. + std::optional FindPromoteOrExpire(const K& key) { + auto it = lru_map_.find(key); + if (it == lru_map_.end()) { + return std::nullopt; + } + auto list_it = it->second; + if (IsExpired(list_it->last_access_time)) { + RemoveEntry(list_it, RemovalCause::EXPIRED); + return std::nullopt; + } + Promote(list_it); + return list_it->value; + } + + /// Move an entry to the front of the LRU list and update its access time. + void Promote(typename EntryList::iterator list_it) { + list_it->last_access_time = std::chrono::steady_clock::now(); + lru_list_.splice(lru_list_.begin(), lru_list_, list_it); + } + + /// Insert a new entry at the front of the LRU list. + void InsertEntry(const K& key, V value, int64_t weight) { + lru_list_.emplace_front( + CacheEntry{key, std::move(value), weight, std::chrono::steady_clock::now()}); + lru_map_[key] = lru_list_.begin(); + current_weight_ += weight; + } + + /// Compare two values for equality. For pointers, compares the underlying + /// pointer first, then dereferences and compares the pointed-to objects. + /// For other types, uses operator==. + static bool ValuesEqual(const V& lhs, const V& rhs) { + if constexpr (is_pointer::value) { + if (lhs == rhs) { + return true; + } + if (!lhs || !rhs) { + return false; + } + return *lhs == *rhs; + } else { + return lhs == rhs; + } + } + + /// Replace the value of an existing entry, invoke the REPLACED callback for the old value, + /// and promote the entry to the front. + void ReplaceEntry(typename EntryList::iterator list_it, V new_value, int64_t new_weight) { + current_weight_ -= list_it->weight; + + K key = list_it->key; + V old_value = std::move(list_it->value); + list_it->value = std::move(new_value); + list_it->weight = new_weight; + list_it->last_access_time = std::chrono::steady_clock::now(); + current_weight_ += new_weight; + lru_list_.splice(lru_list_.begin(), lru_list_, list_it); + + InvokeCallback(key, old_value, RemovalCause::REPLACED); + } + + /// Remove an entry from the cache and invoke the removal callback. + void RemoveEntry(typename EntryList::iterator list_it, RemovalCause cause) { + lru_map_.erase(list_it->key); + K key = std::move(list_it->key); + V value = std::move(list_it->value); + current_weight_ -= list_it->weight; + lru_list_.erase(list_it); + InvokeCallback(key, value, cause); + } + + /// Evict expired entries from the tail, then evict by weight if still over capacity. + void EvictIfNeeded() { + EvictExpired(); + while (current_weight_ > options_.max_weight && !lru_list_.empty()) { + RemoveEntry(std::prev(lru_list_.end()), RemovalCause::SIZE); + } + } + + /// Evict expired entries from the tail of the LRU list. + /// Since the tail has the oldest access time, we can stop as soon as we find + /// a non-expired entry. + void EvictExpired() { + if (options_.expire_after_access_ms < 0) { + return; + } + auto now = std::chrono::steady_clock::now(); + while (!lru_list_.empty()) { + auto it = std::prev(lru_list_.end()); + if (!IsExpired(it->last_access_time, now)) { + break; + } + RemoveEntry(it, RemovalCause::EXPIRED); + } + } + + /// Compute the weight of an entry using the configured weigh function. + int64_t ComputeWeight(const K& key, const V& value) const { + if (options_.weigh_func) { + return options_.weigh_func(key, value); + } + return 1; + } + + /// Check if an entry has expired based on its last access time. + bool IsExpired( + const std::chrono::steady_clock::time_point& last_access_time, + const std::chrono::steady_clock::time_point& now = std::chrono::steady_clock::now()) const { + if (options_.expire_after_access_ms < 0) { + return false; + } + auto elapsed = + std::chrono::duration_cast(now - last_access_time); + return elapsed.count() >= options_.expire_after_access_ms; + } + + /// Invoke the removal callback if one is configured. + void InvokeCallback(const K& key, const V& value, RemovalCause cause) { + if (options_.removal_callback) { + options_.removal_callback(key, value, cause); + } + } + + Options options_; + int64_t current_weight_ = 0; + EntryList lru_list_; + EntryMap lru_map_; + mutable std::shared_mutex mutex_; +}; + +} // namespace paimon diff --git a/src/paimon/common/utils/generic_lru_cache_test.cpp b/src/paimon/common/utils/generic_lru_cache_test.cpp new file mode 100644 index 000000000..40c8f275f --- /dev/null +++ b/src/paimon/common/utils/generic_lru_cache_test.cpp @@ -0,0 +1,890 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/utils/generic_lru_cache.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class GenericLruCacheTest : public ::testing::Test { + public: + using StringIntCache = GenericLruCache; + using StringStringCache = GenericLruCache; + using IntIntCache = GenericLruCache; + using StringSharedPtrCache = GenericLruCache>; + using RemovalCause = StringIntCache::RemovalCause; + + struct RemovalRecord { + std::string key; + std::string value; + RemovalCause cause; + }; +}; + +// ==================== Basic Operations ==================== + +TEST_F(GenericLruCacheTest, ConstructorAndDefaults) { + { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_EQ(cache.Size(), 0); + ASSERT_EQ(cache.GetCurrentWeight(), 0); + ASSERT_EQ(cache.GetMaxWeight(), INT64_MAX); + } + { + StringIntCache::Options options; + options.max_weight = 42; + StringIntCache cache(options); + ASSERT_EQ(cache.GetMaxWeight(), 42); + } +} + +// ==================== GetIfPresent ==================== + +TEST_F(GenericLruCacheTest, GetIfPresentMissAndHit) { + StringIntCache::Options options; + StringIntCache cache(options); + + auto result = cache.GetIfPresent("nonexistent"); + ASSERT_FALSE(result.has_value()); + + ASSERT_OK(cache.Put("key1", 100)); + result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 100); +} + +TEST_F(GenericLruCacheTest, GetIfPresentExpired) { + std::vector removals; + StringIntCache::Options options; + options.expire_after_access_ms = 50; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + ASSERT_EQ(cache.Size(), 1); + + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + + auto result = cache.GetIfPresent("key1"); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(cache.Size(), 0); + + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "key1"); + ASSERT_EQ(removals[0].cause, RemovalCause::EXPIRED); +} + +TEST_F(GenericLruCacheTest, GetIfPresentPromotesEntry) { + std::vector removals; + StringIntCache::Options options; + options.max_weight = 2; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + + // Access "a" to promote it + auto result = cache.GetIfPresent("a"); + ASSERT_TRUE(result.has_value()); + + // Insert "c": should evict "b" (LRU), not "a" + ASSERT_OK(cache.Put("c", 3)); + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "b"); + ASSERT_EQ(removals[0].cause, RemovalCause::SIZE); +} + +// ==================== Get with supplier ==================== + +TEST_F(GenericLruCacheTest, GetCacheMissLoadsViaSupplier) { + StringIntCache::Options options; + StringIntCache cache(options); + + int supplier_calls = 0; + auto supplier = [&](const std::string& key) -> Result { + supplier_calls++; + return 42; + }; + + ASSERT_OK_AND_ASSIGN(auto value, cache.Get("key1", supplier)); + ASSERT_EQ(value, 42); + ASSERT_EQ(supplier_calls, 1); + ASSERT_EQ(cache.Size(), 1); +} + +TEST_F(GenericLruCacheTest, GetCacheHitSkipsSupplier) { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + + int supplier_calls = 0; + auto supplier = [&](const std::string& key) -> Result { + supplier_calls++; + return 999; + }; + + ASSERT_OK_AND_ASSIGN(auto value, cache.Get("key1", supplier)); + ASSERT_EQ(value, 100); + ASSERT_EQ(supplier_calls, 0); +} + +TEST_F(GenericLruCacheTest, GetSupplierError) { + StringIntCache::Options options; + StringIntCache cache(options); + + auto supplier = [](const std::string& key) -> Result { + return Status::IOError("load failed"); + }; + + ASSERT_NOK_WITH_MSG(cache.Get("key1", supplier), "load failed"); + ASSERT_EQ(cache.Size(), 0); +} + +TEST_F(GenericLruCacheTest, GetWeightExceedsMaxReturnsWithoutCaching) { + StringStringCache::Options options; + options.max_weight = 5; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + StringStringCache cache(options); + + auto supplier = [](const std::string& key) -> Result { + return std::string("this_is_a_very_long_value"); + }; + + ASSERT_OK_AND_ASSIGN(auto value, cache.Get("key1", supplier)); + ASSERT_EQ(value, "this_is_a_very_long_value"); + ASSERT_EQ(cache.Size(), 0); +} + +TEST_F(GenericLruCacheTest, GetTriggersEviction) { + std::vector removals; + StringIntCache::Options options; + options.max_weight = 2; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + ASSERT_EQ(cache.Size(), 2); + + auto supplier = [](const std::string& key) -> Result { return 3; }; + ASSERT_OK_AND_ASSIGN(auto value, cache.Get("c", supplier)); + ASSERT_EQ(value, 3); + ASSERT_EQ(cache.Size(), 2); + + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "a"); + ASSERT_EQ(removals[0].cause, RemovalCause::SIZE); +} + +// ==================== Put ==================== + +TEST_F(GenericLruCacheTest, PutNewEntry) { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + ASSERT_EQ(cache.Size(), 1); + ASSERT_EQ(cache.GetCurrentWeight(), 1); + + auto result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 100); +} + +TEST_F(GenericLruCacheTest, PutReplaceWithDifferentValue) { + std::vector removals; + StringIntCache::Options options; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + ASSERT_OK(cache.Put("key1", 200)); + ASSERT_EQ(cache.Size(), 1); + + auto result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 200); + + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "key1"); + ASSERT_EQ(removals[0].value, "100"); + ASSERT_EQ(removals[0].cause, RemovalCause::REPLACED); +} + +TEST_F(GenericLruCacheTest, PutReplaceWithSameValuePromotes) { + std::vector removals; + StringIntCache::Options options; + options.max_weight = 2; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + + // Put same value for "a" — should promote without REPLACED callback + ASSERT_OK(cache.Put("a", 1)); + ASSERT_TRUE(removals.empty()); + + // Insert "c": should evict "b" (LRU after "a" was promoted) + ASSERT_OK(cache.Put("c", 3)); + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "b"); + ASSERT_EQ(removals[0].cause, RemovalCause::SIZE); +} + +TEST_F(GenericLruCacheTest, PutWeightExceedsMaxReturnsInvalid) { + StringStringCache::Options options; + options.max_weight = 5; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + StringStringCache cache(options); + + ASSERT_NOK_WITH_MSG(cache.Put("key1", "this_is_too_long"), + "Entry weight 16 exceeds cache max weight 5, entry will not be cached"); + ASSERT_EQ(cache.Size(), 0); +} + +TEST_F(GenericLruCacheTest, PutTriggersWeightEviction) { + std::vector removals; + StringStringCache::Options options; + options.max_weight = 10; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + options.removal_callback = [&](const std::string& key, const std::string& value, auto cause) { + removals.push_back({key, value, static_cast(cause)}); + }; + StringStringCache cache(options); + + ASSERT_OK(cache.Put("a", "aaaa")); // weight 4 + ASSERT_OK(cache.Put("b", "bbbbb")); // weight 5, total 9 + ASSERT_EQ(cache.GetCurrentWeight(), 9); + + // Insert "c" with weight 5: total would be 14 > 10, evict "a" (4), total becomes 10 + ASSERT_OK(cache.Put("c", "ccccc")); + ASSERT_EQ(cache.Size(), 2); + ASSERT_EQ(cache.GetCurrentWeight(), 10); + + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "a"); + ASSERT_EQ(removals[0].cause, RemovalCause::SIZE); +} + +TEST_F(GenericLruCacheTest, PutMultipleEvictions) { + std::vector evicted_keys; + StringStringCache::Options options; + options.max_weight = 10; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + options.removal_callback = [&](const std::string& key, const std::string& value, auto cause) { + evicted_keys.push_back(key); + }; + StringStringCache cache(options); + + ASSERT_OK(cache.Put("a", "aaa")); // weight 3 + ASSERT_OK(cache.Put("b", "bbb")); // weight 3 + ASSERT_OK(cache.Put("c", "ccc")); // weight 3, total 9 + + // Insert "d" with weight 9: total would be 18 > 10, evict a(3), b(3), c(3) then add d(9) + ASSERT_OK(cache.Put("d", "ddddddddd")); + ASSERT_EQ(cache.Size(), 1); + ASSERT_EQ(cache.GetCurrentWeight(), 9); + + ASSERT_EQ(evicted_keys.size(), 3); + ASSERT_EQ(evicted_keys[0], "a"); + ASSERT_EQ(evicted_keys[1], "b"); + ASSERT_EQ(evicted_keys[2], "c"); +} + +// ==================== Invalidate ==================== + +TEST_F(GenericLruCacheTest, InvalidateExistingKey) { + std::vector removals; + StringIntCache::Options options; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + ASSERT_EQ(cache.Size(), 1); + + cache.Invalidate("key1"); + ASSERT_EQ(cache.Size(), 0); + ASSERT_EQ(cache.GetCurrentWeight(), 0); + + ASSERT_EQ(removals.size(), 1); + ASSERT_EQ(removals[0].key, "key1"); + ASSERT_EQ(removals[0].cause, RemovalCause::EXPLICIT); +} + +TEST_F(GenericLruCacheTest, InvalidateNonExistentKey) { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + cache.Invalidate("nonexistent"); + ASSERT_EQ(cache.Size(), 1); +} + +// ==================== InvalidateAll ==================== + +TEST_F(GenericLruCacheTest, InvalidateAllClearsEverything) { + std::vector removals; + StringIntCache::Options options; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + ASSERT_OK(cache.Put("c", 3)); + ASSERT_EQ(cache.Size(), 3); + + cache.InvalidateAll(); + ASSERT_EQ(cache.Size(), 0); + ASSERT_EQ(cache.GetCurrentWeight(), 0); + + ASSERT_EQ(removals.size(), 3); + for (const auto& record : removals) { + ASSERT_EQ(record.cause, RemovalCause::EXPLICIT); + } +} + +TEST_F(GenericLruCacheTest, InvalidateAllOnEmptyCache) { + StringIntCache::Options options; + StringIntCache cache(options); + + cache.InvalidateAll(); + ASSERT_EQ(cache.Size(), 0); + ASSERT_EQ(cache.GetCurrentWeight(), 0); +} + +// ==================== Weight Function ==================== + +TEST_F(GenericLruCacheTest, DefaultWeightIsOne) { + StringIntCache::Options options; + options.max_weight = 3; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + ASSERT_OK(cache.Put("c", 3)); + ASSERT_EQ(cache.GetCurrentWeight(), 3); + ASSERT_EQ(cache.Size(), 3); + + // Adding one more should evict the LRU entry + ASSERT_OK(cache.Put("d", 4)); + ASSERT_EQ(cache.Size(), 3); + ASSERT_EQ(cache.GetCurrentWeight(), 3); + ASSERT_FALSE(cache.GetIfPresent("a").has_value()); +} + +TEST_F(GenericLruCacheTest, WeightUpdatedOnReplace) { + StringStringCache::Options options; + options.max_weight = 100; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + StringStringCache cache(options); + + ASSERT_OK(cache.Put("a", std::string(30, 'x'))); + ASSERT_EQ(cache.GetCurrentWeight(), 30); + + // Replace with larger value + ASSERT_OK(cache.Put("a", std::string(70, 'y'))); + ASSERT_EQ(cache.GetCurrentWeight(), 70); + ASSERT_EQ(cache.Size(), 1); +} + +// ==================== Expiration ==================== + +TEST_F(GenericLruCacheTest, ExpirationDisabledByDefault) { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + auto result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 100); +} + +TEST_F(GenericLruCacheTest, ExpirationOnGet) { + StringIntCache::Options options; + options.expire_after_access_ms = 50; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + + // Access before expiration + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + auto result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + + // Wait for expiration + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + result = cache.GetIfPresent("key1"); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(cache.Size(), 0); +} + +TEST_F(GenericLruCacheTest, ExpirationOnGetWithSupplier) { + StringIntCache::Options options; + options.expire_after_access_ms = 50; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + + int supplier_calls = 0; + auto supplier = [&](const std::string& key) -> Result { + supplier_calls++; + return 200; + }; + + ASSERT_OK_AND_ASSIGN(auto value, cache.Get("key1", supplier)); + ASSERT_EQ(value, 200); + ASSERT_EQ(supplier_calls, 1); +} + +TEST_F(GenericLruCacheTest, AccessResetsExpirationTimer) { + StringIntCache::Options options; + options.expire_after_access_ms = 100; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + + // Access at 40ms to reset the timer + std::this_thread::sleep_for(std::chrono::milliseconds(40)); + auto result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + + // At 80ms from last access (40ms from the GetIfPresent), should still be valid + std::this_thread::sleep_for(std::chrono::milliseconds(40)); + result = cache.GetIfPresent("key1"); + ASSERT_TRUE(result.has_value()); + + // Wait for full expiration from last access + std::this_thread::sleep_for(std::chrono::milliseconds(150)); + result = cache.GetIfPresent("key1"); + ASSERT_FALSE(result.has_value()); +} + +TEST_F(GenericLruCacheTest, ExpiredEntriesEvictedOnPut) { + std::vector removals; + StringIntCache::Options options; + options.expire_after_access_ms = 50; + options.max_weight = 100; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + + // Put triggers EvictIfNeeded which calls EvictExpired + ASSERT_OK(cache.Put("c", 3)); + + // "a" and "b" should have been expired + int expired_count = 0; + for (const auto& record : removals) { + if (record.cause == RemovalCause::EXPIRED) { + expired_count++; + } + } + ASSERT_EQ(expired_count, 2); + ASSERT_EQ(cache.Size(), 1); +} + +// ==================== Removal Callback ==================== + +TEST_F(GenericLruCacheTest, NoCallbackConfigured) { + StringIntCache::Options options; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("key1", 100)); + cache.Invalidate("key1"); + ASSERT_EQ(cache.Size(), 0); +} + +TEST_F(GenericLruCacheTest, AllRemovalCauses) { + std::vector removals; + StringIntCache::Options options; + options.max_weight = 2; + options.expire_after_access_ms = 50; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + removals.push_back({key, std::to_string(value), static_cast(cause)}); + }; + StringIntCache cache(options); + + // REPLACED: put same key with different value + ASSERT_OK(cache.Put("r", 1)); + ASSERT_OK(cache.Put("r", 2)); + ASSERT_EQ(removals.back().cause, RemovalCause::REPLACED); + + // EXPLICIT: invalidate + cache.Invalidate("r"); + ASSERT_EQ(removals.back().cause, RemovalCause::EXPLICIT); + + // SIZE: evict due to weight + ASSERT_OK(cache.Put("s1", 10)); + ASSERT_OK(cache.Put("s2", 20)); + ASSERT_OK(cache.Put("s3", 30)); + ASSERT_EQ(removals.back().cause, RemovalCause::SIZE); + + // EXPIRED: wait and access + cache.InvalidateAll(); + removals.clear(); + ASSERT_OK(cache.Put("e", 99)); + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + ASSERT_FALSE(cache.GetIfPresent("e").has_value()); + ASSERT_EQ(removals.back().cause, RemovalCause::EXPIRED); +} + +// ==================== valuesequal with shared_ptr ==================== + +TEST_F(GenericLruCacheTest, SharedPtrSamePointerNoReplace) { + using Cause = StringSharedPtrCache::RemovalCause; + std::vector causes; + StringSharedPtrCache::Options options; + options.removal_callback = [&](const std::string& key, const std::shared_ptr& value, + auto cause) { causes.push_back(static_cast(cause)); }; + StringSharedPtrCache cache(options); + + auto ptr = std::make_shared(42); + ASSERT_OK(cache.Put("key1", ptr)); + + // Put same pointer — ValuesEqual returns true, should promote without REPLACED + ASSERT_OK(cache.Put("key1", ptr)); + ASSERT_TRUE(causes.empty()); + ASSERT_EQ(cache.Size(), 1); +} + +TEST_F(GenericLruCacheTest, SharedPtrDifferentPointerSameValueNoReplace) { + using Cause = StringSharedPtrCache::RemovalCause; + std::vector causes; + StringSharedPtrCache::Options options; + options.removal_callback = [&](const std::string& key, const std::shared_ptr& value, + auto cause) { causes.push_back(static_cast(cause)); }; + StringSharedPtrCache cache(options); + + auto ptr1 = std::make_shared(42); + auto ptr2 = std::make_shared(42); + ASSERT_NE(ptr1.get(), ptr2.get()); + + ASSERT_OK(cache.Put("key1", ptr1)); + // Different pointer but same dereferenced value — ValuesEqual returns true + ASSERT_OK(cache.Put("key1", ptr2)); + ASSERT_TRUE(causes.empty()); +} + +TEST_F(GenericLruCacheTest, SharedPtrDifferentValueReplaces) { + using Cause = StringSharedPtrCache::RemovalCause; + std::vector causes; + StringSharedPtrCache::Options options; + options.removal_callback = [&](const std::string& key, const std::shared_ptr& value, + auto cause) { causes.push_back(static_cast(cause)); }; + StringSharedPtrCache cache(options); + + ASSERT_OK(cache.Put("key1", std::make_shared(1))); + ASSERT_OK(cache.Put("key1", std::make_shared(2))); + + ASSERT_EQ(causes.size(), 1); + ASSERT_EQ(causes[0], Cause::REPLACED); +} + +TEST_F(GenericLruCacheTest, SharedPtrNullptrComparison) { + using Cause = StringSharedPtrCache::RemovalCause; + std::vector causes; + StringSharedPtrCache::Options options; + options.removal_callback = [&](const std::string& key, const std::shared_ptr& value, + auto cause) { causes.push_back(static_cast(cause)); }; + StringSharedPtrCache cache(options); + + // Put nullptr + ASSERT_OK(cache.Put("key1", nullptr)); + + // Put nullptr again — same value, should not replace + ASSERT_OK(cache.Put("key1", nullptr)); + ASSERT_TRUE(causes.empty()); + + // Put non-null — different from nullptr, should replace + ASSERT_OK(cache.Put("key1", std::make_shared(1))); + ASSERT_EQ(causes.size(), 1); + ASSERT_EQ(causes[0], Cause::REPLACED); + + // Put nullptr again — different from non-null, should replace + causes.clear(); + ASSERT_OK(cache.Put("key1", nullptr)); + ASSERT_EQ(causes.size(), 1); + ASSERT_EQ(causes[0], Cause::REPLACED); +} + +// ==================== Custom Hash and KeyEqual ==================== + +TEST_F(GenericLruCacheTest, CustomHashAndKeyEqual) { + struct CaseInsensitiveHash { + size_t operator()(const std::string& str) const { + std::string lower = str; + for (auto& ch : lower) { + ch = static_cast(std::tolower(ch)); + } + return std::hash{}(lower); + } + }; + struct CaseInsensitiveEqual { + bool operator()(const std::string& lhs, const std::string& rhs) const { + if (lhs.size() != rhs.size()) return false; + for (size_t i = 0; i < lhs.size(); i++) { + if (std::tolower(lhs[i]) != std::tolower(rhs[i])) return false; + } + return true; + } + }; + + using CICache = GenericLruCache; + CICache::Options options; + CICache cache(options); + + ASSERT_OK(cache.Put("Hello", 1)); + ASSERT_EQ(cache.Size(), 1); + + // "hello" should match "Hello" with case-insensitive comparison + auto result = cache.GetIfPresent("hello"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 1); + + // Put with different case should replace + ASSERT_OK(cache.Put("HELLO", 2)); + ASSERT_EQ(cache.Size(), 1); + + result = cache.GetIfPresent("Hello"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 2); +} + +// ==================== Thread Safety ==================== + +TEST_F(GenericLruCacheTest, ConcurrentPutAndGet) { + IntIntCache::Options options; + options.max_weight = 10000; + IntIntCache cache(options); + + constexpr int32_t num_threads = 8; + constexpr int32_t ops_per_thread = 200; + + std::vector threads; + std::atomic errors{0}; + + for (int32_t t = 0; t < num_threads; t++) { + threads.emplace_back([&, t]() { + for (int32_t i = 0; i < ops_per_thread; i++) { + int32_t key = t * ops_per_thread + i; + auto status = cache.Put(key, key * 10); + if (!status.ok()) { + errors++; + } + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + ASSERT_EQ(errors.load(), 0); + ASSERT_EQ(static_cast(cache.Size()), num_threads * ops_per_thread); + + // Concurrent reads + threads.clear(); + for (int32_t t = 0; t < num_threads; t++) { + threads.emplace_back([&, t]() { + for (int32_t i = 0; i < ops_per_thread; i++) { + int32_t key = t * ops_per_thread + i; + auto result = cache.GetIfPresent(key); + if (!result.has_value() || result.value() != key * 10) { + errors++; + } + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + ASSERT_EQ(errors.load(), 0); +} + +TEST_F(GenericLruCacheTest, ConcurrentGetWithSupplier) { + IntIntCache::Options options; + options.max_weight = 10000; + IntIntCache cache(options); + + constexpr int32_t num_threads = 8; + constexpr int32_t ops_per_thread = 100; + + std::atomic supplier_calls{0}; + std::vector threads; + + for (int32_t t = 0; t < num_threads; t++) { + threads.emplace_back([&, t]() { + for (int32_t i = 0; i < ops_per_thread; i++) { + int32_t key = t * ops_per_thread + i; + auto supplier = [&, key](const int&) -> Result { + supplier_calls++; + return key * 10; + }; + auto result = cache.Get(key, supplier); + ASSERT_TRUE(result.ok()); + ASSERT_EQ(result.value(), key * 10); + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + ASSERT_EQ(static_cast(cache.Size()), num_threads * ops_per_thread); +} + +TEST_F(GenericLruCacheTest, ConcurrentInvalidate) { + IntIntCache::Options options; + IntIntCache cache(options); + + for (int32_t i = 0; i < 100; i++) { + ASSERT_OK(cache.Put(i, i)); + } + + std::vector threads; + for (int32_t t = 0; t < 4; t++) { + threads.emplace_back([&, t]() { + for (int32_t i = t * 25; i < (t + 1) * 25; i++) { + cache.Invalidate(i); + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + ASSERT_EQ(cache.Size(), 0); +} + +// ==================== Edge Cases ==================== + +TEST_F(GenericLruCacheTest, PutAndGetSingleEntry) { + StringIntCache::Options options; + options.max_weight = 1; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("only", 42)); + ASSERT_EQ(cache.Size(), 1); + + auto result = cache.GetIfPresent("only"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), 42); + + // Adding another entry should evict the first + ASSERT_OK(cache.Put("new", 99)); + ASSERT_EQ(cache.Size(), 1); + ASSERT_FALSE(cache.GetIfPresent("only").has_value()); + ASSERT_TRUE(cache.GetIfPresent("new").has_value()); +} + +TEST_F(GenericLruCacheTest, ReplaceUpdatesWeight) { + StringStringCache::Options options; + options.max_weight = 100; + options.weigh_func = [](const std::string& key, const std::string& value) -> int64_t { + return static_cast(value.size()); + }; + StringStringCache cache(options); + + ASSERT_OK(cache.Put("a", std::string(50, 'x'))); + ASSERT_EQ(cache.GetCurrentWeight(), 50); + + // Replace with smaller value + ASSERT_OK(cache.Put("a", std::string(20, 'y'))); + ASSERT_EQ(cache.GetCurrentWeight(), 20); + + // Replace with larger value + ASSERT_OK(cache.Put("a", std::string(80, 'z'))); + ASSERT_EQ(cache.GetCurrentWeight(), 80); +} + +TEST_F(GenericLruCacheTest, EvictionOrderIsLru) { + std::vector evicted_keys; + StringIntCache::Options options; + options.max_weight = 3; + options.removal_callback = [&](const std::string& key, const int& value, auto cause) { + if (static_cast(cause) == RemovalCause::SIZE) { + evicted_keys.push_back(key); + } + }; + StringIntCache cache(options); + + ASSERT_OK(cache.Put("a", 1)); + ASSERT_OK(cache.Put("b", 2)); + ASSERT_OK(cache.Put("c", 3)); + + // Access "a" and "b" to make "c" the LRU + cache.GetIfPresent("a"); + cache.GetIfPresent("b"); + + // Insert "d": should evict "c" (LRU) + ASSERT_OK(cache.Put("d", 4)); + ASSERT_EQ(evicted_keys.size(), 1); + ASSERT_EQ(evicted_keys[0], "c"); +} + +} // namespace paimon::test diff --git a/src/paimon/core/core_options.cpp b/src/paimon/core/core_options.cpp index 3e406998a..28db124a5 100644 --- a/src/paimon/core/core_options.cpp +++ b/src/paimon/core/core_options.cpp @@ -166,7 +166,7 @@ class ConfigParser { } // Parse LookupCompactMode - Status ParseLookupCompactMode(LookupCompactMode* mode) { + Status ParseLookupCompactMode(LookupCompactMode* mode) const { auto iter = config_map_.find(Options::LOOKUP_COMPACT); if (iter != config_map_.end()) { std::string str = StringUtils::ToLowerCase(iter->second); @@ -428,311 +428,400 @@ struct CoreOptions::Impl { std::map file_compression_per_level; int64_t lookup_cache_max_memory = 256 * 1024 * 1024; double lookup_cache_high_prio_pool_ratio = 0.25; -}; - -// Parse configurations from a map and return a populated CoreOptions object -Result CoreOptions::FromMap( - const std::map& options_map, - const std::shared_ptr& specified_file_system, - const std::map& fs_scheme_to_identifier_map) { - CoreOptions options; - auto& impl = options.impl_; - impl->raw_options = options_map; - ConfigParser parser(options_map); - - // Parse basic configurations - PAIMON_RETURN_NOT_OK(parser.Parse(Options::BUCKET, &impl->bucket)); - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::MANIFEST_MERGE_MIN_COUNT, &impl->manifest_merge_min_count)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::SCAN_SNAPSHOT_ID, &impl->scan_snapshot_id)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::READ_BATCH_SIZE, &impl->read_batch_size)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::WRITE_BATCH_SIZE, &impl->write_batch_size)); - PAIMON_RETURN_NOT_OK( - parser.ParseMemorySize(Options::WRITE_BUFFER_SIZE, &impl->write_buffer_size)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMMIT_MAX_RETRIES, &impl->commit_max_retries)); - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::FILE_COMPRESSION, &impl->file_compression)); - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::FILE_COMPRESSION_ZSTD_LEVEL, &impl->file_compression_zstd_level)); - PAIMON_RETURN_NOT_OK( - parser.ParseString(Options::MANIFEST_COMPRESSION, &impl->manifest_compression)); - PAIMON_RETURN_NOT_OK( - parser.ParseString(Options::PARTITION_DEFAULT_NAME, &impl->partition_default_name)); - - // Parse memory size configurations - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::PAGE_SIZE, &impl->page_size)); - if (parser.ContainsKey(Options::TARGET_FILE_SIZE)) { - int64_t target_file_size; - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::TARGET_FILE_SIZE, &target_file_size)); - impl->target_file_size = target_file_size; - } - if (parser.ContainsKey(Options::BLOB_TARGET_FILE_SIZE)) { - int64_t blob_target_file_size; + int64_t lookup_cache_file_retention_ms = 1 * 3600 * 1000; // 1 hour + int64_t lookup_cache_max_disk_size = INT64_MAX; + + // Parse basic table options: bucket, partition, file sizes, batch sizes, file system, etc. + Status ParseBasicOptions( + const ConfigParser& parser, const std::shared_ptr& specified_file_system, + const std::map& fs_scheme_to_identifier_map) { + // Parse bucket - bucket number, -1 for dynamic bucket mode, >0 for fixed bucket mode + PAIMON_RETURN_NOT_OK(parser.Parse(Options::BUCKET, &bucket)); + // Parse partition.default-name - default partition name for null/empty partition values + PAIMON_RETURN_NOT_OK( + parser.ParseString(Options::PARTITION_DEFAULT_NAME, &partition_default_name)); + // Parse page-size - memory page size, default 64 kb + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::PAGE_SIZE, &page_size)); + // Parse target-file-size - target size of a data file + if (parser.ContainsKey(Options::TARGET_FILE_SIZE)) { + int64_t parsed_target_file_size; + PAIMON_RETURN_NOT_OK( + parser.ParseMemorySize(Options::TARGET_FILE_SIZE, &parsed_target_file_size)); + target_file_size = parsed_target_file_size; + } + // Parse blob.target-file-size - target size of a blob file + if (parser.ContainsKey(Options::BLOB_TARGET_FILE_SIZE)) { + int64_t parsed_blob_target_file_size; + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::BLOB_TARGET_FILE_SIZE, + &parsed_blob_target_file_size)); + blob_target_file_size = parsed_blob_target_file_size; + } + // Parse source.split.target-size - target size of a source split when scanning a bucket + PAIMON_RETURN_NOT_OK( + parser.ParseMemorySize(Options::SOURCE_SPLIT_TARGET_SIZE, &source_split_target_size)); + // Parse source.split.open-file-cost - open file cost to avoid reading too many files + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::SOURCE_SPLIT_OPEN_FILE_COST, + &source_split_open_file_cost)); + // Parse read.batch-size - read batch size for file formats + PAIMON_RETURN_NOT_OK(parser.Parse(Options::READ_BATCH_SIZE, &read_batch_size)); + // Parse write.batch-size - write batch size for file formats + PAIMON_RETURN_NOT_OK(parser.Parse(Options::WRITE_BATCH_SIZE, &write_batch_size)); + // Parse write-buffer-size - data to build up in memory before flushing to disk + PAIMON_RETURN_NOT_OK( + parser.ParseMemorySize(Options::WRITE_BUFFER_SIZE, &write_buffer_size)); + // Parse file-system - file system type, default "local" + PAIMON_RETURN_NOT_OK(parser.ParseFileSystem(fs_scheme_to_identifier_map, + specified_file_system, &file_system)); + // Parse write-only - if true, compactions and snapshot expiration will be skipped + PAIMON_RETURN_NOT_OK(parser.Parse(Options::WRITE_ONLY, &write_only)); + // Parse partition.legacy-name - use legacy ToString for partition names, default true + PAIMON_RETURN_NOT_OK(parser.Parse(Options::PARTITION_GENERATE_LEGACY_NAME, + &legacy_partition_name_enabled)); + // Only for test, parse enable-adaptive-prefetch-strategy + PAIMON_RETURN_NOT_OK(parser.Parse("test.enable-adaptive-prefetch-strategy", + &enable_adaptive_prefetch_strategy)); + // Parse data-file.external-paths - external paths for data files, comma separated + std::string parsed_external_paths; PAIMON_RETURN_NOT_OK( - parser.ParseMemorySize(Options::BLOB_TARGET_FILE_SIZE, &blob_target_file_size)); - impl->blob_target_file_size = blob_target_file_size; + parser.ParseString(Options::DATA_FILE_EXTERNAL_PATHS, &parsed_external_paths)); + if (!parsed_external_paths.empty()) { + data_file_external_paths = parsed_external_paths; + } + // Parse data-file.external-paths.strategy - strategy for selecting external path + PAIMON_RETURN_NOT_OK(parser.ParseExternalPathStrategy(&external_path_strategy)); + // Parse data-file.prefix - file name prefix of data files, default "data-" + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::DATA_FILE_PREFIX, &data_file_prefix)); + // Parse row-tracking.enabled - whether to enable unique row id for append table + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::ROW_TRACKING_ENABLED, &row_tracking_enabled)); + // Parse data-evolution.enabled - whether to enable data evolution for row tracking + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::DATA_EVOLUTION_ENABLED, &data_evolution_enabled)); + return Status::OK(); } - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::MANIFEST_TARGET_FILE_SIZE, - &impl->manifest_target_file_size)); - PAIMON_RETURN_NOT_OK( - parser.ParseMemorySize(Options::SOURCE_SPLIT_TARGET_SIZE, &impl->source_split_target_size)); - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::SOURCE_SPLIT_OPEN_FILE_COST, - &impl->source_split_open_file_cost)); - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::MANIFEST_FULL_COMPACTION_FILE_SIZE, - &impl->manifest_full_compaction_file_size)); - - // Parse file format and file system configurations - PAIMON_RETURN_NOT_OK(parser.ParseObject( - Options::FILE_FORMAT, /*default_identifier=*/"parquet", &impl->file_format)); - PAIMON_RETURN_NOT_OK(parser.ParseObject( - Options::MANIFEST_FORMAT, /*default_identifier=*/"avro", &impl->manifest_file_format)); - PAIMON_RETURN_NOT_OK(parser.ParseFileSystem(fs_scheme_to_identifier_map, specified_file_system, - &impl->file_system)); - - // Parse startup mode - PAIMON_RETURN_NOT_OK(parser.ParseStartupMode(&impl->startup_mode)); - - // Special handling for ExpireConfig - int32_t snapshot_num_retain_min = 10; - int32_t snapshot_num_retain_max = std::numeric_limits::max(); - int32_t snapshot_expire_limit = 10; - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::SNAPSHOT_NUM_RETAINED_MIN, &snapshot_num_retain_min)); - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::SNAPSHOT_NUM_RETAINED_MAX, &snapshot_num_retain_max)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::SNAPSHOT_EXPIRE_LIMIT, &snapshot_expire_limit)); - std::string snapshot_time_retained_str = "1 hour"; - PAIMON_RETURN_NOT_OK( - parser.ParseString(Options::SNAPSHOT_TIME_RETAINED, &snapshot_time_retained_str)); - PAIMON_ASSIGN_OR_RAISE(int64_t snapshot_time_retained, - TimeDuration::Parse(snapshot_time_retained_str)); - bool snapshot_clean_empty_directories = false; - PAIMON_RETURN_NOT_OK(parser.Parse(Options::SNAPSHOT_CLEAN_EMPTY_DIRECTORIES, - &snapshot_clean_empty_directories)); - impl->expire_config = - ExpireConfig(snapshot_num_retain_max, snapshot_num_retain_min, snapshot_time_retained, - snapshot_expire_limit, snapshot_clean_empty_directories); - - std::string commit_timeout_str; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMMIT_TIMEOUT, &commit_timeout_str)); - if (!commit_timeout_str.empty()) { - PAIMON_ASSIGN_OR_RAISE(impl->commit_timeout, TimeDuration::Parse(commit_timeout_str)); + // Parse data file format, compression, and per-level format/compression configurations. + Status ParseFileFormatOptions(const ConfigParser& parser) { + // Parse file.format - data file format, default "parquet" + PAIMON_RETURN_NOT_OK(parser.ParseObject( + Options::FILE_FORMAT, /*default_identifier=*/"parquet", &file_format)); + // Parse file.compression - default file compression, default "zstd" + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::FILE_COMPRESSION, &file_compression)); + // Parse file.compression.zstd-level - zstd compression level, default 1 + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::FILE_COMPRESSION_ZSTD_LEVEL, &file_compression_zstd_level)); + // Parse file.format.per.level - different file format for different levels + PAIMON_RETURN_NOT_OK(parser.ParseFileFormatPerLevel(&file_format_per_level)); + // Parse file.compression.per.level - different compression for different levels + PAIMON_RETURN_NOT_OK(parser.ParseFileCompressionPerLevel(&file_compression_per_level)); + return Status::OK(); } - // Parse sequence field - PAIMON_RETURN_NOT_OK(parser.ParseList( - Options::SEQUENCE_FIELD, Options::FIELDS_SEPARATOR, &impl->sequence_field)); - PAIMON_RETURN_NOT_OK(parser.ParseSortOrder(&impl->sequence_field_sort_order)); - // Parse merge and sort engine - PAIMON_RETURN_NOT_OK(parser.ParseSortEngine(&impl->sort_engine)); - PAIMON_RETURN_NOT_OK(parser.ParseMergeEngine(&impl->merge_engine)); - // Parse ignore delete - PAIMON_RETURN_NOT_OK(parser.Parse(Options::IGNORE_DELETE, &impl->ignore_delete)); - - // Parse write-only - PAIMON_RETURN_NOT_OK(parser.Parse(Options::WRITE_ONLY, &impl->write_only)); - - // Parse default agg function - std::string field_default_func; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::FIELDS_DEFAULT_AGG_FUNC, &field_default_func)); - if (!field_default_func.empty()) { - impl->field_default_func = field_default_func; + // Parse manifest file configurations: format, compression, merge, and compaction thresholds. + Status ParseManifestOptions(const ConfigParser& parser) { + // Parse manifest.format - manifest file format, default "avro" + PAIMON_RETURN_NOT_OK(parser.ParseObject( + Options::MANIFEST_FORMAT, /*default_identifier=*/"avro", &manifest_file_format)); + // Parse manifest.compression - manifest file compression, default "zstd" + PAIMON_RETURN_NOT_OK( + parser.ParseString(Options::MANIFEST_COMPRESSION, &manifest_compression)); + // Parse manifest.target-file-size - suggested manifest file size, default 8MB + PAIMON_RETURN_NOT_OK( + parser.ParseMemorySize(Options::MANIFEST_TARGET_FILE_SIZE, &manifest_target_file_size)); + // Parse manifest.merge-min-count - minimum ManifestFileMeta count to trigger merge + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::MANIFEST_MERGE_MIN_COUNT, &manifest_merge_min_count)); + // Parse manifest.full-compaction-threshold-size - size threshold for full compaction + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::MANIFEST_FULL_COMPACTION_FILE_SIZE, + &manifest_full_compaction_file_size)); + return Status::OK(); } - // Parse deletion vectors enabled & force lookup - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::DELETION_VECTORS_ENABLED, &impl->deletion_vectors_enabled)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::FORCE_LOOKUP, &impl->force_lookup)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_WAIT, &impl->lookup_wait)); - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::DELETION_VECTOR_INDEX_FILE_TARGET_SIZE, - &impl->deletion_vector_target_file_size)); - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::DELETION_VECTOR_BITMAP64, &impl->deletion_vectors_bitmap64)); - - // Parse changelog producer - PAIMON_RETURN_NOT_OK(parser.ParseChangelogProducer(&impl->changelog_producer)); - - // Parse partial_update_remove_record_on_delete - PAIMON_RETURN_NOT_OK(parser.Parse(Options::PARTIAL_UPDATE_REMOVE_RECORD_ON_DELETE, - &impl->partial_update_remove_record_on_delete)); - // Parse partial-update.remove-record-on-sequence-group - PAIMON_RETURN_NOT_OK(parser.ParseList( - Options::PARTIAL_UPDATE_REMOVE_RECORD_ON_SEQUENCE_GROUP, Options::FIELDS_SEPARATOR, - &impl->remove_record_on_sequence_group)); - - // Parse scan.fallback-branch - std::string scan_fallback_branch; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::SCAN_FALLBACK_BRANCH, &scan_fallback_branch)); - if (!scan_fallback_branch.empty()) { - impl->scan_fallback_branch = scan_fallback_branch; + // Parse snapshot expiration and retention configurations. + Status ParseExpireOptions(const ConfigParser& parser) { + // Parse snapshot.num-retained.min - minimum completed snapshots to retain, default 10 + int32_t snapshot_num_retain_min = 10; + // Parse snapshot.num-retained.max - maximum completed snapshots to retain + int32_t snapshot_num_retain_max = std::numeric_limits::max(); + // Parse snapshot.expire.limit - maximum snapshots allowed to expire at a time, default 10 + int32_t snapshot_expire_limit = 10; + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::SNAPSHOT_NUM_RETAINED_MIN, &snapshot_num_retain_min)); + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::SNAPSHOT_NUM_RETAINED_MAX, &snapshot_num_retain_max)); + PAIMON_RETURN_NOT_OK(parser.Parse(Options::SNAPSHOT_EXPIRE_LIMIT, &snapshot_expire_limit)); + // Parse snapshot.time-retained - maximum time of completed snapshots to retain + std::string snapshot_time_retained_str = "1 hour"; + PAIMON_RETURN_NOT_OK( + parser.ParseString(Options::SNAPSHOT_TIME_RETAINED, &snapshot_time_retained_str)); + PAIMON_ASSIGN_OR_RAISE(int64_t snapshot_time_retained, + TimeDuration::Parse(snapshot_time_retained_str)); + // Parse snapshot.clean-empty-directories - whether to clean empty dirs on expiration + bool snapshot_clean_empty_directories = false; + PAIMON_RETURN_NOT_OK(parser.Parse(Options::SNAPSHOT_CLEAN_EMPTY_DIRECTORIES, + &snapshot_clean_empty_directories)); + expire_config = + ExpireConfig(snapshot_num_retain_max, snapshot_num_retain_min, snapshot_time_retained, + snapshot_expire_limit, snapshot_clean_empty_directories); + return Status::OK(); } - // Parse branch name - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::BRANCH, &impl->branch)); - - // Parse file-index.read.enabled - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::FILE_INDEX_READ_ENABLED, &impl->file_index_read_enabled)); - // Parse data-file.external-paths - std::string data_file_external_paths; - PAIMON_RETURN_NOT_OK( - parser.ParseString(Options::DATA_FILE_EXTERNAL_PATHS, &data_file_external_paths)); - if (!data_file_external_paths.empty()) { - impl->data_file_external_paths = data_file_external_paths; + // Parse commit configurations: timeout, retries, and force-compact. + Status ParseCommitOptions(const ConfigParser& parser) { + // Parse commit.force-compact - whether to force compaction before commit, default false + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMMIT_FORCE_COMPACT, &commit_force_compact)); + // Parse commit.timeout - timeout duration of retry when commit failed + std::string commit_timeout_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMMIT_TIMEOUT, &commit_timeout_str)); + if (!commit_timeout_str.empty()) { + PAIMON_ASSIGN_OR_RAISE(commit_timeout, TimeDuration::Parse(commit_timeout_str)); + } + // Parse commit.max-retries - maximum retries when commit failed, default 10 + PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMMIT_MAX_RETRIES, &commit_max_retries)); + return Status::OK(); } - // Parse external path strategy - PAIMON_RETURN_NOT_OK(parser.ParseExternalPathStrategy(&impl->external_path_strategy)); - // Only for test, parse enable-adaptive-prefetch-strategy - PAIMON_RETURN_NOT_OK(parser.Parse("test.enable-adaptive-prefetch-strategy", - &impl->enable_adaptive_prefetch_strategy)); - // Parse data file prefix - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::DATA_FILE_PREFIX, &impl->data_file_prefix)); - - // Parse index-file-in-data-file-dir - PAIMON_RETURN_NOT_OK(parser.Parse(Options::INDEX_FILE_IN_DATA_FILE_DIR, - &impl->index_file_in_data_file_dir)); - // Parse row-tracking.enabled - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::ROW_TRACKING_ENABLED, &impl->row_tracking_enabled)); - // Parse data-evolution.enabled - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::DATA_EVOLUTION_ENABLED, &impl->data_evolution_enabled)); - // Parse partition.legacy-name - PAIMON_RETURN_NOT_OK(parser.Parse(Options::PARTITION_GENERATE_LEGACY_NAME, - &impl->legacy_partition_name_enabled)); - // Parse global-index.enabled - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::GLOBAL_INDEX_ENABLED, &impl->global_index_enabled)); - // Parse global_index.external-path - std::string global_index_external_path; - PAIMON_RETURN_NOT_OK( - parser.ParseString(Options::GLOBAL_INDEX_EXTERNAL_PATH, &global_index_external_path)); - if (!global_index_external_path.empty()) { - impl->global_index_external_path = global_index_external_path; - } - // Parse scan.tag-name - std::string scan_tag_name; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::SCAN_TAG_NAME, &scan_tag_name)); - if (!scan_tag_name.empty()) { - impl->scan_tag_name = scan_tag_name; + // Parse merge engine, sort engine, sequence field, changelog, and partial-update options. + Status ParseMergeAndSequenceOptions(const ConfigParser& parser) { + // Parse sequence.field - field that generates sequence number for primary key table + PAIMON_RETURN_NOT_OK(parser.ParseList( + Options::SEQUENCE_FIELD, Options::FIELDS_SEPARATOR, &sequence_field)); + // Parse sequence.field.sort-order - order of sequence field, default "ascending" + PAIMON_RETURN_NOT_OK(parser.ParseSortOrder(&sequence_field_sort_order)); + // Parse sort-engine - sort engine for primary key table, default "loser-tree" + PAIMON_RETURN_NOT_OK(parser.ParseSortEngine(&sort_engine)); + // Parse merge-engine - merge engine for primary key table, default "deduplicate" + PAIMON_RETURN_NOT_OK(parser.ParseMergeEngine(&merge_engine)); + // Parse ignore-delete - whether to ignore delete records, default false + PAIMON_RETURN_NOT_OK(parser.Parse(Options::IGNORE_DELETE, &ignore_delete)); + // Parse fields.default-aggregate-function - default agg function for partial-update + std::string parsed_default_func; + PAIMON_RETURN_NOT_OK( + parser.ParseString(Options::FIELDS_DEFAULT_AGG_FUNC, &parsed_default_func)); + if (!parsed_default_func.empty()) { + field_default_func = parsed_default_func; + } + // Parse changelog-producer - whether to double write to a changelog file, default "none" + PAIMON_RETURN_NOT_OK(parser.ParseChangelogProducer(&changelog_producer)); + // Parse partial-update.remove-record-on-delete - remove whole row on delete + PAIMON_RETURN_NOT_OK(parser.Parse(Options::PARTIAL_UPDATE_REMOVE_RECORD_ON_DELETE, + &partial_update_remove_record_on_delete)); + // Parse partial-update.remove-record-on-sequence-group + PAIMON_RETURN_NOT_OK(parser.ParseList( + Options::PARTIAL_UPDATE_REMOVE_RECORD_ON_SEQUENCE_GROUP, Options::FIELDS_SEPARATOR, + &remove_record_on_sequence_group)); + return Status::OK(); } - // Parse compaction options - // Parse commit.force-compact - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMMIT_FORCE_COMPACT, &impl->commit_force_compact)); - - // Parse compaction.min.file-num - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMPACTION_MIN_FILE_NUM, &impl->compaction_min_file_num)); - - // Parse compaction.force-rewrite-all-files - PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMPACTION_FORCE_REWRITE_ALL_FILES, - &impl->compaction_force_rewrite_all_files)); - - // Parse compaction.force-up-level-0 - PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMPACTION_FORCE_UP_LEVEL_0, - &impl->compaction_force_up_level_0)); - - // Parse compaction.optimization-interval - std::string optimized_compaction_interval_str; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_OPTIMIZATION_INTERVAL, - &optimized_compaction_interval_str)); - if (!optimized_compaction_interval_str.empty()) { - PAIMON_ASSIGN_OR_RAISE(impl->optimized_compaction_interval, - TimeDuration::Parse(optimized_compaction_interval_str)); - } - // Parse compaction.total-size-threshold - std::string compaction_total_size_threshold_str; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_TOTAL_SIZE_THRESHOLD, - &compaction_total_size_threshold_str)); - if (!compaction_total_size_threshold_str.empty()) { - PAIMON_ASSIGN_OR_RAISE(impl->compaction_total_size_threshold, - MemorySize::ParseBytes(compaction_total_size_threshold_str)); - } - // Parse compaction.incremental-size-threshold - std::string compaction_incremental_size_threshold_str; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_INCREMENTAL_SIZE_THRESHOLD, - &compaction_incremental_size_threshold_str)); - if (!compaction_incremental_size_threshold_str.empty()) { - PAIMON_ASSIGN_OR_RAISE(impl->compaction_incremental_size_threshold, - MemorySize::ParseBytes(compaction_incremental_size_threshold_str)); + // Parse deletion vector configurations. + Status ParseDeletionVectorOptions(const ConfigParser& parser) { + // Parse deletion-vectors.enabled - whether to enable deletion vectors mode, default false + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::DELETION_VECTORS_ENABLED, &deletion_vectors_enabled)); + // Parse deletion-vector.index-file.target-size - target size of dv index file, default 2MB + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::DELETION_VECTOR_INDEX_FILE_TARGET_SIZE, + &deletion_vector_target_file_size)); + // Parse deletion-vectors.bitmap64 - enable 64 bit bitmap implementation, default false + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::DELETION_VECTOR_BITMAP64, &deletion_vectors_bitmap64)); + return Status::OK(); } - // Parse compaction.offpeak.start.hour - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMPACT_OFFPEAK_START_HOUR, &impl->compact_off_peak_start_hour)); - // Parse compaction.offpeak.end.hour - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMPACT_OFFPEAK_END_HOUR, &impl->compact_off_peak_end_hour)); - // Parse compaction.offpeak-ratio - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMPACTION_OFFPEAK_RATIO, &impl->compact_off_peak_ratio)); - - // Parse lookup.cache.bloom.filter.enabled - PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_BLOOM_FILTER_ENABLED, - &impl->lookup_cache_bloom_filter)); - - // Parse lookup.cache.bloom.filter.fpp - PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_BLOOM_FILTER_FPP, - &impl->lookup_cache_bloom_filter_fpp)); - - // Parse lookup.remote-file.enabled - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::LOOKUP_REMOTE_FILE_ENABLED, &impl->lookup_remote_file_enabled)); - - // Parse lookup.remote-file.level-threshold - PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_REMOTE_LEVEL_THRESHOLD, - &impl->lookup_remote_level_threshold)); - - // Parse lookup.cache-spill-compression - std::string lookup_compress_options_compression_str; - PAIMON_RETURN_NOT_OK(parser.ParseString(Options::LOOKUP_CACHE_SPILL_COMPRESSION, - &lookup_compress_options_compression_str)); - if (!lookup_compress_options_compression_str.empty()) { - impl->lookup_compress_options.compress = lookup_compress_options_compression_str; + // Parse scan, branch, and tag related configurations. + Status ParseScanAndBranchOptions(const ConfigParser& parser) { + // Parse scan.snapshot-id - optional snapshot id for "from-snapshot" scan mode + PAIMON_RETURN_NOT_OK(parser.Parse(Options::SCAN_SNAPSHOT_ID, &scan_snapshot_id)); + // Parse scan.mode - scanning behavior of the source, default "default" + PAIMON_RETURN_NOT_OK(parser.ParseStartupMode(&startup_mode)); + // Parse scan.fallback-branch - fallback branch when partition not found + std::string parsed_fallback_branch; + PAIMON_RETURN_NOT_OK( + parser.ParseString(Options::SCAN_FALLBACK_BRANCH, &parsed_fallback_branch)); + if (!parsed_fallback_branch.empty()) { + scan_fallback_branch = parsed_fallback_branch; + } + // Parse branch - branch name, default "main" + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::BRANCH, &branch)); + // Parse scan.tag-name - optional tag name for "from-snapshot" scan mode + std::string parsed_tag_name; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::SCAN_TAG_NAME, &parsed_tag_name)); + if (!parsed_tag_name.empty()) { + scan_tag_name = parsed_tag_name; + } + return Status::OK(); } - // Parse spill-compression.zstd-level - PAIMON_RETURN_NOT_OK(parser.Parse(Options::SPILL_COMPRESSION_ZSTD_LEVEL, - &(impl->lookup_compress_options.zstd_level))); + // Parse index-related configurations: file index, global index. + Status ParseIndexOptions(const ConfigParser& parser) { + // Parse file-index.read.enabled - whether to enable reading file index, default true + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::FILE_INDEX_READ_ENABLED, &file_index_read_enabled)); + // Parse index-file-in-data-file-dir - whether index file in data file directory + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::INDEX_FILE_IN_DATA_FILE_DIR, &index_file_in_data_file_dir)); + // Parse global-index.enabled - whether to enable global index for scan, default true + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::GLOBAL_INDEX_ENABLED, &global_index_enabled)); + // Parse global-index.external-path - global index root directory + std::string parsed_global_index_external_path; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::GLOBAL_INDEX_EXTERNAL_PATH, + &parsed_global_index_external_path)); + if (!parsed_global_index_external_path.empty()) { + global_index_external_path = parsed_global_index_external_path; + } + return Status::OK(); + } - // Parse cache-page-size - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::CACHE_PAGE_SIZE, &impl->cache_page_size)); + // Parse compaction configurations: sorted run triggers, size ratios, thresholds, off-peak. + Status ParseCompactionOptions(const ConfigParser& parser) { + // Parse compaction.min.file-num - minimum file number to trigger compaction, default 5 + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACTION_MIN_FILE_NUM, &compaction_min_file_num)); + // Parse compaction.max-size-amplification-percent - size amplification percent, default 200 + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACTION_MAX_SIZE_AMPLIFICATION_PERCENT, + &compaction_max_size_amplification_percent)); + // Parse compaction.size-ratio - percentage flexibility for sorted run comparison, default 1 + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACTION_SIZE_RATIO, &compaction_size_ratio)); + // Parse num-sorted-run.compaction-trigger - sorted run number to trigger compaction + PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_SORTED_RUNS_COMPACTION_TRIGGER, + &num_sorted_runs_compaction_trigger)); + // Parse num-sorted-run.stop-trigger - sorted run number to stop writes + PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_SORTED_RUNS_STOP_TRIGGER, + &num_sorted_runs_stop_trigger)); + // Parse num-levels - total level number for LSM tree + PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_LEVELS, &num_levels)); + // Parse compaction.force-rewrite-all-files - force pick all files for full compaction + PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMPACTION_FORCE_REWRITE_ALL_FILES, + &compaction_force_rewrite_all_files)); + // Parse compaction.force-up-level-0 - always include all level 0 files in candidates + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACTION_FORCE_UP_LEVEL_0, &compaction_force_up_level_0)); + // Parse compaction.optimization-interval - how often to perform optimization compaction + std::string optimized_compaction_interval_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_OPTIMIZATION_INTERVAL, + &optimized_compaction_interval_str)); + if (!optimized_compaction_interval_str.empty()) { + PAIMON_ASSIGN_OR_RAISE(optimized_compaction_interval, + TimeDuration::Parse(optimized_compaction_interval_str)); + } + // Parse compaction.total-size-threshold - force full compaction when total size is smaller + std::string compaction_total_size_threshold_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_TOTAL_SIZE_THRESHOLD, + &compaction_total_size_threshold_str)); + if (!compaction_total_size_threshold_str.empty()) { + PAIMON_ASSIGN_OR_RAISE(compaction_total_size_threshold, + MemorySize::ParseBytes(compaction_total_size_threshold_str)); + } + // Parse compaction.incremental-size-threshold - force full compaction when incremental size + // is bigger + std::string compaction_incremental_size_threshold_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::COMPACTION_INCREMENTAL_SIZE_THRESHOLD, + &compaction_incremental_size_threshold_str)); + if (!compaction_incremental_size_threshold_str.empty()) { + PAIMON_ASSIGN_OR_RAISE( + compaction_incremental_size_threshold, + MemorySize::ParseBytes(compaction_incremental_size_threshold_str)); + } + // Parse compaction.offpeak.start.hour - start of off-peak hours (0-23), -1 to disable + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACT_OFFPEAK_START_HOUR, &compact_off_peak_start_hour)); + // Parse compaction.offpeak.end.hour - end of off-peak hours (0-23), -1 to disable + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACT_OFFPEAK_END_HOUR, &compact_off_peak_end_hour)); + // Parse compaction.offpeak-ratio - more aggressive ratio during off-peak hours, default 0 + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::COMPACTION_OFFPEAK_RATIO, &compact_off_peak_ratio)); + return Status::OK(); + } - // parse file.format.per.level - PAIMON_RETURN_NOT_OK(parser.ParseFileFormatPerLevel(&impl->file_format_per_level)); + // Parse lookup configurations: compact mode, bloom filter, remote file, cache, compression. + Status ParseLookupOptions(const ConfigParser& parser) { + // Parse force-lookup - whether to force lookup for compaction, default false + PAIMON_RETURN_NOT_OK(parser.Parse(Options::FORCE_LOOKUP, &force_lookup)); + // Parse lookup-wait - commit will wait for compaction by lookup, default true + PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_WAIT, &lookup_wait)); + // Parse lookup-compact - lookup compact mode, default RADICAL + PAIMON_RETURN_NOT_OK(parser.ParseLookupCompactMode(&lookup_compact_mode)); + // Parse lookup-compact.max-interval - max interval for gentle mode lookup compaction + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::LOOKUP_COMPACT_MAX_INTERVAL, &lookup_compact_max_interval)); + // Parse lookup.cache.bloom.filter.enabled - enable bloom filter for lookup cache + PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_BLOOM_FILTER_ENABLED, + &lookup_cache_bloom_filter)); + // Parse lookup.cache.bloom.filter.fpp - false positive probability, default 0.05 + PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_BLOOM_FILTER_FPP, + &lookup_cache_bloom_filter_fpp)); + // Parse lookup.remote-file.enabled - whether to enable remote file for lookup + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::LOOKUP_REMOTE_FILE_ENABLED, &lookup_remote_file_enabled)); + // Parse lookup.remote-file.level-threshold - level threshold for remote lookup files + PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_REMOTE_LEVEL_THRESHOLD, + &lookup_remote_level_threshold)); + // Parse lookup.cache-spill-compression - spill compression for lookup cache, default "zstd" + std::string lookup_compress_options_compression_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::LOOKUP_CACHE_SPILL_COMPRESSION, + &lookup_compress_options_compression_str)); + if (!lookup_compress_options_compression_str.empty()) { + lookup_compress_options.compress = lookup_compress_options_compression_str; + } + // Parse spill-compression.zstd-level - zstd level for spill compression, default 1 + PAIMON_RETURN_NOT_OK(parser.Parse(Options::SPILL_COMPRESSION_ZSTD_LEVEL, + &(lookup_compress_options.zstd_level))); + // Parse cache-page-size - memory page size for caching, default 64 kb + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::CACHE_PAGE_SIZE, &cache_page_size)); + // Parse lookup.cache-max-memory-size - max memory size for lookup cache, default 256 mb + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::LOOKUP_CACHE_MAX_MEMORY_SIZE, + &lookup_cache_max_memory)); + // Parse lookup.cache.high-priority-pool-ratio - fraction for high-priority data, default + // 0.25 + PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO, + &lookup_cache_high_prio_pool_ratio)); + if (lookup_cache_high_prio_pool_ratio < 0.0 || lookup_cache_high_prio_pool_ratio >= 1.0) { + return Status::Invalid(fmt::format( + "The high priority pool ratio should in the range [0, 1), while input is {}", + lookup_cache_high_prio_pool_ratio)); + } + // Parse lookup.cache-file-retention - cached files retention time, default "1 hour" + std::string lookup_cache_file_retention_str; + PAIMON_RETURN_NOT_OK(parser.ParseString(Options::LOOKUP_CACHE_FILE_RETENTION, + &lookup_cache_file_retention_str)); + if (!lookup_cache_file_retention_str.empty()) { + PAIMON_ASSIGN_OR_RAISE(lookup_cache_file_retention_ms, + TimeDuration::Parse(lookup_cache_file_retention_str)); + } + // Parse lookup.cache-max-disk-size - max disk size for lookup cache, default unlimited + PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::LOOKUP_CACHE_MAX_DISK_SIZE, + &lookup_cache_max_disk_size)); + return Status::OK(); + } +}; - // parse file.compression.per.level - PAIMON_RETURN_NOT_OK(parser.ParseFileCompressionPerLevel(&impl->file_compression_per_level)); +// Parse configurations from a map and return a populated CoreOptions object. +Result CoreOptions::FromMap( + const std::map& options_map, + const std::shared_ptr& specified_file_system, + const std::map& fs_scheme_to_identifier_map) { + CoreOptions options; + auto& impl = options.impl_; + impl->raw_options = options_map; + ConfigParser parser(options_map); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::COMPACTION_MAX_SIZE_AMPLIFICATION_PERCENT, - &impl->compaction_max_size_amplification_percent)); PAIMON_RETURN_NOT_OK( - parser.Parse(Options::COMPACTION_SIZE_RATIO, &impl->compaction_size_ratio)); + impl->ParseBasicOptions(parser, specified_file_system, fs_scheme_to_identifier_map)); + PAIMON_RETURN_NOT_OK(impl->ParseFileFormatOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseManifestOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseExpireOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseCommitOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseMergeAndSequenceOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseDeletionVectorOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseScanAndBranchOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseIndexOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseCompactionOptions(parser)); + PAIMON_RETURN_NOT_OK(impl->ParseLookupOptions(parser)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_SORTED_RUNS_COMPACTION_TRIGGER, - &impl->num_sorted_runs_compaction_trigger)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_SORTED_RUNS_STOP_TRIGGER, - &impl->num_sorted_runs_stop_trigger)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::NUM_LEVELS, &impl->num_levels)); - - PAIMON_RETURN_NOT_OK(parser.ParseLookupCompactMode(&impl->lookup_compact_mode)); - PAIMON_RETURN_NOT_OK( - parser.Parse(Options::LOOKUP_COMPACT_MAX_INTERVAL, &impl->lookup_compact_max_interval)); - - // parse lookup cache - PAIMON_RETURN_NOT_OK(parser.ParseMemorySize(Options::LOOKUP_CACHE_MAX_MEMORY_SIZE, - &impl->lookup_cache_max_memory)); - PAIMON_RETURN_NOT_OK(parser.Parse(Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO, - &impl->lookup_cache_high_prio_pool_ratio)); - if (impl->lookup_cache_high_prio_pool_ratio < 0.0 || - impl->lookup_cache_high_prio_pool_ratio >= 1.0) { - return Status::Invalid(fmt::format( - "The high priority pool ratio should in the range [0, 1), while input is {}", - impl->lookup_cache_high_prio_pool_ratio)); - } return options; } @@ -1217,4 +1306,12 @@ double CoreOptions::GetLookupCacheHighPrioPoolRatio() const { return impl_->lookup_cache_high_prio_pool_ratio; } +int64_t CoreOptions::GetLookupCacheFileRetentionMs() const { + return impl_->lookup_cache_file_retention_ms; +} + +int64_t CoreOptions::GetLookupCacheMaxDiskSize() const { + return impl_->lookup_cache_max_disk_size; +} + } // namespace paimon diff --git a/src/paimon/core/core_options.h b/src/paimon/core/core_options.h index d29b7fca0..d769a080b 100644 --- a/src/paimon/core/core_options.h +++ b/src/paimon/core/core_options.h @@ -163,6 +163,9 @@ class PAIMON_EXPORT CoreOptions { int64_t GetLookupCacheMaxMemory() const; double GetLookupCacheHighPrioPoolRatio() const; + int64_t GetLookupCacheFileRetentionMs() const; + int64_t GetLookupCacheMaxDiskSize() const; + const std::map& ToMap() const; private: diff --git a/src/paimon/core/core_options_test.cpp b/src/paimon/core/core_options_test.cpp index 6a8a86855..4c930121b 100644 --- a/src/paimon/core/core_options_test.cpp +++ b/src/paimon/core/core_options_test.cpp @@ -128,6 +128,8 @@ TEST(CoreOptionsTest, TestDefaultValue) { ASSERT_EQ(10, core_options.GetLookupCompactMaxInterval()); ASSERT_EQ(256 * 1024 * 1024, core_options.GetLookupCacheMaxMemory()); ASSERT_EQ(0.25, core_options.GetLookupCacheHighPrioPoolRatio()); + ASSERT_EQ(1 * 3600 * 1000, core_options.GetLookupCacheFileRetentionMs()); + ASSERT_EQ(INT64_MAX, core_options.GetLookupCacheMaxDiskSize()); ASSERT_FALSE(core_options.LookupRemoteFileEnabled()); ASSERT_EQ(core_options.GetLookupRemoteLevelThreshold(), INT32_MIN); } @@ -217,6 +219,8 @@ TEST(CoreOptionsTest, TestFromMap) { {Options::FILE_COMPRESSION_PER_LEVEL, "0:lz4,3:none"}, {Options::LOOKUP_CACHE_MAX_MEMORY_SIZE, "1MB"}, {Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO, "0.35"}, + {Options::LOOKUP_CACHE_FILE_RETENTION, "30min"}, + {Options::LOOKUP_CACHE_MAX_DISK_SIZE, "10GB"}, {Options::LOOKUP_REMOTE_FILE_ENABLED, "True"}, {Options::LOOKUP_REMOTE_LEVEL_THRESHOLD, "2"}}; @@ -331,6 +335,8 @@ TEST(CoreOptionsTest, TestFromMap) { ASSERT_EQ(6 * 1024 * 1024, core_options.GetCachePageSize()); ASSERT_EQ(1024 * 1024, core_options.GetLookupCacheMaxMemory()); ASSERT_EQ(0.35, core_options.GetLookupCacheHighPrioPoolRatio()); + ASSERT_EQ(30 * 60 * 1000, core_options.GetLookupCacheFileRetentionMs()); + ASSERT_EQ(10L * 1024 * 1024 * 1024, core_options.GetLookupCacheMaxDiskSize()); ASSERT_TRUE(core_options.LookupRemoteFileEnabled()); ASSERT_EQ(core_options.GetLookupRemoteLevelThreshold(), 2); } diff --git a/src/paimon/core/mergetree/compact/lookup_merge_tree_compact_rewriter_test.cpp b/src/paimon/core/mergetree/compact/lookup_merge_tree_compact_rewriter_test.cpp index 45a2499e2..c55e45ea3 100644 --- a/src/paimon/core/mergetree/compact/lookup_merge_tree_compact_rewriter_test.cpp +++ b/src/paimon/core/mergetree/compact/lookup_merge_tree_compact_rewriter_test.cpp @@ -331,11 +331,14 @@ class LookupMergeTreeCompactRewriterTest : public testing::Test { LookupStoreFactory::Create(lookup_key_comparator, std::make_shared(1024 * 1024, 0.0), options)); PAIMON_ASSIGN_OR_RAISE(auto path_factory, CreateFileStorePathFactory(table_path, options)); - return LookupLevels::Create( - fs_, BinaryRow::EmptyRow(), /*bucket=*/0, options, schema_manager, - std::move(io_manager), path_factory, table_schema, std::move(levels), - DeletionVector::CreateFactory(/*dv_maintainer=*/nullptr), processor_factory, - serializer_factory, lookup_store_factory, remote_lookup_file_manager, pool_); + auto lookup_file_cache = LookupFile::CreateLookupFileCache( + options.GetLookupCacheFileRetentionMs(), options.GetLookupCacheMaxDiskSize()); + return LookupLevels::Create(fs_, BinaryRow::EmptyRow(), /*bucket=*/0, options, + schema_manager, std::move(io_manager), path_factory, + table_schema, std::move(levels), + DeletionVector::CreateFactory(/*dv_maintainer=*/nullptr), + processor_factory, serializer_factory, lookup_store_factory, + lookup_file_cache, remote_lookup_file_manager, pool_); } Result>> GenerateSortedRuns( diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.cpp b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.cpp index 129f8c811..ff595fc75 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.cpp +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.cpp @@ -58,6 +58,7 @@ Result>> CreateLookupLevelsInternal( const std::shared_ptr& levels, const std::shared_ptr::Factory>& processor_factory, const std::shared_ptr& dv_maintainer, + const std::shared_ptr& lookup_file_cache, const std::shared_ptr& remote_lookup_file_manager, const std::shared_ptr& pool) { if (io_manager == nullptr) { @@ -72,10 +73,11 @@ Result>> CreateLookupLevelsInternal( LookupStoreFactory::Create(lookup_key_comparator, cache_manager, options)); auto dv_factory = DeletionVector::CreateFactory(dv_maintainer); auto serializer_factory = std::make_shared(); - return LookupLevels::Create( - options.GetFileSystem(), partition, bucket, options, schema_manager, io_manager, - file_store_path_factory, table_schema, levels, dv_factory, processor_factory, - serializer_factory, lookup_store_factory, remote_lookup_file_manager, pool); + return LookupLevels::Create(options.GetFileSystem(), partition, bucket, options, + schema_manager, io_manager, file_store_path_factory, + table_schema, levels, dv_factory, processor_factory, + serializer_factory, lookup_store_factory, lookup_file_cache, + remote_lookup_file_manager, pool); } } // namespace @@ -109,7 +111,7 @@ Result> MergeTreeCompactManagerFactory::CreateCo const BinaryRow& partition, int32_t bucket, const std::shared_ptr& compact_strategy, const std::shared_ptr& compact_executor, const std::shared_ptr& levels, - const std::shared_ptr& dv_maintainer) const { + const std::shared_ptr& dv_maintainer) { if (options_.WriteOnly()) { return std::make_shared(); } @@ -138,13 +140,18 @@ MergeTreeCompactManagerFactory::CreateCompactionMetricsReporter(const BinaryRow& Result> MergeTreeCompactManagerFactory::CreateRewriter( const BinaryRow& partition, int32_t bucket, const std::shared_ptr& levels, const std::shared_ptr& dv_maintainer, - const std::shared_ptr& cancellation_controller) const { + const std::shared_ptr& cancellation_controller) { auto path_factory_cache = std::make_shared(root_path_, table_schema_, options_, pool_); if (options_.GetChangelogProducer() == ChangelogProducer::FULL_COMPACTION) { return Status::NotImplemented("not support full changelog merge tree compact rewriter"); } if (options_.NeedLookup()) { + // Lazily create the global lookup file cache + if (!lookup_file_cache_) { + lookup_file_cache_ = LookupFile::CreateLookupFileCache( + options_.GetLookupCacheFileRetentionMs(), options_.GetLookupCacheMaxDiskSize()); + } int32_t max_level = options_.GetNumLevels() - 1; return CreateLookupRewriter(partition, bucket, levels, dv_maintainer, max_level, options_.GetLookupStrategy(), path_factory_cache, @@ -177,10 +184,10 @@ Result> MergeTreeCompactManagerFactory::CreateL auto processor_factory = std::make_shared(); PAIMON_ASSIGN_OR_RAISE( std::unique_ptr> lookup_levels, - CreateLookupLevelsInternal(options_, schema_manager_, io_manager_, cache_manager_, - file_store_path_factory_, table_schema_, partition, - bucket, levels, processor_factory, dv_maintainer, - remote_lookup_file_manager, pool_)); + CreateLookupLevelsInternal( + options_, schema_manager_, io_manager_, cache_manager_, file_store_path_factory_, + table_schema_, partition, bucket, levels, processor_factory, dv_maintainer, + lookup_file_cache_, remote_lookup_file_manager, pool_)); auto merge_function_wrapper_factory = [lookup_levels_ptr = lookup_levels.get(), ignore_delete = options_.IgnoreDelete()]( int32_t output_level) -> Result>> { @@ -229,7 +236,7 @@ MergeTreeCompactManagerFactory::CreateLookupRewriterWithDeletionVector( CreateLookupLevelsInternal( options_, schema_manager_, io_manager_, cache_manager_, file_store_path_factory_, table_schema_, partition, bucket, levels, processor_factory, dv_maintainer, - remote_lookup_file_manager, pool_)); + lookup_file_cache_, remote_lookup_file_manager, pool_)); auto merge_function_wrapper_factory = [data_schema = schema_, options = options_, trimmed_primary_keys, lookup_levels_ptr = lookup_levels.get(), lookup_strategy, @@ -263,7 +270,7 @@ MergeTreeCompactManagerFactory::CreateLookupRewriterWithDeletionVector( CreateLookupLevelsInternal( options_, schema_manager_, io_manager_, cache_manager_, file_store_path_factory_, table_schema_, partition, bucket, levels, processor_factory, dv_maintainer, - remote_lookup_file_manager, pool_)); + lookup_file_cache_, remote_lookup_file_manager, pool_)); auto merge_function_wrapper_factory = [data_schema = schema_, options = options_, trimmed_primary_keys, lookup_levels_ptr = lookup_levels.get(), lookup_strategy, @@ -303,10 +310,10 @@ MergeTreeCompactManagerFactory::CreateLookupRewriterWithoutDeletionVector( auto processor_factory = std::make_shared(schema_); PAIMON_ASSIGN_OR_RAISE( std::unique_ptr> lookup_levels, - CreateLookupLevelsInternal(options_, schema_manager_, io_manager_, cache_manager_, - file_store_path_factory_, table_schema_, partition, - bucket, levels, processor_factory, dv_maintainer, - remote_lookup_file_manager, pool_)); + CreateLookupLevelsInternal( + options_, schema_manager_, io_manager_, cache_manager_, file_store_path_factory_, + table_schema_, partition, bucket, levels, processor_factory, dv_maintainer, + lookup_file_cache_, remote_lookup_file_manager, pool_)); auto merge_function_wrapper_factory = [data_schema = schema_, options = options_, trimmed_primary_keys, lookup_levels_ptr = lookup_levels.get(), lookup_strategy, diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.h b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.h index d2079e29f..0ee56c55b 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.h +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_factory.h @@ -27,6 +27,7 @@ #include "paimon/core/mergetree/compact/compact_rewriter.h" #include "paimon/core/mergetree/compact/compact_strategy.h" #include "paimon/core/mergetree/lookup/remote_lookup_file_manager.h" +#include "paimon/core/mergetree/lookup_file.h" #include "paimon/core/operation/metrics/compaction_metrics.h" #include "paimon/result.h" namespace arrow { @@ -88,9 +89,13 @@ class MergeTreeCompactManagerFactory { const BinaryRow& partition, int32_t bucket, const std::shared_ptr& compact_strategy, const std::shared_ptr& compact_executor, const std::shared_ptr& levels, - const std::shared_ptr& dv_maintainer) const; + const std::shared_ptr& dv_maintainer); - void Close() {} + void Close() { + if (lookup_file_cache_) { + lookup_file_cache_->InvalidateAll(); + } + } private: std::shared_ptr CreateCompactionMetricsReporter( @@ -99,7 +104,7 @@ class MergeTreeCompactManagerFactory { Result> CreateRewriter( const BinaryRow& partition, int32_t bucket, const std::shared_ptr& levels, const std::shared_ptr& dv_maintainer, - const std::shared_ptr& cancellation_controller) const; + const std::shared_ptr& cancellation_controller); Result> CreateLookupRewriter( const BinaryRow& partition, int32_t bucket, const std::shared_ptr& levels, @@ -139,6 +144,7 @@ class MergeTreeCompactManagerFactory { std::shared_ptr cache_manager_; std::shared_ptr file_store_path_factory_; std::string root_path_; + std::shared_ptr lookup_file_cache_; }; } // namespace paimon diff --git a/src/paimon/core/mergetree/lookup/remote_lookup_file_manager.cpp b/src/paimon/core/mergetree/lookup/remote_lookup_file_manager.cpp index 4287d4a81..7d7f99ab4 100644 --- a/src/paimon/core/mergetree/lookup/remote_lookup_file_manager.cpp +++ b/src/paimon/core/mergetree/lookup/remote_lookup_file_manager.cpp @@ -64,7 +64,7 @@ Result> RemoteLookupFileManager::GenRemoteLookupFi PAIMON_RETURN_NOT_OK(CopyFromInputToOutput(std::move(input_stream), std::move(output_stream))); - lookup_levels->AddLocalFile(file, lookup_file); + PAIMON_RETURN_NOT_OK(lookup_levels->AddLocalFile(file, lookup_file)); std::vector> new_extra_files(file->extra_files); new_extra_files.push_back(remote_sst_name); diff --git a/src/paimon/core/mergetree/lookup/remote_lookup_file_manager_test.cpp b/src/paimon/core/mergetree/lookup/remote_lookup_file_manager_test.cpp index 6ef1e2a0d..52910e88e 100644 --- a/src/paimon/core/mergetree/lookup/remote_lookup_file_manager_test.cpp +++ b/src/paimon/core/mergetree/lookup/remote_lookup_file_manager_test.cpp @@ -153,11 +153,13 @@ class RemoteLookupFileManagerTest : public testing::Test { LookupStoreFactory::Create(key_comparator, std::make_shared(1024 * 1024, 0.0), options)); PAIMON_ASSIGN_OR_RAISE(auto path_factory, CreateFileStorePathFactory(table_path, options)); + auto lookup_file_cache = LookupFile::CreateLookupFileCache( + options.GetLookupCacheFileRetentionMs(), options.GetLookupCacheMaxDiskSize()); return LookupLevels::Create( fs_, BinaryRow::EmptyRow(), /*bucket=*/0, options, schema_manager, std::move(io_manager), path_factory, table_schema, levels, /*dv_factory=*/{}, processor_factory, serializer_factory, lookup_store_factory, - remote_lookup_file_manager, pool_); + lookup_file_cache, remote_lookup_file_manager, pool_); } Result> CreateRemoteLookupFileManager( diff --git a/src/paimon/core/mergetree/lookup_file.cpp b/src/paimon/core/mergetree/lookup_file.cpp new file mode 100644 index 000000000..a6b31c0d2 --- /dev/null +++ b/src/paimon/core/mergetree/lookup_file.cpp @@ -0,0 +1,102 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/mergetree/lookup_file.h" + +#include "fmt/format.h" +#include "paimon/common/utils/binary_row_partition_computer.h" + +namespace paimon { +LookupFile::LookupFile(const std::shared_ptr& fs, const std::string& local_file, + int64_t file_size_bytes, int32_t level, int64_t schema_id, + const std::string& ser_version, std::unique_ptr&& reader, + Callback callback) + : fs_(fs), + local_file_(local_file), + file_size_bytes_(file_size_bytes), + level_(level), + schema_id_(schema_id), + ser_version_(ser_version), + reader_(std::move(reader)), + callback_(std::move(callback)) {} + +LookupFile::~LookupFile() { + if (!closed_) { + [[maybe_unused]] auto status = Close(); + } +} + +Result> LookupFile::GetResult(const std::shared_ptr& key) { + if (closed_) { + return Status::Invalid("GetResult failed in LookupFile, reader is closed"); + } + request_count_++; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr res, reader_->Lookup(key)); + if (res) { + hit_count_++; + } + return res; +} + +Status LookupFile::Close() { + closed_ = true; + if (callback_) { + callback_(); + } + PAIMON_RETURN_NOT_OK(reader_->Close()); + return fs_->Delete(local_file_, /*recursive=*/false); +} + +int64_t LookupFile::FileWeigh(const std::string& /*file_name*/, + const std::shared_ptr& lookup_file) { + if (!lookup_file || lookup_file->IsClosed()) { + return 0; + } + return lookup_file->file_size_bytes_; +} + +void LookupFile::RemovalCallback(const std::string& /*file_name*/, + const std::shared_ptr& lookup_file, + LookupFile::LookupFileCache::RemovalCause /*cause*/) { + if (lookup_file && !lookup_file->IsClosed()) { + [[maybe_unused]] auto status = lookup_file->Close(); + } +} + +std::shared_ptr LookupFile::CreateLookupFileCache( + int64_t file_retention_ms, int64_t max_disk_size) { + LookupFile::LookupFileCache::Options options; + options.expire_after_access_ms = file_retention_ms; + options.max_weight = max_disk_size; + options.weigh_func = &LookupFile::FileWeigh; + options.removal_callback = &LookupFile::RemovalCallback; + return std::make_shared(std::move(options)); +} + +Result LookupFile::LocalFilePrefix( + const std::shared_ptr& partition_type, const BinaryRow& partition, + int32_t bucket, const std::string& remote_file_name) { + if (partition.GetFieldCount() == 0) { + return fmt::format("{}-{}", std::to_string(bucket), remote_file_name); + } else { + PAIMON_ASSIGN_OR_RAISE(std::string part_str, BinaryRowPartitionComputer::PartToSimpleString( + partition_type, partition, + /*delimiter=*/"-", /*max_length=*/20)); + return fmt::format("{}-{}-{}", part_str, bucket, remote_file_name); + } +} + +} // namespace paimon diff --git a/src/paimon/core/mergetree/lookup_file.h b/src/paimon/core/mergetree/lookup_file.h index 55108cf3c..f686533d9 100644 --- a/src/paimon/core/mergetree/lookup_file.h +++ b/src/paimon/core/mergetree/lookup_file.h @@ -15,31 +15,28 @@ */ #pragma once -#include "fmt/format.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/lookup/lookup_store_factory.h" -#include "paimon/common/utils/binary_row_partition_computer.h" +#include "paimon/common/utils/generic_lru_cache.h" #include "paimon/fs/file_system.h" namespace paimon { /// Lookup file for cache remote file to local. class LookupFile { public: - LookupFile(const std::shared_ptr& fs, const std::string& local_file, int32_t level, - int64_t schema_id, const std::string& ser_version, - std::unique_ptr&& reader) - : fs_(fs), - local_file_(local_file), - level_(level), - schema_id_(schema_id), - ser_version_(ser_version), - reader_(std::move(reader)) {} - - ~LookupFile() { - if (!closed_) { - [[maybe_unused]] auto status = Close(); - } - } + using Callback = std::function; + /// Type alias for the global lookup file cache. + /// Key: data file name (string), Value: shared_ptr + /// Weight is measured in bytes (file size on disk). + using LookupFileCache = GenericLruCache>; + + LookupFile(const std::shared_ptr& fs, const std::string& local_file, + int64_t file_size_bytes, int32_t level, int64_t schema_id, + const std::string& ser_version, std::unique_ptr&& reader, + Callback callback); + + ~LookupFile(); + const std::string& LocalFile() const { return local_file_; } @@ -60,49 +57,47 @@ class LookupFile { return closed_; } - Result> GetResult(const std::shared_ptr& key) { - if (closed_) { - return Status::Invalid("GetResult failed in LookupFile, reader is closed"); - } - request_count_++; - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr res, reader_->Lookup(key)); - if (res) { - hit_count_++; + bool operator==(const LookupFile& other) const { + if (this == &other) { + return true; } - return res; + return local_file_ == other.local_file_; } - Status Close() { - PAIMON_RETURN_NOT_OK(reader_->Close()); - closed_ = true; - // TODO(lisizhuo.lsz): callback - return fs_->Delete(local_file_, /*recursive=*/false); - } + Result> GetResult(const std::shared_ptr& key); + + Status Close(); + + /// Create a global LookupFileCache with the given retention and max disk size (in bytes). + static std::shared_ptr CreateLookupFileCache(int64_t file_retention_ms, + int64_t max_disk_size); static Result LocalFilePrefix(const std::shared_ptr& partition_type, const BinaryRow& partition, int32_t bucket, - const std::string& remote_file_name) { - if (partition.GetFieldCount() == 0) { - return fmt::format("{}-{}", std::to_string(bucket), remote_file_name); - } else { - PAIMON_ASSIGN_OR_RAISE( - std::string part_str, - BinaryRowPartitionComputer::PartToSimpleString( - partition_type, partition, /*delimiter=*/"-", /*max_length=*/20)); - return fmt::format("{}-{}-{}", part_str, bucket, remote_file_name); - } - } + const std::string& remote_file_name); + + private: + /// Compute the weight of a lookup file in bytes for cache eviction. + static int64_t FileWeigh(const std::string& file_name, + const std::shared_ptr& lookup_file); + + /// Removal callback for the global LookupFileCache. + static void RemovalCallback(const std::string& file_name, + const std::shared_ptr& lookup_file, + LookupFileCache::RemovalCause cause); private: std::shared_ptr fs_; std::string local_file_; + int64_t file_size_bytes_ = 0; int32_t level_; int64_t schema_id_; std::string ser_version_; std::unique_ptr reader_; + Callback callback_; int64_t request_count_ = 0; int64_t hit_count_ = 0; bool closed_ = false; - // TODO(lisizhuo.lsz): callback }; + } // namespace paimon diff --git a/src/paimon/core/mergetree/lookup_file_test.cpp b/src/paimon/core/mergetree/lookup_file_test.cpp index 2105aac80..220ba4868 100644 --- a/src/paimon/core/mergetree/lookup_file_test.cpp +++ b/src/paimon/core/mergetree/lookup_file_test.cpp @@ -52,8 +52,9 @@ TEST(LookupFileTest, TestSimple) { std::map kvs = {{"aa", "aa1"}, {"bb", "bb1"}}; auto lookup_file = std::make_shared( - fs, local_file, /*level=*/3, /*schema_id=*/1, - /*ser_version=*/"v1", std::make_unique(kvs, pool)); + fs, local_file, /*file_size_bytes=*/0, /*level=*/3, /*schema_id=*/1, + /*ser_version=*/"v1", std::make_unique(kvs, pool), + /*callback=*/nullptr); ASSERT_EQ(lookup_file->LocalFile(), local_file); ASSERT_EQ(lookup_file->Level(), 3); ASSERT_EQ(lookup_file->SchemaId(), 1); @@ -104,4 +105,119 @@ TEST(LookupFileTest, TestLocalFilePrefix) { ASSERT_EQ(ret, "3-test.orc"); } } + +TEST(LookupFileTest, TestLookupFileCacheLifecycle) { + // This test covers: cache creation, put multiple entries, replacement, + // invalidation, weight-based eviction, and verifying local files are deleted. + auto pool = GetDefaultPool(); + auto tmp_dir = UniqueTestDirectory::Create("local"); + ASSERT_TRUE(tmp_dir); + auto fs = tmp_dir->GetFileSystem(); + + class FakeLookupStoreReader : public LookupStoreReader { + public: + Result> Lookup( + const std::shared_ptr& /*key*/) const override { + return std::shared_ptr(); + } + Status Close() override { + return Status::OK(); + } + }; + + std::vector call_back_files; + // Helper to create a local file with given size and return a LookupFile + auto make_lookup_file = [&](const std::string& name, + int64_t size) -> std::shared_ptr { + std::string path = tmp_dir->Str() + "/" + name; + std::string data(size, 'x'); + EXPECT_OK(fs->WriteFile(path, data, /*overwrite=*/false)); + LookupFile::Callback callback = [&call_back_files, name = name]() { + call_back_files.push_back(name); + }; + return std::make_shared( + fs, path, size, /*level=*/1, /*schema_id=*/0, + /*ser_version=*/"v1", std::make_unique(), std::move(callback)); + }; + + // Create a cache: max_weight = 300 bytes, no expiration + auto cache = LookupFile::CreateLookupFileCache(/*file_retention_ms=*/-1, /*max_disk_size=*/300); + ASSERT_EQ(cache->Size(), 0); + ASSERT_EQ(cache->GetCurrentWeight(), 0); + + // --- Phase 1: Put multiple entries --- + auto file_a = make_lookup_file("a.sst", 100); + auto file_b = make_lookup_file("b.sst", 100); + auto file_c = make_lookup_file("c.sst", 100); + std::string path_a = file_a->LocalFile(); + std::string path_b = file_b->LocalFile(); + std::string path_c = file_c->LocalFile(); + + ASSERT_OK(cache->Put("a", file_a)); + ASSERT_OK(cache->Put("b", file_b)); + ASSERT_OK(cache->Put("c", file_c)); + ASSERT_EQ(cache->Size(), 3); + ASSERT_EQ(cache->GetCurrentWeight(), 300); + + // All local files should exist + ASSERT_TRUE(fs->Exists(path_a).value()); + ASSERT_TRUE(fs->Exists(path_b).value()); + ASSERT_TRUE(fs->Exists(path_c).value()); + + // --- Phase 2: Replace an entry --- + // Replace "b" with a new file; old file_b should be closed and deleted + auto file_b2 = make_lookup_file("b2.sst", 80); + std::string path_b2 = file_b2->LocalFile(); + ASSERT_OK(cache->Put("b", file_b2)); + ASSERT_EQ(cache->Size(), 3); + ASSERT_EQ(cache->GetCurrentWeight(), 280); // 100 + 80 + 100 + + // Old b.sst should be deleted by RemovalCallback (REPLACED cause) + ASSERT_FALSE(fs->Exists(path_b).value()); + ASSERT_EQ(call_back_files, std::vector({"b.sst"})); + // New b2.sst should exist + ASSERT_TRUE(fs->Exists(path_b2).value()); + + // --- Phase 3: Weight-based eviction --- + // Add a large file that pushes total over 300 bytes + auto file_d = make_lookup_file("d.sst", 150); + std::string path_d = file_d->LocalFile(); + ASSERT_OK(cache->Put("d", file_d)); + // Total would be 100 + 80 + 100 + 150 = 430 > 300 + // LRU eviction should remove "a" first (least recently used), then "c" + // After eviction: weight should be 230 + ASSERT_EQ(cache->GetCurrentWeight(), 230); + + // "a" and "c" were LRU (inserted first, never accessed again), should be evicted and file + // deleted + ASSERT_FALSE(cache->GetIfPresent("a").has_value()); + ASSERT_FALSE(cache->GetIfPresent("c").has_value()); + ASSERT_EQ(call_back_files, std::vector({"b.sst", "a.sst", "c.sst"})); + ASSERT_FALSE(fs->Exists(path_a).value()); + ASSERT_FALSE(fs->Exists(path_c).value()); + + // "d" should be in cache + ASSERT_TRUE(cache->GetIfPresent("d").has_value()); + ASSERT_TRUE(fs->Exists(path_d).value()); + + // --- Phase 4: Explicit invalidation --- + // Invalidate "b" (the replaced entry) + cache->Invalidate("b"); + ASSERT_FALSE(cache->GetIfPresent("b").has_value()); + // b2.sst should be deleted + ASSERT_FALSE(fs->Exists(path_b2).value()); + ASSERT_EQ(call_back_files, std::vector({"b.sst", "a.sst", "c.sst", "b2.sst"})); + + // --- Phase 5: InvalidateAll --- + cache->InvalidateAll(); + ASSERT_EQ(cache->Size(), 0); + ASSERT_EQ(cache->GetCurrentWeight(), 0); + // d.sst should be deleted + ASSERT_FALSE(fs->Exists(path_d).value()); + std::vector> file_status_list; + ASSERT_OK(fs->ListDir(tmp_dir->Str(), &file_status_list)); + ASSERT_TRUE(file_status_list.empty()); + ASSERT_EQ(call_back_files, + std::vector({"b.sst", "a.sst", "c.sst", "b2.sst", "d.sst"})); +} } // namespace paimon::test diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp index a11772952..2921a5162 100644 --- a/src/paimon/core/mergetree/lookup_levels.cpp +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -38,6 +38,7 @@ Result>> LookupLevels::Create( const std::shared_ptr::Factory>& processor_factory, const std::shared_ptr& serializer_factory, const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& lookup_file_cache, const std::shared_ptr& remote_lookup_file_manager, const std::shared_ptr& pool) { PAIMON_ASSIGN_OR_RAISE(std::vector pk_fields, @@ -78,7 +79,7 @@ Result>> LookupLevels::Create( fs, partition, bucket, options, schema_manager, io_manager, std::move(key_comparator), data_file_path_factory, std::move(split_read), table_schema, partition_schema, pk_schema, levels, dv_factory, processor_factory, std::move(key_serializer), serializer_factory, - lookup_store_factory, remote_lookup_file_manager, pool)); + lookup_store_factory, lookup_file_cache, remote_lookup_file_manager, pool)); } template Result> LookupLevels::Lookup(const std::shared_ptr& key, @@ -132,6 +133,7 @@ LookupLevels::LookupLevels( std::unique_ptr&& key_serializer, const std::shared_ptr& serializer_factory, const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& lookup_file_cache, const std::shared_ptr& remote_lookup_file_manager, const std::shared_ptr& pool) : pool_(pool), @@ -153,6 +155,7 @@ LookupLevels::LookupLevels( key_serializer_(std::move(key_serializer)), serializer_factory_(serializer_factory), lookup_store_factory_(lookup_store_factory), + lookup_file_cache_(lookup_file_cache), remote_lookup_file_manager_(remote_lookup_file_manager) { if constexpr (std::is_same_v) { // if T is FilePosition, only read key fields to create sst file is enough @@ -171,19 +174,25 @@ LookupLevels::~LookupLevels() { template void LookupLevels::NotifyDropFile(const std::string& file) { - lookup_file_cache_.erase(file); + lookup_file_cache_->Invalidate(file); } template Result> LookupLevels::Lookup(const std::shared_ptr& key, const std::shared_ptr& file) { - auto iter = lookup_file_cache_.find(file->file_name); + auto cached = lookup_file_cache_->GetIfPresent(file->file_name); std::shared_ptr lookup_file; - if (iter == lookup_file_cache_.end()) { - PAIMON_ASSIGN_OR_RAISE(lookup_file, CreateLookupFile(file)); - AddLocalFile(file, lookup_file); + bool new_created = false; + if (cached.has_value()) { + lookup_file = cached.value(); } else { - lookup_file = iter->second; + PAIMON_ASSIGN_OR_RAISE(lookup_file, CreateLookupFile(file)); + new_created = true; + } + + // Ensure newly created lookup files are always added to cache, even on lookup error + if (new_created) { + PAIMON_RETURN_NOT_OK(AddLocalFile(file, lookup_file)); } PAIMON_ASSIGN_OR_RAISE(std::shared_ptr key_bytes, @@ -200,6 +209,7 @@ Result> LookupLevels::Lookup(const std::shared_ptrReadFromDisk(key, lookup_file->Level(), value_bytes, file->file_name)); return std::optional(std::move(result)); } + template Result> LookupLevels::CreateLookupFile( const std::shared_ptr& file) { @@ -219,10 +229,19 @@ Result> LookupLevels::CreateLookupFile( PAIMON_RETURN_NOT_OK(CreateSstFileFromDataFile(file, kv_file_path)); } + // Get file size for cache weight calculation + PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(kv_file_path)); + int64_t file_size = file_status->GetLen(); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr reader, lookup_store_factory_->CreateReader(fs_, kv_file_path, pool_)); - return std::make_shared(fs_, kv_file_path, file->level, schema_id, file_ser_version, - std::move(reader)); + + // Callback to remove from own_cached_files_ when evicted from global cache + std::string file_name = file->file_name; + auto callback = [this, file_name = file_name]() { own_cached_files_.erase(file_name); }; + + return std::make_shared(fs_, kv_file_path, file_size, file->level, schema_id, + file_ser_version, std::move(reader), std::move(callback)); } template @@ -271,9 +290,10 @@ std::string LookupLevels::NewRemoteSst(const std::shared_ptr& f } template -void LookupLevels::AddLocalFile(const std::shared_ptr& file, - const std::shared_ptr& lookup_file) { - lookup_file_cache_[file->file_name] = lookup_file; +Status LookupLevels::AddLocalFile(const std::shared_ptr& file, + const std::shared_ptr& lookup_file) { + own_cached_files_.insert(file->file_name); + return lookup_file_cache_->Put(file->file_name, lookup_file); } template @@ -397,9 +417,15 @@ Result>> LookupLevels::GetOrCreateProcess template Status LookupLevels::Close() { - // TODO(xinyu.lxy): invalid cache levels_->RemoveDropFileCallback(this); - lookup_file_cache_.clear(); + // Move own_cached_files_ to a local copy before iterating. + // Invalidate triggers LookupFile::Close() -> callback -> own_cached_files_.erase(), + // which would invalidate iterators if we iterated over own_cached_files_ directly. + auto cached_files_copy = std::move(own_cached_files_); + own_cached_files_.clear(); + for (const auto& cached_file : cached_files_copy) { + lookup_file_cache_->Invalidate(cached_file); + } return Status::OK(); } diff --git a/src/paimon/core/mergetree/lookup_levels.h b/src/paimon/core/mergetree/lookup_levels.h index 4243dbd0f..b39ade7fd 100644 --- a/src/paimon/core/mergetree/lookup_levels.h +++ b/src/paimon/core/mergetree/lookup_levels.h @@ -16,6 +16,8 @@ #pragma once #include +#include +#include #include "paimon/common/data/serializer/row_compacted_serializer.h" #include "paimon/core/io/key_value_data_file_record_reader.h" @@ -51,6 +53,7 @@ class LookupLevels : public Levels::DropFileCallback { const std::shared_ptr::Factory>& processor_factory, const std::shared_ptr& serializer_factory, const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& lookup_file_cache, const std::shared_ptr& remote_lookup_file_manager, const std::shared_ptr& pool); @@ -75,8 +78,8 @@ class LookupLevels : public Levels::DropFileCallback { Result> CreateLookupFile(const std::shared_ptr& file); - void AddLocalFile(const std::shared_ptr& file, - const std::shared_ptr& lookup_file); + Status AddLocalFile(const std::shared_ptr& file, + const std::shared_ptr& lookup_file); ~LookupLevels() override; @@ -97,6 +100,7 @@ class LookupLevels : public Levels::DropFileCallback { std::unique_ptr&& key_serializer, const std::shared_ptr& serializer_factory, const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& lookup_file_cache, const std::shared_ptr& remote_lookup_file_manager, const std::shared_ptr& pool); @@ -140,7 +144,8 @@ class LookupLevels : public Levels::DropFileCallback { std::shared_ptr serializer_factory_; std::shared_ptr lookup_store_factory_; - std::map> lookup_file_cache_; + std::shared_ptr lookup_file_cache_; + std::set own_cached_files_; std::map, std::shared_ptr>> schema_id_and_ser_version_to_processors_; diff --git a/src/paimon/core/mergetree/lookup_levels_test.cpp b/src/paimon/core/mergetree/lookup_levels_test.cpp index f03255f41..31281367c 100644 --- a/src/paimon/core/mergetree/lookup_levels_test.cpp +++ b/src/paimon/core/mergetree/lookup_levels_test.cpp @@ -16,6 +16,9 @@ #include "paimon/core/mergetree/lookup_levels.h" +#include +#include + #include "arrow/api.h" #include "arrow/c/abi.h" #include "arrow/c/bridge.h" @@ -136,7 +139,8 @@ class LookupLevelsTest : public testing::Test { } Result>> CreateLookupLevels( - const std::string& table_path, const std::shared_ptr& levels) const { + const std::string& table_path, const std::shared_ptr& levels, + std::shared_ptr lookup_file_cache = nullptr) const { auto schema_manager = std::make_shared(fs_, table_path); PAIMON_ASSIGN_OR_RAISE(auto table_schema, schema_manager->ReadSchema(0)); PAIMON_ASSIGN_OR_RAISE(CoreOptions options, CoreOptions::FromMap(table_schema->Options())); @@ -152,10 +156,15 @@ class LookupLevelsTest : public testing::Test { LookupStoreFactory::Create(key_comparator, std::make_shared(1024 * 1024, 0.0), options)); PAIMON_ASSIGN_OR_RAISE(auto path_factory, CreateFileStorePathFactory(table_path, options)); + if (!lookup_file_cache) { + lookup_file_cache = LookupFile::CreateLookupFileCache( + options.GetLookupCacheFileRetentionMs(), options.GetLookupCacheMaxDiskSize()); + } return LookupLevels::Create( fs_, BinaryRow::EmptyRow(), /*bucket=*/0, options, schema_manager, std::move(io_manager), path_factory, table_schema, levels, /*dv_factory=*/{}, processor_factory, serializer_factory, lookup_store_factory, + lookup_file_cache, /*remote_lookup_file_manager=*/nullptr, pool_); } @@ -218,7 +227,7 @@ TEST_F(LookupLevelsTest, TestMultiLevels) { /*start_level=*/1)); ASSERT_FALSE(positioned_kv); - ASSERT_EQ(lookup_levels->lookup_file_cache_.size(), 2); + ASSERT_EQ(lookup_levels->lookup_file_cache_->Size(), 2); ASSERT_EQ(lookup_levels->schema_id_and_ser_version_to_processors_.size(), 1); ASSERT_EQ(lookup_levels->GetLevels()->NonEmptyHighestLevel(), 2); @@ -234,7 +243,7 @@ TEST_F(LookupLevelsTest, TestMultiLevels) { ASSERT_OK(fs_->ListDir(tmp_dir_->Str(), &file_status_list)); ASSERT_TRUE(file_status_list.empty()); ASSERT_TRUE(levels->drop_file_callbacks_.empty()); - // TODO(lisizhuo.lsz): test lookuplevels close + ASSERT_EQ(lookup_levels->lookup_file_cache_->Size(), 0); } TEST_F(LookupLevelsTest, TestMultiFiles) { @@ -543,9 +552,9 @@ TEST_F(LookupLevelsTest, TestDropFileCallbackOnUpdate) { ASSERT_TRUE(positioned_kv); // Both files should be cached now. - ASSERT_EQ(lookup_levels->lookup_file_cache_.size(), 2); - ASSERT_TRUE(lookup_levels->lookup_file_cache_.count(file0->file_name)); - ASSERT_TRUE(lookup_levels->lookup_file_cache_.count(file1->file_name)); + ASSERT_EQ(lookup_levels->lookup_file_cache_->Size(), 2); + ASSERT_TRUE(lookup_levels->lookup_file_cache_->GetIfPresent(file0->file_name).has_value()); + ASSERT_TRUE(lookup_levels->lookup_file_cache_->GetIfPresent(file1->file_name).has_value()); // Update: remove file0 from level1, add a new file to level1. ASSERT_OK_AND_ASSIGN(auto new_file, NewFiles(/*level=*/1, /*last_sequence_number=*/4, @@ -553,10 +562,10 @@ TEST_F(LookupLevelsTest, TestDropFileCallbackOnUpdate) { ASSERT_OK(levels->Update(/*before=*/{file0}, /*after=*/{new_file})); // file0 was dropped, so its cache entry should be invalidated. - ASSERT_EQ(lookup_levels->lookup_file_cache_.size(), 1); - ASSERT_FALSE(lookup_levels->lookup_file_cache_.count(file0->file_name)); + ASSERT_EQ(lookup_levels->lookup_file_cache_->Size(), 1); + ASSERT_FALSE(lookup_levels->lookup_file_cache_->GetIfPresent(file0->file_name).has_value()); // file1 was not dropped, so its cache entry should still exist. - ASSERT_TRUE(lookup_levels->lookup_file_cache_.count(file1->file_name)); + ASSERT_TRUE(lookup_levels->lookup_file_cache_->GetIfPresent(file1->file_name).has_value()); } TEST_F(LookupLevelsTest, TestRemoteSst) { @@ -644,4 +653,280 @@ TEST_F(LookupLevelsTest, TestRemoteSst) { ASSERT_OK(lookup_levels->Close()); } +TEST_F(LookupLevelsTest, TestLookupFileCacheIntegration) { + // This single test covers multiple cache scenarios: + // 1. Cache is populated on first lookup (cache miss -> create -> put) + // 2. Subsequent lookups hit the cache (no new files created) + // 3. Two LookupLevels instances share the same global cache + // 4. Close() invalidates only own_cached_files_ from the shared cache + // 5. NotifyDropFile triggers cache invalidation and file deletion + // 6. Local lookup files are deleted when evicted from cache + + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + // Create two data files at different levels + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [2, 22]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/2, /*last_sequence_number=*/2, table_path, + core_options, "[[3, 33], [4, 44]]")); + + // Create a shared global cache + auto shared_cache = LookupFile::CreateLookupFileCache(/*file_retention_ms=*/-1, + /*max_disk_size=*/INT64_MAX); + ASSERT_EQ(shared_cache->Size(), 0); + + // --- Scenario 1: First lookup populates the cache --- + // Instance 1: uses file0 and file1 + std::vector> files1 = {file0, file1}; + ASSERT_OK_AND_ASSIGN(std::shared_ptr levels1, + Levels::Create(key_comparator, files1, /*num_levels=*/3)); + ASSERT_OK_AND_ASSIGN(auto lookup_levels1, + CreateLookupLevels(table_path, levels1, shared_cache)); + + // Lookup key=1 -> triggers cache miss, creates lookup file for file0 + ASSERT_OK_AND_ASSIGN( + auto result, lookup_levels1->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 11); + ASSERT_EQ(shared_cache->Size(), 1); + + // Lookup key=3 -> triggers cache miss, creates lookup file for file1 + ASSERT_OK_AND_ASSIGN( + result, lookup_levels1->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 33); + ASSERT_EQ(shared_cache->Size(), 2); + + // --- Scenario 2: Subsequent lookup hits the cache (no new file created) --- + ASSERT_OK_AND_ASSIGN( + result, lookup_levels1->Lookup(BinaryRowGenerator::GenerateRowPtr({2}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 22); + // Cache size should not increase (file0 was already cached) + ASSERT_EQ(shared_cache->Size(), 2); + + // --- Scenario 3: Two LookupLevels share the same cache --- + // Create a second data file set that has no overlap with lookup_levels1. + ASSERT_OK_AND_ASSIGN(auto file2, NewFiles(/*level=*/1, /*last_sequence_number=*/4, table_path, + core_options, "[[5, 55], [6, 66]]")); + std::vector> files2 = {file2}; + ASSERT_OK_AND_ASSIGN(std::shared_ptr levels2, + Levels::Create(key_comparator, files2, /*num_levels=*/3)); + ASSERT_OK_AND_ASSIGN(auto lookup_levels2, + CreateLookupLevels(table_path, levels2, shared_cache)); + + // Lookup key=5 in instance 2 -> cache miss, creates lookup file for file2 + ASSERT_OK_AND_ASSIGN( + result, lookup_levels2->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 55); + ASSERT_EQ(shared_cache->Size(), 3); // file0, file1, file2 + + // Collect local file paths for later verification + std::vector> tmp_files; + ASSERT_OK(fs_->ListDir(tmp_dir_->Str(), &tmp_files)); + ASSERT_EQ(tmp_files.size(), 3); + + // --- Scenario 4: Close instance 1 invalidates only its own files --- + // Instance 1 owns file0 and file1 in the cache + ASSERT_OK(lookup_levels1->Close()); + // file0 and file1 should be evicted (only owned by instance 1) + ASSERT_FALSE(shared_cache->GetIfPresent(file0->file_name).has_value()); + ASSERT_FALSE(shared_cache->GetIfPresent(file1->file_name).has_value()); + // file2 should still be in cache (owned by instance 2, not invalidated) + ASSERT_TRUE(shared_cache->GetIfPresent(file2->file_name).has_value()); + ASSERT_EQ(shared_cache->Size(), 1); + + // --- Scenario 5: Close instance 2 cleans up remaining files --- + ASSERT_OK(lookup_levels2->Close()); + ASSERT_EQ(shared_cache->Size(), 0); + + // All local lookup files should be deleted + tmp_files.clear(); + ASSERT_OK(fs_->ListDir(tmp_dir_->Str(), &tmp_files)); + ASSERT_TRUE(tmp_files.empty()); +} + +TEST_F(LookupLevelsTest, TestCacheEvictionBySmallMaxDiskSize) { + // Verify that when max_disk_size is small enough to hold only 2 lookup files, + // adding a 3rd triggers weight-based (SIZE) eviction of the LRU entry. + // After eviction, subsequent lookups on the evicted file still work by + // re-creating the lookup file on demand. + + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + // Create 3 data files at level 1. + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [2, 22]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/1, /*last_sequence_number=*/2, table_path, + core_options, "[[3, 33], [4, 44]]")); + ASSERT_OK_AND_ASSIGN(auto file2, NewFiles(/*level=*/1, /*last_sequence_number=*/4, table_path, + core_options, "[[5, 55], [6, 66]]")); + + std::vector> files = {file0, file1, file2}; + + // Phase 1: Probe with unlimited cache to measure per-file weights. + ASSERT_OK_AND_ASSIGN(std::shared_ptr probe_levels_obj, + Levels::Create(key_comparator, files, /*num_levels=*/3)); + auto unlimited_cache = LookupFile::CreateLookupFileCache(/*file_retention_ms=*/-1, + /*max_disk_size=*/INT64_MAX); + ASSERT_OK_AND_ASSIGN(auto probe_levels, + CreateLookupLevels(table_path, probe_levels_obj, unlimited_cache)); + + // Trigger lookups to populate cache for all 3 files. + ASSERT_OK_AND_ASSIGN(auto result, + probe_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + int64_t weight_after_file0 = unlimited_cache->GetCurrentWeight(); + + ASSERT_OK_AND_ASSIGN(result, + probe_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + int64_t weight_after_file1 = unlimited_cache->GetCurrentWeight(); + + ASSERT_OK_AND_ASSIGN(result, + probe_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + int64_t weight_after_file2 = unlimited_cache->GetCurrentWeight(); + + ASSERT_EQ(unlimited_cache->Size(), 3); + int64_t file0_weight = weight_after_file0; + int64_t file1_weight = weight_after_file1 - weight_after_file0; + int64_t file2_weight = weight_after_file2 - weight_after_file1; + ASSERT_GT(file0_weight, 0); + ASSERT_GT(file1_weight, 0); + ASSERT_GT(file2_weight, 0); + + // Set max_disk_size to exactly hold file0 + file1 but not file2. + int64_t max_disk_for_two = file0_weight + file1_weight + file2_weight - 1; + ASSERT_OK(probe_levels->Close()); + + // Phase 2: Create a new LookupLevels with the constrained cache. + ASSERT_OK_AND_ASSIGN(std::shared_ptr levels2, + Levels::Create(key_comparator, files, /*num_levels=*/3)); + auto small_cache = LookupFile::CreateLookupFileCache(/*file_retention_ms=*/-1, + /*max_disk_size=*/max_disk_for_two); + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, levels2, small_cache)); + + // Lookup file0: fits in cache. + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 11); + ASSERT_EQ(small_cache->Size(), 1); + + // Lookup file1: still fits (file0 + file1 <= max). + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 33); + ASSERT_EQ(small_cache->Size(), 2); + ASSERT_TRUE(small_cache->GetIfPresent(file0->file_name).has_value()); + ASSERT_TRUE(small_cache->GetIfPresent(file1->file_name).has_value()); + + // Lookup file2: total weight exceeds max, should evict file0 (LRU). + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 55); + + // file0 should have been evicted (LRU). + ASSERT_FALSE(small_cache->GetIfPresent(file0->file_name).has_value()); + ASSERT_TRUE(small_cache->GetIfPresent(file1->file_name).has_value()); + ASSERT_TRUE(small_cache->GetIfPresent(file2->file_name).has_value()); + + // Lookup key=1 again (file0 was evicted): should re-create the lookup file. + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 11); + + // file0 is back in cache; file1 should now be evicted (it was LRU). + ASSERT_TRUE(small_cache->GetIfPresent(file0->file_name).has_value()); + ASSERT_FALSE(small_cache->GetIfPresent(file1->file_name).has_value()); + ASSERT_TRUE(small_cache->GetIfPresent(file2->file_name).has_value()); + + ASSERT_OK(lookup_levels->Close()); +} + +TEST_F(LookupLevelsTest, TestCacheEvictionByExpiration) { + // Verify that when expire_after_access_ms is very short, cached lookup files + // expire and are evicted. Subsequent lookups re-create the files. + + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [2, 22]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/2, /*last_sequence_number=*/2, table_path, + core_options, "[[3, 33], [4, 44]]")); + + std::vector> files = {file0, file1}; + ASSERT_OK_AND_ASSIGN(std::shared_ptr levels, + Levels::Create(key_comparator, files, /*num_levels=*/3)); + + // Create a cache with a very short expiration (50ms). + auto expiring_cache = LookupFile::CreateLookupFileCache(/*file_retention_ms=*/50, + /*max_disk_size=*/INT64_MAX); + ASSERT_OK_AND_ASSIGN(auto lookup_levels, + CreateLookupLevels(table_path, levels, expiring_cache)); + + // Lookup to populate cache. + ASSERT_OK_AND_ASSIGN(auto result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 11); + + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 33); + ASSERT_EQ(expiring_cache->Size(), 2); + + // Wait for entries to expire. + std::this_thread::sleep_for(std::chrono::milliseconds(80)); + + // Entries should be expired now. GetIfPresent triggers expiration check. + ASSERT_FALSE(expiring_cache->GetIfPresent(file0->file_name).has_value()); + ASSERT_FALSE(expiring_cache->GetIfPresent(file1->file_name).has_value()); + ASSERT_EQ(expiring_cache->Size(), 0); + + // Lookups should still work by re-creating the lookup files. + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 11); + ASSERT_EQ(expiring_cache->Size(), 1); + + ASSERT_OK_AND_ASSIGN(result, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(result); + ASSERT_EQ(result.value().key_value.value->GetInt(1), 33); + ASSERT_EQ(expiring_cache->Size(), 2); + + ASSERT_OK(lookup_levels->Close()); +} } // namespace paimon::test diff --git a/test/inte/pk_compaction_inte_test.cpp b/test/inte/pk_compaction_inte_test.cpp index 05222140e..3ccb0f74e 100644 --- a/test/inte/pk_compaction_inte_test.cpp +++ b/test/inte/pk_compaction_inte_test.cpp @@ -2973,6 +2973,113 @@ TEST_P(PkCompactionInteTest, TestLookupCompatibility) { } } +TEST_F(PkCompactionInteTest, PkDvAndAggWithIOException) { + // f4 is a large padding field to inflate the initial file size for DV strategy. + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), // value field (min agg) + arrow::field("f1", arrow::utf8()), // PK (schema index 1, pk index 1) + arrow::field("f2", arrow::int32()), // PK (schema index 2, pk index 0) + arrow::field("f3", arrow::float64()), // value field (min agg) + arrow::field("f4", arrow::utf8())}; // padding value field (min agg) + std::vector primary_keys = {"f2", "f1"}; + std::vector partition_keys = {}; + + std::map options = {{Options::FILE_FORMAT, "parquet"}, + {Options::BUCKET, "1"}, + {Options::FILE_SYSTEM, "local"}, + {Options::MERGE_ENGINE, "aggregation"}, + {Options::FIELDS_DEFAULT_AGG_FUNC, "min"}, + {Options::DELETION_VECTORS_ENABLED, "true"}, + {Options::LOOKUP_REMOTE_FILE_ENABLED, "true"}, + {Options::LOOKUP_REMOTE_LEVEL_THRESHOLD, "1"}}; + + auto data_type = arrow::struct_(fields); + int64_t commit_id = 0; + + bool run_complete = false; + auto io_hook = IOHook::GetInstance(); + for (size_t i = 0; i < 800; i += RandomNumber(1, 23)) { + dir_ = UniqueTestDirectory::Create("local"); + std::string table_path = TablePath(); + CreateTable(fields, partition_keys, primary_keys, options); + + // A long padding string (~2KB) to inflate the initial file size. + std::string padding(2048, 'X'); + + // Step 1: Write initial data with large padding (creates a big level-0 file). + // Dave and Eve are NOT overwritten by later batches. + { + // clang-format off + std::string json_data = R"([ +[100, "Alice", 3, 1.5, ")" + padding + R"("], +[200, "Bob", 5, 2.5, ")" + padding + R"("], +[300, "Carol", 1, 3.5, ")" + padding + R"("], +[400, "Dave", 4, 4.5, ")" + padding + R"("], +[500, "Eve", 2, 5.5, ")" + padding + R"("] +])"; + // clang-format on + auto array = + arrow::ipc::internal::json::ArrayFromJSON(data_type, json_data).ValueOrDie(); + ASSERT_OK(WriteAndCommit(table_path, {}, 0, array, commit_id++)); + } + + // Step 2: Full compact → upgrades level-0 file to max level. + ASSERT_OK_AND_ASSIGN( + auto upgrade_msgs, + CompactAndCommit(table_path, {}, 0, /*full_compaction=*/true, commit_id++)); + ASSERT_TRUE(HasExtraLookupFiles(upgrade_msgs)); + // Step 3: Write batch2 with overlapping keys (first new level-0 file). + { + auto array = arrow::ipc::internal::json::ArrayFromJSON(data_type, R"([ + [50, "Alice", 3, 0.5, "a1"], + [300, "Bob", 5, 3.5, "b1"] + ])") + .ValueOrDie(); + ASSERT_OK(WriteAndCommit(table_path, {}, 0, array, commit_id++)); + } + + // Step 4: Write batch3 with overlapping keys (second new level-0 file). + { + auto array = arrow::ipc::internal::json::ArrayFromJSON(data_type, R"([ + [80, "Alice", 3, 1.0, "a2"], + [150, "Carol", 1, 1.5, "c1"] + ])") + .ValueOrDie(); + ASSERT_OK(WriteAndCommit(table_path, {}, 0, array, commit_id++)); + } + // Step 5: Non-full compact → two level-0 files merge, lookup against max-level produces DV. + ScopeGuard guard([&io_hook]() { io_hook->Clear(); }); + io_hook->Reset(i, IOHook::Mode::RETURN_ERROR); + auto dv_compact_msgs = + CompactAndCommit(table_path, {}, 0, /*full_compaction=*/false, commit_id++); + CHECK_HOOK_STATUS(dv_compact_msgs.status(), i); + io_hook->Clear(); + + // Step 6: Assert DV index files are present. + ASSERT_TRUE(HasDeletionVectorIndexFiles(dv_compact_msgs.value())) + << "Non-full compact should produce DV index files"; + ASSERT_TRUE(HasExtraLookupFiles(dv_compact_msgs.value())); + + // Step 7: ScanAndVerify after DV compact. + { + std::map, std::string> expected_data; + // clang-format off + expected_data[std::make_pair("", 0)] = R"([ +[0, 500, "Eve", 2, 5.5, ")" + padding + R"("], +[0, 400, "Dave", 4, 4.5, ")" + padding + R"("], +[0, 150, "Carol", 1, 1.5, ")" + padding + R"("], +[0, 50, "Alice", 3, 0.5, ")" + padding + R"("], +[0, 200, "Bob", 5, 2.5, ")" + padding + R"("] +])"; + // clang-format on + ScanAndVerify(table_path, fields, expected_data); + } + run_complete = true; + break; + } + ASSERT_TRUE(run_complete); +} + std::vector GetTestValuesForCompactionInteTest() { std::vector values; values.emplace_back("parquet");