From daf76962f43d249eb29b3c425ba2b6e45016090f Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Wed, 27 May 2026 13:16:14 +0800 Subject: [PATCH 1/2] fix: add ScopeGuard to wait async tasks before early return in Clean() When GetUsedFiles() returns an error (e.g. index manifest not supported), Clean() would return immediately while thread pool tasks submitted via Via(executor_.get(), ...) were still running. These tasks capture 'this' and access members like fs_, causing use-after-free when the OrphanFilesCleanerImpl is destroyed shortly after. Add a ScopeGuard that calls CollectAll(file_statuses_futures) to ensure all submitted async tasks complete before the function returns, preventing the intermittent segmentation fault. --- src/paimon/common/executor/future.h | 5 ++++- src/paimon/core/operation/orphan_files_cleaner_impl.cpp | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/paimon/common/executor/future.h b/src/paimon/common/executor/future.h index fe6509a21..ebee7390b 100644 --- a/src/paimon/common/executor/future.h +++ b/src/paimon/common/executor/future.h @@ -45,6 +45,9 @@ namespace paimon { /// execution. /// /// @note If `func` returns `void`, the returned future is of type `std::future`. +/// +/// TODO: Since paimon-cpp uses `Status`/`Result` for error handling throughout, the exception +/// capture logic (try/catch + set_exception) in `Via()` will be removed in the future. template auto Via(Executor* executor, Func&& func) -> std::future { using ResultType = decltype(func()); @@ -96,7 +99,7 @@ std::vector CollectAll(std::vector>& futures) { std::vector results; results.reserve(futures.size()); // Reserve space to avoid reallocation. for (auto& future : futures) { - results.push_back(future.get()); // Wait for each future and collect the result. + results.push_back(future.get()); } return results; diff --git a/src/paimon/core/operation/orphan_files_cleaner_impl.cpp b/src/paimon/core/operation/orphan_files_cleaner_impl.cpp index 7eea87977..1f7896f9e 100644 --- a/src/paimon/core/operation/orphan_files_cleaner_impl.cpp +++ b/src/paimon/core/operation/orphan_files_cleaner_impl.cpp @@ -97,11 +97,14 @@ Result> OrphanFilesCleanerImpl::Clean() { } PAIMON_ASSIGN_OR_RAISE(std::set all_dirs, ListPaimonFileDirs()); std::vector>>> file_statuses_futures; + ScopeGuard file_statuses_guard( + [&file_statuses_futures]() { CollectAll(file_statuses_futures); }); for (const auto& dir : all_dirs) { file_statuses_futures.push_back( Via(executor_.get(), [this, dir] { return TryBestListingDirs(dir); })); } PAIMON_ASSIGN_OR_RAISE(std::set used_file_names, GetUsedFiles()); + file_statuses_guard.Release(); Duration duration; std::set need_to_deletes; From 0e54c34b97b26c1949476df9843423f549adf99d Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Thu, 28 May 2026 15:57:39 +0800 Subject: [PATCH 2/2] fix --- src/paimon/common/executor/future.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/executor/future.h b/src/paimon/common/executor/future.h index ebee7390b..8e9f4d87f 100644 --- a/src/paimon/common/executor/future.h +++ b/src/paimon/common/executor/future.h @@ -99,7 +99,7 @@ std::vector CollectAll(std::vector>& futures) { std::vector results; results.reserve(futures.size()); // Reserve space to avoid reallocation. for (auto& future : futures) { - results.push_back(future.get()); + results.push_back(future.get()); // Wait for each future and collect the result. } return results;