From c9e5eb4525eeb33938b394fa3c904be7fdcaac6b Mon Sep 17 00:00:00 2001 From: Michael Vandeberg Date: Tue, 24 Mar 2026 10:14:12 -0600 Subject: [PATCH] Add safe_resume to prevent TLS spoilage in frame allocator propagation Between a coroutine's await_resume (which sets TLS to the correct frame allocator) and the next child coroutine invocation (whose operator new reads TLS), arbitrary user code runs. If that code resumes a coroutine from a different chain on the same thread, the other coroutine's await_resume overwrites TLS with the wrong allocator. safe_resume saves and restores TLS around h.resume(), making TLS behave like a stack so nested resumes cannot spoil the outer value. All internal .resume() call sites now use safe_resume. The two sites that intentionally do not (symmetric_transfer and run_async_wrapper) have comments explaining why they are safe without it. --- .../pages/4.coroutines/4g.allocators.adoc | 14 ++ .../pages/7.examples/7n.custom-executor.adoc | 5 +- .../ROOT/pages/8.design/8k.Executor.adoc | 33 ++++ example/custom-executor/custom_executor.cpp | 3 +- include/boost/capy/concept/executor.hpp | 11 ++ .../capy/detail/await_suspend_helper.hpp | 3 + include/boost/capy/ex/frame_allocator.hpp | 34 ++++ include/boost/capy/ex/run_async.hpp | 4 +- src/ex/detail/strand_queue.hpp | 7 +- src/ex/thread_pool.cpp | 3 +- src/test/run_blocking.cpp | 3 +- test/unit/ex/frame_allocator.cpp | 8 + test/unit/ex/safe_resume.cpp | 175 ++++++++++++++++++ 13 files changed, 295 insertions(+), 8 deletions(-) create mode 100644 test/unit/ex/safe_resume.cpp diff --git a/doc/modules/ROOT/pages/4.coroutines/4g.allocators.adoc b/doc/modules/ROOT/pages/4.coroutines/4g.allocators.adoc index fc0c1517..4ed85d3e 100644 --- a/doc/modules/ROOT/pages/4.coroutines/4g.allocators.adoc +++ b/doc/modules/ROOT/pages/4.coroutines/4g.allocators.adoc @@ -41,6 +41,20 @@ The "window" is the interval between setting the thread-local allocator and the After the window closes (at the first suspension), the TLS allocator may be restored to a previous value. The task retains its captured allocator regardless. +== TLS Preservation + +Between a coroutine's `await_resume` (which sets TLS to the correct allocator) and the next child coroutine invocation (whose `operator new` reads TLS), arbitrary user code runs. If that code resumes a coroutine from a different chain on the same thread -- by calling `.resume()` directly, pumping a completion queue, or running nested dispatch -- the other coroutine's `await_resume` overwrites TLS with its own allocator. The original coroutine's next child would then allocate from the wrong resource. + +To prevent this, any code that calls `.resume()` on a coroutine handle must use `safe_resume` from ``: + +[source,cpp] +---- +// In your event loop or dispatch path: +capy::safe_resume(h); // saves and restores TLS around h.resume() +---- + +`safe_resume` saves the current thread-local allocator, calls `h.resume()`, then restores the saved value. This makes TLS behave like a stack: nested resumes cannot spoil the outer value. All of Capy's built-in executors (`thread_pool`, strands, `blocking_context`) use `safe_resume` internally. Custom executor event loops must do the same -- see xref:7.examples/7n.custom-executor.adoc[Custom Executor] for an example. + == The FrameAllocator Concept Custom allocators must satisfy the `FrameAllocator` concept, which is compatible with {cpp} allocator requirements: diff --git a/doc/modules/ROOT/pages/7.examples/7n.custom-executor.adoc b/doc/modules/ROOT/pages/7.examples/7n.custom-executor.adoc index c248941a..7647ddcc 100644 --- a/doc/modules/ROOT/pages/7.examples/7n.custom-executor.adoc +++ b/doc/modules/ROOT/pages/7.examples/7n.custom-executor.adoc @@ -17,6 +17,7 @@ Implementing the Executor concept with a single-threaded run loop. [source,cpp] ---- #include +#include #include #include #include @@ -56,7 +57,7 @@ public: { auto h = queue_.front(); queue_.pop(); - h.resume(); + capy::safe_resume(h); } } @@ -231,6 +232,8 @@ loop.run(); `run_async` enqueues the initial coroutine. `loop.run()` drains the queue, resuming coroutines one by one until all work completes. This is analogous to a GUI event loop or game tick loop. +Note that `run()` uses `capy::safe_resume(h)` instead of `h.resume()`. This saves and restores the thread-local frame allocator around each resumption, preventing coroutines from spoiling each other's allocator. All custom executor event loops must use `safe_resume` -- see xref:../4.coroutines/4g.allocators.adoc#_tls_preservation[TLS Preservation] for details. + == Output ---- diff --git a/doc/modules/ROOT/pages/8.design/8k.Executor.adoc b/doc/modules/ROOT/pages/8.design/8k.Executor.adoc index 92fc06a1..c931c574 100644 --- a/doc/modules/ROOT/pages/8.design/8k.Executor.adoc +++ b/doc/modules/ROOT/pages/8.design/8k.Executor.adoc @@ -326,6 +326,39 @@ ex_.post(cont_); This pattern is identical across all three Corosio backends: epoll (Linux), IOCP (Windows), and select (POSIX fallback). The executor concept and `executor_ref` provide the abstraction that makes this possible. The backend-specific code deals with I/O readiness or completion notification. The executor-specific code deals with coroutine scheduling. The two concerns are cleanly separated. +== Frame Allocator Preservation + +Capy propagates frame allocators via thread-local storage (see xref:../4.coroutines/4g.allocators.adoc#_thread_local_propagation[Thread-Local Propagation]). The TLS value is set in `await_resume` when a coroutine resumes and read in `operator new` when a child coroutine is created. Between these two points, the coroutine body executes arbitrary user code. + +If that user code resumes a coroutine from a different chain on the same thread -- by calling `.resume()` directly, pumping a dispatch queue, or running nested event loop work -- the other coroutine's `await_resume` overwrites TLS. The original coroutine's next child then allocates from the wrong resource. + +=== The Save/Restore Protocol + +The fix is to save and restore TLS around every `.resume()` call: + +[source,cpp] +---- +inline void +safe_resume(std::coroutine_handle<> h) noexcept +{ + auto* saved = get_current_frame_allocator(); + h.resume(); + set_current_frame_allocator(saved); +} +---- + +This makes TLS behave like a stack. Each nested resume pushes its own allocator; when the coroutine suspends and `.resume()` returns, the previous value is restored. The cost is two TLS accesses (one read, one write) per `.resume()` call -- negligible compared to the cost of resuming a coroutine. + +=== Where It Applies + +All executor event loops and strand dispatch loops must use `safe_resume` instead of calling `.resume()` directly. Capy's `thread_pool`, `blocking_context`, and `strand_queue` all use it internally. + +Two `.resume()` call sites intentionally do _not_ use `safe_resume`: + +* **`symmetric_transfer`** (MSVC workaround). The calling coroutine is about to suspend unconditionally. When it later resumes, `await_resume` restores TLS from the promise's stored environment. Save/restore would add overhead with no benefit. + +* **`run_async_wrapper::operator()`**. TLS is already saved in the wrapper's constructor and restored in its destructor, which bracket the entire task lifetime. + == Why Not `std::execution` (P2300) https://wg21.link/P2300[P2300] defines a sender/receiver model where execution context flows _backward_ from receiver to sender via queries after `connect()`: diff --git a/example/custom-executor/custom_executor.cpp b/example/custom-executor/custom_executor.cpp index a7ecdb89..8dec6bf0 100644 --- a/example/custom-executor/custom_executor.cpp +++ b/example/custom-executor/custom_executor.cpp @@ -16,6 +16,7 @@ // #include +#include #include #include #include @@ -56,7 +57,7 @@ class run_loop : public capy::execution_context { auto h = queue_.front(); queue_.pop(); - h.resume(); + capy::safe_resume(h); } } diff --git a/include/boost/capy/concept/executor.hpp b/include/boost/capy/concept/executor.hpp index 94608b26..d7582d9f 100644 --- a/include/boost/capy/concept/executor.hpp +++ b/include/boost/capy/concept/executor.hpp @@ -118,6 +118,17 @@ class execution_context; `post`, the continuation is enqueued and the lifetime requirement applies. + @par Frame Allocator TLS + + The library propagates a frame allocator via thread-local + storage. When a custom executor's event loop calls + `.resume()` to drain its work queue, it must use + `safe_resume()` from `` + instead of calling `h.resume()` directly. This saves and + restores the thread-local frame allocator around the call, + preventing a resumed coroutine from permanently overwriting + the caller's value. + @par Executor Validity An executor becomes invalid when the first call to diff --git a/include/boost/capy/detail/await_suspend_helper.hpp b/include/boost/capy/detail/await_suspend_helper.hpp index 4dbdfec9..3d72c7c6 100644 --- a/include/boost/capy/detail/await_suspend_helper.hpp +++ b/include/boost/capy/detail/await_suspend_helper.hpp @@ -54,6 +54,9 @@ namespace detail { #if BOOST_CAPY_WORKAROUND(_MSC_VER, >= 1) inline void symmetric_transfer(std::coroutine_handle<> h) noexcept { + // safe_resume is not needed here: the calling coroutine is + // about to suspend unconditionally. When it later resumes, + // await_resume restores TLS from the promise's environment. h.resume(); } #else diff --git a/include/boost/capy/ex/frame_allocator.hpp b/include/boost/capy/ex/frame_allocator.hpp index b912c819..8dbdb3b2 100644 --- a/include/boost/capy/ex/frame_allocator.hpp +++ b/include/boost/capy/ex/frame_allocator.hpp @@ -12,6 +12,7 @@ #include +#include #include /* Design rationale (pdimov): @@ -109,6 +110,39 @@ set_current_frame_allocator( detail::current_frame_allocator_ref() = mr; } +/** Resume a coroutine handle with frame-allocator TLS protection. + + Saves the current thread-local frame allocator before + calling `h.resume()`, then restores it after the call + returns. This prevents a resumed coroutine's + `await_resume` from permanently overwriting the caller's + allocator value. + + Between a coroutine's resumption and its next child + invocation, arbitrary user code may run. If that code + resumes a coroutine from a different chain on this + thread, the other coroutine's `await_resume` overwrites + TLS with its own allocator. Without save/restore, the + original coroutine's next child would allocate from + the wrong resource. + + Event loops, strand dispatch loops, and any code that + calls `.resume()` on a coroutine handle should use + this function instead of calling `.resume()` directly. + See the @ref Executor concept documentation for details. + + @param h The coroutine handle to resume. + + @see get_current_frame_allocator, set_current_frame_allocator +*/ +inline void +safe_resume(std::coroutine_handle<> h) noexcept +{ + auto* saved = get_current_frame_allocator(); + h.resume(); + set_current_frame_allocator(saved); +} + } // namespace capy } // namespace boost diff --git a/include/boost/capy/ex/run_async.hpp b/include/boost/capy/ex/run_async.hpp index c4627d53..068fdb85 100644 --- a/include/boost/capy/ex/run_async.hpp +++ b/include/boost/capy/ex/run_async.hpp @@ -411,7 +411,9 @@ class [[nodiscard]] run_async_wrapper p.env_ = {p.wg_.executor(), st_, p.get_resource()}; task_promise.set_environment(&p.env_); - // Start task through executor + // Start task through executor. + // safe_resume is not needed here: TLS is already saved in the + // constructor (saved_tls_) and restored in the destructor. p.task_cont_.h = task_h; p.wg_.executor().dispatch(p.task_cont_).resume(); } diff --git a/src/ex/detail/strand_queue.hpp b/src/ex/detail/strand_queue.hpp index a0461dfd..1ff84027 100644 --- a/src/ex/detail/strand_queue.hpp +++ b/src/ex/detail/strand_queue.hpp @@ -11,6 +11,7 @@ #define BOOST_CAPY_SRC_EX_DETAIL_STRAND_QUEUE_HPP #include +#include #include #include @@ -128,7 +129,7 @@ class strand_queue std::coroutine_handle target) { (void)q; - target.resume(); + safe_resume(target); co_return; } @@ -220,7 +221,7 @@ class strand_queue tail_ = nullptr; auto h = std::coroutine_handle::from_promise(*p); - h.resume(); + safe_resume(h); h.destroy(); } } @@ -265,7 +266,7 @@ class strand_queue batch.head = p->next; auto h = std::coroutine_handle::from_promise(*p); - h.resume(); + safe_resume(h); // Don't use h.destroy() - it would call operator delete which // accesses the queue's free_list_ (race with push). // Instead, manually free the frame without recycling. diff --git a/src/ex/thread_pool.cpp b/src/ex/thread_pool.cpp index b9fa0a6a..8d5d79d0 100644 --- a/src/ex/thread_pool.cpp +++ b/src/ex/thread_pool.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -226,7 +227,7 @@ class thread_pool::impl c = pop(); } if(c) - c->h.resume(); + safe_resume(c->h); } } }; diff --git a/src/test/run_blocking.cpp b/src/test/run_blocking.cpp index 5603725f..58a1e7f6 100644 --- a/src/test/run_blocking.cpp +++ b/src/test/run_blocking.cpp @@ -9,6 +9,7 @@ #include +#include #include #include #include @@ -76,7 +77,7 @@ blocking_context::run() h = impl_->queue.front(); impl_->queue.pop(); } - h.resume(); + safe_resume(h); } if(impl_->ep) std::rethrow_exception(impl_->ep); diff --git a/test/unit/ex/frame_allocator.cpp b/test/unit/ex/frame_allocator.cpp index c650accf..93c71123 100644 --- a/test/unit/ex/frame_allocator.cpp +++ b/test/unit/ex/frame_allocator.cpp @@ -78,6 +78,14 @@ TLS restoration on resume: After awaiting a child, the parent's TLS may have been changed by the child. transform_awaiter::await_resume restores parent's allocator from its promise. +Event loops must use safe_resume: + Between a coroutine's await_resume (which sets TLS) and the next child + invocation (whose operator new reads TLS), arbitrary user code runs. If + that code resumes a coroutine from a different chain on the same thread, + the other coroutine's await_resume overwrites TLS. Event loops, strand + dispatch loops, and any code that calls .resume() must use safe_resume() + to save and restore TLS around the call. + memory_resource* lifetime: When passing memory_resource* directly, the user is responsible for ensuring it outlives all tasks. This matches std::pmr conventions. diff --git a/test/unit/ex/safe_resume.cpp b/test/unit/ex/safe_resume.cpp new file mode 100644 index 00000000..b6fc36f6 --- /dev/null +++ b/test/unit/ex/safe_resume.cpp @@ -0,0 +1,175 @@ +// +// Copyright (c) 2026 Michael Vandeberg +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/capy +// + +// Test that header file is self-contained. +#include + +#include "test_suite.hpp" + +#include +#include +#include + +namespace boost { +namespace capy { + +/* +safe_resume Tests +================= + +Verifies that safe_resume saves and restores the thread-local +frame allocator around h.resume(). A plain coroutine (no +io_awaitable_promise_base) is used so that operator new does +not read TLS — we only care about the TLS value itself. +*/ + +namespace { + +// Minimal coroutine that overwrites TLS with a known value, then suspends. +struct spoiler_coro +{ + struct promise_type + { + std::pmr::memory_resource* spoil_to = nullptr; + + spoiler_coro get_return_object() + { + return spoiler_coro{std::coroutine_handle::from_promise(*this)}; + } + + std::suspend_always initial_suspend() noexcept { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; + + std::coroutine_handle h_; + + ~spoiler_coro() + { + if(h_) + h_.destroy(); + } + + spoiler_coro(spoiler_coro&& o) noexcept + : h_(std::exchange(o.h_, nullptr)) + { + } + + spoiler_coro& operator=(spoiler_coro&&) = delete; + spoiler_coro(spoiler_coro const&) = delete; + +private: + explicit spoiler_coro(std::coroutine_handle h) + : h_(h) + { + } + + friend promise_type; +}; + +// Coroutine body: overwrite TLS and suspend. +spoiler_coro make_spoiler(std::pmr::memory_resource* spoil_to) +{ + set_current_frame_allocator(spoil_to); + co_return; +} + +// Coroutine body: overwrite TLS, then safe_resume another coroutine, then suspend. +spoiler_coro make_nested_spoiler( + std::pmr::memory_resource* spoil_to, + std::coroutine_handle<> inner) +{ + set_current_frame_allocator(spoil_to); + safe_resume(inner); + // After safe_resume, TLS should be spoil_to again + co_return; +} + +} // anonymous namespace + +struct safe_resume_test +{ + void + testSafeResumePreservesTLS() + { + auto* original = std::pmr::null_memory_resource(); + auto* spoil = std::pmr::new_delete_resource(); + + auto coro = make_spoiler(spoil); + + set_current_frame_allocator(original); + safe_resume(coro.h_); + BOOST_TEST(get_current_frame_allocator() == original); + + set_current_frame_allocator(nullptr); + } + + void + testSafeResumePreservesNull() + { + auto* spoil = std::pmr::new_delete_resource(); + + auto coro = make_spoiler(spoil); + + set_current_frame_allocator(nullptr); + safe_resume(coro.h_); + BOOST_TEST(get_current_frame_allocator() == nullptr); + } + + void + testRawResumeDoesNotPreserveTLS() + { + // Documents the problem that safe_resume fixes: + // raw .resume() lets the coroutine spoil TLS. + auto* original = std::pmr::null_memory_resource(); + auto* spoil = std::pmr::new_delete_resource(); + + auto coro = make_spoiler(spoil); + + set_current_frame_allocator(original); + coro.h_.resume(); + BOOST_TEST(get_current_frame_allocator() == spoil); + + set_current_frame_allocator(nullptr); + } + + void + testNestedSafeResume() + { + auto* outer_value = std::pmr::null_memory_resource(); + auto* middle_value = std::pmr::new_delete_resource(); + auto* inner_value = std::pmr::get_default_resource(); + + auto inner = make_spoiler(inner_value); + auto outer = make_nested_spoiler(middle_value, inner.h_); + + set_current_frame_allocator(outer_value); + safe_resume(outer.h_); + // Outer safe_resume should restore outer_value, + // regardless of what happened inside. + BOOST_TEST(get_current_frame_allocator() == outer_value); + + set_current_frame_allocator(nullptr); + } + + void + run() + { + testSafeResumePreservesTLS(); + testSafeResumePreservesNull(); + testRawResumeDoesNotPreserveTLS(); + testNestedSafeResume(); + } +}; + +TEST_SUITE(safe_resume_test, "capy.safe_resume"); + +} // capy +} // boost