From 21b2f653f402ba81a920eb9cbb08d7bc8750c28d Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Wed, 11 Jun 2025 15:01:47 -0400 Subject: [PATCH 1/4] logging: Add better logging on IPC server-side failures Motivation: when trying to add a new unit test ran into a confusing error "(remote):0: failed: remote exception: Called null capability" caused by forgetting to make a FooInterface.initThreadMap call during initialization and these logging prints would have made it easier to see where the error was coming from and debug. --- include/mp/proxy-types.h | 7 +++++++ include/mp/type-context.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/include/mp/proxy-types.h b/include/mp/proxy-types.h index 22468b1..0ea0741 100644 --- a/include/mp/proxy-types.h +++ b/include/mp/proxy-types.h @@ -726,6 +726,13 @@ kj::Promise serverInvoke(Server& server, CallContext& call_context, Fn fn) MP_LOG(*server.m_context.loop, Log::Debug) << "IPC server send response #" << req << " " << TypeName(); MP_LOG(*server.m_context.loop, Log::Trace) << "response data: " << LogEscape(call_context.getResults().toString(), server.m_context.loop->m_log_opts.max_chars); + }, [&server, req](::kj::Exception&& e) { + // Call failed for some reason. Cap'n Proto will try to send + // this error to the client as well, but it is good to log the + // failure early here and include the request number. + MP_LOG(*server.m_context.loop, Log::Error) << "IPC server error request #" << req << " " << TypeName() + << " " << kj::str("kj::Exception: ", e).cStr(); + return kj::mv(e); }); } catch (const std::exception& e) { MP_LOG(*server.m_context.loop, Log::Error) << "IPC server unhandled exception: " << e.what(); diff --git a/include/mp/type-context.h b/include/mp/type-context.h index 09ac179..e66af6e 100644 --- a/include/mp/type-context.h +++ b/include/mp/type-context.h @@ -163,6 +163,40 @@ auto PassField(Priority<1>, TypeList<>, ServerContext& server_context, const Fn& << "IPC server error request #" << req << ", missing thread to execute request"; throw std::runtime_error("invalid thread handle"); } + }, [&server, req](::kj::Exception&& e) { + // If you see the error "(remote):0: failed: remote exception: + // Called null capability" here, it probably means your Init class + // is missing a declaration like: + // + // construct @0 (threadMap: Proxy.ThreadMap) -> (threadMap :Proxy.ThreadMap); + // + // which passes a ThreadMap reference from the client to the server, + // allowing the server to create threads to run IPC calls on the + // client, and also returns a ThreadMap reference from the server to + // the client, allowing the client to create threads on the server. + // (Typically the latter ThreadMap is used more often because there + // are more client-to-server calls.) + // + // If the other side of the connection did not previously get a + // ThreadMap reference from this side of the connection, when the + // other side calls `m_thread_map.makeThreadRequest()` in + // `BuildField` above, `m_thread_map` will be null, but that call + // will not fail immediately due to Cap'n Proto's request pipelining + // and delayed execution. Instead that call will return an invalid + // Thread reference, and when that reference is passed to this side + // of the connection as `thread_client` above, the + // `getLocalServer(thread_client)` call there will be the first + // thing to overtly fail, leading to an error here. + // + // Potentially there are also other things that could cause errors + // here, but this is the most likely cause. + // + // The log statement here is not strictly necessary since the same + // exception will also be logged in serverInvoke, but this logging + // may provide extra context that could be helpful for debugging. + MP_LOG(*server.m_context.loop, Log::Info) + << "IPC server error request #" << req << " CapabilityServerSet::getLocalServer call failed, did you forget to provide a ThreadMap to the client prior to this IPC call?"; + return kj::mv(e); }) // Wait for the invocation to finish before returning to the caller. .then([invoke_wait = kj::mv(future.promise)]() mutable { return kj::mv(invoke_wait); }); From 2af6a9bc3036895ddddbd80d1f7789bec92a6516 Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Mon, 30 Jun 2025 10:47:34 -0400 Subject: [PATCH 2/4] debug: Add TypeName() function and log statements for Proxy objects being created and destroyed --- include/mp/proxy-io.h | 7 +++++++ include/mp/proxy-types.h | 6 +++--- include/mp/util.h | 30 +++++++++++++++++++++++++++++- src/mp/gen.cpp | 3 +++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/include/mp/proxy-io.h b/include/mp/proxy-io.h index 0abef89..f1d783c 100644 --- a/include/mp/proxy-io.h +++ b/include/mp/proxy-io.h @@ -454,6 +454,8 @@ ProxyClientBase::ProxyClientBase(typename Interface::Client cli : m_client(std::move(client)), m_context(connection) { + MP_LOG(*m_context.loop, Log::Info) << "Creating " << CxxTypeName(*this) << " " << this; + // Handler for the connection getting destroyed before this client object. auto disconnect_cb = m_context.connection->addSyncCleanup([this]() { // Release client capability by move-assigning to temporary. @@ -510,13 +512,16 @@ ProxyClientBase::ProxyClientBase(typename Interface::Client cli template ProxyClientBase::~ProxyClientBase() noexcept { + MP_LOG(*m_context.loop, Log::Info) << "Cleaning up " << CxxTypeName(*this) << " " << this; CleanupRun(m_context.cleanup_fns); + MP_LOG(*m_context.loop, Log::Info) << "Destroying " << CxxTypeName(*this) << " " << this; } template ProxyServerBase::ProxyServerBase(std::shared_ptr impl, Connection& connection) : m_impl(std::move(impl)), m_context(&connection) { + MP_LOG(*m_context.loop, Log::Info) << "Creating " << CxxTypeName(*this) << " " << this; assert(m_impl); } @@ -535,6 +540,7 @@ ProxyServerBase::ProxyServerBase(std::shared_ptr impl, Co template ProxyServerBase::~ProxyServerBase() { + MP_LOG(*m_context.loop, Log::Info) << "Cleaning up " << CxxTypeName(*this) << " " << this; if (m_impl) { // If impl is non-null at this point, it means no client is waiting for // the m_impl server object to be destroyed synchronously. This can @@ -561,6 +567,7 @@ ProxyServerBase::~ProxyServerBase() }); } assert(m_context.cleanup_fns.empty()); + MP_LOG(*m_context.loop, Log::Info) << "Destroying " << CxxTypeName(*this) << " " << this; } //! If the capnp interface defined a special "destroy" method, as described the diff --git a/include/mp/proxy-types.h b/include/mp/proxy-types.h index 0ea0741..7b1cb92 100644 --- a/include/mp/proxy-types.h +++ b/include/mp/proxy-types.h @@ -568,16 +568,16 @@ template void clientDestroy(Client& client) { if (client.m_context.connection) { - MP_LOG(*client.m_context.loop, Log::Info) << "IPC client destroy " << typeid(client).name(); + MP_LOG(*client.m_context.loop, Log::Info) << "IPC client destroy " << CxxTypeName(client); } else { - KJ_LOG(INFO, "IPC interrupted client destroy", typeid(client).name()); + KJ_LOG(INFO, "IPC interrupted client destroy", CxxTypeName(client)); } } template void serverDestroy(Server& server) { - MP_LOG(*server.m_context.loop, Log::Info) << "IPC server destroy " << typeid(server).name(); + MP_LOG(*server.m_context.loop, Log::Info) << "IPC server destroy " << CxxTypeName(server); } //! Entry point called by generated client code that looks like: diff --git a/include/mp/util.h b/include/mp/util.h index e5b4dd1..43f17eb 100644 --- a/include/mp/util.h +++ b/include/mp/util.h @@ -7,18 +7,24 @@ #include #include -#include +#include #include #include #include #include #include #include +#include #include #include #include #include +#if __has_include() +#include +#include +#endif + namespace mp { //! Generic utility functions used by capnp code. @@ -238,6 +244,28 @@ inline char* CharCast(unsigned char* c) { return (char*)c; } inline const char* CharCast(const char* c) { return c; } inline const char* CharCast(const unsigned char* c) { return (const char*)c; } +#if __has_include() // GCC & Clang ─ use to demangle +inline std::string _demangle(const char* m) +{ + int status = 0; + std::unique_ptr p{ + abi::__cxa_demangle(m, nullptr, nullptr, &status), std::free}; + return (status == 0 && p) ? p.get() : m; // fall back on mangled if needed +} +#else // MSVC or other ─ no demangling available +inline std::string _demangle(const char* m) { return m; } +#endif + +template +std::string CxxTypeName(const T& /*unused*/) +{ +#ifdef __cpp_rtti + return _demangle(typeid(std::decay_t).name()); +#else + return ""; +#endif +} + } // namespace mp #endif // MP_UTIL_H diff --git a/src/mp/gen.cpp b/src/mp/gen.cpp index 1840071..26862f3 100644 --- a/src/mp/gen.cpp +++ b/src/mp/gen.cpp @@ -238,8 +238,11 @@ static void Generate(kj::StringPtr src_prefix, cpp_types << "// Generated by " PROXY_BIN " from " << src_file << "\n\n"; cpp_types << "// IWYU pragma: no_include \"mp/proxy.h\"\n"; cpp_types << "// IWYU pragma: no_include \"mp/proxy-io.h\"\n"; + cpp_types << "#include <" << include_path << ".h> // IWYU pragma: keep\n"; cpp_types << "#include <" << include_path << ".proxy.h>\n"; cpp_types << "#include <" << include_path << ".proxy-types.h> // IWYU pragma: keep\n"; + cpp_types << "#include \n"; + cpp_types << "#include \n"; cpp_types << "#include <" << PROXY_TYPES << ">\n\n"; cpp_types << "namespace mp {\n"; From 0fc31ec036091fcc5e62372fade78faa80e83d0f Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Thu, 2 Oct 2025 14:38:30 -0400 Subject: [PATCH 3/4] (workaround for ci, to be dropped after #214 is merged) Cherry-picking 73d22ba2e9301fd77727a2193fc289e5299563bb from https://github.com/bitcoin-core/libmultiprocess/pull/222 --- test/mp/test/test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/mp/test/test.cpp b/test/mp/test/test.cpp index 79b440f..b2cd589 100644 --- a/test/mp/test/test.cpp +++ b/test/mp/test/test.cpp @@ -321,12 +321,12 @@ KJ_TEST("Make simultaneous IPC callbacks with same request_thread and callback_t setup.server->m_impl->m_fn = [&] {}; foo->callFnAsync(); ThreadContext& tc{g_thread_context}; - std::optional callback_thread, request_thread; - { + Thread::Client *callback_thread, *request_thread; + foo->m_context.loop->sync([&] { Lock lock(tc.waiter->m_mutex); - callback_thread = tc.callback_threads.at(foo->m_context.connection)->m_client; - request_thread = tc.request_threads.at(foo->m_context.connection)->m_client; - } + callback_thread = &tc.callback_threads.at(foo->m_context.connection)->m_client; + request_thread = &tc.request_threads.at(foo->m_context.connection)->m_client; + }); setup.server->m_impl->m_fn = [&] { try From 437f042541a688da1d603d02748ba7ff48926c80 Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Fri, 3 Oct 2025 08:25:19 -0400 Subject: [PATCH 4/4] ci: try to get stack trace from mptest --- ci/configs/gnu32.bash | 2 +- ci/scripts/ci.sh | 1 + ci/scripts/run.sh | 115 +++++++++++++++++++++++++++++++++++++++++- shell.nix | 1 + test/mp/test/test.cpp | 7 +++ 5 files changed, 124 insertions(+), 2 deletions(-) diff --git a/ci/configs/gnu32.bash b/ci/configs/gnu32.bash index 961821c..c84b6c2 100644 --- a/ci/configs/gnu32.bash +++ b/ci/configs/gnu32.bash @@ -4,6 +4,6 @@ NIX_ARGS=( --arg minimal true --arg crossPkgs 'import { crossSystem = { config = "i686-unknown-linux-gnu"; }; }' ) -export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-unused-parameter" +export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-unused-parameter -ggdb" CMAKE_ARGS=(-G Ninja) BUILD_ARGS=(-k 0) diff --git a/ci/scripts/ci.sh b/ci/scripts/ci.sh index d989e9f..78a11a9 100755 --- a/ci/scripts/ci.sh +++ b/ci/scripts/ci.sh @@ -34,4 +34,5 @@ else cmake --build . --target "$t" -- "${BUILD_ARGS[@]+"${BUILD_ARGS[@]}"}" done fi +ulimit -c unlimited || true ctest --output-on-failure diff --git a/ci/scripts/run.sh b/ci/scripts/run.sh index 11b9184..ab3dbd4 100755 --- a/ci/scripts/run.sh +++ b/ci/scripts/run.sh @@ -8,6 +8,119 @@ export LC_ALL=C.UTF-8 set -o errexit -o nounset -o pipefail -o xtrace +echo "::group::Enable core dumps" + +# Try to ensure cores are written as files (not to systemd-coredump/apport). +# This typically requires root; it's okay if it fails — we'll fall back to coredumpctl. +want_pattern='core.%e.%p.%t' +have_sudo=0 +if command -v sudo >/dev/null 2>&1; then have_sudo=1; fi +if [[ -r /proc/sys/kernel/core_pattern ]]; then + current="$(cat /proc/sys/kernel/core_pattern || true)" + if [[ "$current" == '|'* ]]; then + # Piped to a handler; try to switch to file-based cores + if (( have_sudo )); then sudo sysctl -w "kernel.core_pattern=$want_pattern" || true; fi + else + # Already file-based; try to make the name more informative + if (( have_sudo )); then sudo sysctl -w "kernel.core_pattern=$want_pattern" || true; fi + fi +fi + +echo "core_pattern now: $(cat /proc/sys/kernel/core_pattern 2>/dev/null || echo '?')" +echo "::endgroup::" + [ "${CI_CONFIG+x}" ] && source "$CI_CONFIG" -nix-shell --pure --keep CI_CONFIG --keep CI_CLEAN "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" --run ci/scripts/ci.sh shell.nix +status=0 +if ! nix-shell --pure --keep CI_CONFIG --keep CI_CLEAN "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" --run ci/scripts/ci.sh shell.nix; then + status=$? + echo "ci.sh failed with status $status" +fi + +# --- Collect core files (file-based) ----------------------------------------- +echo "::group::Search for core files" +mapfile -t cores < <(find "${GITHUB_WORKSPACE:-$PWD}" -xdev \ + \( -type f -name 'core' -o -name 'core.*' \) 2>/dev/null || true) +echo "Found ${#cores[@]} possible core files." +echo "::endgroup::" + +# --- If no file-based cores, try systemd-coredump (Ubuntu) ------------------- +use_coredumpctl=0 +if [[ "${#cores[@]}" -eq 0 ]] && command -v coredumpctl >/dev/null 2>&1; then + # List recent dumps for this workspace's binaries (best-effort heuristic). + # If you know the test binary name(s), set EXE_HINT to narrow it. + EXE_HINT="mptest" + echo "::group::Check systemd-coredump" + if [[ -n "$EXE_HINT" ]]; then + coredumpctl --no-pager list | grep -E "$EXE_HINT" || true + else + coredumpctl --no-pager list | head -n 50 || true + fi + echo "::endgroup::" + use_coredumpctl=1 +fi + +# --- Print backtraces -------------------------------------------------------- +echo "::group::Backtraces" + +backtrace_with_gdb () { + local exe="$1" core="$2" + if [[ -n "$exe" && -x "$exe" ]]; then + echo "Core: $core" + echo "Exe: $exe" + gdb -batch "$exe" "$core" \ + -ex 'set pagination off' \ + -ex 'set print pretty on' \ + -ex 'info threads' \ + -ex 'thread apply all bt full' \ + -ex 'quit' || true + else + echo "Core: $core (exe unknown or not executable; trying core-only mode)" + gdb -batch -c "$core" \ + -ex 'set pagination off' \ + -ex 'set print pretty on' \ + -ex 'info threads' \ + -ex 'info files' \ + -ex 'thread apply all bt full' \ + -ex 'quit' || true + fi +} +if [[ "${#cores[@]}" -gt 0 ]]; then + for core in "${cores[@]}"; do + # Try to extract the executable path that gdb discovers from the core + exe="$(gdb -batch -c "$core" -ex 'info files' 2>/dev/null | sed -n "s/^Core was generated by \`\(.*\)'.*$/\1/p" | head -n1 || true)" + backtrace_with_gdb "$exe" "$core" + done +elif (( use_coredumpctl )); then + # If your tests produce a known binary name, set EXE_HINT to make this precise. + # Otherwise we grab the newest C/C++ dump and use that. + line="$(coredumpctl --no-pager --no-legend list | tail -n 1)" + if [[ -n "$EXE_HINT" ]]; then + line="$(coredumpctl --no-pager --no-legend list | grep -E "$EXE_HINT" | tail -n 1 || true)" + fi + if [[ -n "$line" ]]; then + pid="$(awk '{print $5}' <<<"$line" || true)" + exe_path="$(coredumpctl --no-pager info "$pid" \ + | sed -n 's/^ *Executable: //p' \ + | head -n1)" + # Dump that core to a file and analyze + out="core.from-coredumpctl.${pid:-unknown}" + if coredumpctl dump "${pid:-}" -o "$out" >/dev/null 2>&1; then + backtrace_with_gdb "$exe_path" "$out" + else + echo "Could not dump core via coredumpctl (pid=$pid)." + # As a last resort, coredumpctl can drive gdb directly, but it's interactive. + # We avoid that in CI. + fi + rm -v "$out" + else + echo "No matching coredumps in systemd-coredump." + fi +else + echo "No core files found." +fi + +echo "::endgroup::" + +# Preserve the original test result code +exit "$status" diff --git a/shell.nix b/shell.nix index 24d3820..077259e 100644 --- a/shell.nix +++ b/shell.nix @@ -55,6 +55,7 @@ in crossPkgs.mkShell { ]; nativeBuildInputs = with pkgs; [ cmakeBuild + gdb include-what-you-use ninja ] ++ lib.optionals (!minimal) [ diff --git a/test/mp/test/test.cpp b/test/mp/test/test.cpp index b2cd589..7519f21 100644 --- a/test/mp/test/test.cpp +++ b/test/mp/test/test.cpp @@ -29,6 +29,7 @@ #include "mp/util.h" #include #include +#include #include #include #include @@ -67,6 +68,12 @@ class TestSetup TestSetup(bool client_owns_connection = true) : thread{[&] { + // Restore default sigsegv handler to enable core dumps + struct sigaction dfl{}; + dfl.sa_handler = SIG_DFL; + sigemptyset(&dfl.sa_mask); + sigaction(SIGSEGV, &dfl, nullptr); + EventLoop loop("mptest", [](mp::LogMessage log_data) { std::cout << "LOG" << (int)log_data.level << ": " << log_data.message << "\n"; if (log_data.level == mp::Log::Raise) throw std::runtime_error(log_data.message);