diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index fd534bc77f0009..7102d326a198eb 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -75,6 +75,7 @@ #include "service/backend_options.h" #include "service/backend_service.h" #include "service/brpc_service.h" +#include "service/http/action/print_stack_globals.h" #include "service/http_service.h" #include "storage/options.h" #include "storage/storage_engine.h" @@ -127,6 +128,7 @@ void init_signals() { if (ret < 0) { exit(-1); } + doris::print_stack::print_stack_init(); } static void thrift_output(const char* x) { diff --git a/be/src/service/http/action/print_stack.cpp b/be/src/service/http/action/print_stack.cpp new file mode 100644 index 00000000000000..bf85116d377104 --- /dev/null +++ b/be/src/service/http/action/print_stack.cpp @@ -0,0 +1,296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/http/action/print_stack.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/symbol_index.h" +#include "service/http/action/print_stack_globals.h" +#include "util/defer_op.h" + +namespace doris { +namespace { + +// Reason: only one dump can run at a time. Two would fight over the +// signal channel and the capture slot. +// Spec: docs/architecture.md "Layer 3a invariants". +std::mutex s_dump_mutex; + +// Reason: enumerate /proc/self/task. With a selector, return the +// selector only if /proc/self/task lists it; otherwise an empty +// vector. Without a selector, return every tid. +// +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:656-680 +// (`getFilteredThreadIds`). CK enumerates /proc/self/task into a UInt64 +// column and then runs the SQL WHERE predicate over it via +// `VirtualColumnUtils::filterBlockWithPredicate`. Our public API only +// accepts an optional `thread_id`, so we specialize CK's generic +// predicate filter to a single-tid equality check. +std::vector list_target_thread_ids(const PrintStackOptions& options) { + DIR* dir = ::opendir("/proc/self/task"); + if (dir == nullptr) { + return {}; + } + + std::vector all_tids; + while (auto* ent = ::readdir(dir)) { + if (ent->d_name[0] == '.') { + continue; + } + char* end = nullptr; + long parsed = std::strtol(ent->d_name, &end, 10); + if (end == ent->d_name || *end != '\0' || parsed <= 0) { + continue; + } + all_tids.push_back(static_cast(parsed)); + } + ::closedir(dir); + + if (!options.target_thread_id.has_value()) { + return all_tids; + } + + int64_t want = *options.target_thread_id; + for (int64_t tid : all_tids) { + if (tid == want) { + return {tid}; + } + } + return {}; +} + +// Reason: thread names come from /proc//comm. Best-effort. +// +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:234-284 +// (`getFilteredThreadNames`, Linux branch). CK reads the same +// `/proc/self/task//comm` file per tid, then runs the SQL WHERE +// predicate over the collected names. We only need the per-tid name for +// the public response, so we skip the name-side predicate filter. +std::unordered_map read_thread_names(const std::vector& tids) { + std::unordered_map names; + names.reserve(tids.size()); + for (int64_t tid : tids) { + char path[64]; + std::snprintf(path, sizeof(path), "/proc/self/task/%lld/comm", + static_cast(tid)); + std::ifstream in(path); + std::string name; + if (in.is_open()) { + std::getline(in, name); + } + names.emplace(tid, std::move(name)); + } + return names; +} + +// Reason: skip threads that block the service signal. Reads SigBlk +// from /proc//status and checks bit (signal_number - 1). +// +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:294-327 +// (`isSignalBlocked`). Same /proc parse. Note that CK's check is +// `sig_blk & signal` (line 317), which treats the raw signal number as +// a mask — that miscounts for any signal whose number is not already a +// power of two. Linux's SigBlk is a bitmask indexed by `signal - 1`, so +// we shift explicitly here. +bool is_signal_blocked(int64_t tid, int signal_number) { + char path[64]; + std::snprintf(path, sizeof(path), "/proc/self/task/%lld/status", + static_cast(tid)); + std::ifstream in(path); + if (!in.is_open()) { + return false; + } + std::string line; + while (std::getline(in, line)) { + if (line.compare(0, 7, "SigBlk:") != 0) { + continue; + } + // Format: "SigBlk:\t". + std::string hex = line.substr(7); + // Trim leading whitespace. + size_t start = hex.find_first_not_of(" \t"); + if (start == std::string::npos) { + return false; + } + hex = hex.substr(start); + unsigned long long mask = 0; + try { + mask = std::stoull(hex, nullptr, 16); + } catch (...) { + return false; + } + return (mask & (1ULL << (signal_number - 1))) != 0; + } + return false; +} + +int remaining_ms_until(std::chrono::steady_clock::time_point deadline) { + auto now = std::chrono::steady_clock::now(); + if (now >= deadline) { + return 0; + } + auto diff = std::chrono::duration_cast(deadline - now).count(); + if (diff <= 0) { + return 0; + } + return static_cast(diff); +} + +// Reason: thin wrapper over the rt_tgsigqueueinfo syscall. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:114-118. +int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t* info) { + return static_cast(::syscall(__NR_rt_tgsigqueueinfo, tgid, tid, sig, info)); +} + +// Reason: map one captured PC to a (dso, dso_offset) pair. +// SymbolIndex is built lazily by MultiVersion::instance() +// on first call; its construction reads dl_iterate_phdr and is not +// signal-safe, so this resolution runs in the coordinator. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:560-563. +StackFrame pc_to_frame(uintptr_t pc) { + const auto* obj = + doris::SymbolIndex::instance()->findObject(reinterpret_cast(pc)); + if (obj == nullptr) { + return {}; + } + return {obj->name, pc - reinterpret_cast(obj->address_begin)}; +} + +// Reason: bounded read of the notification pipe. Drains notifications +// whose sequence is not the current one. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:186-228. +bool wait_on_pipe(int timeout_ms) { + while (true) { + pollfd pfd {print_stack::g_notification_pipe_rw[0], POLLIN, 0}; + int rc = ::poll(&pfd, 1, timeout_ms); + + if (rc < 0 && errno == EINTR) { + if (--timeout_ms <= 0) { + return false; + } + continue; + } + if (rc < 0) { + return false; + } + if (rc == 0) { + return false; + } + + int seq = 0; + ssize_t n = ::read(print_stack::g_notification_pipe_rw[0], &seq, sizeof(seq)); + if (n < 0 && errno == EINTR) { + continue; + } + if (n != static_cast(sizeof(seq))) { + return false; + } + if (seq == print_stack::g_sequence_num.load(std::memory_order_relaxed)) { + return true; + } + } +} + +// Reason: send one signal, wait bounded, copy out on match. Sequence +// advances on every exit so a late handler cannot publish into the +// next target's slot. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:484-518. +void capture_one(int64_t tid, std::chrono::steady_clock::time_point deadline, + ThreadStackTrace* out) { + if (is_signal_blocked(tid, print_stack::kServiceSignal)) { + out->status = ThreadStackStatus::SignalBlocked; + return; + } + + auto bump_seq = [] { ++print_stack::g_sequence_num; }; + Defer on_exit(std::move(bump_seq)); + + siginfo_t si {}; + si.si_code = SI_QUEUE; + si.si_pid = print_stack::g_server_pid.load(std::memory_order_relaxed); + si.si_value.sival_int = print_stack::g_sequence_num.load(std::memory_order_acquire); + + if (rt_tgsigqueueinfo(si.si_pid, static_cast(tid), print_stack::kServiceSignal, &si) != + 0) { + if (errno == ESRCH) { + out->status = ThreadStackStatus::ThreadExited; + return; + } + out->status = ThreadStackStatus::CaptureFailed; + return; + } + + if (!wait_on_pipe(remaining_ms_until(deadline)) || + si.si_value.sival_int != print_stack::g_data_ready_num.load(std::memory_order_acquire)) { + out->status = ThreadStackStatus::Timeout; + return; + } + + out->status = print_stack::g_slot.status; + out->frames.reserve(print_stack::g_slot.frame_count); + for (size_t i = 0; i < print_stack::g_slot.frame_count; ++i) { + out->frames.push_back(pc_to_frame(print_stack::g_slot.pcs[i])); + } +} + +} // namespace + +// Reason: orchestration shared by every variant. The per-variant step +// is the capture hook the handler invokes. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:424-599. +PrintStackResult collect_print_stack(const PrintStackOptions& options) { + std::scoped_lock dump_lock(s_dump_mutex); + + std::vector tids = list_target_thread_ids(options); + auto names = read_thread_names(tids); + + PrintStackResult result; + result.threads.reserve(tids.size()); + for (int64_t tid : tids) { + ThreadStackTrace row; + row.thread_id = tid; + row.thread_name = names[tid]; + + auto deadline = std::chrono::steady_clock::now() + + std::chrono::milliseconds(print_stack::kPipeReadTimeoutMs); + capture_one(tid, deadline, &row); + + result.threads.push_back(std::move(row)); + } + + return result; +} + +} // namespace doris diff --git a/be/src/service/http/action/print_stack.h b/be/src/service/http/action/print_stack.h new file mode 100644 index 00000000000000..e24547b19172ac --- /dev/null +++ b/be/src/service/http/action/print_stack.h @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +namespace doris { + +// Reason: status carries why a row has no frames. Stays in the typed +// result; the public JSON contract drops it. +// Spec: docs/architecture.md "Layer 5". +enum class ThreadStackStatus { + // Reason: capture succeeded; frames hold the resolved trace. + OK, + // Reason: handler did not publish before the bounded wait expired. + Timeout, + // Reason: target thread had the service signal in its blocked + // mask. Coordinator did not send the signal. + SignalBlocked, + // Reason: target thread no longer exists. Send returned ESRCH. + ThreadExited, + // Reason: send failed for a reason other than ESRCH, or the + // handler stored a failure status in the slot. + CaptureFailed, +}; + +// Reason: one resolved frame. (dso, dso_offset) is the canonical +// input to addr2line and llvm-symbolizer. +struct StackFrame { + std::string dso; + uint64_t dso_offset = 0; +}; + +struct ThreadStackTrace { + int64_t thread_id = 0; + std::string thread_name; + ThreadStackStatus status = ThreadStackStatus::OK; + std::vector frames; +}; + +struct PrintStackOptions { + std::optional target_thread_id; +}; + +struct PrintStackResult { + std::vector threads; +}; + +PrintStackResult collect_print_stack(const PrintStackOptions& options); + +} // namespace doris diff --git a/be/src/service/http/action/print_stack_action.cpp b/be/src/service/http/action/print_stack_action.cpp new file mode 100644 index 00000000000000..8df968bc8a02f5 --- /dev/null +++ b/be/src/service/http/action/print_stack_action.cpp @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/http/action/print_stack_action.h" + +#include + +#include +#include +#include + +#include "common/status.h" +#include "service/http/action/print_stack.h" +#include "service/http/http_channel.h" +#include "service/http/http_headers.h" +#include "service/http/http_request.h" +#include "service/http/http_status.h" +#include "util/easy_json.h" + +namespace doris { +namespace { + +// Reason: parse only the public selector. `thread_id` is optional and +// must be a positive integer. +Status parse_print_stack_options(HttpRequest* req, PrintStackOptions* out) { + const auto& params = req->query_params(); + auto it = params.find("thread_id"); + if (it == params.end()) { + return Status::OK(); + } + + int64_t parsed = 0; + try { + parsed = std::stoll(it->second); + } catch (const std::exception&) { + return Status::InvalidArgument("thread_id must be an integer"); + } + if (parsed <= 0) { + return Status::InvalidArgument("thread_id must be positive"); + } + out->target_thread_id = parsed; + return Status::OK(); +} + +// Reason: drop status and any internal field; emit the public JSON +// contract: { "threads": [ { "thread_id", "thread_name", "trace": +// [ { "dso", "dso_offset" } ] } ] }. `dso_offset` is a hex string so +// it round-trips losslessly through clients that lack uint64. +std::string serialize_print_stack_result(const PrintStackResult& result) { + EasyJson root; + EasyJson threads = root.Set("threads", EasyJson::kArray); + for (const auto& trace : result.threads) { + EasyJson row = threads.PushBack(EasyJson::kObject); + row.Set("thread_id", trace.thread_id); + row.Set("thread_name", trace.thread_name); + EasyJson frames = row.Set("trace", EasyJson::kArray); + for (const auto& f : trace.frames) { + if (f.dso.empty()) { + continue; + } + EasyJson frame = frames.PushBack(EasyJson::kObject); + frame.Set("dso", f.dso); + frame.Set("dso_offset", fmt::format("0x{:x}", f.dso_offset)); + } + } + return root.ToString(); +} + +} // namespace + +PrintStackAction::PrintStackAction(ExecEnv* /*exec_env*/) {} + +void PrintStackAction::handle(HttpRequest* req) { + PrintStackOptions options; + if (auto s = parse_print_stack_options(req, &options); !s.ok()) { + EasyJson err; + err.Set("status", "bad_request"); + err.Set("message", std::string(s.to_string())); + req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json"); + HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, err.ToString()); + return; + } + + PrintStackResult result = collect_print_stack(options); + std::string body = serialize_print_stack_result(result); + + req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json"); + HttpChannel::send_reply(req, HttpStatus::OK, body); +} + +} // namespace doris diff --git a/be/src/service/http/action/print_stack_action.h b/be/src/service/http/action/print_stack_action.h new file mode 100644 index 00000000000000..9374dbecac9930 --- /dev/null +++ b/be/src/service/http/action/print_stack_action.h @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "service/http/http_handler.h" + +namespace doris { + +class ExecEnv; +class HttpRequest; + +class PrintStackAction final : public HttpHandler { +public: + explicit PrintStackAction(ExecEnv* exec_env); + ~PrintStackAction() override = default; + + void handle(HttpRequest* req) override; +}; + +} // namespace doris diff --git a/be/src/service/http/action/print_stack_capture.h b/be/src/service/http/action/print_stack_capture.h new file mode 100644 index 00000000000000..ebe57b7e7d6cca --- /dev/null +++ b/be/src/service/http/action/print_stack_capture.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "service/http/action/print_stack_globals.h" + +namespace doris::print_stack { + +// Reason: every variant supplies this function. The body runs inside +// the signal handler on the interrupted thread, between latch acquire +// and data_ready publish. The function MUST: +// - not allocate, log, take locks, read /proc, or touch Doris TLS; +// - write at most kMaxSignalFrames PCs into out->pcs; +// - set out->frame_count to the number of valid entries; +// - set out->status to OK on success or CaptureFailed on failure. +// Spec: docs/architecture.md "Layer 3d". +void capture_into_slot(const ucontext_t& uc, StackCaptureSlot* out); + +} // namespace doris::print_stack diff --git a/be/src/service/http/action/print_stack_fp_walk.cpp b/be/src/service/http/action/print_stack_fp_walk.cpp new file mode 100644 index 00000000000000..cba622321b9701 --- /dev/null +++ b/be/src/service/http/action/print_stack_fp_walk.cpp @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include + +#include "service/http/action/print_stack_capture.h" +#include "service/http/action/print_stack_globals.h" + +namespace doris::print_stack { +namespace { + +// Reason: the BE binary is built with `-fno-omit-frame-pointer`. RBP +// chains a stack of saved-frame-pointers: at each `[rbp]` is the +// caller's RBP; at `[rbp + 8]` is the return address. +// Reference: System V AMD64 ABI, "Stack Frame Pointer". +constexpr ptrdiff_t kReturnAddressOffset = sizeof(uintptr_t); + +// Reason: mincore takes a page-aligned address. Round down to the +// host page boundary. +uintptr_t page_floor(uintptr_t addr, long page_size) { + return addr & ~static_cast(page_size - 1); +} + +// Reason: the handler must not segfault on a stack walk. mincore +// reports whether the page is resident (and therefore at least +// mapped). A non-resident page would crash us on dereference. +// Signal-safety: mincore is AS-safe per POSIX. Reference: +// /base/base/StackTrace.cpp uses the same idea via a sigsetjmp +// guard; we use the cheaper mincore probe. +bool page_is_mapped(uintptr_t addr, long page_size) { + unsigned char vec = 0; + if (::mincore(reinterpret_cast(page_floor(addr, page_size)), + static_cast(page_size), &vec) != 0) { + return false; + } + return true; +} + +// Reason: an RBP that is not 8-aligned or that points outside the +// stack region is meaningless. We cap the range with a generous 16 MiB +// span — large enough for any sane thread, small enough that a stray +// RBP cannot point a long way into the heap. +constexpr uintptr_t kMaxStackSpan = static_cast(16) << 20; // 16 MiB + +bool rbp_can_read(uintptr_t rbp, uintptr_t first_rbp, long page_size) { + if (rbp == 0) { + return false; + } + if ((rbp & (sizeof(uintptr_t) - 1)) != 0) { + return false; + } + if (rbp < first_rbp) { + // Frame pointers walk toward higher addresses; equality is allowed + // on the first iteration where rbp == first_rbp. + return false; + } + if (rbp - first_rbp > kMaxStackSpan) { + return false; + } + if (!page_is_mapped(rbp, page_size)) { + return false; + } + if (!page_is_mapped(rbp + kReturnAddressOffset, page_size)) { + return false; + } + return true; +} + +uintptr_t read_word(uintptr_t addr) { + return *reinterpret_cast(addr); +} + +} // namespace + +void capture_into_slot(const ucontext_t& uc, StackCaptureSlot* out) { + // 1. Default to failure; success path overwrites both fields. + out->status = ThreadStackStatus::CaptureFailed; + out->frame_count = 0; + + // 2. Extract RIP and RBP from the interrupted machine context. + const auto& mc = uc.uc_mcontext; + const uintptr_t rip = static_cast(mc.gregs[REG_RIP]); + uintptr_t rbp = static_cast(mc.gregs[REG_RBP]); + + long page_size = ::sysconf(_SC_PAGESIZE); + if (page_size <= 0) { + return; + } + + size_t n = 0; + if (rip != 0) { + out->pcs[n++] = rip; + } + + const uintptr_t first_rbp = rbp; + + // 3. Walk the saved-frame-pointer chain. + for (; n < kMaxSignalFrames; ++n) { + if (!rbp_can_read(rbp, first_rbp, page_size)) { + break; + } + const uintptr_t return_addr = read_word(rbp + kReturnAddressOffset); + const uintptr_t next_rbp = read_word(rbp); + if (return_addr == 0) { + break; + } + out->pcs[n] = return_addr; + // The next RBP must be strictly greater than the current one, + // else the chain is broken or loops. Stop before dereferencing. + if (next_rbp <= rbp) { + ++n; + break; + } + rbp = next_rbp; + } + + out->frame_count = n; + out->status = ThreadStackStatus::OK; +} + +} // namespace doris::print_stack diff --git a/be/src/service/http/action/print_stack_globals.h b/be/src/service/http/action/print_stack_globals.h new file mode 100644 index 00000000000000..e19660436ed1cc --- /dev/null +++ b/be/src/service/http/action/print_stack_globals.h @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "service/http/action/print_stack.h" + +namespace doris::print_stack { + +// Reason: an RT signal is queued, not coalesced, so the coordinator +// drives one target after another without losing the signal in +// flight. The BE has no other SIGRT* user, and glibc reserves +// SIGRTMIN..SIGRTMIN+2. Any slot >= SIGRTMIN+3 would work; SIGRTMIN+6 +// is the chosen one. `inline const` (not `constexpr`) because glibc +// expands SIGRTMIN to a function call. +// Spec: docs/architecture.md "Layer 1". +inline const int kServiceSignal = SIGRTMIN + 6; + +// Reason: a runaway walk would block the handler. This cap sizes the +// slot's frame array. +// Spec: docs/architecture.md "Layer 1". +constexpr size_t kMaxSignalFrames = 1024; + +// Reason: bounded wait the coordinator gives the handler to publish +// on the notification pipe. Short enough that one stuck thread does +// not dominate wall clock; long enough that a non-stuck thread +// always publishes. Per-thread, applied sequentially. +// Spec: docs/architecture.md "Layer 1". +constexpr int kPipeReadTimeoutMs = 100; + +// Reason: handler writes frames here; coordinator reads them after +// the wait succeeds. One slot suffices because capture is sequential. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:103. +struct StackCaptureSlot { + // Reason: outcome the handler stored. Coordinator may overwrite + // on Timeout, SignalBlocked, or ThreadExited. + ThreadStackStatus status {ThreadStackStatus::CaptureFailed}; + + // Reason: number of valid entries in `pcs`. Variant writes this. + size_t frame_count {0}; + + // Reason: runtime PCs the variant captured. Resolved in the + // coordinator. + std::array pcs {}; +}; + +// Reason: identifies signals sent by this process. A manual signal +// from another process cannot publish into the slot. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:75. +extern std::atomic g_server_pid; + +// Reason: every capture attempt gets a fresh sequence. Signal +// payload and pipe both carry this value. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:83. +extern std::atomic g_sequence_num; + +// Reason: handler stores the sequence after frames are visible. The +// coordinator checks it before reading the slot. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:84. +extern std::atomic g_data_ready_num; + +// Reason: single-writer gate for the slot. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:85. +extern std::atomic g_signal_latch; + +extern StackCaptureSlot g_slot; + +// Reason: pipe carries only the sequence number. Frames travel +// through g_slot. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:111. +extern int g_notification_pipe_rw[2]; + +void print_stack_signal_handler(int sig, siginfo_t* info, void* context); + +void print_stack_init(); + +} // namespace doris::print_stack diff --git a/be/src/service/http/action/print_stack_init.cpp b/be/src/service/http/action/print_stack_init.cpp new file mode 100644 index 00000000000000..cc4369092c4350 --- /dev/null +++ b/be/src/service/http/action/print_stack_init.cpp @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/http/action/print_stack_globals.h" + +#include +#include + +#include +#include +#include + +#include "common/logging.h" + +namespace doris::print_stack { + +std::atomic g_server_pid {0}; +std::atomic g_sequence_num {0}; +std::atomic g_data_ready_num {0}; +std::atomic g_signal_latch {false}; +StackCaptureSlot g_slot; +int g_notification_pipe_rw[2] = {-1, -1}; + +namespace { + +// Reason: open the notification pipe with close-on-exec. The pipe is +// the only wakeup channel for the coordinator. +// Reference: /src/Common/PipeFDs.cpp:30-38. +void open_notification_pipe_once() { + if (pipe2(g_notification_pipe_rw, O_CLOEXEC) != 0) { + LOG(FATAL) << "print_stack: pipe2 failed: " << std::strerror(errno); + } +} + +// Reason: install the service signal handler with SA_SIGINFO so the +// payload carries the sequence. SA_RESTART avoids EINTR on unrelated +// syscalls in target threads. +// Spec: docs/architecture.md "Layer 1". +void install_signal_handler_once() { + struct sigaction sa {}; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sa.sa_sigaction = print_stack_signal_handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(kServiceSignal, &sa, nullptr) != 0) { + LOG(FATAL) << "print_stack: sigaction failed: " << std::strerror(errno); + } +} + +} // namespace + +void print_stack_init() { + // 1. Record this process's pid so the handler can reject foreign + // signals. + g_server_pid.store(getpid()); + + // 2. Open the notification pipe before installing the handler so + // the handler always has a valid write fd. + open_notification_pipe_once(); + + // 3. Install the signal handler last. + install_signal_handler_once(); +} + +} // namespace doris::print_stack diff --git a/be/src/service/http/action/print_stack_signal_handler.cpp b/be/src/service/http/action/print_stack_signal_handler.cpp new file mode 100644 index 00000000000000..bfbd6df749c49c --- /dev/null +++ b/be/src/service/http/action/print_stack_signal_handler.cpp @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "service/http/action/print_stack_capture.h" +#include "service/http/action/print_stack_globals.h" + +namespace doris::print_stack { + +// Reason: publish one interrupted thread's frames, then notify the +// coordinator. Body runs on the interrupted worker thread and must +// stay signal-safe. +// Reference: /src/Storages/System/StorageSystemStackTrace.cpp:120-183. +void print_stack_signal_handler(int /*sig*/, siginfo_t* info, void* context) { + // 1. The handler body must stay allocation-free. CK enforces + // this with `DENY_ALLOCATIONS_IN_SCOPE`, backed by their + // allocator's overload hooks. Doris jemalloc has no such + // hook, so the marker is kept commented out as documentation; + // review enforces the rule. + // Reference: /src/Storages/System/StorageSystemStackTrace.cpp:121. + // DENY_ALLOCATIONS_IN_SCOPE; + + // 2. Save errno so the interrupted thread does not observe a + // change. + auto saved_errno = errno; + + // 3. Reject signals not sent by this process. + if (info->si_pid != g_server_pid.load(std::memory_order_relaxed)) { + errno = saved_errno; + return; + } + + // 4. Read the sequence from the payload. + int seq = info->si_value.sival_int; + + // 5. Discard late deliveries. + if (seq != g_sequence_num.load(std::memory_order_acquire)) { + errno = saved_errno; + return; + } + + // 6. Single writer enters the slot. + bool expected = false; + if (!g_signal_latch.compare_exchange_strong(expected, true, std::memory_order_acquire)) { + errno = saved_errno; + return; + } + + // 7. Variant fills the slot from the interrupted ucontext. + capture_into_slot(*reinterpret_cast(context), &g_slot); + + // 8. Publish the sequence after frames are visible. + g_data_ready_num.store(seq, std::memory_order_release); + + // 9. Wake the coordinator. Pipe carries only the sequence. + ssize_t res = ::write(g_notification_pipe_rw[1], &seq, sizeof(seq)); + (void)res; + + // 10. Restore errno and release the latch. + errno = saved_errno; + g_signal_latch.store(false, std::memory_order_release); +} + +} // namespace doris::print_stack diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 93a50a81cc10ff..2cacc312e8385d 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -61,6 +61,7 @@ #include "service/http/action/pad_rowset_action.h" #include "service/http/action/pipeline_task_action.h" #include "service/http/action/pprof_actions.h" +#include "service/http/action/print_stack_action.h" #include "service/http/action/reload_tablet_action.h" #include "service/http/action/report_action.h" #include "service/http/action/reset_rpc_channel_action.h" @@ -164,6 +165,10 @@ Status HttpService::start() { HealthAction* health_action = _pool.add(new HealthAction(_env)); _ev_http_server->register_handler(HttpMethod::GET, "/api/health", health_action); + // Register print_stack debug action + PrintStackAction* print_stack_action = _pool.add(new PrintStackAction(_env)); + _ev_http_server->register_handler(HttpMethod::GET, "/api/print_stack", print_stack_action); + // Clear cache action ClearCacheAction* clear_cache_action = _pool.add(new ClearCacheAction(_env)); _ev_http_server->register_handler(HttpMethod::GET, "/api/clear_cache/{type}", diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index 917777f24ebfbd..b5f3e2d5b4d3c9 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -1252,11 +1252,11 @@ Status SegmentWriter::_generate_short_key_index(std::vectorkeys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write; } -inline bool SegmentWriter::_is_mow_with_cluster_key() { +bool SegmentWriter::_is_mow_with_cluster_key() { return _is_mow() && !_tablet_schema->cluster_key_uids().empty(); } diff --git a/be/test/service/http/print_stack_action_test.cpp b/be/test/service/http/print_stack_action_test.cpp new file mode 100644 index 00000000000000..e4c901dbbeb574 --- /dev/null +++ b/be/test/service/http/print_stack_action_test.cpp @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/http/action/print_stack_action.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "service/http/action/print_stack_globals.h" +#include "service/http/ev_http_server.h" +#include "service/http/http_client.h" +#include "service/http/http_method.h" + +namespace doris { +namespace { + +constexpr std::string_view kRoute = "/api/print_stack"; + +// Reason: a cheap thread fixture for tests that need a stable target tid +// to dump. The worker records its tid via SYS_gettid, signals ready, and +// parks on a flag until stop(). +// Local: test-only fixture. +class ParkedMarkerThread { +public: + ParkedMarkerThread() = default; + + void start() { + _ready = false; + _stop.store(false); + _thread = std::thread([this] { this->run(); }); + std::unique_lock lock(_mu); + _cv.wait(lock, [this] { return _ready; }); + } + + void stop() { + _stop.store(true); + if (_thread.joinable()) { + _thread.join(); + } + } + + pid_t tid() const { return _tid.load(); } + +private: + void run() { + _tid.store(static_cast(::syscall(SYS_gettid))); + { + std::lock_guard lock(_mu); + _ready = true; + } + _cv.notify_all(); + while (!_stop.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + } + + std::thread _thread; + std::atomic _tid {0}; + std::atomic _stop {false}; + std::mutex _mu; + std::condition_variable _cv; + bool _ready = false; +}; + +// Reason: integration fixture that exposes PrintStackAction on a real +// EvHttpServer. Test cases drive it with HttpClient and read the JSON +// response — the same path a production caller takes. +// Spec: docs/phase2-test-plan.md "Test surface". +class PrintStackActionTest : public testing::Test { +public: + static void SetUpTestCase() { + s_action = new PrintStackAction(nullptr); + s_server = new EvHttpServer(0); + s_server->register_handler(HttpMethod::GET, std::string(kRoute), s_action); + static_cast(s_server->start()); + s_port = s_server->get_real_port(); + ASSERT_NE(0, s_port); + s_hostname = "http://127.0.0.1:" + std::to_string(s_port); + + print_stack::print_stack_init(); + + s_marker.start(); + } + + static void TearDownTestCase() { + s_marker.stop(); + delete s_server; + s_server = nullptr; + delete s_action; + s_action = nullptr; + } + + static std::string get_url(const std::string& url) { + HttpClient client; + Status st = client.init(url); + EXPECT_TRUE(st.ok()) << st; + client.set_method(HttpMethod::GET); + std::string body; + st = client.execute(&body); + EXPECT_TRUE(st.ok()) << st; + return body; + } + + static EvHttpServer* s_server; + static PrintStackAction* s_action; + static int s_port; + static std::string s_hostname; + static ParkedMarkerThread s_marker; +}; + +EvHttpServer* PrintStackActionTest::s_server = nullptr; +PrintStackAction* PrintStackActionTest::s_action = nullptr; +int PrintStackActionTest::s_port = 0; +std::string PrintStackActionTest::s_hostname; +ParkedMarkerThread PrintStackActionTest::s_marker; + +// Reason: [case 1] catches drift in the JSON contract. The required root +// (threads), per-thread (thread_id, thread_name, trace), and per-frame +// (dso, dso_offset) keys must be present with their declared types. +TEST_F(PrintStackActionTest, ContractJsonShape) { + // 1. GET /api/print_stack and parse the JSON body. + std::string body = get_url(s_hostname + std::string(kRoute)); + rapidjson::Document doc; + doc.Parse(body.c_str()); + ASSERT_FALSE(doc.HasParseError()) << "body: " << body; + + // 2. Root threads is a non-empty array. + ASSERT_TRUE(doc.IsObject()); + ASSERT_TRUE(doc.HasMember("threads")); + ASSERT_TRUE(doc["threads"].IsArray()); + EXPECT_FALSE(doc["threads"].Empty()) << "expected at least one captured thread"; + + // 3. Each thread row carries thread_id/thread_name/trace; each frame + // carries dso/dso_offset; all well-typed. + for (const auto& t : doc["threads"].GetArray()) { + ASSERT_TRUE(t.IsObject()); + ASSERT_TRUE(t.HasMember("thread_id")); + ASSERT_TRUE(t["thread_id"].IsInt64()); + ASSERT_TRUE(t.HasMember("thread_name")); + ASSERT_TRUE(t["thread_name"].IsString()); + ASSERT_TRUE(t.HasMember("trace")); + ASSERT_TRUE(t["trace"].IsArray()); + + for (const auto& f : t["trace"].GetArray()) { + ASSERT_TRUE(f.IsObject()); + ASSERT_TRUE(f.HasMember("dso")); + ASSERT_TRUE(f["dso"].IsString()); + ASSERT_TRUE(f.HasMember("dso_offset")); + ASSERT_TRUE(f["dso_offset"].IsString()); + } + } +} + +// Reason: [case 2] the thread_id query param targets one tid. A live tid +// returns a single matching row; an absent tid returns an empty array. +TEST_F(PrintStackActionTest, ThreadIdSelector) { + // 1. Self-tid: exactly one row whose thread_id matches. + { + pid_t self_tid = static_cast(::syscall(SYS_gettid)); + std::string body = + get_url(s_hostname + std::string(kRoute) + + "?thread_id=" + std::to_string(static_cast(self_tid))); + rapidjson::Document doc; + doc.Parse(body.c_str()); + ASSERT_FALSE(doc.HasParseError()) << "body: " << body; + ASSERT_TRUE(doc.HasMember("threads")); + ASSERT_TRUE(doc["threads"].IsArray()); + ASSERT_EQ(1u, doc["threads"].Size()) << "body: " << body; + EXPECT_EQ(static_cast(self_tid), doc["threads"][0]["thread_id"].GetInt64()); + } + + // 2. Absent tid (999999999): threads is empty. + { + std::string body = get_url(s_hostname + std::string(kRoute) + "?thread_id=999999999"); + rapidjson::Document doc; + doc.Parse(body.c_str()); + ASSERT_FALSE(doc.HasParseError()) << "body: " << body; + ASSERT_TRUE(doc.HasMember("threads")); + ASSERT_TRUE(doc["threads"].IsArray()); + EXPECT_TRUE(doc["threads"].Empty()) << "body: " << body; + } +} + +// Reason: [case 3] the walker observes at least one frame that maps back +// to the test binary. Capture is best-effort under TSan, so the loop +// retries up to 100 times before failing — mirroring CK's +// 03565_system_stack_trace_works.sh. +TEST_F(PrintStackActionTest, BestEffortFrameObserved) { + // 1. The marker thread is live so its tid is non-zero. + ASSERT_NE(0, s_marker.tid()); + + // 2. Up to 100 attempts: dump the marker tid and scan its trace for a + // frame whose dso names the test binary (`doris_be_test`) with a + // non-zero offset. + bool observed = false; + for (int attempt = 0; attempt < 100 && !observed; ++attempt) { + std::string body = + get_url(s_hostname + std::string(kRoute) + + "?thread_id=" + std::to_string(static_cast(s_marker.tid()))); + rapidjson::Document doc; + doc.Parse(body.c_str()); + if (doc.HasParseError() || !doc.HasMember("threads") || !doc["threads"].IsArray() || + doc["threads"].Empty()) { + continue; + } + const auto& trace = doc["threads"][0]["trace"]; + if (!trace.IsArray()) { + continue; + } + for (const auto& frame : trace.GetArray()) { + if (!frame.IsObject() || !frame.HasMember("dso") || !frame.HasMember("dso_offset")) { + continue; + } + std::string_view dso(frame["dso"].GetString(), frame["dso"].GetStringLength()); + std::string_view dso_offset(frame["dso_offset"].GetString(), + frame["dso_offset"].GetStringLength()); + const bool dso_matches = dso.find("doris_be_test") != std::string_view::npos; + const bool offset_nonzero = dso_offset != "0x0"; + if (dso_matches && offset_nonzero) { + observed = true; + break; + } + } + } + + // 3. At least one attempt observed the test-binary frame. + EXPECT_TRUE(observed) << "no frame mapped to the test binary across 100 attempts"; +} + +} // namespace +} // namespace doris