Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/service/doris_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#include "service/backend_options.h"
#include "service/backend_service.h"
#include "service/brpc_service.h"
#include "service/http/action/print_stack_globals.h"
#include "service/http_service.h"
#include "storage/options.h"
#include "storage/storage_engine.h"
Expand Down Expand Up @@ -127,6 +128,7 @@ void init_signals() {
if (ret < 0) {
exit(-1);
}
doris::print_stack::print_stack_init();
}

static void thrift_output(const char* x) {
Expand Down
296 changes: 296 additions & 0 deletions be/src/service/http/action/print_stack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "service/http/action/print_stack.h"

#include <dirent.h>
#include <poll.h>
#include <sys/syscall.h>
#include <unistd.h>

#include <atomic>
#include <cerrno>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <mutex>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "common/symbol_index.h"
#include "service/http/action/print_stack_globals.h"
#include "util/defer_op.h"

namespace doris {
namespace {

// Reason: only one dump can run at a time. Two would fight over the
// signal channel and the capture slot.
// Spec: docs/architecture.md "Layer 3a invariants".
std::mutex s_dump_mutex;

// Reason: enumerate /proc/self/task. With a selector, return the
// selector only if /proc/self/task lists it; otherwise an empty
// vector. Without a selector, return every tid.
//
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:656-680
// (`getFilteredThreadIds`). CK enumerates /proc/self/task into a UInt64
// column and then runs the SQL WHERE predicate over it via
// `VirtualColumnUtils::filterBlockWithPredicate`. Our public API only
// accepts an optional `thread_id`, so we specialize CK's generic
// predicate filter to a single-tid equality check.
std::vector<int64_t> list_target_thread_ids(const PrintStackOptions& options) {
DIR* dir = ::opendir("/proc/self/task");
if (dir == nullptr) {
return {};
}

std::vector<int64_t> all_tids;
while (auto* ent = ::readdir(dir)) {
if (ent->d_name[0] == '.') {
continue;
}
char* end = nullptr;
long parsed = std::strtol(ent->d_name, &end, 10);
if (end == ent->d_name || *end != '\0' || parsed <= 0) {
continue;
}
all_tids.push_back(static_cast<int64_t>(parsed));
}
::closedir(dir);

if (!options.target_thread_id.has_value()) {
return all_tids;
}

int64_t want = *options.target_thread_id;
for (int64_t tid : all_tids) {
if (tid == want) {
return {tid};
}
}
return {};
}

// Reason: thread names come from /proc/<tid>/comm. Best-effort.
//
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:234-284
// (`getFilteredThreadNames`, Linux branch). CK reads the same
// `/proc/self/task/<tid>/comm` file per tid, then runs the SQL WHERE
// predicate over the collected names. We only need the per-tid name for
// the public response, so we skip the name-side predicate filter.
std::unordered_map<int64_t, std::string> read_thread_names(const std::vector<int64_t>& tids) {
std::unordered_map<int64_t, std::string> names;
names.reserve(tids.size());
for (int64_t tid : tids) {
char path[64];
std::snprintf(path, sizeof(path), "/proc/self/task/%lld/comm",
static_cast<long long>(tid));
std::ifstream in(path);
std::string name;
if (in.is_open()) {
std::getline(in, name);
}
names.emplace(tid, std::move(name));
}
return names;
}

// Reason: skip threads that block the service signal. Reads SigBlk
// from /proc/<tid>/status and checks bit (signal_number - 1).
//
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:294-327
// (`isSignalBlocked`). Same /proc parse. Note that CK's check is
// `sig_blk & signal` (line 317), which treats the raw signal number as
// a mask — that miscounts for any signal whose number is not already a
// power of two. Linux's SigBlk is a bitmask indexed by `signal - 1`, so
// we shift explicitly here.
bool is_signal_blocked(int64_t tid, int signal_number) {
char path[64];
std::snprintf(path, sizeof(path), "/proc/self/task/%lld/status",
static_cast<long long>(tid));
std::ifstream in(path);
if (!in.is_open()) {
return false;
}
std::string line;
while (std::getline(in, line)) {
if (line.compare(0, 7, "SigBlk:") != 0) {
continue;
}
// Format: "SigBlk:\t<hex64>".
std::string hex = line.substr(7);
// Trim leading whitespace.
size_t start = hex.find_first_not_of(" \t");
if (start == std::string::npos) {
return false;
}
hex = hex.substr(start);
unsigned long long mask = 0;
try {
mask = std::stoull(hex, nullptr, 16);
} catch (...) {
return false;
}
return (mask & (1ULL << (signal_number - 1))) != 0;
}
return false;
}

int remaining_ms_until(std::chrono::steady_clock::time_point deadline) {
auto now = std::chrono::steady_clock::now();
if (now >= deadline) {
return 0;
}
auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(deadline - now).count();
if (diff <= 0) {
return 0;
}
return static_cast<int>(diff);
}

// Reason: thin wrapper over the rt_tgsigqueueinfo syscall.
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:114-118.
int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t* info) {
return static_cast<int>(::syscall(__NR_rt_tgsigqueueinfo, tgid, tid, sig, info));
}

// Reason: map one captured PC to a (dso, dso_offset) pair.
// SymbolIndex is built lazily by MultiVersion<SymbolIndex>::instance()
// on first call; its construction reads dl_iterate_phdr and is not
// signal-safe, so this resolution runs in the coordinator.
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:560-563.
StackFrame pc_to_frame(uintptr_t pc) {
const auto* obj =
doris::SymbolIndex::instance()->findObject(reinterpret_cast<const void*>(pc));
if (obj == nullptr) {
return {};
}
return {obj->name, pc - reinterpret_cast<uintptr_t>(obj->address_begin)};
}

// Reason: bounded read of the notification pipe. Drains notifications
// whose sequence is not the current one.
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:186-228.
bool wait_on_pipe(int timeout_ms) {
while (true) {
pollfd pfd {print_stack::g_notification_pipe_rw[0], POLLIN, 0};
int rc = ::poll(&pfd, 1, timeout_ms);

if (rc < 0 && errno == EINTR) {
if (--timeout_ms <= 0) {
return false;
}
continue;
}
if (rc < 0) {
return false;
}
if (rc == 0) {
return false;
}

int seq = 0;
ssize_t n = ::read(print_stack::g_notification_pipe_rw[0], &seq, sizeof(seq));
if (n < 0 && errno == EINTR) {
continue;
}
if (n != static_cast<ssize_t>(sizeof(seq))) {
return false;
}
if (seq == print_stack::g_sequence_num.load(std::memory_order_relaxed)) {
return true;
}
}
}

// Reason: send one signal, wait bounded, copy out on match. Sequence
// advances on every exit so a late handler cannot publish into the
// next target's slot.
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:484-518.
void capture_one(int64_t tid, std::chrono::steady_clock::time_point deadline,
ThreadStackTrace* out) {
if (is_signal_blocked(tid, print_stack::kServiceSignal)) {
out->status = ThreadStackStatus::SignalBlocked;
return;
}

auto bump_seq = [] { ++print_stack::g_sequence_num; };
Defer<decltype(bump_seq)> on_exit(std::move(bump_seq));

siginfo_t si {};
si.si_code = SI_QUEUE;
si.si_pid = print_stack::g_server_pid.load(std::memory_order_relaxed);
si.si_value.sival_int = print_stack::g_sequence_num.load(std::memory_order_acquire);

if (rt_tgsigqueueinfo(si.si_pid, static_cast<pid_t>(tid), print_stack::kServiceSignal, &si) !=
0) {
if (errno == ESRCH) {
out->status = ThreadStackStatus::ThreadExited;
return;
}
out->status = ThreadStackStatus::CaptureFailed;
return;
}

if (!wait_on_pipe(remaining_ms_until(deadline)) ||
si.si_value.sival_int != print_stack::g_data_ready_num.load(std::memory_order_acquire)) {
out->status = ThreadStackStatus::Timeout;
return;
}

out->status = print_stack::g_slot.status;
out->frames.reserve(print_stack::g_slot.frame_count);
for (size_t i = 0; i < print_stack::g_slot.frame_count; ++i) {
out->frames.push_back(pc_to_frame(print_stack::g_slot.pcs[i]));
}
}

} // namespace

// Reason: orchestration shared by every variant. The per-variant step
// is the capture hook the handler invokes.
// Reference: <ck>/src/Storages/System/StorageSystemStackTrace.cpp:424-599.
PrintStackResult collect_print_stack(const PrintStackOptions& options) {
std::scoped_lock dump_lock(s_dump_mutex);

std::vector<int64_t> tids = list_target_thread_ids(options);
auto names = read_thread_names(tids);

PrintStackResult result;
result.threads.reserve(tids.size());
for (int64_t tid : tids) {
ThreadStackTrace row;
row.thread_id = tid;
row.thread_name = names[tid];

auto deadline = std::chrono::steady_clock::now() +
std::chrono::milliseconds(print_stack::kPipeReadTimeoutMs);
capture_one(tid, deadline, &row);

result.threads.push_back(std::move(row));
}

return result;
}

} // namespace doris
69 changes: 69 additions & 0 deletions be/src/service/http/action/print_stack.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <optional>
#include <string>
#include <vector>

namespace doris {

// Reason: status carries why a row has no frames. Stays in the typed
// result; the public JSON contract drops it.
// Spec: docs/architecture.md "Layer 5".
enum class ThreadStackStatus {
// Reason: capture succeeded; frames hold the resolved trace.
OK,
// Reason: handler did not publish before the bounded wait expired.
Timeout,
// Reason: target thread had the service signal in its blocked
// mask. Coordinator did not send the signal.
SignalBlocked,
// Reason: target thread no longer exists. Send returned ESRCH.
ThreadExited,
// Reason: send failed for a reason other than ESRCH, or the
// handler stored a failure status in the slot.
CaptureFailed,
};

// Reason: one resolved frame. (dso, dso_offset) is the canonical
// input to addr2line and llvm-symbolizer.
struct StackFrame {
std::string dso;
uint64_t dso_offset = 0;
};

struct ThreadStackTrace {
int64_t thread_id = 0;
std::string thread_name;
ThreadStackStatus status = ThreadStackStatus::OK;
std::vector<StackFrame> frames;
};

struct PrintStackOptions {
std::optional<int64_t> target_thread_id;
};

struct PrintStackResult {
std::vector<ThreadStackTrace> threads;
};

PrintStackResult collect_print_stack(const PrintStackOptions& options);

} // namespace doris
Loading
Loading