Skip to content

Commit

Permalink
libroach,cli,server: add thread stack dump facility
Browse files Browse the repository at this point in the history
Add a facility for dumping the stack traces for all threads in the
process under linux. The technique used was adapted from
github.com/thoughtspot/threadstacks. The list of threads is retrieved by
scanning `/proc/self/tasks`. A realtime signal is sent to each thread
using `rt_tgsigqueueinfo`. A custom signal handler for that signal uses
the glibc `backtrace` facility to retrieve the thread's
stack. Communication between the coordinating thread and the signalled
thread is performed using a pipe (most other synchronization primitives
are not safe to use from a signal handler).

Hook up `/debug/threads` endpoint and add a link on the debug page of
the admin UI. Extend `/_status/stacks` to allow the optional retrieval
of thread stacks (vs the default of goroutine stacks). Enhance `debug
zip` to retrieve the thread stacks for each node.

Release note (admin ui change): Improve debuggability of C++-level
issues by providing access to thread stack traces via a new
`/debug/threads` endpoint which is exposed on the Admin UI advanced
debug page. Include thread stack traces in the info collected by `debug
zip`. Thread stack traces are currently only available on Linux.
  • Loading branch information
petermattis committed Mar 9, 2020
1 parent c9b189b commit 957b4bd
Show file tree
Hide file tree
Showing 14 changed files with 767 additions and 284 deletions.
5 changes: 5 additions & 0 deletions c-deps/libroach/CMakeLists.txt
Expand Up @@ -21,6 +21,10 @@ cmake_minimum_required(VERSION 3.3 FATAL_ERROR)

project(roachlib)

if(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
endif()

add_library(roach
batch.cc
cache.cc
Expand All @@ -42,6 +46,7 @@ add_library(roach
options.cc
snapshot.cc
sst_dump.cc
stack_trace.cc
table_props.cc
utils.cc
protos/kv/kvserver/concurrency/lock/locking.pb.cc
Expand Down
9 changes: 8 additions & 1 deletion c-deps/libroach/db.cc
Expand Up @@ -35,6 +35,7 @@
#include "protos/roachpb/errors.pb.h"
#include "row_counter.h"
#include "snapshot.h"
#include "stack_trace.h"
#include "status.h"
#include "table_props.h"
#include "timestamp.h"
Expand Down Expand Up @@ -807,7 +808,9 @@ DBStatus DBPartialMergeOne(DBSlice existing, DBSlice update, DBString* new_value

// DBGetStats queries the given DBEngine for various operational stats and
// write them to the provided DBStatsResult instance.
DBStatus DBGetStats(DBEngine* db, DBStatsResult* stats) { return db->GetStats(stats); }
DBStatus DBGetStats(DBEngine* db, DBStatsResult* stats) {
return db->GetStats(stats);
}

// `DBGetTickersAndHistograms` retrieves maps of all RocksDB tickers and histograms.
// It differs from `DBGetStats` by getting _every_ ticker and histogram, and by not
Expand Down Expand Up @@ -1232,3 +1235,7 @@ DBListDirResults DBEnvListDir(DBEngine* db, DBSlice name) {
}
return result;
}

DBString DBDumpThreadStacks() {
return ToDBString(DumpThreadStacks());
}
8 changes: 7 additions & 1 deletion c-deps/libroach/include/libroach.h
Expand Up @@ -611,7 +611,13 @@ typedef struct {

// DBEnvListDir lists the contents of the directory with name.
DBListDirResults DBEnvListDir(DBEngine* db, DBSlice name);



// DBDumpThreadStacks returns the stacks for all threads. The stacks
// are raw addresses, and do not contain symbols. Use addr2line (or
// atos on Darwin) to symbolize.
DBString DBDumpThreadStacks();

#ifdef __cplusplus
} // extern "C"
#endif
333 changes: 333 additions & 0 deletions c-deps/libroach/stack_trace.cc
@@ -0,0 +1,333 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

#include "stack_trace.h"

#if defined(OS_LINUX) && defined(__GLIBC__)

#include <cxxabi.h>
#include <dirent.h>
#include <execinfo.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <memory>
#include <string>
#include <vector>

namespace {

const int kStackTraceSignal = SIGRTMIN;

// Maximum depth allowed for a stack trace.
const int kMaxDepth = 100;

// Stack trace of a thread.
struct ThreadStack {
ThreadStack(pid_t id, int fd)
: tid(id),
ack_fd(fd) {
}

bool Ack() {
done = true;
const char ack_ch = 'y'; // the specific value doesn't matter
int num_written;
do {
num_written = write(ack_fd, &ack_ch, sizeof(ack_ch));
} while (num_written < 0 && errno == EINTR);
return sizeof(ack_ch) == num_written;
}

// ID of the thread to retrieve stack trace from.
const pid_t tid;
// File descriptor where the ack should be written.
const int ack_fd;
// The stack trace.
void* addr[kMaxDepth];
// The depth of the stack trace.
int depth = 0;
// Has the stack been populated.
std::atomic<bool> done;
};

std::vector<pid_t> ListThreads(std::string *error) {
std::vector<pid_t> pids;
DIR* dir;
do {
dir = opendir("/proc/self/task");
} while (dir == nullptr && errno == EINTR);
if (dir == nullptr) {
*error = "unable to open /proc/self/task";
return pids;
}

for (;;) {
// NB: readdir_r is deprecated and readdir is actually thread-safe
// on modern versions of glibc.
struct dirent* entry = readdir(dir);
if (entry == nullptr) {
if (errno == EINTR) {
continue;
}
break;
}
const std::string child(entry->d_name);
if (child == "." || child == "..") {
continue;
}
auto pid = strtoll(child.c_str(), nullptr, 10);
pids.push_back(pid_t(pid));
}

for (; closedir(dir) < 0 && errno == EINTR; ) {
}

if (pids.empty()) {
*error = "no threads found in /proc/self/task";
}
return pids;
}

uint64_t BlockedSignals(pid_t tid, std::string *error) {
const std::string path = "/proc/" + std::to_string(tid) + "/status";
int fd;
do {
fd = open(path.c_str(), O_RDONLY);
} while (fd < 0 && errno == EINTR);
if (fd < -1) {
*error = path + ": unable to open";
return 0;
}
std::string data;
for (;;) {
char buf[1024];
int n;
do {
n = read(fd, buf, sizeof(buf));
} while (n < 0 && errno == EINTR);
if (n < 0) {
*error = path + ": read failed";
break;
}
if (n == 0) {
break;
}
data.append(buf, n);
}

for (; close(fd) < 0 && errno == EINTR;) {
}
if (!error->empty()) {
return 0;
}

const std::string needle("SigBlk:");
size_t pos = data.find(needle);
if (pos == data.npos) {
*error = path + ": unable to find SigBlk";
return 0;
}
data = data.substr(pos + needle.size());
return strtoull(data.c_str(), nullptr, 16);
}

void InternalHandler(int signum, siginfo_t* siginfo, void* ucontext) {
// Ignore signals that were sent by an external process. The
// stacktrace signal handler is intended only for signals we send to
// ourselves.
if (siginfo->si_pid != getpid()) {
return;
}
auto stack = reinterpret_cast<ThreadStack*>(siginfo->si_value.sival_ptr);
if (stack == nullptr) {
return;
}
stack->depth = backtrace(stack->addr, kMaxDepth);
stack->Ack();
}

int SignalThread(pid_t pid, pid_t tid, uid_t uid, int signum, sigval payload) {
// Similar to pthread_sigqueue(), but usable with a tid since we
// don't have a pthread_t.
siginfo_t info;
memset(&info, 0, sizeof(info));
info.si_signo = signum;
info.si_code = SI_QUEUE;
info.si_pid = pid;
info.si_uid = uid;
info.si_value = payload;
return syscall(SYS_rt_tgsigqueueinfo, pid, tid, signum, &info);
}

int64_t NowMillis() {
timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}

std::string DumpThreadStacksHelper() {
std::string error;
auto tids = ListThreads(&error);
if (tids.empty()) {
return error;
}

// Create a pipe on which threads can send acks after they finish
// writing their stacktrace. Since Linux 2.6.11, the default pipe
// capacity has been 65536. Each thread will be writing a single
// byte to the pipe, so they should never block.
int pipe_fd[2];
if (pipe(pipe_fd) == -1) {
return "unable to create pipe";
}

// Signal all threads to write their stack trace in a pre-allocated
// area. Note that some threads might have died by now, so
// signalling them will fail.
std::vector<std::unique_ptr<ThreadStack>> stacks;
const auto pid = getpid();
const auto uid = getuid();
std::string result;
char buf[128];
for (auto tid : tids) {
std::string error;
const uint64_t blocked = BlockedSignals(tid, &error);
if ((blocked & (1ULL << kStackTraceSignal)) != 0) {
// The thread is blocking receipt of our signal, so don't bother
// sending it.
continue;
}
if (!error.empty()) {
snprintf(buf, sizeof(buf), "thread %d\n%s\n\n", tid, error.c_str());
result.append(buf);
}

std::unique_ptr<ThreadStack> stack(new ThreadStack(tid, pipe_fd[1]));
union sigval payload;
payload.sival_ptr = stack.get();
if (SignalThread(pid, tid, uid, kStackTraceSignal, payload) == 0) {
stacks.push_back(std::move(stack));
} else {
snprintf(buf, sizeof(buf), "thread %d\n(no response)\n\n", tid);
result.append(buf);
}
}

// Set operations on pipe_fd[0] to be non-blocking. This is
// important if the poll() on this fd returns, but the subsequent
// read block.
int flags = fcntl(pipe_fd[0], F_GETFL, 0);
fcntl(pipe_fd[0], F_SETFL, flags | O_NONBLOCK);

// Wait for all the acks, timing out after 5 seconds.
auto end = NowMillis() + 5000;
for (int acks = 0; acks < stacks.size(); ) {
pollfd pollfds[1];
pollfds[0].fd = pipe_fd[0];
pollfds[0].events = POLLIN;
pollfds[0].revents = 0;
auto timeout = end - NowMillis();
if (timeout <= 0) {
break;
}
auto ret = poll(pollfds, 1, int(timeout));
if (ret == -1) {
continue;
}
if (ret == 0) {
// We timed out before reading all of the stacks.
break;
}
if (pollfds[0].revents & POLLIN) {
char buf[128];
auto num_read = read(pipe_fd[0], buf, sizeof(buf));
if (num_read >= 0) {
acks += num_read;
}
}
}

close(pipe_fd[0]);
close(pipe_fd[1]);

for (auto& stack : stacks) {
if (!stack->done) {
// We were unable to populate the stack. This could occur if the
// signal to the thread was blocked or delayed. In the case of a
// delayed signal, it could be delivered later, so we need keep
// the stack around to be populated at that point.
snprintf(buf, sizeof(buf), "thread %d\n(no response)\n\n", stack->tid);
result.append(buf);
stack.release();
continue;
}

snprintf(buf, sizeof(buf), "thread %d\n", stack->tid);
result.append(buf);

auto syms = backtrace_symbols(stack->addr, stack->depth);
for (int i = 2; i < stack->depth; ++i) {
if (syms != nullptr) {
// Note that backtrace_symbols includes the address in the
// output it returns.
snprintf(buf, sizeof(buf), "#%-2d %s\n", i-2, syms[i]);
} else {
snprintf(buf, sizeof(buf), "#%-2d 0x%08lx\n", i-2, (uintptr_t)(stack->addr[i]));
}
result.append(buf);
}
result.append("\n");

if (syms != nullptr) {
free(syms);
}
}

return result;
}

} // namespace

std::string DumpThreadStacks() {
struct sigaction action;
struct sigaction oldaction;
memset(&action, 0, sizeof(action));
action.sa_sigaction = InternalHandler;
// Set SA_RESTART so that supported syscalls are automatically restarted if
// interrupted by the stacktrace collection signal.
action.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
if (sigaction(kStackTraceSignal, &action, &oldaction) != 0) {
return "unable to initialize signal handler";
}

auto result = DumpThreadStacksHelper();

// Restore the old signal handler. We ignore error here as there
// isn't anything to do if we encounter an error.
sigaction(kStackTraceSignal, &oldaction, nullptr);

return result;
}

#else // !defined(OS_LINUX) || !defined(__GLIBC__)

std::string DumpThreadStacks() {
return "thread stacks only available on Linux/Glibc";
}

#endif // !defined(OS_LINUX) || !defined(__GLIBC__)

0 comments on commit 957b4bd

Please sign in to comment.