Skip to content

Commit

Permalink
Fix malloc dead lock cause by contention profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
chenBright committed Jul 7, 2024
1 parent 2e18318 commit daf3e98
Show file tree
Hide file tree
Showing 18 changed files with 230 additions and 132 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ jobs:
- name: compile tests
run: |
cd test
make -j ${{env.proc_num}}
make -j ${{env.proc_num}} test_butil
- name: run tests
run: |
cd test
Expand Down
4 changes: 2 additions & 2 deletions src/brpc/builtin/hotspots_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
#include "brpc/details/tcmalloc_extension.h"

extern "C" {
int __attribute__((weak)) ProfilerStart(const char* fname);
void __attribute__((weak)) ProfilerStop();
int BAIDU_WEAK ProfilerStart(const char* fname);
void BAIDU_WEAK ProfilerStop();
}

namespace bthread {
Expand Down
4 changes: 2 additions & 2 deletions src/brpc/builtin/pprof_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ extern "C" {
#if defined(OS_LINUX)
extern char *program_invocation_name;
#endif
int __attribute__((weak)) ProfilerStart(const char* fname);
void __attribute__((weak)) ProfilerStop();
int BAIDU_WEAK ProfilerStart(const char* fname);
void BAIDU_WEAK ProfilerStop();
}

namespace bthread {
Expand Down
2 changes: 1 addition & 1 deletion src/brpc/policy/rtmp_protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
// we mark the symbol as weak. If the runtime does not have the function,
// handshaking will fallback to the simple one.
extern "C" {
const EVP_MD* __attribute__((weak)) EVP_sha256(void);
const EVP_MD* BAIDU_WEAK EVP_sha256(void);
}


Expand Down
4 changes: 2 additions & 2 deletions src/brpc/socket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@
#endif

namespace bthread {
size_t __attribute__((weak))
get_sizes(const bthread_id_list_t* list, size_t* cnt, size_t n);
size_t BAIDU_WEAK get_sizes(const bthread_id_list_t* list,
size_t* cnt, size_t n);
}


Expand Down
46 changes: 35 additions & 11 deletions src/bthread/mutex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
// Date: Sun Aug 3 12:46:15 CST 2014

#include <pthread.h>
#include <execinfo.h>
#include <dlfcn.h> // dlsym
#include <fcntl.h> // O_RDONLY
#include "butil/atomicops.h"
Expand All @@ -34,26 +33,25 @@
#include "butil/files/file_path.h"
#include "butil/file_util.h"
#include "butil/unique_ptr.h"
#include "butil/memory/scope_guard.h"
#include "butil/third_party/murmurhash3/murmurhash3.h"
#include "butil/third_party/symbolize/symbolize.h"
#include "butil/logging.h"
#include "butil/object_pool.h"
#include "butil/debug/stack_trace.h"
#include "bthread/butex.h" // butex_*
#include "bthread/processor.h" // cpu_relax, barrier
#include "bthread/mutex.h" // bthread_mutex_t
#include "bthread/sys_futex.h"
#include "bthread/log.h"

extern "C" {
extern void* __attribute__((weak)) _dl_sym(void* handle, const char* symbol, void* caller);
extern void* BAIDU_WEAK _dl_sym(void* handle, const char* symbol, void* caller);
}
extern int __attribute__((weak)) GetStackTrace(void** result, int max_depth, int skip_count);

namespace bthread {
// Warm up backtrace before main().
void* dummy_buf[4];
const int ALLOW_UNUSED dummy_bt = GetStackTrace
? GetStackTrace(dummy_buf, arraysize(dummy_buf), 0)
: backtrace(dummy_buf, arraysize(dummy_buf));
const butil::debug::StackTrace ALLOW_UNUSED dummy_bt;

// For controlling contentions collected per second.
static bvar::CollectorSpeedLimit g_cp_sl = BVAR_COLLECTOR_SPEED_LIMIT_INITIALIZER;
Expand Down Expand Up @@ -457,6 +455,10 @@ inline uint64_t hash_mutex_ptr(const Mutex* m) {
// code are never sampled, otherwise deadlock may occur.
static __thread bool tls_inside_lock = false;

// Warn up some singleton objects used in contention profiler
// to avoid deadlock in malloc call stack.
static __thread bool tls_warn_up = false;

// Speed up with TLS:
// Most pthread_mutex are locked and unlocked in the same thread. Putting
// contention information in TLS avoids collisions that may occur in
Expand Down Expand Up @@ -527,18 +529,40 @@ inline bool remove_pthread_contention_site(const Mutex* mutex,
// Submit the contention along with the callsite('s stacktrace)
void submit_contention(const bthread_contention_site_t& csite, int64_t now_ns) {
tls_inside_lock = true;
BRPC_SCOPE_EXIT {
tls_inside_lock = false;
};

butil::debug::StackTrace stack; // May lock.
size_t nframes = 0;
const void* const* addrs = stack.Addresses(&nframes);
if (0 == nframes) {
return;
}
// There are two situations where we need to check whether in the
// malloc call stack:
// 1. Warn up some singleton objects used in `submit_contention'
// to avoid deadlock in malloc call stack.
// 2. LocalPool is empty, GlobalPool may allocate memory by malloc.
if (!tls_warn_up || butil::local_pool_free_empty<SampledContention>()) {
// In malloc call stack, can not submit contention.
if (stack.FindSymbol((void*)malloc)) {
return;
}
}

auto sc = butil::get_object<SampledContention>();
// Normalize duration_us and count so that they're addable in later
// processings. Notice that sampling_range is adjusted periodically by
// collecting thread.
sc->duration_ns = csite.duration_ns * bvar::COLLECTOR_SAMPLING_BASE
/ csite.sampling_range;
sc->count = bvar::COLLECTOR_SAMPLING_BASE / (double)csite.sampling_range;
sc->nframes = GetStackTrace
? GetStackTrace(sc->stack, arraysize(sc->stack), 0)
: backtrace(sc->stack, arraysize(sc->stack)); // may lock
memcpy(sc->stack, addrs, sizeof(void*) * nframes);
sc->nframes = nframes;
sc->submit(now_ns / 1000); // may lock
tls_inside_lock = false;
// Once submit a contention, complete warn up.
tls_warn_up = true;
}

namespace internal {
Expand Down
3 changes: 0 additions & 3 deletions src/butil/debug/stack_trace.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ StackTrace::StackTrace(const void* const* trace, size_t count) {
count_ = count;
}

StackTrace::~StackTrace() {
}

const void *const *StackTrace::Addresses(size_t* count) const {
*count = count_;
if (count_)
Expand Down
5 changes: 3 additions & 2 deletions src/butil/debug/stack_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ class BUTIL_EXPORT StackTrace {

// Copying and assignment are allowed with the default functions.

~StackTrace();

// Gets an array of instruction pointer values. |*count| will be set to the
// number of elements in the returned array.
const void* const* Addresses(size_t* count) const;

// Whether if the given symbol is found in the stack trace.
bool FindSymbol(void* symbol) const;

// Prints the stack trace to stderr.
void Print() const;

Expand Down
21 changes: 20 additions & 1 deletion src/butil/debug/stack_trace_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#include "butil/third_party/symbolize/symbolize.h"
#endif

extern int __attribute__((weak)) GetStackTrace(void** result, int max_depth, int skip_count);
extern int BAIDU_WEAK GetStackTrace(void** result, int max_depth, int skip_count);

namespace butil {
namespace debug {
Expand Down Expand Up @@ -763,6 +763,25 @@ StackTrace::StackTrace() {
}
}

bool StackTrace::FindSymbol(void* symbol) const {
#if !defined(__UCLIBC__)
for (size_t i = 0; i < count_; ++i) {
void* saddr;
// Subtract by one as return address of function may be in the next
// function when a function is annotated as noreturn.
void* address = static_cast<char*>(trace_[i]) - 1;
if (!google::SymbolizeAndAddress(address, &saddr)) {
continue;
}
LOG(INFO) << "saddr: " << saddr << " symbol: " << symbol;
if (saddr == symbol) {
return true;
}
}
#endif
return false;
}

void StackTrace::Print() const {
// NOTE: This code MUST be async-signal safe (it's used by in-process
// stack dumping signal handler). NO malloc or stdio is allowed here.
Expand Down
6 changes: 2 additions & 4 deletions src/butil/iobuf_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "butil/hash.h"
#include <execinfo.h>

extern int __attribute__((weak)) GetStackTrace(void** result, int max_depth, int skip_count);
extern int BAIDU_WEAK GetStackTrace(void** result, int max_depth, int skip_count);

namespace butil {

Expand Down Expand Up @@ -296,9 +296,7 @@ void SubmitIOBufSample(IOBuf::Block* block, int64_t ref) {
auto sample = IOBufSample::New();
sample->block = block;
sample->count = ref;
sample->nframes = GetStackTrace(sample->stack,
arraysize(sample->stack),
0); // may lock
sample->nframes = GetStackTrace(sample->stack, arraysize(sample->stack), 0);
IOBufProfiler::GetInstance()->Submit(sample);
}

Expand Down
5 changes: 5 additions & 0 deletions src/butil/object_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ template <typename T> struct ObjectPoolValidator {

namespace butil {

// Whether if FreeChunk of LocalPool is empty.
template <typename T> inline bool local_pool_free_empty() {
return ObjectPool<T>::singleton()->local_free_empty();
}

// Get an object typed |T|. The object should be cleared before usage.
// NOTE: T must be default-constructible.
template <typename T> inline T* get_object() {
Expand Down
12 changes: 12 additions & 0 deletions src/butil/object_pool_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,25 @@ class BAIDU_CACHELINE_ALIGNMENT ObjectPool {
return -1;
}

inline bool free_empty() const {
return 0 == _cur_free.nfree;
}

private:
ObjectPool* _pool;
Block* _cur_block;
size_t _cur_block_index;
FreeChunk _cur_free;
};

inline bool local_free_empty() {
LocalPool* lp = get_or_new_local_pool();
if (BAIDU_LIKELY(lp != NULL)) {
return lp->free_empty();
}
return true;
}

inline T* get_object() {
LocalPool* lp = get_or_new_local_pool();
if (BAIDU_LIKELY(lp != NULL)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ static int GetRunningOnValgrind(void) {
}

/* See the comments in dynamic_annotations.h */
int __attribute__((weak)) RunningOnValgrind(void) {
int DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK RunningOnValgrind(void) {
static volatile int running_on_valgrind = -1;
/* C doesn't have thread-safe initialization of statics, and we
don't want to depend on pthread_once here, so hack it. */
Expand Down
10 changes: 10 additions & 0 deletions src/butil/third_party/symbolize/symbolize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "symbolize.h"
#include "demangle.h"
#include "butil/compiler_specific.h"
#include "butil/logging.h"

_START_GOOGLE_NAMESPACE_

Expand Down Expand Up @@ -830,6 +831,15 @@ bool BAIDU_WEAK Symbolize(void *pc, char *out, int out_size) {
return SymbolizeAndDemangle(pc, out, out_size);
}

bool BAIDU_WEAK SymbolizeAndAddress(void *pc, void **out) {
Dl_info info{};
if (dladdr(pc, &info)) {
LOG(INFO) << "info.dli_saddr: " << info.dli_saddr << ", info.dli_sname: " << info.dli_sname;
*out = info.dli_saddr;
}
return NULL != info.dli_saddr;
}

_END_GOOGLE_NAMESPACE_

#else /* HAVE_SYMBOLIZE */
Expand Down
2 changes: 2 additions & 0 deletions src/butil/third_party/symbolize/symbolize.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ _START_GOOGLE_NAMESPACE_
// returns false.
bool Symbolize(void *pc, char *out, int out_size);

bool SymbolizeAndAddress(void *pc, void **out);

_END_GOOGLE_NAMESPACE_

#endif // BUTIL_SYMBOLIZE_H_
Loading

0 comments on commit daf3e98

Please sign in to comment.