Skip to content

Commit

Permalink
improvements
Browse files Browse the repository at this point in the history
Signed-off-by: Francisco Javier Honduvilla Coto <javierhonduco@gmail.com>
  • Loading branch information
javierhonduco committed Jan 25, 2023
1 parent 38abbc5 commit dae09ee
Show file tree
Hide file tree
Showing 7 changed files with 58,701 additions and 102 deletions.
2 changes: 1 addition & 1 deletion bpf/.clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
BasedOnStyle: LLVM
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
ColumnLimit: 120
ColumnLimit: 160
105 changes: 51 additions & 54 deletions bpf/cpu/cpu.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
#include "../common.h"
#include "hash.h"

//#include <uapi/linux/bpf.h>
enum {
BPF_F_NO_PREALLOC = (1U << 0),
};
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
Expand All @@ -26,12 +22,10 @@ enum {
// Number of frames to walk per tail call iteration.
#define MAX_STACK_DEPTH_PER_PROGRAM 15
// Number of BPF tail calls that will be attempted.
//
// invariant: `MAX_TAIL_CALLS * MAX_STACK_DEPTH_PER_PROGRAM` >=
// `MAX_STACK_DEPTH`
#define MAX_TAIL_CALLS 10
// Maximum number of frames.
#define MAX_STACK_DEPTH 127
_Static_assert(MAX_TAIL_CALLS * MAX_STACK_DEPTH_PER_PROGRAM >= MAX_STACK_DEPTH, "Not enough iterations to traverse the whole stack");
// Number of unique stacks.
#define MAX_STACK_TRACES_ENTRIES 1024
// Number of items in the stack counts aggregation map.
Expand All @@ -40,12 +34,16 @@ enum {
#define MAX_PROCESSES 1500
// Binary search iterations for dwarf based stack walking.
// 2^19 can bisect ~524_288 entries.
//
// invariant: `2^MAX_BINARY_SEARCH_DEPTH >= MAX_UNWIND_TABLE_SIZE`
#define MAX_BINARY_SEARCH_DEPTH 19
// Size of the unwind table.
// 250k * sizeof(stack_unwind_row_t) = 2MB
#define MAX_UNWIND_TABLE_SIZE 250 * 1000
_Static_assert(1 << MAX_BINARY_SEARCH_DEPTH >= MAX_UNWIND_TABLE_SIZE, "Unwind table too small");


// Useful to isolate stack unwinding issues.
#define DISABLE_BPF_HELPER_FP_UNWINDER 1

// Unwind tables bigger than can't fit in the remaining space
// of the current shard are broken up into chunks up to `MAX_UNWIND_TABLE_SIZE`.
#define MAX_UNWIND_TABLE_CHUNKS 30
Expand Down Expand Up @@ -87,30 +85,28 @@ const volatile struct config_t config = {};

/*============================== MACROS =====================================*/

#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \
struct { \
__uint(type, _type); \
__uint(max_entries, _max_entries); \
__type(key, _key_type); \
__type(value, _value_type); \
#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \
struct { \
__uint(type, _type); \
__uint(max_entries, _max_entries); \
__type(key, _key_type); \
__type(value, _value_type); \
} _name SEC(".maps");

// Stack Traces are slightly different
// in that the value is 1 big byte array
// of the stack addresses
typedef __u64 stack_trace_type[MAX_STACK_DEPTH];
#define BPF_STACK_TRACE(_name, _max_entries) \
BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_type, _max_entries);

#define BPF_HASH(_name, _key_type, _value_type, _max_entries) \
BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries);

#define DEFINE_COUNTER(__func__name) \
static void BUMP_##__func__name() { \
u32 *c = bpf_map_lookup_elem(&percpu_stats, &__func__name); \
if (c != NULL) { \
*c += 1; \
} \
#define BPF_STACK_TRACE(_name, _max_entries) BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_type, _max_entries);

#define BPF_HASH(_name, _key_type, _value_type, _max_entries) BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries);

#define DEFINE_COUNTER(__func__name) \
static void BUMP_##__func__name() { \
u32 *c = bpf_map_lookup_elem(&percpu_stats, &__func__name); \
if (c != NULL) { \
*c += 1; \
} \
}

/*============================= INTERNAL STRUCTS ============================*/
Expand Down Expand Up @@ -294,8 +290,8 @@ static void unwind_print_stats() {
return;
}

u32 *jit_errors = bpf_map_lookup_elem(&percpu_stats, &UNWIND_JIT_ERRORS);
if (jit_errors == NULL) {
u32 *unknown_jit = bpf_map_lookup_elem(&percpu_stats, &UNWIND_JIT_ERRORS);
if (unknown_jit == NULL) {
return;
}

Expand All @@ -305,7 +301,7 @@ static void unwind_print_stats() {
bpf_printk("truncated=%lu", *truncated_counter);
bpf_printk("catchall=%lu", *catchall_count);
bpf_printk("never=%lu", *never);
bpf_printk("jit_failure=%lu", *jit_errors);
bpf_printk("unknown_jit=%lu", *unknown_jit);

bpf_printk("total_counter=%lu", *total_counter);
bpf_printk("(not_covered=%lu)", *not_covered_count);
Expand All @@ -329,10 +325,12 @@ static __always_inline void *bpf_map_lookup_or_try_init(void *map, const void *k
if (val)
return val;

err = bpf_map_update_elem(map, key, init, BPF_NOEXIST);
err = bpf_map_update_elem(map, key, init, BPF_ANY); // ANY?
// 17 == EEXIST
if (err && err != -17)
if (err !=0) {
bpf_printk("[error] bpf_map_lookup_or_try_init with ret: %d", err);
return 0;
}

return bpf_map_lookup_elem(map, key);
}
Expand Down Expand Up @@ -415,8 +413,7 @@ enum find_unwind_table_return {
// Finds the shard information for a given pid and program counter. Optionally,
// and offset can be passed that will be filled in with the mapping's load
// address.
static __always_inline enum find_unwind_table_return find_unwind_table(shard_info_t **shard_info, pid_t pid, u64 pc,
u64 *offset) {
static __always_inline enum find_unwind_table_return find_unwind_table(shard_info_t **shard_info, pid_t pid, u64 pc, u64 *offset) {
process_info_t *proc_info = bpf_map_lookup_elem(&process_info, &pid);
// Appease the verifier.
if (proc_info == NULL) {
Expand Down Expand Up @@ -493,8 +490,7 @@ static __always_inline enum find_unwind_table_return find_unwind_table(shard_inf
}

// Aggregate the given stacktrace.
static __always_inline void add_stack(struct bpf_perf_event_data *ctx, u64 pid_tgid, enum stack_walking_method method,
unwind_state_t *unwind_state) {
static __always_inline void add_stack(struct bpf_perf_event_data *ctx, u64 pid_tgid, enum stack_walking_method method, unwind_state_t *unwind_state) {
u64 zero = 0;
stack_count_key_t stack_key = {0};

Expand Down Expand Up @@ -523,8 +519,16 @@ static __always_inline void add_stack(struct bpf_perf_event_data *ctx, u64 pid_t
stack_key.user_stack_id = 0;

// Insert stack.
bpf_map_update_elem(&dwarf_stack_traces, &stack_hash, &unwind_state->stack, BPF_ANY);
int err = bpf_map_update_elem(&dwarf_stack_traces, &stack_hash, &unwind_state->stack, BPF_ANY);
if (err != 0) {
bpf_printk("[error] bpf_map_update_elem with ret: %d", err);
}

} else if (method == STACK_WALKING_METHOD_FP) {
bpf_printk("[info] fp unwinding %d", DISABLE_BPF_HELPER_FP_UNWINDER);
if (DISABLE_BPF_HELPER_FP_UNWINDER) {
return;
}
int stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK);
if (stack_id >= 0) {
stack_key.user_stack_id = stack_id;
Expand Down Expand Up @@ -591,8 +595,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
bpf_printk("========== left %llu right %llu", left, right);
u64 table_idx = find_offset_for_pc(unwind_table, unwind_state->ip - offset, left, right);

if (table_idx == BINARY_SEARCH_NOT_FOUND || table_idx == BINARY_SEARCH_SHOULD_NEVER_HAPPEN ||
table_idx == BINARY_SEARCH_EXHAUSTED_ITERATIONS) {
if (table_idx == BINARY_SEARCH_NOT_FOUND || table_idx == BINARY_SEARCH_SHOULD_NEVER_HAPPEN || table_idx == BINARY_SEARCH_EXHAUSTED_ITERATIONS) {
bpf_printk("[error] binary search failed with %llx", table_idx);
return 1;
}
Expand Down Expand Up @@ -676,7 +679,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
// is *always* 8 bytes ahead of the previous stack pointer.
u64 previous_rip_addr = previous_rsp - 8; // the saved return address is 8 bytes ahead of the previous stack pointer
u64 previous_rip = 0;
int err = bpf_probe_read_user(&previous_rip, 8, (void *)(previous_rip_addr)); // 8 bytes, a whole word in a 64 bits machine
int err = bpf_probe_read_user(&previous_rip, 8, (void *)(previous_rip_addr));

if (previous_rip == 0) {
int user_pid = pid_tgid;
Expand All @@ -692,9 +695,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
return 1;
}

bpf_printk("[error] previous_rip should not be zero. This can mean that "
"the read failed, ret=%d while reading @ %llx.",
err, previous_rip_addr);
bpf_printk("[error] previous_rip should not be zero. This can mean that the read failed, ret=%d while reading @ %llx.", err, previous_rip_addr);
BUMP_UNWIND_CATCHALL_ERROR();
return 1;
}
Expand All @@ -706,10 +707,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
} else {
u64 previous_rbp_addr = previous_rsp + found_rbp_offset;
bpf_printk("\t(bp_offset: %d, bp value stored at %llx)", found_rbp_offset, previous_rbp_addr);
int ret = bpf_probe_read_user(&previous_rbp, 8,
(void *)(previous_rbp_addr)); // 8 bytes, a whole word in a 64 bits
// machine

int ret = bpf_probe_read_user(&previous_rbp, 8, (void *)(previous_rbp_addr));
if (ret != 0) {
bpf_printk("[error] previous_rbp should not be zero. This can mean "
"that the read has failed %d.",
Expand Down Expand Up @@ -747,9 +745,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
bpf_printk("======= reached main! =======");
add_stack(ctx, pid_tgid, STACK_WALKING_METHOD_DWARF, unwind_state);
BUMP_UNWIND_SUCCESS();
bpf_printk("yesssss :)");
} else {

int user_pid = pid_tgid;
process_info_t *proc_info = bpf_map_lookup_elem(&process_info, &user_pid);
if (proc_info == NULL) {
Expand All @@ -773,8 +769,7 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
bpf_tail_call(ctx, &programs, 0);
}

// We couldn't walk enough frames
bpf_printk("nooooooo :(");
// We couldn't get the whole stacktrace.
BUMP_UNWIND_TRUNCATED();
return 0;
}
Expand Down Expand Up @@ -817,14 +812,16 @@ int profile_cpu(struct bpf_perf_event_data *ctx) {
int user_pid = pid_tgid;
int user_tgid = pid_tgid >> 32;

if (user_pid == 0)
if (user_pid == 0) {
return 0;
}

if (config.debug) {
// very noisy
// This can be very noisy
// bpf_printk("debug mode enabled, make sure you specified process name");
if (!is_debug_enabled_for_pid(user_tgid))
if (!is_debug_enabled_for_pid(user_tgid)) {
return 0;
}
}

bool has_unwind_info = has_unwind_information(user_pid);
Expand Down
1 change: 1 addition & 0 deletions dnf

0 comments on commit dae09ee

Please sign in to comment.