diff --git a/agent/src/ebpf/kernel/include/perf_profiler.h b/agent/src/ebpf/kernel/include/perf_profiler.h index ea2c74dc49b..a33cc7ec074 100644 --- a/agent/src/ebpf/kernel/include/perf_profiler.h +++ b/agent/src/ebpf/kernel/include/perf_profiler.h @@ -44,12 +44,39 @@ typedef enum { struct stack_trace_key_t { __u32 pid; // processID or threadID __u32 tgid; // processID + __u64 itid; // interpreter thread id __u32 cpu; char comm[TASK_COMM_LEN]; int kernstack; int userstack; + __u64 intpstack; __u64 timestamp; __u64 duration_ns; }; +typedef struct { + char class_name[32]; + char method_name[64]; + char path[128]; +} symbol_t; + +#define PYTHON_STACK_FRAMES_PER_RUN 16 +#define PYTHON_STACK_PROG_MAX_RUN 5 +#define MAX_STACK_DEPTH (PYTHON_STACK_PROG_MAX_RUN * PYTHON_STACK_FRAMES_PER_RUN) + +typedef struct { + __u64 len; + __u64 addresses[MAX_STACK_DEPTH]; +} stack_trace_t; + +typedef struct { + struct stack_trace_key_t key; + stack_trace_t stack; + + void *thread_state; + void *frame_ptr; + + __u32 runs; +} unwind_state_t; + #endif /* DF_BPF_PERF_PROFILER_H */ diff --git a/agent/src/ebpf/kernel/perf_profiler.bpf.c b/agent/src/ebpf/kernel/perf_profiler.bpf.c index 0114e600e7a..6d138a34654 100644 --- a/agent/src/ebpf/kernel/perf_profiler.bpf.c +++ b/agent/src/ebpf/kernel/perf_profiler.bpf.c @@ -63,13 +63,195 @@ MAP_PERF_EVENT(profiler_output_b, int, __u32, MAX_CPU) MAP_STACK_TRACE(stack_map_a, STACK_MAP_ENTRIES) MAP_STACK_TRACE(stack_map_b, STACK_MAP_ENTRIES) +MAP_PROG_ARRAY(progs_jmp_perf_map, __u32, __u32, PROG_PERF_NUM) +struct bpf_map_def SEC("maps") __python_symbols = { + .type = BPF_MAP_TYPE_LRU_HASH, + __BPF_MAP_DEF(symbol_t, __u32, 512), +}; +MAP_PERARRAY(heap, __u32, unwind_state_t, 1) +MAP_PERARRAY(python_symbol_index, __u32, __u32, 1) +MAP_HASH(python_stack, __u64, stack_trace_t, STACK_MAP_ENTRIES) + +static inline __attribute__((always_inline)) bool comm_eq(char *a, char *b) { +#pragma unroll + for (int i = 0; i < TASK_COMM_LEN; i++) { + if (a[i] == '\0' || b[i] == '\0') { + return a[i] == b[i]; + } + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +struct { + struct { + __s64 current_frame; + } py_cframe; + struct { + __s64 co_filename; + __s64 co_name; + __s64 co_varnames; + __s64 co_firstlineno; + } py_code_object; + struct { + __s64 f_back; + __s64 f_code; + __s64 f_lineno; + __s64 f_localsplus; + } py_frame_object; + struct { + __s64 ob_type; + } py_object; + struct { + __s64 data; + __s64 size; + } py_string; + struct { + __s64 next; + __s64 interp; + __s64 frame; + __s64 thread_id; + __s64 native_thread_id; + __s64 cframe; + } py_thread_state; + struct { + __s64 ob_item; + } py_tuple_object; + struct { + __s64 tp_name; + } py_type_object; + struct { + __s64 owner; + } py_interpreter_frame; +} py_offsets = { + .py_cframe = { + .current_frame = 0, + }, + .py_code_object = { + .co_filename = 104, + .co_name = 112, + .co_varnames = 72, + .co_firstlineno = 40, + }, + .py_frame_object = { + .f_back = 24, + .f_code = 32, + .f_lineno = 108, + .f_localsplus = 360, + }, + .py_object = { + .ob_type = 8, + }, + .py_string = { + .data = 48, + .size = 16, + }, + .py_thread_state = { + .next = 8, + .interp = 16, + .frame = 24, + .thread_id = 176, + .native_thread_id = -1, + .cframe = -1, + }, + .py_tuple_object = { + .ob_item = 24, + }, + .py_type_object = { + .tp_name = 24, + }, + .py_interpreter_frame = { + .owner = -1, + }, +}; + +static inline __attribute__((always_inline)) __u32 read_symbol(void *frame_ptr, void *code_ptr, symbol_t *symbol) { + void *ptr; + bpf_probe_read_user(&ptr, sizeof(ptr), code_ptr + py_offsets.py_code_object.co_varnames); + bpf_probe_read_user(&ptr, sizeof(ptr), ptr + py_offsets.py_tuple_object.ob_item); + bpf_probe_read_user_str(&symbol->method_name, sizeof(symbol->method_name), ptr + py_offsets.py_string.data); + + char self_str[4] = "self"; + char cls_str[4] = "cls"; + bool first_self = *(__s32 *)symbol->method_name == *(__s32 *)self_str; + bool first_cls = *(__s32 *)symbol->method_name == *(__s32 *)cls_str; + + if (first_self || first_cls) { + bpf_probe_read_user(&ptr, sizeof(ptr), frame_ptr + py_offsets.py_frame_object.f_localsplus); + if (first_self) { + bpf_probe_read_user(&ptr, sizeof(ptr), ptr + py_offsets.py_object.ob_type); + } + bpf_probe_read_user(&ptr, sizeof(ptr), ptr + py_offsets.py_type_object.tp_name); + bpf_probe_read_user_str(&symbol->class_name, sizeof(symbol->class_name), ptr); + } + + bpf_probe_read_user(&ptr, sizeof(ptr), code_ptr + py_offsets.py_code_object.co_filename); + bpf_probe_read_user_str(&symbol->path, sizeof(symbol->path), ptr + py_offsets.py_string.data); + + bpf_probe_read_user(&ptr, sizeof(ptr), code_ptr + py_offsets.py_code_object.co_name); + bpf_probe_read_user_str(&symbol->method_name, sizeof(symbol->method_name), ptr + py_offsets.py_string.data); + + __u32 lineno; + bpf_probe_read_user(&lineno, sizeof(lineno), code_ptr + py_offsets.py_code_object.co_firstlineno); + + return lineno; +} + +static inline __attribute__((always_inline)) __u32 get_symbol_id(symbol_t *symbol) { + __u32 *found_id = bpf_map_lookup_elem(&__python_symbols, symbol); + if (found_id) { + return *found_id; + } + + __u32 zero = 0; + __u32 *sym_idx = bpf_map_lookup_elem(&__python_symbol_index, &zero); + if (sym_idx == NULL) { + return 0; + } + + __u32 id = *sym_idx * 32 + bpf_get_smp_processor_id(); + *sym_idx += 1; + + int err = bpf_map_update_elem(&__python_symbols, symbol, &id, BPF_ANY); + if (err) { + return 0; + } + return id; +} + +static inline __attribute__((always_inline)) __u64 hash_stack(stack_trace_t *stack) { + const __u64 m = 0xc6a4a7935bd1e995LLU; + const int r = 47; + __u64 hash = stack->len * m; + +#pragma unroll + for (int i = 0; i < MAX_STACK_DEPTH; i++) { + if (i >= stack->len) { + break; + } + + __u64 k = stack->addresses[i]; + + k *= m; + k ^= k >> r; + k *= m; + + hash ^= k; + hash *= m; + } + + return hash; +} + /* * Used for communication between user space and BPF to control the * switching between buffer a and buffer b. */ MAP_ARRAY(profiler_state_map, __u32, __u64, PROFILER_CNT) - SEC("perf_event") -int bpf_perf_event(struct bpf_perf_event_data *ctx) + +static inline __attribute__((always_inline)) int get_stack_and_output_perf(struct bpf_perf_event_data *ctx, unwind_state_t *state) { __u32 count_idx; @@ -91,39 +273,20 @@ int bpf_perf_event(struct bpf_perf_event_data *ctx) count_idx = OUTPUT_CNT_IDX; __u64 *output_count_ptr = profiler_state_map__lookup(&count_idx); - count_idx = ENABLE_IDX; - __u64 *enable_ptr = profiler_state_map__lookup(&count_idx); - count_idx = ERROR_IDX; __u64 *error_count_ptr = profiler_state_map__lookup(&count_idx); if (transfer_count_ptr == NULL || sample_count_a_ptr == NULL || sample_count_b_ptr == NULL || drop_count_ptr == NULL || iter_count_ptr == NULL || error_count_ptr == NULL || - output_count_ptr == NULL || enable_ptr == NULL) { + output_count_ptr == NULL) { count_idx = ERROR_IDX; __u64 err_val = 1; profiler_state_map__update(&count_idx, &err_val); return 0; } - if (unlikely(*enable_ptr == 0)) - return 0; - - __u64 id = bpf_get_current_pid_tgid(); - struct stack_trace_key_t key = { 0 }; - key.tgid = id >> 32; - key.pid = (__u32) id; - - /* - * CPU idle stacks will not be collected. - */ - if (key.tgid == key.pid && key.pid == 0) - return 0; - - key.cpu = bpf_get_smp_processor_id(); - bpf_get_current_comm(&key.comm, sizeof(key.comm)); - key.timestamp = bpf_ktime_get_ns(); + struct stack_trace_key_t *key = &state->key; /* * Note: @@ -145,51 +308,61 @@ int bpf_perf_event(struct bpf_perf_event_data *ctx) __u64 sample_count = 0; if (!((*transfer_count_ptr) & 0x1ULL)) { - key.kernstack = bpf_get_stackid(ctx, &NAME(stack_map_a), + key->kernstack = bpf_get_stackid(ctx, &NAME(stack_map_a), KERN_STACKID_FLAGS); - key.userstack = bpf_get_stackid(ctx, &NAME(stack_map_a), + key->userstack = bpf_get_stackid(ctx, &NAME(stack_map_a), USER_STACKID_FLAGS); - if (-EEXIST == key.kernstack) + if (-EEXIST == key->kernstack) __sync_fetch_and_add(drop_count_ptr, 1); - if (-EEXIST == key.userstack) + if (-EEXIST == key->userstack) __sync_fetch_and_add(drop_count_ptr, 1); - if (key.userstack < 0 && key.kernstack < 0) + if (key->userstack < 0 && key->kernstack < 0) return 0; + if (state->stack.len > 0) { + key->intpstack = hash_stack(&state->stack); + python_stack__update(&key->intpstack, &state->stack); + } + sample_count = *sample_count_a_ptr; __sync_fetch_and_add(sample_count_a_ptr, 1); if (bpf_perf_event_output(ctx, &NAME(profiler_output_a), - BPF_F_CURRENT_CPU, &key, sizeof(key))) + BPF_F_CURRENT_CPU, key, sizeof(struct stack_trace_key_t))) __sync_fetch_and_add(error_count_ptr, 1); else __sync_fetch_and_add(output_count_ptr, 1); } else { - key.kernstack = bpf_get_stackid(ctx, &NAME(stack_map_b), + key->kernstack = bpf_get_stackid(ctx, &NAME(stack_map_b), KERN_STACKID_FLAGS); - key.userstack = bpf_get_stackid(ctx, &NAME(stack_map_b), + key->userstack = bpf_get_stackid(ctx, &NAME(stack_map_b), USER_STACKID_FLAGS); - if (-EEXIST == key.kernstack) + if (-EEXIST == key->kernstack) __sync_fetch_and_add(drop_count_ptr, 1); - if (-EEXIST == key.userstack) + if (-EEXIST == key->userstack) __sync_fetch_and_add(drop_count_ptr, 1); - if (key.userstack < 0 && key.kernstack < 0) + if (key->userstack < 0 && key->kernstack < 0) return 0; + if (state->stack.len > 0) { + key->intpstack = hash_stack(&state->stack); + python_stack__update(&key->intpstack, &state->stack); + } + sample_count = *sample_count_b_ptr; __sync_fetch_and_add(sample_count_b_ptr, 1); if (bpf_perf_event_output(ctx, &NAME(profiler_output_b), - BPF_F_CURRENT_CPU, &key, sizeof(key))) + BPF_F_CURRENT_CPU, key, sizeof(struct stack_trace_key_t))) __sync_fetch_and_add(error_count_ptr, 1); else __sync_fetch_and_add(output_count_ptr, 1); @@ -213,3 +386,139 @@ int bpf_perf_event(struct bpf_perf_event_data *ctx) return 0; } + +SEC("perf_event") +int bpf_perf_event(struct bpf_perf_event_data *ctx) +{ + __u32 count_idx = ENABLE_IDX; + __u64 *enable_ptr = profiler_state_map__lookup(&count_idx); + + if (enable_ptr == NULL) { + count_idx = ERROR_IDX; + __u64 err_val = 1; + profiler_state_map__update(&count_idx, &err_val); + return 0; + } + + if (unlikely(*enable_ptr == 0)) + return 0; + + __u64 id = bpf_get_current_pid_tgid(); + + __u32 zero = 0; + unwind_state_t *state = heap__lookup(&zero); + if (state == NULL) { + return 0; + } + __builtin_memset(state, 0, sizeof(unwind_state_t)); + + struct stack_trace_key_t *key = &state->key; + key->tgid = id >> 32; + key->pid = (__u32) id; + + /* + * CPU idle stacks will not be collected. + */ + if (key->tgid == key->pid && key->pid == 0) + return 0; + + key->cpu = bpf_get_smp_processor_id(); + bpf_get_current_comm(&key->comm, sizeof(key->comm)); + key->timestamp = bpf_ktime_get_ns(); + + if (comm_eq(key->comm, "python3")) { + bpf_tail_call(ctx, &NAME(progs_jmp_perf_map), PROG_PYTHON_FRAME_PTR_IDX); + } + + return get_stack_and_output_perf(ctx, state); +} + +PROGPE(python_frame_ptr)(struct bpf_perf_event_data *ctx) { + __u32 zero = 0; + unwind_state_t *state = heap__lookup(&zero); + if (state == NULL) { + return 0; + } + + __u64 thread_state_addr = 140737353904984; + if (bpf_probe_read_user(&state->thread_state, sizeof(void *), (void *)thread_state_addr) != 0) { + goto finish; + } + + if (bpf_probe_read_user(&state->key.itid, sizeof(__u32), state->thread_state + py_offsets.py_thread_state.thread_id) != 0) { + goto finish; + } + + if (bpf_probe_read_user(&state->frame_ptr, sizeof(void *), state->thread_state + py_offsets.py_thread_state.frame) != 0) { + goto finish; + } + + bpf_tail_call(ctx, &NAME(progs_jmp_perf_map), PROG_PYTHON_WALK_STACK_IDX); + +finish: + return 0; +} + +PROGPE(python_walk_stack)(struct bpf_perf_event_data *ctx) { + __u32 zero = 0; + unwind_state_t *state = heap__lookup(&zero); + if (state == NULL) { + return 0; + } + + if (state->frame_ptr == NULL) { + if (state->stack.len == 0) { + return 0; + } + goto output; + } + + symbol_t symbol; + +#pragma unroll + for (int i = 0; i < PYTHON_STACK_FRAMES_PER_RUN; i++) { + void *code_ptr = 0; + if (bpf_probe_read_user(&code_ptr, sizeof(code_ptr), state->frame_ptr + py_offsets.py_frame_object.f_code) != 0) { + goto output; + } + if (code_ptr == NULL) { + goto output; + } + + __builtin_memset(&symbol, 0, sizeof(symbol)); + __u64 lineno = read_symbol(state->frame_ptr, code_ptr, &symbol); + if (lineno == 0) { + goto output; + } + __u64 symbol_id = get_symbol_id(&symbol); + __u64 cur_len = state->stack.len; + if (cur_len >= 0 && cur_len < MAX_STACK_DEPTH) { + state->stack.addresses[state->stack.len++] = (lineno << 32) | symbol_id; + } + + if (bpf_probe_read_user(&state->frame_ptr, sizeof(void *), state->frame_ptr + py_offsets.py_frame_object.f_back) != 0) { + goto output; + } + if (!state->frame_ptr) { + goto output; + } + } + + if (state->runs++ < PYTHON_STACK_PROG_MAX_RUN) { + bpf_tail_call(ctx, &NAME(progs_jmp_perf_map), PROG_PYTHON_WALK_STACK_IDX); + } + +output: + + bpf_tail_call(ctx, &NAME(progs_jmp_perf_map), PROG_PYTHON_PERF_OUTPUT_IDX); + return 0; +} + +PROGPE(python_perf_output)(struct bpf_perf_event_data *ctx) { + __u32 zero = 0; + unwind_state_t *state = heap__lookup(&zero); + if (state == NULL) { + return 0; + } + return get_stack_and_output_perf(ctx, state); +} diff --git a/agent/src/ebpf/samples/rust/profiler/Cargo.lock b/agent/src/ebpf/samples/rust/profiler/Cargo.lock index bbe94648ad1..e0282c22da4 100644 --- a/agent/src/ebpf/samples/rust/profiler/Cargo.lock +++ b/agent/src/ebpf/samples/rust/profiler/Cargo.lock @@ -28,6 +28,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "chrono" version = "0.4.19" @@ -131,6 +137,7 @@ dependencies = [ name = "profiler" version = "0.1.0" dependencies = [ + "cfg-if", "chrono", "dunce", "env_logger", diff --git a/agent/src/ebpf/samples/rust/profiler/Cargo.toml b/agent/src/ebpf/samples/rust/profiler/Cargo.toml index 81b0975dbb4..ab7ce6233b1 100644 --- a/agent/src/ebpf/samples/rust/profiler/Cargo.toml +++ b/agent/src/ebpf/samples/rust/profiler/Cargo.toml @@ -10,6 +10,7 @@ dunce = "0.1.1" [dependencies] libc = "0.2" +cfg-if = "1.0" chrono = "0.4" serde = { version = "1.0", features = ["derive"] } lazy_static = "1.4.0" diff --git a/agent/src/ebpf/samples/rust/socket-tracer/Cargo.toml b/agent/src/ebpf/samples/rust/socket-tracer/Cargo.toml index 7b05ebc7da0..22f19e4aa3d 100644 --- a/agent/src/ebpf/samples/rust/socket-tracer/Cargo.toml +++ b/agent/src/ebpf/samples/rust/socket-tracer/Cargo.toml @@ -10,6 +10,7 @@ dunce = "0.1.1" [dependencies] libc = "0.2" +cfg-if = "1.0" chrono = "0.4" serde = { version = "1.0", features = ["derive"] } lazy_static = "1.4.0" diff --git a/agent/src/ebpf/user/config.h b/agent/src/ebpf/user/config.h index 30853862ac5..27667c08fd7 100644 --- a/agent/src/ebpf/user/config.h +++ b/agent/src/ebpf/user/config.h @@ -36,6 +36,7 @@ //Program jmp tables #define MAP_PROGS_JMP_KP_NAME "__progs_jmp_kp_map" #define MAP_PROGS_JMP_TP_NAME "__progs_jmp_tp_map" +#define MAP_PROGS_JMP_PERF_NAME "__progs_jmp_perf_map" #define PROG_DATA_SUBMIT_NAME_FOR_KP "bpf_prog_kp__data_submit" #define PROG_DATA_SUBMIT_NAME_FOR_TP "bpf_prog_tp__data_submit" @@ -65,6 +66,14 @@ enum { PROG_KP_NUM }; +enum { + PROG_BPF_PERF_EVENT_IDX, + PROG_PYTHON_FRAME_PTR_IDX, + PROG_PYTHON_WALK_STACK_IDX, + PROG_PYTHON_PERF_OUTPUT_IDX, + PROG_PERF_NUM +}; + //thread index for bihash enum { THREAD_PROFILER_READER_IDX = 0, diff --git a/agent/src/ebpf/user/profile/perf_profiler.c b/agent/src/ebpf/user/profile/perf_profiler.c index dac0132edf4..92d05522d58 100644 --- a/agent/src/ebpf/user/profile/perf_profiler.c +++ b/agent/src/ebpf/user/profile/perf_profiler.c @@ -299,7 +299,6 @@ static int create_profiler(struct bpf_tracer *tracer) ret = create_work_thread("java_update", &java_syms_update_thread, (void *)java_syms_update_work, (void *)tracer); - if (ret) { goto error; } @@ -352,6 +351,7 @@ static int create_profiler(struct bpf_tracer *tracer) if (ret) { goto error; } + } else { tracer->enable_sample = false; ebpf_info(LOG_CP_TAG "=== oncpu profiler disabled ===\n"); @@ -521,6 +521,46 @@ static struct tracer_sockopts cpdbg_sockopts = { .get = cpdbg_sockopt_get, }; +static void __insert_output_prog_to_map(struct bpf_tracer *tracer, + const char *map_name, + const char *prog_name, int key) +{ + struct ebpf_prog *prog; + prog = ebpf_obj__get_prog_by_name(tracer->obj, prog_name); + if (prog == NULL) { + ebpf_error("bpf_obj__get_prog_by_name() not find \"%s\"\n", + prog_name); + } + + if (!bpf_table_set_value(tracer, map_name, key, &prog->prog_fd)) { + ebpf_error("bpf_table_set_value() failed, prog fd:%d\n", + prog->prog_fd); + } + + ebpf_info("Insert into map('%s'), key %d, program name %s\n", + map_name, key, prog_name); +} + +static void insert_perf_event_programs(struct bpf_tracer *tracer) +{ + __insert_output_prog_to_map(tracer, + MAP_PROGS_JMP_PERF_NAME, + "bpf_perf_event", + PROG_BPF_PERF_EVENT_IDX); + __insert_output_prog_to_map(tracer, + MAP_PROGS_JMP_PERF_NAME, + "bpf_prog_pe__python_frame_ptr", + PROG_PYTHON_FRAME_PTR_IDX); + __insert_output_prog_to_map(tracer, + MAP_PROGS_JMP_PERF_NAME, + "bpf_prog_pe__python_walk_stack", + PROG_PYTHON_WALK_STACK_IDX); + __insert_output_prog_to_map(tracer, + MAP_PROGS_JMP_PERF_NAME, + "bpf_prog_pe__python_perf_output", + PROG_PYTHON_PERF_OUTPUT_IDX); +} + /* * start continuous profiler * @freq sample frequency, Hertz. (e.g. 99 profile stack traces at 99 Hertz) @@ -606,6 +646,8 @@ int start_continuous_profiler(int freq, int java_syms_space_limit, if (tracer == NULL) return (-1); + insert_perf_event_programs(tracer); + if (sockopt_register(&cpdbg_sockopts) != ETR_OK) return (-1); diff --git a/agent/src/ebpf/user/profile/profile_common.c b/agent/src/ebpf/user/profile/profile_common.c index 65180879ba0..23afce86e05 100644 --- a/agent/src/ebpf/user/profile/profile_common.c +++ b/agent/src/ebpf/user/profile/profile_common.c @@ -706,6 +706,7 @@ static inline void update_matched_process_in_total(struct profiler_context *ctx, static void aggregate_stack_traces(struct profiler_context *ctx, struct bpf_tracer *t, const char *stack_map_name, + const char *intp_stack_map_name, stack_str_hash_t * stack_str_hash, stack_trace_msg_hash_t * msg_hash, u32 * count, bool use_a_map) @@ -886,7 +887,7 @@ static void aggregate_stack_traces(struct profiler_context *ctx, */ char *trace_str = - resolve_and_gen_stack_trace_str(t, v, stack_map_name, + resolve_and_gen_stack_trace_str(t, v, stack_map_name, intp_stack_map_name, stack_str_hash, matched, process_name, info_p); @@ -1042,7 +1043,7 @@ void process_bpf_stacktraces(struct profiler_context *ctx, struct bpf_tracer *t) * After the reader completes data reading, the work of * data aggregation will be blocked if there is no data. */ - aggregate_stack_traces(ctx, t, stack_map_name, + aggregate_stack_traces(ctx, t, stack_map_name, "__python_stack", &ctx->stack_str_hash, &ctx->msg_hash, &count, using_map_set_a); diff --git a/agent/src/ebpf/user/profile/stringifier.c b/agent/src/ebpf/user/profile/stringifier.c index 8136fac234d..0c4657cd732 100644 --- a/agent/src/ebpf/user/profile/stringifier.c +++ b/agent/src/ebpf/user/profile/stringifier.c @@ -62,6 +62,7 @@ static const char *k_err_tag = "[kernel stack trace error]"; static const char *u_err_tag = "[user stack trace error]"; +static const char *i_err_tag = "[interpreter stack trace error]"; static const char *lost_tag = "[stack trace lost]"; static const char *k_sym_prefix = "[k] "; static const char *lib_sym_prefix = "[l] "; @@ -400,12 +401,90 @@ static char *build_stack_trace_string(struct bpf_tracer *t, return NULL; } +static char *build_interpreter_stack_trace_string(struct bpf_tracer *t, + const char *intp_stack_map_name, + pid_t pid, + u64 stack_id) +{ + ASSERT(pid >= 0 && stack_id != 0); + + stack_trace_t stack = { 0 }; + if (!bpf_table_get_value(t, intp_stack_map_name, stack_id, (void *)&stack)) { + return NULL; + } + + if (stack.len == 0) { + return NULL; + } + + symbol_t symbols[512]; + __u32 symbol_ids[512]; + + struct ebpf_map *map = ebpf_obj__get_map_by_name(t->obj, "__python_symbols"); + int fd = map->fd; + + symbol_t key = {}; + int n = 0; + while (bpf_get_next_key(fd, &key, &symbols[n]) == 0) { + int ret = bpf_lookup_elem(fd, &symbols[n], &symbol_ids[n]); + key = symbols[n]; + if (ret == 0) { + n++; + } + } + + int folded_size = 0; + for (int i = stack.len - 1; i >= 0; i--) { + folded_size += 1; // ; + __u64 addr = stack.addresses[i]; + if (addr == 0) { + folded_size += strlen("-;"); + continue; + } + __u32 symbol_id = addr & 0xFFFF; + for (int j = 0; j < n; j++) { + if (symbol_ids[j] == symbol_id) { + symbol_t *s = &symbols[j]; + folded_size += strlen(s->class_name) + 2 + strlen(s->method_name); + } + } + } + + char *fold_stack_trace_str = + clib_mem_alloc_aligned("folded_str", folded_size, 0, NULL); + + int offset = 0; + for (int i = stack.len - 1; i >= 0; i--) { + if (offset != 0) { + offset += snprintf(fold_stack_trace_str + offset, folded_size - offset, ";"); + } + __u64 addr = stack.addresses[i]; + if (addr == 0) { + offset += snprintf(fold_stack_trace_str + offset, folded_size - offset, "-"); + continue; + } + __u32 symbol_id = addr & 0xFFFF; + for (int j = 0; j < n; j++) { + if (symbol_ids[j] == symbol_id) { + symbol_t *s = &symbols[j]; + if (strlen(s->class_name) > 0) { + offset += snprintf(fold_stack_trace_str + offset, folded_size - offset, "%s::", s->class_name); + } + offset += snprintf(fold_stack_trace_str + offset, folded_size - offset, "%s", s->method_name); + } + } + } + + return fold_stack_trace_str; +} + static char *folded_stack_trace_string(struct bpf_tracer *t, int stack_id, pid_t pid, const char *stack_map_name, + const char *intp_stack_map_name, stack_str_hash_t * h, - bool new_cache, void *info_p, u64 ts) + bool new_cache, void *info_p, u64 ts, u64 intp_stack_id) { ASSERT(pid >= 0 && stack_id >= 0); @@ -414,6 +493,7 @@ static char *folded_stack_trace_string(struct bpf_tracer *t, * stack trace string has already been stored. */ stack_str_hash_kv kv; + // FIXME: hack here: stack id from kernel/user and interpreter may collide, which shouldn't be a problem in demo kv.key = (u64) stack_id; kv.value = 0; if (stack_str_hash_search(h, &kv, &kv) == 0) { @@ -423,8 +503,13 @@ static char *folded_stack_trace_string(struct bpf_tracer *t, char *str = NULL; int ret_val = 0; - str = build_stack_trace_string(t, stack_map_name, pid, stack_id, - h, new_cache, &ret_val, info_p, ts); + if (intp_stack_id == 0) { + str = build_stack_trace_string(t, stack_map_name, + pid, stack_id, + h, new_cache, &ret_val, info_p, ts); + } else { + str = build_interpreter_stack_trace_string(t, intp_stack_map_name, pid, intp_stack_id); + } if (ret_val == ETR_NOTEXIST) return NULL; @@ -471,6 +556,7 @@ static inline char *alloc_stack_trace_str(int len) char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, struct stack_trace_key_t *v, const char *stack_map_name, + const char *intp_stack_map_name, stack_str_hash_t * h, bool new_cache, char *process_name, void *info_p) @@ -491,9 +577,9 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, */ /* add separator and '\0' */ - int len = 2; - char *k_trace_str, *u_trace_str, *trace_str; - k_trace_str = u_trace_str = trace_str = NULL; + int len = 3; + char *k_trace_str, *u_trace_str, *i_trace_str, *trace_str; + k_trace_str = u_trace_str = i_trace_str = trace_str = NULL; /* For processes without configuration, the stack string is in the format 'process name;thread name'. */ @@ -527,9 +613,9 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, if (v->kernstack >= 0) { k_trace_str = folded_stack_trace_string(t, v->kernstack, - 0, stack_map_name, + 0, stack_map_name, intp_stack_map_name, h, new_cache, info_p, - v->timestamp); + v->timestamp, 0); if (k_trace_str == NULL) return NULL; } @@ -537,67 +623,24 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, if (v->userstack >= 0) { u_trace_str = folded_stack_trace_string(t, v->userstack, v->tgid, - stack_map_name, + stack_map_name, intp_stack_map_name, h, new_cache, info_p, - v->timestamp); + v->timestamp, 0); if (u_trace_str == NULL) return NULL; } - /* trace_str = u_stack_str_fn() + ";" + k_stack_str_fn(); */ - if (v->kernstack >= 0 && v->userstack >= 0) { - if (k_trace_str) { - len += strlen(k_trace_str); - } else { - len += strlen(k_err_tag); - } - - if (u_trace_str) { - len += strlen(u_trace_str); - } else { - len += strlen(u_err_tag); - } - - trace_str = alloc_stack_trace_str(len); - if (trace_str == NULL) { - ebpf_warning("No available memory space.\n"); - return NULL; - } - snprintf(trace_str, len, "%s;%s", - u_trace_str ? u_trace_str : u_err_tag, - k_trace_str ? k_trace_str : k_err_tag); - - } else if (v->kernstack >= 0) { - if (k_trace_str) { - len += strlen(k_trace_str); - } else { - len += strlen(k_err_tag); - } - - trace_str = alloc_stack_trace_str(len); - if (trace_str == NULL) { - ebpf_warning("No available memory space.\n"); - return NULL; - } - - snprintf(trace_str, len, "%s", - k_trace_str ? k_trace_str : k_err_tag); - } else if (v->userstack >= 0) { - if (u_trace_str) { - len += strlen(u_trace_str); - } else { - len += strlen(u_err_tag); - } - - trace_str = alloc_stack_trace_str(len); - if (trace_str == NULL) { - ebpf_warning("No available memory space.\n"); + if (v->intpstack != 0) { + i_trace_str = folded_stack_trace_string(t, 0, + v->tgid, + stack_map_name, intp_stack_map_name, + h, new_cache, info_p, + v->timestamp, v->intpstack); + if (i_trace_str == NULL) return NULL; - } + } - snprintf(trace_str, len, "%s", - u_trace_str ? u_trace_str : u_err_tag); - } else { + if (v->kernstack < 0 && v->userstack < 0 && v->intpstack == 0) { /* * The kernel can indicate the invalidity of a stack ID in two * different ways: @@ -621,6 +664,52 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, } snprintf(trace_str, len, "%s", lost_tag); + return trace_str; + } + + if (v->kernstack >= 0) { + if (k_trace_str) { + len += strlen(k_trace_str); + } else { + len += strlen(k_err_tag); + } + } + + if (v->userstack >= 0) { + if (u_trace_str) { + len += strlen(u_trace_str); + } else { + len += strlen(u_err_tag); + } + } + + if (v->intpstack != 0) { + if (i_trace_str) { + len += strlen(i_trace_str); + } else { + len += strlen(i_err_tag); + } + } + + trace_str = alloc_stack_trace_str(len); + if (trace_str == NULL) { + ebpf_warning("No available memory space.\n"); + return NULL; + } + + int offset = 0; + bool last_stack = false; + + if (v->userstack >= 0) { + offset += snprintf(trace_str + offset, len - offset, "%s%s", last_stack ? ";" : "", u_trace_str ? u_trace_str : u_err_tag); + last_stack = true; + } + if (v->intpstack != 0) { + offset += snprintf(trace_str + offset, len - offset, "%s%s", last_stack ? ";" : "", i_trace_str ? i_trace_str : i_err_tag); + last_stack = true; + } + if (v->kernstack >= 0) { + offset += snprintf(trace_str + offset, len - offset, "%s%s", last_stack ? ";" : "", k_trace_str ? k_trace_str : k_err_tag); } return trace_str; diff --git a/agent/src/ebpf/user/profile/stringifier.h b/agent/src/ebpf/user/profile/stringifier.h index 3de3cb5d9d2..2798c0d9847 100644 --- a/agent/src/ebpf/user/profile/stringifier.h +++ b/agent/src/ebpf/user/profile/stringifier.h @@ -43,6 +43,7 @@ void release_stack_str_hash(stack_str_hash_t *h); char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, struct stack_trace_key_t *v, const char *stack_map_name, + const char *intp_stack_map_name, stack_str_hash_t *h, bool new_cache, char *process_name, void *info_p);