Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[eBPF] Reducing CPU consumption for process events exec/exit #6201

Draft
wants to merge 1 commit into
base: v6.4
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion agent/src/ebpf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ graph LR
- 8 rust extra events callback
- We provide a function that the user can register a callback interface for a specific event. e.g. Use rust function process these events.
- 9.1 add_event_to_proc_header
- Add `struct process_event` to list-head(proc_events_head), need to set a expire time in `struct process_event`, see the description of [TLS/SSL Tracing](https://github.com/deepflowio/deepflow/tree/main/agent/src/ebpf#tlsssl-tracing) for the reason.
- Add `struct probe_process_event` to list-head(proc_events_head), need to set a expire time in `struct probe_process_event`, see the description of [TLS/SSL Tracing](https://github.com/deepflowio/deepflow/tree/main/agent/src/ebpf#tlsssl-tracing) for the reason.
- 14.2.1 clear_probes_by_pid
- Clear all probe, when process id == pid (event fetched).
- 14.2.2 proc_parse_and_register
Expand Down
47 changes: 24 additions & 23 deletions agent/src/ebpf/kernel/uprobe_base.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,44 +641,45 @@ int bpf_func_sched_process_exit(struct sched_comm_exit_ctx *ctx)
return 0;
}

// /sys/kernel/debug/tracing/events/sched/sched_process_fork/format
SEC("tracepoint/sched/sched_process_fork")
int bpf_func_sched_process_fork(struct sched_comm_fork_ctx *ctx)
static inline int kernel_clone_exit(struct syscall_comm_exit_ctx *ctx)
{
/*
* When you find that the golang process starts, sometimes you
* don't get the process start information, all you get is
* threads. Take the following example:
*
* # pstree -p 4157
* deepflow-server(4157)─┬─{deepflow-server}(4214)
* ├─{deepflow-server}(4216)
* ├─{deepflow-server}(4217)
* ├─{deepflow-server}(4218)
* ├─{deepflow-server}(4219)
* ├─{deepflow-server}(4229)
*
* fetch data:
* .... 296916.616252: 0: parent_pid 4216 child_pid 4218
* .... 296916.616366: 0: parent_pid 4218 child_pid 4219
*
* To get process startup information we add probe 'sched_process_exec'.
*/
__u64 id = bpf_get_current_pid_tgid();
long ret = ctx->ret;

// error or parent process
if (ret != 0)
return 0;

int pid = (int)id;
int tgid = (int)(id >> 32);
// filter threads
if (pid != tgid)
return 0;

struct member_fields_offset *offset = retrieve_ready_kern_offset();
if (offset == NULL)
return 0;

struct process_event_t data;
data.meta.event_type = EVENT_TYPE_PROC_EXEC;
data.pid = ctx->child_pid;
data.pid = pid;
bpf_get_current_comm(data.name, sizeof(data.name));
bpf_perf_event_output(ctx, &NAME(socket_data),
BPF_F_CURRENT_CPU, &data, sizeof(data));

return 0;
}

// /sys/kernel/debug/tracing/events/syscalls/sys_exit_fork/format
TPPROG(sys_exit_fork) (struct syscall_comm_exit_ctx * ctx) {
return kernel_clone_exit(ctx);
}

// /sys/kernel/debug/tracing/events/syscalls/sys_exit_clone/format
TPPROG(sys_exit_clone) (struct syscall_comm_exit_ctx * ctx) {
return kernel_clone_exit(ctx);
}

// /sys/kernel/debug/tracing/events/sched/sched_process_exec/format
SEC("tracepoint/sched/sched_process_exec")
int bpf_func_sched_process_exec(struct sched_comm_exec_ctx *ctx)
Expand Down
4 changes: 4 additions & 0 deletions agent/src/ebpf/user/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ enum {

//thread index for bihash
enum {
// cp-reader-0
THREAD_PROFILER_READER_IDX = 0,
// proc-events
THREAD_PROC_EVENTS_IDX,
// sk-reader-0 ...
THREAD_PROC_ACT_IDX_BASE
};

Expand Down
89 changes: 35 additions & 54 deletions agent/src/ebpf/user/go_tracer.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,11 @@
#include "socket.h"
#include "elf.h"

/* *INDENT-OFF* */
// For process execute/exit events.
struct process_event {
struct list_head list; // list add to proc_events_head
struct bpf_tracer *tracer; // link to struct bpf_tracer
uint8_t type; // EVENT_TYPE_PROC_EXEC or EVENT_TYPE_PROC_EXIT
char *path; // Full path "/proc/<pid>/root/..."
int pid; // Process ID
uint32_t expire_time; // Expiration Date, the number of seconds since the system started.
};
/* *INDENT-ON* */

extern uint32_t k_version;
extern struct proc_events_record proc_ev_record;

static char build_info_magic[] = "\xff Go buildinf:";
static struct list_head proc_info_head; // For pid-offsets correspondence lists.
static struct list_head proc_events_head; // For process execute/exit events list.
static pthread_mutex_t mutex_proc_events_lock;

/* *INDENT-OFF* */
/* ------------- offsets info -------------- */
Expand Down Expand Up @@ -595,10 +583,8 @@ static int resolve_bin_file(const char *path, int pid,
for (int k = 0; k < NELEMS(offsets); k++) {
off = &offsets[k];
int offset = struct_member_offset_analyze(binary_path,
off->
structure,
off->
field_name);
off->structure,
off->field_name);
if (offset == ETR_INVAL)
offset = off->default_offset;

Expand Down Expand Up @@ -744,9 +730,7 @@ int collect_go_uprobe_syms_from_procfs(struct tracer_probes_conf *conf)
struct dirent *entry = NULL;
DIR *fddir = NULL;

init_list_head(&proc_events_head);
init_list_head(&proc_info_head);
pthread_mutex_init(&mutex_proc_events_lock, NULL);

if (!is_feature_enabled(FEATURE_UPROBE_GOLANG))
return ETR_OK;
Expand Down Expand Up @@ -966,26 +950,12 @@ static void process_execute_handle(int pid, struct bpf_tracer *tracer)
pthread_mutex_unlock(&tracer->mutex_probes_lock);
}

// The caller needs 'mutex_proc_events_lock' for protection
static inline void find_and_clear_event_from_list(int pid)
{
struct process_event *pe;
struct list_head *p, *n;
list_for_each_safe(p, n, &proc_events_head) {
pe = container_of(p, struct process_event, list);
if (pe->pid == pid) {
list_head_del(&pe->list);
free(pe->path);
free(pe);
}
}
}

static void process_exit_handle(int pid, struct bpf_tracer *tracer)
{
pthread_mutex_lock(&mutex_proc_events_lock);
find_and_clear_event_from_list(pid);
pthread_mutex_unlock(&mutex_proc_events_lock);
struct proc_events_record *r = &proc_ev_record;
proc_events_lock(r->golang_list_lock);
find_and_clear_event_from_list(pid, &r->golang_events_head);
proc_events_unlock(r->golang_list_lock);

// Protect the probes operation in multiple threads, similar to process_execute_handle()
pthread_mutex_lock(&tracer->mutex_probes_lock);
Expand All @@ -1011,7 +981,7 @@ static void add_event_to_proc_header(struct bpf_tracer *tracer, int pid,
return;
}

struct process_event *pe = calloc(1, sizeof(struct process_event));
struct probe_process_event *pe = calloc(1, sizeof(struct probe_process_event));
if (pe == NULL) {
free(path);
ebpf_warning("Without memory.\n");
Expand All @@ -1024,10 +994,10 @@ static void add_event_to_proc_header(struct bpf_tracer *tracer, int pid,
pe->type = type;
pe->expire_time = get_sys_uptime() + PROC_EVENT_DELAY_HANDLE_DEF;

pthread_mutex_lock(&mutex_proc_events_lock);
find_and_clear_event_from_list(pid);
list_add_tail(&pe->list, &proc_events_head);
pthread_mutex_unlock(&mutex_proc_events_lock);
struct proc_events_record *r = &proc_ev_record;
proc_events_lock(r->golang_list_lock);
list_add_tail(&pe->list, &r->golang_events_head);
proc_events_unlock(r->golang_list_lock);
}

/**
Expand Down Expand Up @@ -1077,19 +1047,20 @@ void go_process_exit(int pid)
*/
void go_process_events_handle(void)
{
struct process_event *pe;
struct proc_events_record *r = &proc_ev_record;
struct probe_process_event *pe;
do {
// Multithreaded safe fetch 'struct process_event'
pthread_mutex_lock(&mutex_proc_events_lock);
if (!list_empty(&proc_events_head)) {
pe = list_first_entry(&proc_events_head,
struct process_event, list);
// Multithreaded safe fetch 'struct probe_process_event'
proc_events_lock(r->golang_list_lock);
if (!list_empty(&r->golang_events_head)) {
pe = list_first_entry(&r->golang_events_head,
struct probe_process_event, list);
} else {
pe = NULL;
}

if (pe == NULL) {
pthread_mutex_unlock(&mutex_proc_events_lock);
proc_events_unlock(r->golang_list_lock);
break;
}

Expand All @@ -1101,16 +1072,26 @@ void go_process_events_handle(void)
list_head_del(&pe->list);
free(pe->path);
free(pe);
pthread_mutex_unlock(&mutex_proc_events_lock);
proc_events_unlock(r->golang_list_lock);
if (path == NULL)
break;

if (type == EVENT_TYPE_PROC_EXEC) {
if (access(path, F_OK) == 0) {
process_execute_handle(pid, tracer);
struct version_info go_version;
memset(&go_version, 0,
sizeof(go_version));
if (fetch_go_elf_version
(path, &go_version)) {
process_execute_handle(pid,
tracer);
}
}
}
free(path);

free(path);
} else {
pthread_mutex_unlock(&mutex_proc_events_lock);
proc_events_unlock(r->golang_list_lock);
break;
}
} while (true);
Expand Down
6 changes: 4 additions & 2 deletions agent/src/ebpf/user/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

static struct list_head events_list; // Use for extra register events
static pthread_t proc_events_pthread; // Process exec/exit thread

extern __thread uword thread_index; // for bihash
/*
* tracer_hooks_detach() and tracer_hooks_attach() will become terrible
* when the number of probes is very large. Because we have to spend a
Expand Down Expand Up @@ -174,7 +174,8 @@ static void socket_tracer_set_probes(struct tracer_probes_conf *tps)
tps_set_symbol(tps, "tracepoint/syscalls/sys_exit_accept");
tps_set_symbol(tps, "tracepoint/syscalls/sys_exit_accept4");
// process execute
tps_set_symbol(tps, "tracepoint/sched/sched_process_fork");
tps_set_symbol(tps, "tracepoint/syscalls/sys_exit_fork");
tps_set_symbol(tps, "tracepoint/syscalls/sys_exit_clone");
tps_set_symbol(tps, "tracepoint/sched/sched_process_exec");

// 周期性触发用于缓存的数据的超时检查
Expand Down Expand Up @@ -1183,6 +1184,7 @@ static void check_datadump_timeout(void)
static void process_events_handle_main(__unused void *arg)
{
prctl(PR_SET_NAME, "proc-events");
thread_index = THREAD_PROC_EVENTS_IDX;
struct bpf_tracer *t = arg;
for (;;) {
/*
Expand Down