Skip to content

Commit

Permalink
Introduce {attach|detach}_kfunc API
Browse files Browse the repository at this point in the history
Kernel added new probe called trampoline allowing to probe
almost any kernel function when BTF info is available in
the system.

Adding the interface to define trampoline function for given
kernel function via BPF_PROG macro, like:

  KFUNC_PROBE(do_sys_open, int dfd, const char *filename, int flags, int mode)
  {
    ...
  }

which defines trampoline function with the 'kfunc__do_sys_open'
name, that instruments do_sys_open kernel function before the
function is executed.

or:

  KRETFUNC_PROBE(do_sys_open, int dfd, const char *filename, int flags, int mode, int ret)
  {
    ...
  }

which defines trampoline function with the 'kfunc__do_sys_open'
name, that instruments do_sys_open kernel function after the
function is executed.

The main benefit is really lower overhead for trampolines (please
see following commit for klockstat.py with perf comparison).

Another benefit is the ability of kretfunc probe to access
function arguments, so some tools might need only one program
instead of entry/exit ones (please see following commit for
opensnoop.py changes).

Currently the interface does not allow to define function
of different name than:
  kfunc__<function_name> or kretfunc__<function_name>

which is sufficient by now, and can be easily changed in future
if needed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
  • Loading branch information
olsajiri authored and yonghong-song committed Feb 27, 2020
1 parent 550706a commit 572478b
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 1 deletion.
46 changes: 46 additions & 0 deletions docs/reference_guide.md
Expand Up @@ -16,6 +16,8 @@ This guide is incomplete. If something feels missing, check the bcc and kernel s
- [6. USDT probes](#6-usdt-probes)
- [7. Raw Tracepoints](#7-raw-tracepoints)
- [8. system call tracepoints](#8-system-call-tracepoints)
- [9. kfuncs](#9-kfuncs)
- [10. kretfuncs](#9-kretfuncs)
- [Data](#data)
- [1. bpf_probe_read()](#1-bpf_probe_read)
- [2. bpf_probe_read_str()](#2-bpf_probe_read_str)
Expand Down Expand Up @@ -317,6 +319,50 @@ b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
Examples in situ:
[code](https://github.com/iovisor/bcc/blob/552658edda09298afdccc8a4b5e17311a2d8a771/tools/execsnoop.py#L101) ([output](https://github.com/iovisor/bcc/blob/552658edda09298afdccc8a4b5e17311a2d8a771/tools/execsnoop_example.txt#L8))

### 9. kfuncs

Syntax: KFUNC_PROBE(*function*, typeof(arg1) arg1, typeof(arg2) arge ...)

This is a macro that instruments the kernel function via trampoline
*before* the function is executed. It's defined by *function* name and
the function arguments defined as *argX*.

For example:
```C
KFUNC_PROBE(do_sys_open, int dfd, const char *filename, int flags, int mode)
{
...
```
This instruments the do_sys_open kernel function and make its arguments
accessible as standard argument values.
Examples in situ:
[search /tools](https://github.com/iovisor/bcc/search?q=KFUNC_PROBE+path%3Atools&type=Code)
### 10. kretfuncs
Syntax: KRETFUNC_PROBE(*event*, typeof(arg1) arg1, typeof(arg2) arge ..., int ret)
This is a macro that instruments the kernel function via trampoline
*after* the function is executed. It's defined by *function* name and
the function arguments defined as *argX*.
The last argument of the probe is the return value of the instrumented function.
For example:
```C
KFUNC_PROBE(do_sys_open, int dfd, const char *filename, int flags, int mode, int ret)
{
...
```

This instruments the do_sys_open kernel function and make its arguments
accessible as standard argument values together with its return value.

Examples in situ:
[search /tools](https://github.com/iovisor/bcc/search?q=KRETFUNC_PROBE+path%3Atools&type=Code)


## Data

Expand Down
45 changes: 45 additions & 0 deletions src/cc/export/helpers.h
Expand Up @@ -945,6 +945,51 @@ int tracepoint__##category##__##event(struct tracepoint__##category##__##event *
#define RAW_TRACEPOINT_PROBE(event) \
int raw_tracepoint__##event(struct bpf_raw_tracepoint_args *ctx)

/* BPF_PROG macro allows to define trampoline function,
* borrowed from kernel bpf selftest code.
*/
#define ___bpf_concat(a, b) a ## b
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
#define ___bpf_narg(...) \
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)

#define ___bpf_ctx_cast0() ctx
#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
#define ___bpf_ctx_cast(args...) \
___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)

#define BPF_PROG(name, args...) \
int name(unsigned long long *ctx); \
__attribute__((always_inline)) \
static void ____##name(unsigned long long *ctx, ##args); \
int name(unsigned long long *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
____##name(___bpf_ctx_cast(args)); \
_Pragma("GCC diagnostic pop") \
return 0; \
} \
static void ____##name(unsigned long long *ctx, ##args)

#define KFUNC_PROBE(event, args...) \
BPF_PROG(kfunc__ ## event, args)

#define KRETFUNC_PROBE(event, args...) \
BPF_PROG(kretfunc__ ## event, args)

#define TP_DATA_LOC_READ_CONST(dst, field, length) \
do { \
unsigned short __offset = args->data_loc_##field & 0xFFFF; \
Expand Down
33 changes: 32 additions & 1 deletion src/cc/libbpf.c
Expand Up @@ -510,7 +510,7 @@ int bcc_prog_load_xattr(struct bpf_load_program_attr *attr, int prog_len,
unsigned name_len = attr->name ? strlen(attr->name) : 0;
char *tmp_log_buf = NULL, *attr_log_buf = NULL;
unsigned tmp_log_buf_size = 0, attr_log_buf_size = 0;
int ret = 0, name_offset = 0;
int ret = 0, name_offset = 0, expected_attach_type = 0;
char prog_name[BPF_OBJ_NAME_LEN] = {};

unsigned insns_cnt = prog_len / sizeof(struct bpf_insn);
Expand Down Expand Up @@ -547,6 +547,20 @@ int bcc_prog_load_xattr(struct bpf_load_program_attr *attr, int prog_len,
name_offset = 12;
else if (strncmp(attr->name, "raw_tracepoint__", 16) == 0)
name_offset = 16;
else if (strncmp(attr->name, "kfunc__", 7) == 0) {
name_offset = 7;
expected_attach_type = BPF_TRACE_FENTRY;
} else if (strncmp(attr->name, "kretfunc__", 10) == 0) {
name_offset = 10;
expected_attach_type = BPF_TRACE_FEXIT;
}

if (attr->prog_type == BPF_PROG_TYPE_TRACING) {
attr->attach_btf_id = libbpf_find_vmlinux_btf_id(attr->name + name_offset,
expected_attach_type);
attr->expected_attach_type = expected_attach_type;
}

memcpy(prog_name, attr->name + name_offset,
min(name_len - name_offset, BPF_OBJ_NAME_LEN - 1));
attr->name = prog_name;
Expand Down Expand Up @@ -1146,6 +1160,23 @@ int bpf_attach_raw_tracepoint(int progfd, const char *tp_name)
return ret;
}

int bpf_detach_kfunc(int prog_fd, char *func)
{
UNUSED(prog_fd);
UNUSED(func);
return 0;
}

int bpf_attach_kfunc(int prog_fd)
{
int ret;

ret = bpf_raw_tracepoint_open(NULL, prog_fd);
if (ret < 0)
fprintf(stderr, "bpf_attach_raw_tracepoint (kfunc): %s\n", strerror(errno));
return ret;
}

void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
perf_reader_lost_cb lost_cb, void *cb_cookie,
int pid, int cpu, int page_cnt) {
Expand Down
4 changes: 4 additions & 0 deletions src/cc/libbpf.h
Expand Up @@ -94,6 +94,10 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name);

int bpf_attach_raw_tracepoint(int progfd, const char *tp_name);

int bpf_detach_kfunc(int prog_fd, char *func);

int bpf_attach_kfunc(int prog_fd);

void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
perf_reader_lost_cb lost_cb, void *cb_cookie,
int pid, int cpu, int page_cnt);
Expand Down
67 changes: 67 additions & 0 deletions src/python/bcc/__init__.py
Expand Up @@ -156,6 +156,7 @@ class BPF(object):
SK_MSG = 16
RAW_TRACEPOINT = 17
CGROUP_SOCK_ADDR = 18
TRACING = 26

# from xdp_action uapi/linux/bpf.h
XDP_ABORTED = 0
Expand All @@ -164,6 +165,10 @@ class BPF(object):
XDP_TX = 3
XDP_REDIRECT = 4

# from bpf_attach_type uapi/linux/bpf.h
TRACE_FENTRY = 24
TRACE_FEXIT = 25

_probe_repl = re.compile(b"[^a-zA-Z0-9_]")
_sym_caches = {}
_bsymcache = lib.bcc_buildsymcache_new()
Expand Down Expand Up @@ -303,6 +308,8 @@ def __init__(self, src_file=b"", hdr_file=b"", text=None, debug=0,
self.uprobe_fds = {}
self.tracepoint_fds = {}
self.raw_tracepoint_fds = {}
self.kfunc_entry_fds = {}
self.kfunc_exit_fds = {}
self.perf_buffers = {}
self.open_perf_events = {}
self.tracefile = None
Expand Down Expand Up @@ -869,6 +876,58 @@ def detach_raw_tracepoint(self, tp=b""):
os.close(self.raw_tracepoint_fds[tp])
del self.raw_tracepoint_fds[tp]

@staticmethod
def add_prefix(prefix, name):
if not name.startswith(prefix):
name = prefix + name
return name

def detach_kfunc(self, fn_name=b""):
fn_name = _assert_is_bytes(fn_name)
fn_name = BPF.add_prefix(b"kfunc__", fn_name)

if fn_name not in self.kfunc_entry_fds:
raise Exception("Kernel entry func %s is not attached" % fn_name)
os.close(self.kfunc_entry_fds[fn_name])
del self.kfunc_entry_fds[fn_name]

def detach_kretfunc(self, fn_name=b""):
fn_name = _assert_is_bytes(fn_name)
fn_name = BPF.add_prefix(b"kretfunc__", fn_name)

if fn_name not in self.kfunc_exit_fds:
raise Exception("Kernel exit func %s is not attached" % fn_name)
os.close(self.kfunc_exit_fds[fn_name])
del self.kfunc_exit_fds[fn_name]

def attach_kfunc(self, fn_name=b""):
fn_name = _assert_is_bytes(fn_name)
fn_name = BPF.add_prefix(b"kfunc__", fn_name)

if fn_name in self.kfunc_entry_fds:
raise Exception("Kernel entry func %s has been attached" % fn_name)

fn = self.load_func(fn_name, BPF.TRACING)
fd = lib.bpf_attach_kfunc(fn.fd)
if fd < 0:
raise Exception("Failed to attach BPF to entry kernel func")
self.kfunc_entry_fds[fn_name] = fd;
return self

def attach_kretfunc(self, fn_name=b""):
fn_name = _assert_is_bytes(fn_name)
fn_name = BPF.add_prefix(b"kretfunc__", fn_name)

if fn_name in self.kfunc_exit_fds:
raise Exception("Kernel exit func %s has been attached" % fn_name)

fn = self.load_func(fn_name, BPF.TRACING)
fd = lib.bpf_attach_kfunc(fn.fd)
if fd < 0:
raise Exception("Failed to attach BPF to exit kernel func")
self.kfunc_exit_fds[fn_name] = fd;
return self

@staticmethod
def support_raw_tracepoint():
# kernel symbol "bpf_find_raw_tracepoint" indicates raw_tracepoint support
Expand Down Expand Up @@ -1124,6 +1183,10 @@ def _trace_autoload(self):
fn = self.load_func(func_name, BPF.RAW_TRACEPOINT)
tp = fn.name[len(b"raw_tracepoint__"):]
self.attach_raw_tracepoint(tp=tp, fn_name=fn.name)
elif func_name.startswith(b"kfunc__"):
self.attach_kfunc(fn_name=func_name)
elif func_name.startswith(b"kretfunc__"):
self.attach_kretfunc(fn_name=func_name)

def trace_open(self, nonblocking=False):
"""trace_open(nonblocking=False)
Expand Down Expand Up @@ -1353,6 +1416,10 @@ def cleanup(self):
self.detach_tracepoint(k)
for k, v in list(self.raw_tracepoint_fds.items()):
self.detach_raw_tracepoint(k)
for k, v in list(self.kfunc_entry_fds.items()):
self.detach_kfunc(k)
for k, v in list(self.kfunc_exit_fds.items()):
self.detach_kretfunc(k)

# Clean up opened perf ring buffer and perf events
table_keys = list(self.tables.keys())
Expand Down

0 comments on commit 572478b

Please sign in to comment.