Skip to content

Commit 708ac5b

Browse files
anakryikoAlexei Starovoitov
authored andcommitted
libbpf: add ksyscall/kretsyscall sections support for syscall kprobes
Add SEC("ksyscall")/SEC("ksyscall/<syscall_name>") and corresponding kretsyscall variants (for return kprobes) to allow users to kprobe syscall functions in kernel. These special sections allow to ignore complexities and differences between kernel versions and host architectures when it comes to syscall wrapper and corresponding __<arch>_sys_<syscall> vs __se_sys_<syscall> differences, depending on whether host kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER (though libbpf itself doesn't rely on /proc/config.gz for detecting this, see BPF_KSYSCALL patch for how it's done internally). Combined with the use of BPF_KSYSCALL() macro, this allows to just specify intended syscall name and expected input arguments and leave dealing with all the variations to libbpf. In addition to SEC("ksyscall+") and SEC("kretsyscall+") add bpf_program__attach_ksyscall() API which allows to specify syscall name at runtime and provide associated BPF cookie value. At the moment SEC("ksyscall") and bpf_program__attach_ksyscall() do not handle all the calling convention quirks for mmap(), clone() and compat syscalls. It also only attaches to "native" syscall interfaces. If host system supports compat syscalls or defines 32-bit syscalls in 64-bit kernel, such syscall interfaces won't be attached to by libbpf. These limitations may or may not change in the future. Therefore it is recommended to use SEC("kprobe") for these syscalls or if working with compat and 32-bit interfaces is required. Tested-by: Alan Maguire <alan.maguire@oracle.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/r/20220714070755.3235561-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent 6f5d467 commit 708ac5b

File tree

4 files changed

+157
-9
lines changed

4 files changed

+157
-9
lines changed

tools/lib/bpf/libbpf.c

Lines changed: 108 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4670,6 +4670,8 @@ static int probe_kern_btf_enum64(void)
46704670
strs, sizeof(strs)));
46714671
}
46724672

4673+
static int probe_kern_syscall_wrapper(void);
4674+
46734675
enum kern_feature_result {
46744676
FEAT_UNKNOWN = 0,
46754677
FEAT_SUPPORTED = 1,
@@ -4738,6 +4740,9 @@ static struct kern_feature_desc {
47384740
[FEAT_BTF_ENUM64] = {
47394741
"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
47404742
},
4743+
[FEAT_SYSCALL_WRAPPER] = {
4744+
"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
4745+
},
47414746
};
47424747

47434748
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -8421,6 +8426,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
84218426

84228427
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
84238428
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8429+
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
84248430
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
84258431
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
84268432
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -8441,6 +8447,8 @@ static const struct bpf_sec_def section_defs[] = {
84418447
SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
84428448
SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
84438449
SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8450+
SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
8451+
SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
84448452
SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
84458453
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
84468454
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE),
@@ -9797,7 +9805,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
97979805
{
97989806
struct perf_event_attr attr = {};
97999807
char errmsg[STRERR_BUFSIZE];
9800-
int type, pfd, err;
9808+
int type, pfd;
98019809

98029810
if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
98039811
return -EINVAL;
@@ -9833,14 +9841,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
98339841
pid < 0 ? -1 : pid /* pid */,
98349842
pid == -1 ? 0 : -1 /* cpu */,
98359843
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9836-
if (pfd < 0) {
9837-
err = -errno;
9838-
pr_warn("%s perf_event_open() failed: %s\n",
9839-
uprobe ? "uprobe" : "kprobe",
9840-
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9841-
return err;
9842-
}
9843-
return pfd;
9844+
return pfd >= 0 ? pfd : -errno;
98449845
}
98459846

98469847
static int append_to_file(const char *file, const char *fmt, ...)
@@ -9945,6 +9946,60 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
99459946
return err;
99469947
}
99479948

9949+
static const char *arch_specific_syscall_pfx(void)
9950+
{
9951+
#if defined(__x86_64__)
9952+
return "x64";
9953+
#elif defined(__i386__)
9954+
return "ia32";
9955+
#elif defined(__s390x__)
9956+
return "s390x";
9957+
#elif defined(__s390__)
9958+
return "s390";
9959+
#elif defined(__arm__)
9960+
return "arm";
9961+
#elif defined(__aarch64__)
9962+
return "arm64";
9963+
#elif defined(__mips__)
9964+
return "mips";
9965+
#elif defined(__riscv)
9966+
return "riscv";
9967+
#else
9968+
return NULL;
9969+
#endif
9970+
}
9971+
9972+
static int probe_kern_syscall_wrapper(void)
9973+
{
9974+
char syscall_name[64];
9975+
const char *ksys_pfx;
9976+
9977+
ksys_pfx = arch_specific_syscall_pfx();
9978+
if (!ksys_pfx)
9979+
return 0;
9980+
9981+
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
9982+
9983+
if (determine_kprobe_perf_type() >= 0) {
9984+
int pfd;
9985+
9986+
pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
9987+
if (pfd >= 0)
9988+
close(pfd);
9989+
9990+
return pfd >= 0 ? 1 : 0;
9991+
} else { /* legacy mode */
9992+
char probe_name[128];
9993+
9994+
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
9995+
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
9996+
return 0;
9997+
9998+
(void)remove_kprobe_event_legacy(probe_name, false);
9999+
return 1;
10000+
}
10001+
}
10002+
994810003
struct bpf_link *
994910004
bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
995010005
const char *func_name,
@@ -10030,6 +10085,29 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
1003010085
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
1003110086
}
1003210087

10088+
struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10089+
const char *syscall_name,
10090+
const struct bpf_ksyscall_opts *opts)
10091+
{
10092+
LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10093+
char func_name[128];
10094+
10095+
if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10096+
return libbpf_err_ptr(-EINVAL);
10097+
10098+
if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10099+
snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10100+
arch_specific_syscall_pfx(), syscall_name);
10101+
} else {
10102+
snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10103+
}
10104+
10105+
kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10106+
kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10107+
10108+
return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10109+
}
10110+
1003310111
/* Adapted from perf/util/string.c */
1003410112
static bool glob_match(const char *str, const char *pat)
1003510113
{
@@ -10200,6 +10278,27 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf
1020010278
return libbpf_get_error(*link);
1020110279
}
1020210280

10281+
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10282+
{
10283+
LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10284+
const char *syscall_name;
10285+
10286+
*link = NULL;
10287+
10288+
/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10289+
if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10290+
return 0;
10291+
10292+
opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10293+
if (opts.retprobe)
10294+
syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10295+
else
10296+
syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10297+
10298+
*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10299+
return *link ? 0 : -errno;
10300+
}
10301+
1020310302
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
1020410303
{
1020510304
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);

tools/lib/bpf/libbpf.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,52 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
457457
const char *pattern,
458458
const struct bpf_kprobe_multi_opts *opts);
459459

460+
struct bpf_ksyscall_opts {
461+
/* size of this struct, for forward/backward compatiblity */
462+
size_t sz;
463+
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
464+
__u64 bpf_cookie;
465+
/* attach as return probe? */
466+
bool retprobe;
467+
size_t :0;
468+
};
469+
#define bpf_ksyscall_opts__last_field retprobe
470+
471+
/**
472+
* @brief **bpf_program__attach_ksyscall()** attaches a BPF program
473+
* to kernel syscall handler of a specified syscall. Optionally it's possible
474+
* to request to install retprobe that will be triggered at syscall exit. It's
475+
* also possible to associate BPF cookie (though options).
476+
*
477+
* Libbpf automatically will determine correct full kernel function name,
478+
* which depending on system architecture and kernel version/configuration
479+
* could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will
480+
* attach specified program using kprobe/kretprobe mechanism.
481+
*
482+
* **bpf_program__attach_ksyscall()** is an API counterpart of declarative
483+
* **SEC("ksyscall/<syscall>")** annotation of BPF programs.
484+
*
485+
* At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do
486+
* not handle all the calling convention quirks for mmap(), clone() and compat
487+
* syscalls. It also only attaches to "native" syscall interfaces. If host
488+
* system supports compat syscalls or defines 32-bit syscalls in 64-bit
489+
* kernel, such syscall interfaces won't be attached to by libbpf.
490+
*
491+
* These limitations may or may not change in the future. Therefore it is
492+
* recommended to use SEC("kprobe") for these syscalls or if working with
493+
* compat and 32-bit interfaces is required.
494+
*
495+
* @param prog BPF program to attach
496+
* @param syscall_name Symbolic name of the syscall (e.g., "bpf")
497+
* @param opts Additional options (see **struct bpf_ksyscall_opts**)
498+
* @return Reference to the newly created BPF link; or NULL is returned on
499+
* error, error code is stored in errno
500+
*/
501+
LIBBPF_API struct bpf_link *
502+
bpf_program__attach_ksyscall(const struct bpf_program *prog,
503+
const char *syscall_name,
504+
const struct bpf_ksyscall_opts *opts);
505+
460506
struct bpf_uprobe_opts {
461507
/* size of this struct, for forward/backward compatiblity */
462508
size_t sz;

tools/lib/bpf/libbpf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ LIBBPF_0.8.0 {
356356
LIBBPF_1.0.0 {
357357
global:
358358
bpf_prog_query_opts;
359+
bpf_program__attach_ksyscall;
359360
btf__add_enum64;
360361
btf__add_enum64_value;
361362
libbpf_bpf_attach_type_str;

tools/lib/bpf/libbpf_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,8 @@ enum kern_feature_id {
352352
FEAT_BPF_COOKIE,
353353
/* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
354354
FEAT_BTF_ENUM64,
355+
/* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */
356+
FEAT_SYSCALL_WRAPPER,
355357
__FEAT_CNT,
356358
};
357359

0 commit comments

Comments
 (0)