Skip to content

Commit ab850ab

Browse files
author
Alexei Starovoitov
committed
Merge branch 'Add SEC("ksyscall") support'
Andrii Nakryiko says: ==================== Add SEC("ksyscall")/SEC("kretsyscall") sections and corresponding bpf_program__attach_ksyscall() API that simplifies tracing kernel syscalls through kprobe mechanism. Kprobing syscalls isn't trivial due to varying syscall handler names in the kernel and various ways syscall argument are passed, depending on kernel architecture and configuration. SEC("ksyscall") allows user to not care about such details and just get access to syscall input arguments, while libbpf takes care of necessary feature detection logic. There are still more quirks that are not straightforward to hide completely (see comments about mmap(), clone() and compat syscalls), so in such more advanced scenarios user might need to fall back to plain SEC("kprobe") approach, but for absolute majority of users SEC("ksyscall") is a big improvement. As part of this patch set libbpf adds two more virtual __kconfig externs, in addition to existing LINUX_KERNEL_VERSION: LINUX_HAS_BPF_COOKIE and LINUX_HAS_SYSCALL_WRAPPER, which let's libbpf-provided BPF-side code minimize external dependencies and assumptions and let's user-space part of libbpf to perform all the feature detection logic. This benefits USDT support code, which now doesn't depend on BPF CO-RE for its functionality. v1->v2: - normalize extern variable-related warn and debug message formats (Alan); rfc->v1: - drop dependency on kallsyms and speed up SYSCALL_WRAPPER detection (Alexei); - drop dependency on /proc/config.gz in bpf_tracing.h (Yaniv); - add doc comment and ephasize mmap(), clone() and compat quirks that are not supported (Ilya); - use mechanism similar to LINUX_KERNEL_VERSION to also improve USDT code. ==================== Reviewed-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 9ff5efd + d814ed6 commit ab850ab

File tree

11 files changed

+289
-109
lines changed

11 files changed

+289
-109
lines changed

tools/lib/bpf/bpf_tracing.h

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#ifndef __BPF_TRACING_H__
33
#define __BPF_TRACING_H__
44

5+
#include <bpf/bpf_helpers.h>
6+
57
/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
68
#if defined(__TARGET_ARCH_x86)
79
#define bpf_target_x86
@@ -140,7 +142,7 @@ struct pt_regs___s390 {
140142
#define __PT_RC_REG gprs[2]
141143
#define __PT_SP_REG gprs[15]
142144
#define __PT_IP_REG psw.addr
143-
#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
145+
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
144146
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)
145147

146148
#elif defined(bpf_target_arm)
@@ -174,7 +176,7 @@ struct pt_regs___arm64 {
174176
#define __PT_RC_REG regs[0]
175177
#define __PT_SP_REG sp
176178
#define __PT_IP_REG pc
177-
#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
179+
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
178180
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)
179181

180182
#elif defined(bpf_target_mips)
@@ -493,39 +495,62 @@ typeof(name(0)) name(struct pt_regs *ctx) \
493495
} \
494496
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
495497

498+
/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */
496499
#define ___bpf_syscall_args0() ctx
497-
#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
498-
#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
499-
#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
500-
#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
501-
#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
500+
#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs)
501+
#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs)
502+
#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
503+
#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
504+
#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
502505
#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
503506

507+
/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
508+
#define ___bpf_syswrap_args0() ctx
509+
#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
510+
#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
511+
#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
512+
#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
513+
#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
514+
#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)
515+
504516
/*
505-
* BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
517+
* BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for
506518
* tracing syscall functions, like __x64_sys_close. It hides the underlying
507519
* platform-specific low-level way of getting syscall input arguments from
508520
* struct pt_regs, and provides a familiar typed and named function arguments
509521
* syntax and semantics of accessing syscall input parameters.
510522
*
511-
* Original struct pt_regs* context is preserved as 'ctx' argument. This might
523+
* Original struct pt_regs * context is preserved as 'ctx' argument. This might
512524
* be necessary when using BPF helpers like bpf_perf_event_output().
513525
*
514-
* This macro relies on BPF CO-RE support.
526+
* At the moment BPF_KSYSCALL does not handle all the calling convention
527+
* quirks for mmap(), clone() and compat syscalls transparrently. This may or
528+
* may not change in the future. User needs to take extra measures to handle
529+
* such quirks explicitly, if necessary.
530+
*
531+
* This macro relies on BPF CO-RE support and virtual __kconfig externs.
515532
*/
516-
#define BPF_KPROBE_SYSCALL(name, args...) \
533+
#define BPF_KSYSCALL(name, args...) \
517534
name(struct pt_regs *ctx); \
535+
extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \
518536
static __attribute__((always_inline)) typeof(name(0)) \
519537
____##name(struct pt_regs *ctx, ##args); \
520538
typeof(name(0)) name(struct pt_regs *ctx) \
521539
{ \
522-
struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \
540+
struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \
541+
? (struct pt_regs *)PT_REGS_PARM1(ctx) \
542+
: ctx; \
523543
_Pragma("GCC diagnostic push") \
524544
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
525-
return ____##name(___bpf_syscall_args(args)); \
545+
if (LINUX_HAS_SYSCALL_WRAPPER) \
546+
return ____##name(___bpf_syswrap_args(args)); \
547+
else \
548+
return ____##name(___bpf_syscall_args(args)); \
526549
_Pragma("GCC diagnostic pop") \
527550
} \
528551
static __attribute__((always_inline)) typeof(name(0)) \
529552
____##name(struct pt_regs *ctx, ##args)
530553

554+
#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
555+
531556
#endif

0 commit comments

Comments
 (0)