Skip to content

Commit

Permalink
UPSTREAM: bpf: permit multiple bpf attachments for a single perf event
Browse files Browse the repository at this point in the history
This patch enables multiple bpf attachments for a
kprobe/uprobe/tracepoint single trace event.
Each trace_event keeps a list of attached perf events.
When an event happens, all attached bpf programs will
be executed based on the order of attachment.

A global bpf_event_mutex lock is introduced to protect
prog_array attaching and detaching. An alternative will
be introduce a mutex lock in every trace_event_call
structure, but it takes a lot of extra memory.
So a global bpf_event_mutex lock is a good compromise.

The bpf prog detachment involves allocation of memory.
If the allocation fails, a dummy do-nothing program
will replace to-be-detached program in-place.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit e87c6bc)
Signed-off-by: Connor O'Brien <connoro@google.com>
Bug: 121213201
Bug: 138317270
Test: build & boot cuttlefish; attach 2 progs to 1 tracepoint
Change-Id: I25ce1ed6c9512d0a6f2db7547e109958fe1619b6
  • Loading branch information
yonghong-song authored and cobrien7 committed Dec 12, 2019
1 parent 1040872 commit 5179a6a
Show file tree
Hide file tree
Showing 9 changed files with 255 additions and 56 deletions.
30 changes: 25 additions & 5 deletions include/linux/bpf.h
Expand Up @@ -275,18 +275,38 @@ struct bpf_prog_array {
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);

#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);

#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog **_prog; \
struct bpf_prog **_prog, *__prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
rcu_read_lock(); \
_prog = rcu_dereference(array)->progs; \
for (; *_prog; _prog++) \
_ret &= func(*_prog, ctx); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_prog = _array->progs; \
while ((__prog = READ_ONCE(*_prog))) { \
_ret &= func(__prog, ctx); \
_prog++; \
} \
_out: \
rcu_read_unlock(); \
_ret; \
})

#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)

#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, true)

#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

Expand Down
43 changes: 39 additions & 4 deletions include/linux/trace_events.h
Expand Up @@ -272,14 +272,37 @@ struct trace_event_call {
#ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu *perf_events;
struct bpf_prog *prog;
struct perf_event *bpf_prog_owner;
struct bpf_prog_array __rcu *prog_array;

int (*perf_perm)(struct trace_event_call *,
struct perf_event *);
#endif
};

#ifdef CONFIG_PERF_EVENTS
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
{
/*
* This inline function checks whether call->prog_array
* is valid or not. The function is called in various places,
* outside rcu_read_lock/unlock, as a heuristic to speed up execution.
*
* If this function returns true, and later call->prog_array
* becomes false inside rcu_read_lock/unlock region,
* we bail out then. If this function return false,
* there is a risk that we might miss a few events if the checking
* were delayed until inside rcu_read_lock/unlock region and
* call->prog_array happened to become non-NULL then.
*
* Here, READ_ONCE() is used instead of rcu_access_pointer().
* rcu_access_pointer() requires the actual definition of
* "struct bpf_prog_array" while READ_ONCE() only needs
* a declaration of the same type.
*/
return !!READ_ONCE(call->prog_array);
}
#endif

static inline const char *
trace_event_name(struct trace_event_call *call)
{
Expand Down Expand Up @@ -436,12 +459,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
}

#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
return 1;
}

static inline int
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}

static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }

#endif

enum {
Expand Down Expand Up @@ -512,6 +546,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
{
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
}

#endif

#endif /* _LINUX_TRACE_EVENT_H */
6 changes: 3 additions & 3 deletions include/trace/perf.h
Expand Up @@ -35,7 +35,6 @@ perf_trace_##call(void *__data, proto) \
struct trace_event_call *event_call = __data; \
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
struct trace_event_raw_##call *entry; \
struct bpf_prog *prog = event_call->prog; \
struct pt_regs *__regs; \
u64 __count = 1; \
struct task_struct *__task = NULL; \
Expand All @@ -47,8 +46,9 @@ perf_trace_##call(void *__data, proto) \
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
\
head = this_cpu_ptr(event_call->perf_events); \
if (!prog && __builtin_constant_p(!__task) && !__task && \
hlist_empty(head)) \
if (!bpf_prog_array_valid(event_call) && \
__builtin_constant_p(!__task) && !__task && \
hlist_empty(head)) \
return; \
\
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
Expand Down
81 changes: 81 additions & 0 deletions kernel/bpf/core.c
Expand Up @@ -1481,6 +1481,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

static unsigned int __bpf_prog_ret1(const void *ctx,
const struct bpf_insn *insn)
{
return 1;
}

static struct bpf_prog_dummy {
struct bpf_prog prog;
} dummy_bpf_prog = {
.prog = {
.bpf_func = __bpf_prog_ret1,
},
};

/* to avoid allocating empty bpf_prog_array for cgroups that
* don't have bpf program attached use one global 'empty_prog_array'
* It will not be modified the caller of bpf_prog_array_alloc()
Expand Down Expand Up @@ -1512,6 +1526,73 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
kfree_rcu(progs, rcu);
}

void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog)
{
struct bpf_prog **prog = progs->progs;

for (; *prog; prog++)
if (*prog == old_prog) {
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
break;
}
}

int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
struct bpf_prog **existing_prog;
struct bpf_prog_array *array;
int new_prog_idx = 0;

/* Figure out how many existing progs we need to carry over to
* the new array.
*/
if (old_array) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++) {
if (*existing_prog != exclude_prog &&
*existing_prog != &dummy_bpf_prog.prog)
carry_prog_cnt++;
if (*existing_prog == include_prog)
return -EEXIST;
}
}

/* How many progs (not NULL) will be in the new array? */
new_prog_cnt = carry_prog_cnt;
if (include_prog)
new_prog_cnt += 1;

/* Do we have any prog (not NULL) in the new array? */
if (!new_prog_cnt) {
*new_array = NULL;
return 0;
}

/* +1 as the end of prog_array is marked with NULL */
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
if (!array)
return -ENOMEM;

/* Fill in the new prog array */
if (carry_prog_cnt) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++)
if (*existing_prog != exclude_prog &&
*existing_prog != &dummy_bpf_prog.prog)
array->progs[new_prog_idx++] = *existing_prog;
}
if (include_prog)
array->progs[new_prog_idx++] = include_prog;
array->progs[new_prog_idx] = NULL;
*new_array = array;
return 0;
}

static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_prog_aux *aux;
Expand Down
26 changes: 8 additions & 18 deletions kernel/events/core.c
Expand Up @@ -8037,11 +8037,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
struct pt_regs *regs, struct hlist_head *head,
struct task_struct *task)
{
struct bpf_prog *prog = call->prog;

if (prog) {
if (bpf_prog_array_valid(call)) {
*(struct pt_regs **)raw_data = regs;
if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
}
Expand Down Expand Up @@ -8231,13 +8229,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
int ret;

if (event->attr.type != PERF_TYPE_TRACEPOINT)
return perf_event_set_bpf_handler(event, prog_fd);

if (event->tp_event->prog)
return -EEXIST;

is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
Expand Down Expand Up @@ -8265,26 +8261,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return -EACCES;
}
}
event->tp_event->prog = prog;
event->tp_event->bpf_prog_owner = event;

return 0;
ret = perf_event_attach_bpf_prog(event, prog);
if (ret)
bpf_prog_put(prog);
return ret;
}

static void perf_event_free_bpf_prog(struct perf_event *event)
{
struct bpf_prog *prog;

if (event->attr.type != PERF_TYPE_TRACEPOINT) {
perf_event_free_bpf_handler(event);
return;
}

prog = event->tp_event->prog;
if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
bpf_prog_put(prog);
}
perf_event_detach_bpf_prog(event);
}

#else
Expand Down

0 comments on commit 5179a6a

Please sign in to comment.