Skip to content

Commit 07dc3a6

Browse files
Tengda Wunamhyung
authored andcommitted
perf stat: Support inherit events during fork() for bperf
bperf has a nice ability to share PMUs, but it still does not support inherit events during fork(), resulting in some deviations in its stat results compared with perf. perf stat result: $ ./perf stat -e cycles,instructions -- ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 2,316,038,116 cycles 2,859,350,725 instructions 1.009603637 seconds time elapsed 1.004196000 seconds user 0.003950000 seconds sys bperf stat result: $ ./perf stat --bpf-counters -e cycles,instructions -- \ ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 18,762,093 cycles 23,487,766 instructions 1.008913769 seconds time elapsed 1.003248000 seconds user 0.004069000 seconds sys In order to support event inheritance, two new bpf programs are added to monitor the fork and exit of tasks respectively. When a task is created, add it to the filter map to enable counting, and reuse the `accum_key` of its parent task to count together with the parent task. When a task exits, remove it from the filter map to disable counting. After support: $ ./perf stat --bpf-counters -e cycles,instructions -- \ ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 2,316,252,189 cycles 2,859,946,547 instructions 1.009422314 seconds time elapsed 1.003597000 seconds user 0.004270000 seconds sys Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Cc: song@kernel.org Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20241021110201.325617-2-wutengda@huaweicloud.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
1 parent ba993e5 commit 07dc3a6

File tree

5 files changed

+126
-14
lines changed

5 files changed

+126
-14
lines changed

tools/perf/builtin-stat.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2641,6 +2641,7 @@ int cmd_stat(int argc, const char **argv)
26412641
} else if (big_num_opt == 0) /* User passed --no-big-num */
26422642
stat_config.big_num = false;
26432643

2644+
target.inherit = !stat_config.no_inherit;
26442645
err = target__validate(&target);
26452646
if (err) {
26462647
target__strerror(&target, err, errbuf, BUFSIZ);

tools/perf/util/bpf_counter.c

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ static int bperf_check_target(struct evsel *evsel,
394394
}
395395

396396
static struct perf_cpu_map *all_cpu_map;
397+
static __u32 filter_entry_cnt;
397398

398399
static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
399400
struct perf_event_attr_map_entry *entry)
@@ -444,12 +445,32 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
444445
return err;
445446
}
446447

448+
static int bperf_attach_follower_program(struct bperf_follower_bpf *skel,
449+
enum bperf_filter_type filter_type,
450+
bool inherit)
451+
{
452+
struct bpf_link *link;
453+
int err = 0;
454+
455+
if ((filter_type == BPERF_FILTER_PID ||
456+
filter_type == BPERF_FILTER_TGID) && inherit)
457+
/* attach all follower bpf progs to enable event inheritance */
458+
err = bperf_follower_bpf__attach(skel);
459+
else {
460+
link = bpf_program__attach(skel->progs.fexit_XXX);
461+
if (IS_ERR(link))
462+
err = PTR_ERR(link);
463+
}
464+
465+
return err;
466+
}
467+
447468
static int bperf__load(struct evsel *evsel, struct target *target)
448469
{
449470
struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
450471
int attr_map_fd, diff_map_fd = -1, err;
451472
enum bperf_filter_type filter_type;
452-
__u32 filter_entry_cnt, i;
473+
__u32 i;
453474

454475
if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
455476
return -1;
@@ -529,9 +550,6 @@ static int bperf__load(struct evsel *evsel, struct target *target)
529550
/* set up reading map */
530551
bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
531552
filter_entry_cnt);
532-
/* set up follower filter based on target */
533-
bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
534-
filter_entry_cnt);
535553
err = bperf_follower_bpf__load(evsel->follower_skel);
536554
if (err) {
537555
pr_err("Failed to load follower skeleton\n");
@@ -543,6 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
543561
for (i = 0; i < filter_entry_cnt; i++) {
544562
int filter_map_fd;
545563
__u32 key;
564+
struct bperf_filter_value fval = { i, 0 };
546565

547566
if (filter_type == BPERF_FILTER_PID ||
548567
filter_type == BPERF_FILTER_TGID)
@@ -553,12 +572,14 @@ static int bperf__load(struct evsel *evsel, struct target *target)
553572
break;
554573

555574
filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
556-
bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
575+
bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY);
557576
}
558577

559578
evsel->follower_skel->bss->type = filter_type;
579+
evsel->follower_skel->bss->inherit = target->inherit;
560580

561-
err = bperf_follower_bpf__attach(evsel->follower_skel);
581+
err = bperf_attach_follower_program(evsel->follower_skel, filter_type,
582+
target->inherit);
562583

563584
out:
564585
if (err && evsel->bperf_leader_link_fd >= 0)
@@ -623,7 +644,7 @@ static int bperf__read(struct evsel *evsel)
623644
bperf_sync_counters(evsel);
624645
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
625646

626-
for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
647+
for (i = 0; i < filter_entry_cnt; i++) {
627648
struct perf_cpu entry;
628649
__u32 cpu;
629650

tools/perf/util/bpf_skel/bperf_follower.bpf.c

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include <bpf/bpf_tracing.h>
66
#include "bperf_u.h"
77

8+
#define MAX_ENTRIES 102400
9+
810
struct {
911
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
1012
__uint(key_size, sizeof(__u32));
@@ -22,25 +24,29 @@ struct {
2224
struct {
2325
__uint(type, BPF_MAP_TYPE_HASH);
2426
__uint(key_size, sizeof(__u32));
25-
__uint(value_size, sizeof(__u32));
27+
__uint(value_size, sizeof(struct bperf_filter_value));
28+
__uint(max_entries, MAX_ENTRIES);
29+
__uint(map_flags, BPF_F_NO_PREALLOC);
2630
} filter SEC(".maps");
2731

2832
enum bperf_filter_type type = 0;
2933
int enabled = 0;
34+
int inherit;
3035

3136
SEC("fexit/XXX")
3237
int BPF_PROG(fexit_XXX)
3338
{
3439
struct bpf_perf_event_value *diff_val, *accum_val;
3540
__u32 filter_key, zero = 0;
36-
__u32 *accum_key;
41+
__u32 accum_key;
42+
struct bperf_filter_value *fval;
3743

3844
if (!enabled)
3945
return 0;
4046

4147
switch (type) {
4248
case BPERF_FILTER_GLOBAL:
43-
accum_key = &zero;
49+
accum_key = zero;
4450
goto do_add;
4551
case BPERF_FILTER_CPU:
4652
filter_key = bpf_get_smp_processor_id();
@@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX)
4955
filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
5056
break;
5157
case BPERF_FILTER_TGID:
52-
filter_key = bpf_get_current_pid_tgid() >> 32;
58+
/* Use pid as the filter_key to exclude new task counts
59+
* when inherit is disabled. Don't worry about the existing
60+
* children in TGID losing their counts, bpf_counter has
61+
* already added them to the filter map via perf_thread_map
62+
* before this bpf prog runs.
63+
*/
64+
filter_key = inherit ?
65+
bpf_get_current_pid_tgid() >> 32 :
66+
bpf_get_current_pid_tgid() & 0xffffffff;
5367
break;
5468
default:
5569
return 0;
5670
}
5771

58-
accum_key = bpf_map_lookup_elem(&filter, &filter_key);
59-
if (!accum_key)
72+
fval = bpf_map_lookup_elem(&filter, &filter_key);
73+
if (!fval)
6074
return 0;
6175

76+
accum_key = fval->accum_key;
77+
if (fval->exited)
78+
bpf_map_delete_elem(&filter, &filter_key);
79+
6280
do_add:
6381
diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
6482
if (!diff_val)
6583
return 0;
6684

67-
accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
85+
accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key);
6886
if (!accum_val)
6987
return 0;
7088

@@ -75,4 +93,70 @@ int BPF_PROG(fexit_XXX)
7593
return 0;
7694
}
7795

96+
/* The program is only used for PID or TGID filter types. */
97+
SEC("tp_btf/task_newtask")
98+
int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags)
99+
{
100+
__u32 parent_key, child_key;
101+
struct bperf_filter_value *parent_fval;
102+
struct bperf_filter_value child_fval = { 0 };
103+
104+
if (!enabled)
105+
return 0;
106+
107+
switch (type) {
108+
case BPERF_FILTER_PID:
109+
parent_key = bpf_get_current_pid_tgid() & 0xffffffff;
110+
child_key = task->pid;
111+
break;
112+
case BPERF_FILTER_TGID:
113+
parent_key = bpf_get_current_pid_tgid() >> 32;
114+
child_key = task->tgid;
115+
if (child_key == parent_key)
116+
return 0;
117+
break;
118+
default:
119+
return 0;
120+
}
121+
122+
/* Check if the current task is one of the target tasks to be counted */
123+
parent_fval = bpf_map_lookup_elem(&filter, &parent_key);
124+
if (!parent_fval)
125+
return 0;
126+
127+
/* Start counting for the new task by adding it into filter map,
128+
* inherit the accum key of its parent task so that they can be
129+
* counted together.
130+
*/
131+
child_fval.accum_key = parent_fval->accum_key;
132+
child_fval.exited = 0;
133+
bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST);
134+
135+
return 0;
136+
}
137+
138+
/* The program is only used for PID or TGID filter types. */
139+
SEC("tp_btf/sched_process_exit")
140+
int BPF_PROG(on_exittask, struct task_struct *task)
141+
{
142+
__u32 pid;
143+
struct bperf_filter_value *fval;
144+
145+
if (!enabled)
146+
return 0;
147+
148+
/* Stop counting for this task by removing it from filter map.
149+
* For TGID type, if the pid can be found in the map, it means that
150+
* this pid belongs to the leader task. After the task exits, the
151+
* tgid of its child tasks (if any) will be 1, so the pid can be
152+
* safely removed.
153+
*/
154+
pid = task->pid;
155+
fval = bpf_map_lookup_elem(&filter, &pid);
156+
if (fval)
157+
fval->exited = 1;
158+
159+
return 0;
160+
}
161+
78162
char LICENSE[] SEC("license") = "Dual BSD/GPL";

tools/perf/util/bpf_skel/bperf_u.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,9 @@ enum bperf_filter_type {
1111
BPERF_FILTER_TGID,
1212
};
1313

14+
struct bperf_filter_value {
15+
__u32 accum_key;
16+
__u8 exited;
17+
};
18+
1419
#endif /* __BPERF_STAT_U_H */

tools/perf/util/target.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct target {
1717
bool default_per_cpu;
1818
bool per_thread;
1919
bool use_bpf;
20+
bool inherit;
2021
int initial_delay;
2122
const char *attr_map;
2223
};

0 commit comments

Comments
 (0)