Skip to content

Commit 9d6a1df

Browse files
captain5050acmel
authored andcommitted
perf pmus: Allow just core PMU scanning
Scanning all PMUs is expensive as all PMUs sysfs entries are loaded, benchmarking shows more than 4x the cost: ``` $ perf bench internals pmu-scan -i 1000 Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 989.231 usec (+- 1.535 usec) Average PMU scanning took: 4309.425 usec (+- 74.322 usec) ``` Add new perf_pmus__scan_core routine that scans just core PMUs. Replace perf_pmus__scan calls with perf_pmus__scan_core when non-core PMUs are being ignored. Reviewed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ali Saidi <alisaidi@amazon.com> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jing Zhang <renyu.zj@linux.alibaba.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kang Minchul <tegongkang@gmail.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mike Leach <mike.leach@linaro.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Rob Herring <robh@kernel.org> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230527072210.2900565-30-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 15c57a8 commit 9d6a1df

File tree

14 files changed

+75
-96
lines changed

14 files changed

+75
-96
lines changed

tools/perf/arch/arm64/util/pmu.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@ static struct perf_pmu *pmu__find_core_pmu(void)
1111
{
1212
struct perf_pmu *pmu = NULL;
1313

14-
while ((pmu = perf_pmus__scan(pmu))) {
15-
if (!is_pmu_core(pmu->name))
16-
continue;
17-
14+
while ((pmu = perf_pmus__scan_core(pmu))) {
1815
/*
1916
* The cpumap should cover all CPUs. Otherwise, some CPUs may
2017
* not support some events or have different event IDs.

tools/perf/arch/x86/util/evlist.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,10 @@ static int ___evlist__add_default_attrs(struct evlist *evlist,
3333
continue;
3434
}
3535

36-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
36+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
3737
struct perf_cpu_map *cpus;
3838
struct evsel *evsel;
3939

40-
if (!pmu->is_core)
41-
continue;
42-
4340
evsel = evsel__new(attrs + i);
4441
if (evsel == NULL)
4542
goto out_delete_partial_list;

tools/perf/arch/x86/util/perf_regs.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -300,11 +300,9 @@ uint64_t arch__intr_reg_mask(void)
300300
* The same register set is supported among different hybrid PMUs.
301301
* Only check the first available one.
302302
*/
303-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
304-
if (pmu->is_core) {
305-
type = pmu->type;
306-
break;
307-
}
303+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
304+
type = pmu->type;
305+
break;
308306
}
309307
attr.config |= type << PERF_PMU_TYPE_SHIFT;
310308
}

tools/perf/bench/pmu-scan.c

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct pmu_scan_result {
2222
int nr_aliases;
2323
int nr_formats;
2424
int nr_caps;
25+
bool is_core;
2526
};
2627

2728
static const struct option options[] = {
@@ -53,6 +54,7 @@ static int save_result(void)
5354
r = results + nr_pmus;
5455

5556
r->name = strdup(pmu->name);
57+
r->is_core = pmu->is_core;
5658
r->nr_caps = pmu->nr_caps;
5759

5860
r->nr_aliases = 0;
@@ -72,7 +74,7 @@ static int save_result(void)
7274
return 0;
7375
}
7476

75-
static int check_result(void)
77+
static int check_result(bool core_only)
7678
{
7779
struct pmu_scan_result *r;
7880
struct perf_pmu *pmu;
@@ -81,6 +83,9 @@ static int check_result(void)
8183

8284
for (int i = 0; i < nr_pmus; i++) {
8385
r = &results[i];
86+
if (core_only && !r->is_core)
87+
continue;
88+
8489
pmu = perf_pmus__find(r->name);
8590
if (pmu == NULL) {
8691
pr_err("Cannot find PMU %s\n", r->name);
@@ -130,7 +135,6 @@ static int run_pmu_scan(void)
130135
struct timeval start, end, diff;
131136
double time_average, time_stddev;
132137
u64 runtime_us;
133-
unsigned int i;
134138
int ret;
135139

136140
init_stats(&stats);
@@ -142,26 +146,30 @@ static int run_pmu_scan(void)
142146
return -1;
143147
}
144148

145-
for (i = 0; i < iterations; i++) {
146-
gettimeofday(&start, NULL);
147-
perf_pmus__scan(NULL);
148-
gettimeofday(&end, NULL);
149-
150-
timersub(&end, &start, &diff);
151-
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
152-
update_stats(&stats, runtime_us);
153-
154-
ret = check_result();
155-
perf_pmus__destroy();
156-
if (ret < 0)
157-
break;
149+
for (int j = 0; j < 2; j++) {
150+
bool core_only = (j == 0);
151+
152+
for (unsigned int i = 0; i < iterations; i++) {
153+
gettimeofday(&start, NULL);
154+
if (core_only)
155+
perf_pmus__scan_core(NULL);
156+
else
157+
perf_pmus__scan(NULL);
158+
gettimeofday(&end, NULL);
159+
timersub(&end, &start, &diff);
160+
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
161+
update_stats(&stats, runtime_us);
162+
163+
ret = check_result(core_only);
164+
perf_pmus__destroy();
165+
if (ret < 0)
166+
break;
167+
}
168+
time_average = avg_stats(&stats);
169+
time_stddev = stddev_stats(&stats);
170+
pr_info(" Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n",
171+
core_only ? " core" : "", time_average, time_stddev);
158172
}
159-
160-
time_average = avg_stats(&stats);
161-
time_stddev = stddev_stats(&stats);
162-
pr_info(" Average PMU scanning took: %.3f usec (+- %.3f usec)\n",
163-
time_average, time_stddev);
164-
165173
delete_result();
166174
return 0;
167175
}

tools/perf/tests/pmu-events.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -709,12 +709,9 @@ static int test__aliases(struct test_suite *test __maybe_unused,
709709
struct perf_pmu *pmu = NULL;
710710
unsigned long i;
711711

712-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
712+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
713713
int count = 0;
714714

715-
if (!is_pmu_core(pmu->name))
716-
continue;
717-
718715
if (list_empty(&pmu->format)) {
719716
pr_debug2("skipping testing core PMU %s\n", pmu->name);
720717
continue;

tools/perf/util/cputopo.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,10 +477,9 @@ struct hybrid_topology *hybrid_topology__new(void)
477477
if (!perf_pmus__has_hybrid())
478478
return NULL;
479479

480-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
481-
if (pmu->is_core)
482-
nr++;
483-
}
480+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
481+
nr++;
482+
484483
if (nr == 0)
485484
return NULL;
486485

@@ -489,10 +488,7 @@ struct hybrid_topology *hybrid_topology__new(void)
489488
return NULL;
490489

491490
tp->nr = nr;
492-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
493-
if (!pmu->is_core)
494-
continue;
495-
491+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
496492
if (load_hybrid_node(&tp->nodes[i], pmu)) {
497493
hybrid_topology__delete(tp);
498494
return NULL;

tools/perf/util/header.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,10 +1607,7 @@ static int write_pmu_caps(struct feat_fd *ff,
16071607
*/
16081608
if (perf_pmus__has_hybrid()) {
16091609
pmu = NULL;
1610-
while ((pmu = perf_pmus__scan(pmu))) {
1611-
if (!pmu->is_core)
1612-
continue;
1613-
1610+
while ((pmu = perf_pmus__scan_core(pmu))) {
16141611
ret = __write_pmu_caps(ff, pmu, true);
16151612
if (ret < 0)
16161613
return ret;

tools/perf/util/mem-events.c

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,7 @@ int perf_mem_events__init(void)
136136
} else {
137137
struct perf_pmu *pmu = NULL;
138138

139-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
140-
if (!pmu->is_core)
141-
continue;
142-
139+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
143140
scnprintf(sysfs_name, sizeof(sysfs_name),
144141
e->sysfs_name, pmu->name);
145142
e->supported |= perf_mem_event__supported(mnt, sysfs_name);
@@ -176,10 +173,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
176173
char sysfs_name[100];
177174
struct perf_pmu *pmu = NULL;
178175

179-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
180-
if (!pmu->is_core)
181-
continue;
182-
176+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
183177
scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
184178
pmu->name);
185179
if (!perf_mem_event__supported(mnt, sysfs_name)) {
@@ -217,9 +211,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
217211
return -1;
218212
}
219213

220-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
221-
if (!pmu->is_core)
222-
continue;
214+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
223215
rec_argv[i++] = "-e";
224216
s = perf_mem_events__name(j, pmu->name);
225217
if (s) {

tools/perf/util/parse-events.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -453,15 +453,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
453453
const char *config_name = get_config_name(head_config);
454454
const char *metric_id = get_config_metric_id(head_config);
455455

456-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
456+
/* Legacy cache events are only supported by core PMUs. */
457+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
457458
LIST_HEAD(config_terms);
458459
struct perf_event_attr attr;
459460
int ret;
460461

461-
/* Skip unsupported PMUs. */
462-
if (!perf_pmu__supports_legacy_cache(pmu))
463-
continue;
464-
465462
if (parse_events__filter_pmu(parse_state, pmu))
466463
continue;
467464

@@ -1481,12 +1478,10 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
14811478
return __parse_events_add_numeric(parse_state, list, /*pmu=*/NULL,
14821479
type, config, head_config);
14831480

1484-
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
1481+
/* Wildcards on numeric values are only supported by core PMUs. */
1482+
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
14851483
int ret;
14861484

1487-
if (!perf_pmu__supports_wildcard_numeric(pmu))
1488-
continue;
1489-
14901485
if (parse_events__filter_pmu(parse_state, pmu))
14911486
continue;
14921487

tools/perf/util/pmu.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,21 +1427,11 @@ bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu)
14271427
return pmu->is_core;
14281428
}
14291429

1430-
bool perf_pmu__supports_wildcard_numeric(const struct perf_pmu *pmu)
1431-
{
1432-
return pmu->is_core;
1433-
}
1434-
14351430
bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
14361431
{
14371432
return !is_pmu_hybrid(pmu->name);
14381433
}
14391434

1440-
bool perf_pmu__is_mem_pmu(const struct perf_pmu *pmu)
1441-
{
1442-
return pmu->is_core;
1443-
}
1444-
14451435
bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name)
14461436
{
14471437
struct perf_pmu_alias *alias;

0 commit comments

Comments
 (0)