Skip to content

Commit 7c9903c

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
x86/perf, static_call: Optimize x86_pmu methods
Replace many of the indirect calls with static_call(). The average PMI time, as measured by perf_sample_event_took()*: PRE: 3283.03 [ns] POST: 3145.12 [ns] Which is a ~138 [ns] win per PMI, or a ~4.2% decrease. [*] on an IVB-EP, using: 'perf record -a -e cycles -- make O=defconfig-build/ -j80' Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/20200818135805.338001015@infradead.org
1 parent d25e37d commit 7c9903c

File tree

1 file changed

+94
-40
lines changed

1 file changed

+94
-40
lines changed

arch/x86/events/core.c

Lines changed: 94 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <linux/bitops.h>
2929
#include <linux/device.h>
3030
#include <linux/nospec.h>
31+
#include <linux/static_call.h>
3132

3233
#include <asm/apic.h>
3334
#include <asm/stacktrace.h>
@@ -52,6 +53,34 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
5253
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
5354
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
5455

56+
/*
57+
* This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
58+
* from just a typename, as opposed to an actual function.
59+
*/
60+
DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq);
61+
DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all);
62+
DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
63+
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
64+
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
65+
66+
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
67+
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
68+
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
69+
70+
DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
71+
DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
72+
DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
73+
74+
DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling);
75+
DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
76+
DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
77+
78+
DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
79+
DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
80+
81+
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
82+
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
83+
5584
u64 __read_mostly hw_cache_event_ids
5685
[PERF_COUNT_HW_CACHE_MAX]
5786
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -660,7 +689,7 @@ static void x86_pmu_disable(struct pmu *pmu)
660689
cpuc->enabled = 0;
661690
barrier();
662691

663-
x86_pmu.disable_all();
692+
static_call(x86_pmu_disable_all)();
664693
}
665694

666695
void x86_pmu_enable_all(int added)
@@ -907,8 +936,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
907936
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
908937
n0 -= cpuc->n_txn;
909938

910-
if (x86_pmu.start_scheduling)
911-
x86_pmu.start_scheduling(cpuc);
939+
static_call_cond(x86_pmu_start_scheduling)(cpuc);
912940

913941
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
914942
c = cpuc->event_constraint[i];
@@ -925,7 +953,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
925953
* change due to external factors (sibling state, allow_tfa).
926954
*/
927955
if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
928-
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
956+
c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
929957
cpuc->event_constraint[i] = c;
930958
}
931959

@@ -1008,8 +1036,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
10081036
if (!unsched && assign) {
10091037
for (i = 0; i < n; i++) {
10101038
e = cpuc->event_list[i];
1011-
if (x86_pmu.commit_scheduling)
1012-
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
1039+
static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
10131040
}
10141041
} else {
10151042
for (i = n0; i < n; i++) {
@@ -1018,15 +1045,13 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
10181045
/*
10191046
* release events that failed scheduling
10201047
*/
1021-
if (x86_pmu.put_event_constraints)
1022-
x86_pmu.put_event_constraints(cpuc, e);
1048+
static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
10231049

10241050
cpuc->event_constraint[i] = NULL;
10251051
}
10261052
}
10271053

1028-
if (x86_pmu.stop_scheduling)
1029-
x86_pmu.stop_scheduling(cpuc);
1054+
static_call_cond(x86_pmu_stop_scheduling)(cpuc);
10301055

10311056
return unsched ? -EINVAL : 0;
10321057
}
@@ -1226,7 +1251,7 @@ static void x86_pmu_enable(struct pmu *pmu)
12261251
cpuc->enabled = 1;
12271252
barrier();
12281253

1229-
x86_pmu.enable_all(added);
1254+
static_call(x86_pmu_enable_all)(added);
12301255
}
12311256

12321257
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1347,7 +1372,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
13471372
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
13481373
goto done_collect;
13491374

1350-
ret = x86_pmu.schedule_events(cpuc, n, assign);
1375+
ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
13511376
if (ret)
13521377
goto out;
13531378
/*
@@ -1365,13 +1390,11 @@ static int x86_pmu_add(struct perf_event *event, int flags)
13651390
cpuc->n_added += n - n0;
13661391
cpuc->n_txn += n - n0;
13671392

1368-
if (x86_pmu.add) {
1369-
/*
1370-
* This is before x86_pmu_enable() will call x86_pmu_start(),
1371-
* so we enable LBRs before an event needs them etc..
1372-
*/
1373-
x86_pmu.add(event);
1374-
}
1393+
/*
1394+
* This is before x86_pmu_enable() will call x86_pmu_start(),
1395+
* so we enable LBRs before an event needs them etc..
1396+
*/
1397+
static_call_cond(x86_pmu_add)(event);
13751398

13761399
ret = 0;
13771400
out:
@@ -1399,7 +1422,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
13991422
cpuc->events[idx] = event;
14001423
__set_bit(idx, cpuc->active_mask);
14011424
__set_bit(idx, cpuc->running);
1402-
x86_pmu.enable(event);
1425+
static_call(x86_pmu_enable)(event);
14031426
perf_event_update_userpage(event);
14041427
}
14051428

@@ -1469,7 +1492,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
14691492
struct hw_perf_event *hwc = &event->hw;
14701493

14711494
if (test_bit(hwc->idx, cpuc->active_mask)) {
1472-
x86_pmu.disable(event);
1495+
static_call(x86_pmu_disable)(event);
14731496
__clear_bit(hwc->idx, cpuc->active_mask);
14741497
cpuc->events[hwc->idx] = NULL;
14751498
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1519,8 +1542,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
15191542
if (i >= cpuc->n_events - cpuc->n_added)
15201543
--cpuc->n_added;
15211544

1522-
if (x86_pmu.put_event_constraints)
1523-
x86_pmu.put_event_constraints(cpuc, event);
1545+
static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
15241546

15251547
/* Delete the array entry. */
15261548
while (++i < cpuc->n_events) {
@@ -1533,13 +1555,12 @@ static void x86_pmu_del(struct perf_event *event, int flags)
15331555
perf_event_update_userpage(event);
15341556

15351557
do_del:
1536-
if (x86_pmu.del) {
1537-
/*
1538-
* This is after x86_pmu_stop(); so we disable LBRs after any
1539-
* event can need them etc..
1540-
*/
1541-
x86_pmu.del(event);
1542-
}
1558+
1559+
/*
1560+
* This is after x86_pmu_stop(); so we disable LBRs after any
1561+
* event can need them etc..
1562+
*/
1563+
static_call_cond(x86_pmu_del)(event);
15431564
}
15441565

15451566
int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1617,7 +1638,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
16171638
return NMI_DONE;
16181639

16191640
start_clock = sched_clock();
1620-
ret = x86_pmu.handle_irq(regs);
1641+
ret = static_call(x86_pmu_handle_irq)(regs);
16211642
finish_clock = sched_clock();
16221643

16231644
perf_sample_event_took(finish_clock - start_clock);
@@ -1830,6 +1851,38 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
18301851
static struct attribute_group x86_pmu_attr_group;
18311852
static struct attribute_group x86_pmu_caps_group;
18321853

1854+
static void x86_pmu_static_call_update(void)
1855+
{
1856+
static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
1857+
static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
1858+
static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
1859+
static_call_update(x86_pmu_enable, x86_pmu.enable);
1860+
static_call_update(x86_pmu_disable, x86_pmu.disable);
1861+
1862+
static_call_update(x86_pmu_add, x86_pmu.add);
1863+
static_call_update(x86_pmu_del, x86_pmu.del);
1864+
static_call_update(x86_pmu_read, x86_pmu.read);
1865+
1866+
static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
1867+
static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
1868+
static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
1869+
1870+
static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
1871+
static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
1872+
static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
1873+
1874+
static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
1875+
static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
1876+
1877+
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
1878+
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
1879+
}
1880+
1881+
static void _x86_pmu_read(struct perf_event *event)
1882+
{
1883+
x86_perf_event_update(event);
1884+
}
1885+
18331886
static int __init init_hw_perf_events(void)
18341887
{
18351888
struct x86_pmu_quirk *quirk;
@@ -1898,6 +1951,11 @@ static int __init init_hw_perf_events(void)
18981951
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
18991952
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
19001953

1954+
if (!x86_pmu.read)
1955+
x86_pmu.read = _x86_pmu_read;
1956+
1957+
x86_pmu_static_call_update();
1958+
19011959
/*
19021960
* Install callbacks. Core will call them for each online
19031961
* cpu.
@@ -1934,11 +1992,9 @@ static int __init init_hw_perf_events(void)
19341992
}
19351993
early_initcall(init_hw_perf_events);
19361994

1937-
static inline void x86_pmu_read(struct perf_event *event)
1995+
static void x86_pmu_read(struct perf_event *event)
19381996
{
1939-
if (x86_pmu.read)
1940-
return x86_pmu.read(event);
1941-
x86_perf_event_update(event);
1997+
static_call(x86_pmu_read)(event);
19421998
}
19431999

19442000
/*
@@ -2015,7 +2071,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
20152071
if (!x86_pmu_initialized())
20162072
return -EAGAIN;
20172073

2018-
ret = x86_pmu.schedule_events(cpuc, n, assign);
2074+
ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
20192075
if (ret)
20202076
return ret;
20212077

@@ -2308,15 +2364,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
23082364

23092365
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
23102366
{
2311-
if (x86_pmu.sched_task)
2312-
x86_pmu.sched_task(ctx, sched_in);
2367+
static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
23132368
}
23142369

23152370
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
23162371
struct perf_event_context *next)
23172372
{
2318-
if (x86_pmu.swap_task_ctx)
2319-
x86_pmu.swap_task_ctx(prev, next);
2373+
static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
23202374
}
23212375

23222376
void perf_check_microcode(void)

0 commit comments

Comments
 (0)