Skip to content

Commit 267fb27

Browse files
author
Peter Zijlstra
committed
perf: Reduce stack usage of perf_output_begin()
__perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org
1 parent 7bdb157 commit 267fb27

File tree

6 files changed

+37
-30
lines changed

6 files changed

+37
-30
lines changed

arch/powerpc/perf/imc-pmu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event)
13361336
/* If this is a valid record, create the sample */
13371337
struct perf_output_handle handle;
13381338

1339-
if (perf_output_begin(&handle, event, header.size))
1339+
if (perf_output_begin(&handle, &data, event, header.size))
13401340
return;
13411341

13421342
perf_output_sample(&handle, &header, &data, event);

arch/s390/kernel/perf_cpum_sf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
672672
rcu_read_lock();
673673

674674
perf_prepare_sample(&header, data, event, regs);
675-
if (perf_output_begin(&handle, event, header.size))
675+
if (perf_output_begin(&handle, data, event, header.size))
676676
goto out;
677677

678678
/* Update the process ID (see also kernel/events/core.c) */

arch/x86/events/intel/ds.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void)
642642
rcu_read_lock();
643643
perf_prepare_sample(&header, &data, event, &regs);
644644

645-
if (perf_output_begin(&handle, event, header.size *
646-
(top - base - skip)))
645+
if (perf_output_begin(&handle, &data, event,
646+
header.size * (top - base - skip)))
647647
goto unlock;
648648

649649
for (at = base; at < top; at++) {

include/linux/perf_event.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event)
14001400
extern void perf_event_addr_filters_sync(struct perf_event *event);
14011401

14021402
extern int perf_output_begin(struct perf_output_handle *handle,
1403+
struct perf_sample_data *data,
14031404
struct perf_event *event, unsigned int size);
14041405
extern int perf_output_begin_forward(struct perf_output_handle *handle,
1405-
struct perf_event *event,
1406-
unsigned int size);
1406+
struct perf_sample_data *data,
1407+
struct perf_event *event,
1408+
unsigned int size);
14071409
extern int perf_output_begin_backward(struct perf_output_handle *handle,
1410+
struct perf_sample_data *data,
14081411
struct perf_event *event,
14091412
unsigned int size);
14101413

kernel/events/core.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7186,6 +7186,7 @@ __perf_event_output(struct perf_event *event,
71867186
struct perf_sample_data *data,
71877187
struct pt_regs *regs,
71887188
int (*output_begin)(struct perf_output_handle *,
7189+
struct perf_sample_data *,
71897190
struct perf_event *,
71907191
unsigned int))
71917192
{
@@ -7198,7 +7199,7 @@ __perf_event_output(struct perf_event *event,
71987199

71997200
perf_prepare_sample(&header, data, event, regs);
72007201

7201-
err = output_begin(&handle, event, header.size);
7202+
err = output_begin(&handle, data, event, header.size);
72027203
if (err)
72037204
goto exit;
72047205

@@ -7264,7 +7265,7 @@ perf_event_read_event(struct perf_event *event,
72647265
int ret;
72657266

72667267
perf_event_header__init_id(&read_event.header, &sample, event);
7267-
ret = perf_output_begin(&handle, event, read_event.header.size);
7268+
ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
72687269
if (ret)
72697270
return;
72707271

@@ -7533,7 +7534,7 @@ static void perf_event_task_output(struct perf_event *event,
75337534

75347535
perf_event_header__init_id(&task_event->event_id.header, &sample, event);
75357536

7536-
ret = perf_output_begin(&handle, event,
7537+
ret = perf_output_begin(&handle, &sample, event,
75377538
task_event->event_id.header.size);
75387539
if (ret)
75397540
goto out;
@@ -7636,7 +7637,7 @@ static void perf_event_comm_output(struct perf_event *event,
76367637
return;
76377638

76387639
perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
7639-
ret = perf_output_begin(&handle, event,
7640+
ret = perf_output_begin(&handle, &sample, event,
76407641
comm_event->event_id.header.size);
76417642

76427643
if (ret)
@@ -7736,7 +7737,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
77367737

77377738
perf_event_header__init_id(&namespaces_event->event_id.header,
77387739
&sample, event);
7739-
ret = perf_output_begin(&handle, event,
7740+
ret = perf_output_begin(&handle, &sample, event,
77407741
namespaces_event->event_id.header.size);
77417742
if (ret)
77427743
goto out;
@@ -7863,7 +7864,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data)
78637864

78647865
perf_event_header__init_id(&cgroup_event->event_id.header,
78657866
&sample, event);
7866-
ret = perf_output_begin(&handle, event,
7867+
ret = perf_output_begin(&handle, &sample, event,
78677868
cgroup_event->event_id.header.size);
78687869
if (ret)
78697870
goto out;
@@ -7989,7 +7990,7 @@ static void perf_event_mmap_output(struct perf_event *event,
79897990
}
79907991

79917992
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
7992-
ret = perf_output_begin(&handle, event,
7993+
ret = perf_output_begin(&handle, &sample, event,
79937994
mmap_event->event_id.header.size);
79947995
if (ret)
79957996
goto out;
@@ -8299,7 +8300,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
82998300
int ret;
83008301

83018302
perf_event_header__init_id(&rec.header, &sample, event);
8302-
ret = perf_output_begin(&handle, event, rec.header.size);
8303+
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
83038304

83048305
if (ret)
83058306
return;
@@ -8333,7 +8334,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
83338334

83348335
perf_event_header__init_id(&lost_samples_event.header, &sample, event);
83358336

8336-
ret = perf_output_begin(&handle, event,
8337+
ret = perf_output_begin(&handle, &sample, event,
83378338
lost_samples_event.header.size);
83388339
if (ret)
83398340
return;
@@ -8388,7 +8389,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data)
83888389

83898390
perf_event_header__init_id(&se->event_id.header, &sample, event);
83908391

8391-
ret = perf_output_begin(&handle, event, se->event_id.header.size);
8392+
ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
83928393
if (ret)
83938394
return;
83948395

@@ -8463,7 +8464,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
84638464

84648465
perf_event_header__init_id(&throttle_event.header, &sample, event);
84658466

8466-
ret = perf_output_begin(&handle, event,
8467+
ret = perf_output_begin(&handle, &sample, event,
84678468
throttle_event.header.size);
84688469
if (ret)
84698470
return;
@@ -8506,7 +8507,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data)
85068507

85078508
perf_event_header__init_id(&ksymbol_event->event_id.header,
85088509
&sample, event);
8509-
ret = perf_output_begin(&handle, event,
8510+
ret = perf_output_begin(&handle, &sample, event,
85108511
ksymbol_event->event_id.header.size);
85118512
if (ret)
85128513
return;
@@ -8596,7 +8597,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
85968597

85978598
perf_event_header__init_id(&bpf_event->event_id.header,
85988599
&sample, event);
8599-
ret = perf_output_begin(&handle, event,
8600+
ret = perf_output_begin(&handle, data, event,
86008601
bpf_event->event_id.header.size);
86018602
if (ret)
86028603
return;
@@ -8705,7 +8706,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data)
87058706

87068707
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
87078708

8708-
ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
8709+
ret = perf_output_begin(&handle, &sample, event,
8710+
text_poke_event->event_id.header.size);
87098711
if (ret)
87108712
return;
87118713

@@ -8786,7 +8788,7 @@ static void perf_log_itrace_start(struct perf_event *event)
87868788
rec.tid = perf_event_tid(event, current);
87878789

87888790
perf_event_header__init_id(&rec.header, &sample, event);
8789-
ret = perf_output_begin(&handle, event, rec.header.size);
8791+
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
87908792

87918793
if (ret)
87928794
return;

kernel/events/ring_buffer.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
147147

148148
static __always_inline int
149149
__perf_output_begin(struct perf_output_handle *handle,
150+
struct perf_sample_data *data,
150151
struct perf_event *event, unsigned int size,
151152
bool backward)
152153
{
@@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle,
237238
handle->size = (1UL << page_shift) - offset;
238239

239240
if (unlikely(have_lost)) {
240-
struct perf_sample_data sample_data;
241-
242241
lost_event.header.size = sizeof(lost_event);
243242
lost_event.header.type = PERF_RECORD_LOST;
244243
lost_event.header.misc = 0;
245244
lost_event.id = event->id;
246245
lost_event.lost = local_xchg(&rb->lost, 0);
247246

248-
perf_event_header__init_id(&lost_event.header,
249-
&sample_data, event);
247+
/* XXX mostly redundant; @data is already fully initializes */
248+
perf_event_header__init_id(&lost_event.header, data, event);
250249
perf_output_put(handle, lost_event);
251-
perf_event__output_id_sample(event, handle, &sample_data);
250+
perf_event__output_id_sample(event, handle, data);
252251
}
253252

254253
return 0;
@@ -263,22 +262,25 @@ __perf_output_begin(struct perf_output_handle *handle,
263262
}
264263

265264
int perf_output_begin_forward(struct perf_output_handle *handle,
266-
struct perf_event *event, unsigned int size)
265+
struct perf_sample_data *data,
266+
struct perf_event *event, unsigned int size)
267267
{
268-
return __perf_output_begin(handle, event, size, false);
268+
return __perf_output_begin(handle, data, event, size, false);
269269
}
270270

271271
int perf_output_begin_backward(struct perf_output_handle *handle,
272+
struct perf_sample_data *data,
272273
struct perf_event *event, unsigned int size)
273274
{
274-
return __perf_output_begin(handle, event, size, true);
275+
return __perf_output_begin(handle, data, event, size, true);
275276
}
276277

277278
int perf_output_begin(struct perf_output_handle *handle,
279+
struct perf_sample_data *data,
278280
struct perf_event *event, unsigned int size)
279281
{
280282

281-
return __perf_output_begin(handle, event, size,
283+
return __perf_output_begin(handle, data, event, size,
282284
unlikely(is_write_backward(event)));
283285
}
284286

0 commit comments

Comments
 (0)