Skip to content

Commit ddb321a

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, but also some kernel side fixes: uncore PMU driver fix, user regs sampling fix and an instruction decoder fix that unbreaks PEBS precise sampling" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes perf/x86_64: Improve user regs sampling perf: Move task_pt_regs sampling into arch code x86: Fix off-by-one in instruction decoder perf hists browser: Fix segfault when showing callchain perf callchain: Free callchains when hist entries are deleted perf hists: Fix children sort key behavior perf diff: Fix to sort by baseline field by default perf list: Fix --raw-dump option perf probe: Fix crash in dwarf_getcfi_elf perf probe: Fix to fall back to find probe point in symbols perf callchain: Append callchains only when requested perf ui/tui: Print backtrace symbols when segfault occurs perf report: Show progress bar for output resorting
2 parents 1e6c3e8 + 5306c31 commit ddb321a

26 files changed

+371
-77
lines changed

arch/arm/kernel/perf_regs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task)
2828
{
2929
return PERF_SAMPLE_REGS_ABI_32;
3030
}
31+
32+
void perf_get_regs_user(struct perf_regs *regs_user,
33+
struct pt_regs *regs,
34+
struct pt_regs *regs_user_copy)
35+
{
36+
regs_user->regs = task_pt_regs(current);
37+
regs_user->abi = perf_reg_abi(current);
38+
}

arch/arm64/kernel/perf_regs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task)
5050
else
5151
return PERF_SAMPLE_REGS_ABI_64;
5252
}
53+
54+
void perf_get_regs_user(struct perf_regs *regs_user,
55+
struct pt_regs *regs,
56+
struct pt_regs *regs_user_copy)
57+
{
58+
regs_user->regs = task_pt_regs(current);
59+
regs_user->abi = perf_reg_abi(current);
60+
}

arch/x86/kernel/cpu/perf_event_intel_uncore.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
1818
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
1919
#define UNCORE_EXTRA_PCI_DEV 0xff
20-
#define UNCORE_EXTRA_PCI_DEV_MAX 2
20+
#define UNCORE_EXTRA_PCI_DEV_MAX 3
2121

2222
/* support up to 8 sockets */
2323
#define UNCORE_SOCKET_MAX 8

arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void)
891891
enum {
892892
SNBEP_PCI_QPI_PORT0_FILTER,
893893
SNBEP_PCI_QPI_PORT1_FILTER,
894+
HSWEP_PCI_PCU_3,
894895
};
895896

896897
static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
@@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void)
20262027
{
20272028
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
20282029
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
2030+
2031+
/* Detect 6-8 core systems with only two SBOXes */
2032+
if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
2033+
u32 capid4;
2034+
2035+
pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
2036+
0x94, &capid4);
2037+
if (((capid4 >> 6) & 0x3) == 0)
2038+
hswep_uncore_sbox.num_boxes = 2;
2039+
}
2040+
20292041
uncore_msr_uncores = hswep_msr_uncores;
20302042
}
20312043

@@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
22872299
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
22882300
SNBEP_PCI_QPI_PORT1_FILTER),
22892301
},
2302+
{ /* PCU.3 (for Capability registers) */
2303+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
2304+
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
2305+
HSWEP_PCI_PCU_3),
2306+
},
22902307
{ /* end: all zeroes */ }
22912308
};
22922309

arch/x86/kernel/perf_regs.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task)
7878
{
7979
return PERF_SAMPLE_REGS_ABI_32;
8080
}
81+
82+
void perf_get_regs_user(struct perf_regs *regs_user,
83+
struct pt_regs *regs,
84+
struct pt_regs *regs_user_copy)
85+
{
86+
regs_user->regs = task_pt_regs(current);
87+
regs_user->abi = perf_reg_abi(current);
88+
}
8189
#else /* CONFIG_X86_64 */
8290
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
8391
(1ULL << PERF_REG_X86_ES) | \
@@ -102,4 +110,86 @@ u64 perf_reg_abi(struct task_struct *task)
102110
else
103111
return PERF_SAMPLE_REGS_ABI_64;
104112
}
113+
114+
void perf_get_regs_user(struct perf_regs *regs_user,
115+
struct pt_regs *regs,
116+
struct pt_regs *regs_user_copy)
117+
{
118+
struct pt_regs *user_regs = task_pt_regs(current);
119+
120+
/*
121+
* If we're in an NMI that interrupted task_pt_regs setup, then
122+
* we can't sample user regs at all. This check isn't really
123+
* sufficient, though, as we could be in an NMI inside an interrupt
124+
* that happened during task_pt_regs setup.
125+
*/
126+
if (regs->sp > (unsigned long)&user_regs->r11 &&
127+
regs->sp <= (unsigned long)(user_regs + 1)) {
128+
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
129+
regs_user->regs = NULL;
130+
return;
131+
}
132+
133+
/*
134+
* RIP, flags, and the argument registers are usually saved.
135+
* orig_ax is probably okay, too.
136+
*/
137+
regs_user_copy->ip = user_regs->ip;
138+
regs_user_copy->cx = user_regs->cx;
139+
regs_user_copy->dx = user_regs->dx;
140+
regs_user_copy->si = user_regs->si;
141+
regs_user_copy->di = user_regs->di;
142+
regs_user_copy->r8 = user_regs->r8;
143+
regs_user_copy->r9 = user_regs->r9;
144+
regs_user_copy->r10 = user_regs->r10;
145+
regs_user_copy->r11 = user_regs->r11;
146+
regs_user_copy->orig_ax = user_regs->orig_ax;
147+
regs_user_copy->flags = user_regs->flags;
148+
149+
/*
150+
* Don't even try to report the "rest" regs.
151+
*/
152+
regs_user_copy->bx = -1;
153+
regs_user_copy->bp = -1;
154+
regs_user_copy->r12 = -1;
155+
regs_user_copy->r13 = -1;
156+
regs_user_copy->r14 = -1;
157+
regs_user_copy->r15 = -1;
158+
159+
/*
160+
* For this to be at all useful, we need a reasonable guess for
161+
* sp and the ABI. Be careful: we're in NMI context, and we're
162+
* considering current to be the current task, so we should
163+
* be careful not to look at any other percpu variables that might
164+
* change during context switches.
165+
*/
166+
if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
167+
task_thread_info(current)->status & TS_COMPAT) {
168+
/* Easy case: we're in a compat syscall. */
169+
regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
170+
regs_user_copy->sp = user_regs->sp;
171+
regs_user_copy->cs = user_regs->cs;
172+
regs_user_copy->ss = user_regs->ss;
173+
} else if (user_regs->orig_ax != -1) {
174+
/*
175+
* We're probably in a 64-bit syscall.
176+
* Warning: this code is severely racy. At least it's better
177+
* than just blindly copying user_regs.
178+
*/
179+
regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
180+
regs_user_copy->sp = this_cpu_read(old_rsp);
181+
regs_user_copy->cs = __USER_CS;
182+
regs_user_copy->ss = __USER_DS;
183+
regs_user_copy->cx = -1; /* usually contains garbage */
184+
} else {
185+
/* We're probably in an interrupt or exception. */
186+
regs_user->abi = user_64bit_mode(user_regs) ?
187+
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
188+
regs_user_copy->sp = user_regs->sp;
189+
regs_user_copy->cs = user_regs->cs;
190+
regs_user_copy->ss = user_regs->ss;
191+
}
192+
193+
regs_user->regs = regs_user_copy;
194+
}
105195
#endif /* CONFIG_X86_32 */

arch/x86/lib/insn.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
/* Verify next sizeof(t) bytes can be on the same instruction */
3030
#define validate_next(t, insn, n) \
31-
((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
31+
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
3232

3333
#define __get_next(t, insn) \
3434
({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })

include/linux/perf_event.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,6 @@ struct perf_branch_stack {
7979
struct perf_branch_entry entries[0];
8080
};
8181

82-
struct perf_regs {
83-
__u64 abi;
84-
struct pt_regs *regs;
85-
};
86-
8782
struct task_struct;
8883

8984
/*
@@ -610,7 +605,14 @@ struct perf_sample_data {
610605
u32 reserved;
611606
} cpu_entry;
612607
struct perf_callchain_entry *callchain;
608+
609+
/*
610+
* regs_user may point to task_pt_regs or to regs_user_copy, depending
611+
* on arch details.
612+
*/
613613
struct perf_regs regs_user;
614+
struct pt_regs regs_user_copy;
615+
614616
struct perf_regs regs_intr;
615617
u64 stack_user_size;
616618
} ____cacheline_aligned;

include/linux/perf_regs.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
#ifndef _LINUX_PERF_REGS_H
22
#define _LINUX_PERF_REGS_H
33

4+
struct perf_regs {
5+
__u64 abi;
6+
struct pt_regs *regs;
7+
};
8+
49
#ifdef CONFIG_HAVE_PERF_REGS
510
#include <asm/perf_regs.h>
611
u64 perf_reg_value(struct pt_regs *regs, int idx);
712
int perf_reg_validate(u64 mask);
813
u64 perf_reg_abi(struct task_struct *task);
14+
void perf_get_regs_user(struct perf_regs *regs_user,
15+
struct pt_regs *regs,
16+
struct pt_regs *regs_user_copy);
917
#else
1018
static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
1119
{
@@ -21,5 +29,13 @@ static inline u64 perf_reg_abi(struct task_struct *task)
2129
{
2230
return PERF_SAMPLE_REGS_ABI_NONE;
2331
}
32+
33+
static inline void perf_get_regs_user(struct perf_regs *regs_user,
34+
struct pt_regs *regs,
35+
struct pt_regs *regs_user_copy)
36+
{
37+
regs_user->regs = task_pt_regs(current);
38+
regs_user->abi = perf_reg_abi(current);
39+
}
2440
#endif /* CONFIG_HAVE_PERF_REGS */
2541
#endif /* _LINUX_PERF_REGS_H */

kernel/events/core.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle,
44614461
}
44624462

44634463
static void perf_sample_regs_user(struct perf_regs *regs_user,
4464-
struct pt_regs *regs)
4464+
struct pt_regs *regs,
4465+
struct pt_regs *regs_user_copy)
44654466
{
4466-
if (!user_mode(regs)) {
4467-
if (current->mm)
4468-
regs = task_pt_regs(current);
4469-
else
4470-
regs = NULL;
4471-
}
4472-
4473-
if (regs) {
4474-
regs_user->abi = perf_reg_abi(current);
4467+
if (user_mode(regs)) {
4468+
regs_user->abi = perf_reg_abi(current);
44754469
regs_user->regs = regs;
4470+
} else if (current->mm) {
4471+
perf_get_regs_user(regs_user, regs, regs_user_copy);
44764472
} else {
44774473
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
44784474
regs_user->regs = NULL;
@@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header,
49514947
}
49524948

49534949
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
4954-
perf_sample_regs_user(&data->regs_user, regs);
4950+
perf_sample_regs_user(&data->regs_user, regs,
4951+
&data->regs_user_copy);
49554952

49564953
if (sample_type & PERF_SAMPLE_REGS_USER) {
49574954
/* regs dump ABI info */

tools/perf/builtin-annotate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
232232
if (nr_samples > 0) {
233233
total_nr_samples += nr_samples;
234234
hists__collapse_resort(hists, NULL);
235-
hists__output_resort(hists);
235+
hists__output_resort(hists, NULL);
236236

237237
if (symbol_conf.event_group &&
238238
!perf_evsel__is_group_leader(pos))

0 commit comments

Comments
 (0)