From 8e15958e1b7183bbfbdf004f0ad8f2b62f023f9f Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Wed, 30 Apr 2014 14:48:22 -0400 Subject: [PATCH] Fix for the X86_64 "bt" command on Linux 3.3 and later kernels to properly display exception frame register contents on NMI stacks. Kernel commit 3f3c8b8c4b2a34776c3470142a7c8baafcda6eb0 added 12 more values to the NMI exception stack to handle nested NMIs caused by page faults or breakpoints that could occur while handling an NMI exception. The fix has two parts: 1. Determine if this kernel has the nested NMI layout and set a machine-specific flag (NESTED_NMI) if it does. 2. When backtracing an NMI stack, use the saved values instead of those found at the top of stack. Kernel commit 28696f434fef0efa97534b59986ad33b9c4df7f8 changed the stack layout again, swapping the location of the "saved" and "copied" registers. This can be detected automatically, because the "copied" registers contain either a copy of the "saved" registers, or point to "repeat_nmi". So, if "repeat_nmi" is found as the return address, assume that this is the old layout, and adjust the stack pointer again. Without the patch, incorrect register values are displayed in the exception frame dump in the NMI stack backtrace. (ptesarik@suse.cz) --- defs.h | 1 + x86_64.c | 75 +++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/defs.h b/defs.h index 711b1546..4054de4e 100644 --- a/defs.h +++ b/defs.h @@ -5123,6 +5123,7 @@ struct machine_specific { #define VM_XEN_RHEL4 (0x100) #define FRAMEPOINTER (0x200) #define GART_REGION (0x400) +#define NESTED_NMI (0x800) #define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4) diff --git a/x86_64.c b/x86_64.c index 5364c303..f4a3e8b5 100644 --- a/x86_64.c +++ b/x86_64.c @@ -468,6 +468,8 @@ x86_64_init(int when) else x86_64_per_cpu_init(); x86_64_ist_init(); + if (symbol_exists("repeat_nmi")) + machdep->flags |= NESTED_NMI; machdep->in_alternate_stack = x86_64_in_alternate_stack; if ((machdep->machspec->irqstack = (char *) malloc(machdep->machspec->stkinfo.isize)) == NULL) @@ -609,6 +611,8 @@ x86_64_dump_machdep_table(ulong arg) fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : ""); if (machdep->flags & GART_REGION) fprintf(fp, "%sGART_REGION", others++ ? "|" : ""); + if (machdep->flags & NESTED_NMI) + fprintf(fp, "%sNESTED_NMI", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -3009,6 +3013,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) } stacktop = bt->stacktop - SIZE(pt_regs); + if ((machdep->flags & NESTED_NMI) && estack_index == NMI_STACK) + stacktop -= 12*sizeof(ulong); bt->flags &= ~BT_FRAMESIZE_DISABLE; @@ -3046,21 +3052,37 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) } cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0, - bt->stackbuf + (bt->stacktop - bt->stackbase) - - SIZE(pt_regs), bt, ofp); + bt->stackbuf + (stacktop - bt->stackbase), + bt, ofp); if (!BT_REFERENCE_CHECK(bt)) fprintf(fp, "--- <%s exception stack> ---\n", x86_64_exception_stacks[estack_index]); - /* - * stack = (unsigned long *) estack_end[-2]; + /* + * Find the CPU-saved, or handler-saved registers */ up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]); - up -= 2; - rsp = bt->stkptr = *up; - up -= 3; - bt->instptr = *up; + up -= 5; + if ((machdep->flags & NESTED_NMI) && + estack_index == NMI_STACK && + bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) { + up -= 12; + /* Copied and saved regs are swapped in pre-3.8 kernels */ + if (*up == symbol_value("repeat_nmi")) + up += 5; + } + + /* Registers (as saved by CPU): + * + * up[4] SS + * up[3] RSP + * up[2] RFLAGS + * up[1] CS + * up[0] RIP + */ + rsp = bt->stkptr = up[3]; + bt->instptr = up[0]; if (cs & 3) done = TRUE; /* user-mode exception */ else @@ -3513,27 +3535,46 @@ x86_64_dwarf_back_trace_cmd(struct bt_info *bt_in) } stacktop = bt->stacktop - SIZE(pt_regs); - + if ((machdep->flags & NESTED_NMI) && + estack_index == NMI_STACK) + stacktop -= 12*sizeof(ulong); + if (!done) { level = dwarf_backtrace(bt, level, stacktop); done = TRUE; } cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0, - bt->stackbuf + (bt->stacktop - bt->stackbase) - - SIZE(pt_regs), bt, ofp); + bt->stackbuf + (stacktop - bt->stackbase), + bt, ofp); if (!BT_REFERENCE_CHECK(bt)) fprintf(fp, "--- ---\n"); - /* - * stack = (unsigned long *) estack_end[-2]; + /* + * Find the CPU-saved, or handler-saved registers */ up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]); - up -= 2; - rsp = bt->stkptr = *up; - up -= 3; - bt->instptr = *up; + up -= 5; + if ((machdep->flags & NESTED_NMI) && + estack_index == NMI_STACK && + bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) { + up -= 12; + /* Copied and saved regs are swapped in pre-3.8 kernels */ + if (*up == symbol_value("repeat_nmi")) + up += 5; + } + + /* Registers (as saved by CPU): + * + * up[4] SS + * up[3] RSP + * up[2] RFLAGS + * up[1] CS + * up[0] RIP + */ + rsp = bt->stkptr = up[3]; + bt->instptr = up[0]; if (cs & 3) done = TRUE; /* user-mode exception */ else