From 45b74b89530d611b3fa95a1041e158fbb865fa84 Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Mon, 23 Oct 2017 11:15:39 -0400 Subject: [PATCH] Fix for support of KASLR enabled kernels captured by the SADUMP dumpfile facility. SADUMP dumpfile headers do not contain phys_base or VMCOREINFO notes, so without this patch, the crash session fails during initialization with the message "crash: seek error: kernel virtual address:
type: "page_offset_base". This patch calculates the phys_base value and the KASLR offset using the IDTR and CR3 registers from the dumpfile header. (indou.takao@jp.fujitsu.com) --- defs.h | 4 + sadump.c | 465 +++++++++++++++++++++++++++++++++++++++++++++++++++++- sadump.h | 1 + symbols.c | 44 ++++++ x86_64.c | 21 +++ 5 files changed, 534 insertions(+), 1 deletion(-) diff --git a/defs.h b/defs.h index a694a66a..76e5512b 100644 --- a/defs.h +++ b/defs.h @@ -2591,6 +2591,9 @@ struct symbol_table_data { ulong last_section_end; ulong _stext_vmlinux; struct downsized downsized; + ulong divide_error_vmlinux; + ulong idt_table_vmlinux; + ulong saved_command_line_vmlinux; }; /* flags for st */ @@ -6312,6 +6315,7 @@ void sadump_set_zero_excluded(void); void sadump_unset_zero_excluded(void); struct sadump_data; struct sadump_data *get_sadump_data(void); +int sadump_calc_kaslr_offset(ulong *); /* * qemu.c diff --git a/sadump.c b/sadump.c index a96ba9ce..2ccfa82f 100644 --- a/sadump.c +++ b/sadump.c @@ -1558,12 +1558,17 @@ sadump_display_regs(int cpu, FILE *ofp) */ int sadump_phys_base(ulong *phys_base) { - if (SADUMP_VALID()) { + if (SADUMP_VALID() && !sd->phys_base) { if (CRASHDEBUG(1)) error(NOTE, "sadump: does not save phys_base.\n"); return FALSE; } + if (sd->phys_base) { + *phys_base = sd->phys_base; + return TRUE; + } + return FALSE; } @@ -1649,3 +1654,461 @@ get_sadump_data(void) { return sd; } + +#ifdef X86_64 +static int +get_sadump_smram_cpu_state_any(struct sadump_smram_cpu_state *smram) +{ + ulong offset; + struct sadump_header *sh = sd->dump_header; + int apicid; + struct sadump_smram_cpu_state scs, zero; + + offset = sd->sub_hdr_offset + sizeof(uint32_t) + + sd->dump_header->nr_cpus * sizeof(struct sadump_apic_state); + + memset(&zero, 0, sizeof(zero)); + + for (apicid = 0; apicid < sh->nr_cpus; ++apicid) { + if (!read_device(&scs, sizeof(scs), &offset)) { + error(INFO, "sadump: cannot read sub header " + "cpu_state\n"); + return FALSE; + } + if (memcmp(&scs, &zero, sizeof(scs)) != 0) { + *smram = scs; + return TRUE; + } + } + + return FALSE; +} + +/* + * Get address of vector0 interrupt handler (Devide Error) from Interrupt + * Descriptor Table. + */ +static ulong +get_vec0_addr(ulong idtr) +{ + struct gate_struct64 { + uint16_t offset_low; + uint16_t segment; + uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + uint16_t offset_middle; + uint32_t offset_high; + uint32_t zero1; + } __attribute__((packed)) gate; + + readmem(idtr, PHYSADDR, &gate, sizeof(gate), "idt_table", FAULT_ON_ERROR); + + return ((ulong)gate.offset_high << 32) + + ((ulong)gate.offset_middle << 16) + + gate.offset_low; +} + +/* + * Parse a string of [size[KMG] ]offset[KMG] + * Import from Linux kernel(lib/cmdline.c) + */ +static ulong memparse(char *ptr, char **retptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +/* + * Find "elfcorehdr=" in the boot parameter of kernel and return the address + * of elfcorehdr. + */ +static ulong +get_elfcorehdr(ulong cr3, ulong kaslr_offset) +{ + char cmdline[BUFSIZE], *ptr; + ulong cmdline_vaddr; + ulong cmdline_paddr; + ulong buf_vaddr, buf_paddr; + char *end; + ulong elfcorehdr_addr = 0, elfcorehdr_size = 0; + int verbose = CRASHDEBUG(1)? 1: 0; + + cmdline_vaddr = st->saved_command_line_vmlinux + kaslr_offset; + if (!kvtop(NULL, cmdline_vaddr, &cmdline_paddr, verbose)) + return 0; + + if (CRASHDEBUG(1)) { + fprintf(fp, "cmdline vaddr=%lx\n", cmdline_vaddr); + fprintf(fp, "cmdline paddr=%lx\n", cmdline_paddr); + } + + if (!readmem(cmdline_paddr, PHYSADDR, &buf_vaddr, sizeof(ulong), + "saved_command_line", RETURN_ON_ERROR)) + return 0; + + if (!kvtop(NULL, buf_vaddr, &buf_paddr, verbose)) + return 0; + + if (CRASHDEBUG(1)) { + fprintf(fp, "cmdline buffer vaddr=%lx\n", buf_vaddr); + fprintf(fp, "cmdline buffer paddr=%lx\n", buf_paddr); + } + + memset(cmdline, 0, BUFSIZE); + if (!readmem(buf_paddr, PHYSADDR, cmdline, BUFSIZE, + "saved_command_line", RETURN_ON_ERROR)) + return 0; + + ptr = strstr(cmdline, "elfcorehdr="); + if (!ptr) + return 0; + + if (CRASHDEBUG(1)) + fprintf(fp, "2nd kernel detected\n"); + + ptr += strlen("elfcorehdr="); + elfcorehdr_addr = memparse(ptr, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } + + if (CRASHDEBUG(1)) { + fprintf(fp, "elfcorehdr_addr=%lx\n", elfcorehdr_addr); + fprintf(fp, "elfcorehdr_size=%lx\n", elfcorehdr_size); + } + + return elfcorehdr_addr; +} + + /* + * Get vmcoreinfo from elfcorehdr. + * Some codes are imported from Linux kernel(fs/proc/vmcore.c) + */ +static int +get_vmcoreinfo(ulong elfcorehdr, ulong *addr, int *len) +{ + unsigned char e_ident[EI_NIDENT]; + Elf64_Ehdr ehdr; + Elf64_Phdr phdr; + Elf64_Nhdr nhdr; + ulong ptr; + ulong nhdr_offset = 0; + int i; + + if (!readmem(elfcorehdr, PHYSADDR, e_ident, EI_NIDENT, + "EI_NIDENT", RETURN_ON_ERROR)) + return FALSE; + + if (e_ident[EI_CLASS] != ELFCLASS64) { + error(INFO, "Only ELFCLASS64 is supportd\n"); + return FALSE; + } + + if (!readmem(elfcorehdr, PHYSADDR, &ehdr, sizeof(ehdr), + "Elf64_Ehdr", RETURN_ON_ERROR)) + return FALSE; + + /* Sanity Check */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || + ehdr.e_ident[EI_CLASS] != ELFCLASS64 || + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || + ehdr.e_ehsize != sizeof(Elf64_Ehdr) || + ehdr.e_phentsize != sizeof(Elf64_Phdr) || + ehdr.e_phnum == 0) { + error(INFO, "Invalid elf header\n"); + return FALSE; + } + + ptr = elfcorehdr + ehdr.e_phoff; + for (i = 0; i < ehdr.e_phnum; i++) { + ulong offset; + char name[16]; + + if (!readmem(ptr, PHYSADDR, &phdr, sizeof(phdr), + "Elf64_Phdr", RETURN_ON_ERROR)) + return FALSE; + + ptr += sizeof(phdr); + if (phdr.p_type != PT_NOTE) + continue; + + offset = phdr.p_offset; + if (!readmem(offset, PHYSADDR, &nhdr, sizeof(nhdr), + "Elf64_Nhdr", RETURN_ON_ERROR)) + return FALSE; + + offset += DIV_ROUND_UP(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))* + sizeof(Elf64_Word); + memset(name, 0, sizeof(name)); + if (!readmem(offset, PHYSADDR, name, sizeof(name), + "Elf64_Nhdr name", RETURN_ON_ERROR)) + return FALSE; + + if(!strcmp(name, "VMCOREINFO")) { + nhdr_offset = offset; + break; + } + } + + if (!nhdr_offset) + return FALSE; + + *addr = nhdr_offset + + DIV_ROUND_UP(nhdr.n_namesz, sizeof(Elf64_Word))* + sizeof(Elf64_Word); + *len = nhdr.n_descsz; + + if (CRASHDEBUG(1)) { + fprintf(fp, "vmcoreinfo addr=%lx\n", *addr); + fprintf(fp, "vmcoreinfo len=%d\n", *len); + } + + return TRUE; +} + +/* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel. + * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo. + * + * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter + * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel. + * There is nothing to do. + * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo + * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo. + */ +static int +get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset, + ulong *kaslr_offset, ulong *phys_base) +{ + ulong elfcorehdr_addr = 0; + ulong vmcoreinfo_addr; + int vmcoreinfo_len; + char *buf, *pos; + int ret = FALSE; + + /* Find "elfcorehdr=" in the kernel boot parameter */ + elfcorehdr_addr = get_elfcorehdr(cr3, orig_kaslr_offset); + if (!elfcorehdr_addr) + return FALSE; + + /* Get vmcoreinfo from the address of "elfcorehdr=" */ + if (!get_vmcoreinfo(elfcorehdr_addr, &vmcoreinfo_addr, &vmcoreinfo_len)) + return FALSE; + + if (!vmcoreinfo_len) + return FALSE; + + if (CRASHDEBUG(1)) + fprintf(fp, "Find vmcoreinfo in kdump memory\n"); + + buf = GETBUF(vmcoreinfo_len); + if (!readmem(vmcoreinfo_addr, PHYSADDR, buf, vmcoreinfo_len, + "vmcoreinfo", RETURN_ON_ERROR)) + goto quit; + + /* Get phys_base form vmcoreinfo */ + pos = strstr(buf, "NUMBER(phys_base)="); + if (!pos) + goto quit; + *phys_base = strtoull(pos + strlen("NUMBER(phys_base)="), NULL, 0); + + /* Get kaslr_offset form vmcoreinfo */ + pos = strstr(buf, "KERNELOFFSET="); + if (!pos) + goto quit; + *kaslr_offset = strtoull(pos + strlen("KERNELOFFSET="), NULL, 16); + + ret = TRUE; + +quit: + FREEBUF(buf); + return ret; +} + +/* + * Calculate kaslr_offset and phys_base + * + * kaslr_offset: + * The difference between original address in System.map or vmlinux and + * actual address placed randomly by kaslr feature. To be more accurate, + * kaslr_offset = actual address - original address + * + * phys_base: + * Physical address where the kerenel is placed. In other words, it's a + * physical address of __START_KERNEL_map. This is also decided randomly by + * kaslr. + * + * kaslr offset and phys_base are calculated as follows: + * + * kaslr_offset: + * 1) Get IDTR and CR3 value from the dump header. + * 2) Get a virtual address of IDT from IDTR value + * --- (A) + * 3) Translate (A) to physical address using CR3, which points a top of + * page table. + * --- (B) + * 4) Get an address of vector0 (Devide Error) interrupt handler from + * IDT, which are pointed by (B). + * --- (C) + * 5) Get an address of symbol "divide_error" form vmlinux + * --- (D) + * + * Now we have two addresses: + * (C)-> Actual address of "divide_error" + * (D)-> Original address of "divide_error" in the vmlinux + * + * kaslr_offset can be calculated by the difference between these two + * value. + * + * phys_base; + * 1) Get IDT virtual address from vmlinux + * --- (E) + * + * So phys_base can be calculated using relationship of directly mapped + * address. + * + * phys_base = + * Physical address(B) - + * (Virtual address(E) + kaslr_offset - __START_KERNEL_map) + * + * Note that the address (A) cannot be used instead of (E) because (A) is + * not direct map address, it's a fixed map address. + * + * This solution works in most every case, but does not work in the + * following case. + * + * 1) If the dump is captured on early stage of kernel boot, IDTR points + * early IDT table(early_idts) instead of normal IDT(idt_table). + * 2) If the dump is captured whle kdump is working, IDTR points + * IDT table of 2nd kernel, not 1st kernel. + * + * Current implementation does not support the case 1), need + * enhancement in the future. For the case 2), get kaslr_offset and + * phys_base as follows. + * + * 1) Get kaslr_offset and phys_base using the above solution. + * 2) Get kernel boot parameter from "saved_command_line" + * 3) If "elfcorehdr=" is not included in boot parameter, we are in the + * first kernel, nothing to do any more. + * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd + * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and + * get kaslr_offset and phys_base from vmcoreinfo. + */ +int +sadump_calc_kaslr_offset(ulong *kaslr_offset) +{ + ulong phys_base = 0; + struct sadump_smram_cpu_state scs; + uint64_t idtr = 0, cr3 = 0, idtr_paddr; + ulong divide_error_vmcore; + ulong kaslr_offset_kdump, phys_base_kdump; + int ret = FALSE; + int verbose = CRASHDEBUG(1)? 1: 0; + + if (!machine_type("X86_64")) + return FALSE; + + memset(&scs, 0, sizeof(scs)); + get_sadump_smram_cpu_state_any(&scs); + cr3 = scs.Cr3; + idtr = ((uint64_t)scs.IdtUpper)<<32 | (uint64_t)scs.IdtLower; + + /* + * Set up for kvtop. + * + * calc_kaslr_offset() is called before machdep_init(PRE_GDB), so some + * variables are not initialized yet. Set up them here to call kvtop(). + * + * TODO: XEN and 5-level is not supported + */ + vt->kernel_pgd[0] = cr3; + machdep->machspec->last_pml4_read = vt->kernel_pgd[0]; + machdep->machspec->physical_mask_shift = __PHYSICAL_MASK_SHIFT_2_6; + machdep->machspec->pgdir_shift = PGDIR_SHIFT; + if (!readmem(cr3, PHYSADDR, machdep->machspec->pml4, PAGESIZE(), + "cr3", RETURN_ON_ERROR)) + goto quit; + + /* Convert virtual address of IDT table to physical address */ + if (!kvtop(NULL, idtr, &idtr_paddr, verbose)) + goto quit; + + /* Now we can calculate kaslr_offset and phys_base */ + divide_error_vmcore = get_vec0_addr(idtr_paddr); + *kaslr_offset = divide_error_vmcore - st->divide_error_vmlinux; + phys_base = idtr_paddr - + (st->idt_table_vmlinux + *kaslr_offset - __START_KERNEL_map); + + if (CRASHDEBUG(1)) { + fprintf(fp, "calc_kaslr_offset: idtr=%lx\n", idtr); + fprintf(fp, "calc_kaslr_offset: cr3=%lx\n", cr3); + fprintf(fp, "calc_kaslr_offset: idtr(phys)=%lx\n", idtr_paddr); + fprintf(fp, "calc_kaslr_offset: divide_error(vmlinux): %lx\n", + st->divide_error_vmlinux); + fprintf(fp, "calc_kaslr_offset: divide_error(vmcore): %lx\n", + divide_error_vmcore); + } + + /* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd + * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base + * from vmcoreinfo + */ + if (get_kaslr_offset_from_vmcoreinfo( + cr3, *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) { + *kaslr_offset = kaslr_offset_kdump; + phys_base = phys_base_kdump; + } + + if (CRASHDEBUG(1)) { + fprintf(fp, "calc_kaslr_offset: kaslr_offset=%lx\n", + *kaslr_offset); + fprintf(fp, "calc_kaslr_offset: phys_base=%lx\n", phys_base); + } + + sd->phys_base = phys_base; + ret = TRUE; +quit: + vt->kernel_pgd[0] = 0; + machdep->machspec->last_pml4_read = 0; + return ret; +} +#else +int +sadump_calc_kaslr_offset(ulong *kaslr_offset) +{ + return FALSE; +} +#endif /* X86_64 */ diff --git a/sadump.h b/sadump.h index 7f8e3845..681f5e44 100644 --- a/sadump.h +++ b/sadump.h @@ -219,6 +219,7 @@ struct sadump_data { ulonglong backup_offset; uint64_t max_mapnr; + ulong phys_base; }; struct sadump_data *sadump_get_sadump_data(void); diff --git a/symbols.c b/symbols.c index 02cb34e7..b2f27961 100644 --- a/symbols.c +++ b/symbols.c @@ -624,6 +624,9 @@ kaslr_init(void) st->_stext_vmlinux = UNINITIALIZED; } } + + if (SADUMP_DUMPFILE()) + kt->flags2 |= KASLR_CHECK; } /* @@ -637,6 +640,19 @@ derive_kaslr_offset(bfd *abfd, int dynamic, bfd_byte *start, bfd_byte *end, unsigned long relocate; ulong _stext_relocated; + if (SADUMP_DUMPFILE()) { + ulong kaslr_offset = 0; + + sadump_calc_kaslr_offset(&kaslr_offset); + + if (kaslr_offset) { + kt->relocate = kaslr_offset * -1; + kt->flags |= RELOC_SET; + } + + return; + } + if (ACTIVE()) { _stext_relocated = symbol_value_from_proc_kallsyms("_stext"); if (_stext_relocated == BADVAL) @@ -3052,6 +3068,16 @@ dump_symbol_table(void) else fprintf(fp, "\n"); + if (SADUMP_DUMPFILE()) { + fprintf(fp, "divide_error_vmlinux: %lx\n", st->divide_error_vmlinux); + fprintf(fp, " idt_table_vmlinux: %lx\n", st->idt_table_vmlinux); + fprintf(fp, "saved_command_line_vmlinux: %lx\n", st->saved_command_line_vmlinux); + } else { + fprintf(fp, "divide_error_vmlinux: (unused)\n"); + fprintf(fp, " idt_table_vmlinux: (unused)\n"); + fprintf(fp, "saved_command_line_vmlinux: (unused)\n"); + } + fprintf(fp, " symval_hash[%d]: %lx\n", SYMVAL_HASH, (ulong)&st->symval_hash[0]); @@ -12246,6 +12272,24 @@ numeric_forward(const void *P_x, const void *P_y) } } + if (SADUMP_DUMPFILE()) { + /* Need for kaslr_offset and phys_base */ + if (STREQ(x->name, "divide_error")) + st->divide_error_vmlinux = valueof(x); + else if (STREQ(y->name, "divide_error")) + st->divide_error_vmlinux = valueof(y); + + if (STREQ(x->name, "idt_table")) + st->idt_table_vmlinux = valueof(x); + else if (STREQ(y->name, "idt_table")) + st->idt_table_vmlinux = valueof(y); + + if (STREQ(x->name, "saved_command_line")) + st->saved_command_line_vmlinux = valueof(x); + else if (STREQ(y->name, "saved_command_line")) + st->saved_command_line_vmlinux = valueof(y); + } + xs = bfd_get_section(x); ys = bfd_get_section(y); diff --git a/x86_64.c b/x86_64.c index 6e60ddaf..2f9e6dbc 100644 --- a/x86_64.c +++ b/x86_64.c @@ -194,6 +194,9 @@ x86_64_init(int when) machdep->machspec->kernel_image_size = dtol(string, QUIET, NULL); free(string); } + if (SADUMP_DUMPFILE()) + /* Need for calculation of kaslr_offset and phys_base */ + machdep->kvtop = x86_64_kvtop; break; case PRE_GDB: @@ -2019,6 +2022,22 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo ulong pte; physaddr_t physpage; + if (SADUMP_DUMPFILE() && !(machdep->flags & KSYMS_START)) { + /* + * In the case of sadump, to calculate kaslr_offset and + * phys_base, kvtop is called during symtab_init(). In this + * stage phys_base is not initialized yet and x86_64_VTOP() + * does not work. Jump to the code of pagetable translation. + */ + FILL_PML4(); + pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr); + if (verbose) { + fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]); + fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4); + } + goto start_vtop_with_pagetable; + } + if (!IS_KVADDR(kvaddr)) return FALSE; @@ -2065,6 +2084,8 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4); } } + +start_vtop_with_pagetable: if (!(*pml4) & _PAGE_PRESENT) goto no_kpage; pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK;