diff --git a/Makefile b/Makefile index 9e07f51..490c4dc 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,8 @@ ENV_FILE=build_env -include $(ENV_FILE) +SHELL := /bin/bash + OBJ = entry.o main.o console.o lib/string.o fdt.o fdt_strerror.o fdt_ro.o exc-vecs.o guest.o \ exc.o time.o mmu.o mem.o opal.o cache.o rom.o lib/ctype.o lib/vsprintf.o log.o lib/malloc.o \ fat/fat_cache.o fat/fat_format.o fat/fat_string.o fat/fat_write.o \ @@ -8,13 +10,10 @@ OBJ = entry.o main.o console.o lib/string.o fdt.o fdt_strerror.o fdt_ro.o exc-ve NAME = prephv -ifeq ($(CONFIG_MAMBO), 1) -BUILD_FLAGS = -DCONFIG_MAMBO -OBJ += mambo.o -endif - CROSS ?= ppc64le-linux CC = $(CROSS)-gcc +PPC64QEMU ?= qemu-system-ppc64 +FAT_IMAGE ?= ../image.fat # relative to skiboot ARCH_FLAGS = -msoft-float -mpowerpc64 -mcpu=power8 -mtune=power8 -mabi=elfv2 \ -mlittle-endian -mno-strict-align -mno-multiple \ @@ -42,6 +41,8 @@ clean_env: log_clean_env clean @echo OLD_BUILD_ENV=\"$(BUILD_ENV)\" > $(ENV_FILE) test: $(NAME) skiboot/skiboot.lid + $(shell pushd skiboot && $(PPC64QEMU) -m 4G -M powernv -nographic -kernel ../prephv -initrd $(FAT_IMAGE) 1>&2 || 0) + $(shell reset) skiboot/skiboot.lid: git submodule update --init skiboot diff --git a/README.md b/README.md new file mode 100644 index 0000000..444342a --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +prephv - because I wasted my 90's playing with LEGOs. +===================================================== + +This is mostly a huge ugly hack, derived from my +ppc64le_hello code. The running philosophy here is +to throw things together late at night with my family +asleep and see how far I get without a real design +or without a real desire to implement boring things +like IDE (*sigh*) emulation. PowerPC things though +are considered fun and crucial...anyway... + +It's a 64-bit LE ``hypervisor'', running a single 32-bit +LE VM, with the intention of modelling some as of yet +unknown PReP machine...eventually hopefully enough to BSOD +inside the Windows NT 4.0 PowerPC kernel. + +PReP is a long dead specification for consumer-ish PowerPC +servers and workstations, that basically amounted to a +pre-ACPI PC with a PowerPC CPU. Some PReP machines ran +Windows NT 4.0. Most ran AIX, an obscure Solaris port, or +gave Linux hackers many fun restless nights. + +Relieve '96... I spent that July basking on the beaches of +Cagliari. Somewhere in a dusty office in Seattle people +were dealing with stuff like this: + + Open Firmware ARC Interface Version 3.0 (Jul 12 1996 - 18:46:44) + Couldn't claim SYSTEM PARAMETER BLOCK + Program complete - please reboot. + +Status +------ + +Today I fake out enough of OpenFirware client interface to +successfully run the VENEER.EXE ARC shim and hand off to +SETUPLDR. Not much else. Specifically, the CPU VM state is +not modelled quite well (or at all...?). + +![ARC veneer image](/docs/veneer.png?raw=true "In ARC menu") +![setupldr image](/docs/setupldr.png?raw=true "In SETUPLDR") + +The "disk" is passed as initrd. This is nowhere near +being able to run any portion of NT kernel. + +Building +-------- + +You will need a LE 64-bit toolchain to build (i.e. ppc64le-linux). +A good source of toolchains is kernel.org. For building skiboot +(for testing) you will also need the BE 64-bit toolchain +(i.e. powerpc64-linux). + +Skiboot is pulled-in as a submodule. Prephv is a skiboot payload. +So hypothetically it could run on a real Power8 box. + +You will need Benjamin Herrenschmidt's PowerNV QEMU tree. + +Good directions for QEMU/PowerNV: +https://www.flamingspork.com/blog/2015/08/28/running-opal-in-qemu-the-powernv-platform/ + + $ make + +Running +------- + +You now need an image.fat that contains at least the following +files from the NT 4.0 CD, which are obviously not distributable: +- \veneer.exe from /PPC/VENEER.EXE +- \osloader.exe from /PPC/SETUPLDR + +I've used FAT16. Other FAT types are untested. Good luck. + + $ PPC64QEMU=/path/to/BenH/powernv/qemu make test + +TODO +---- + +- Everything +- Threads (+implement an interactive monitor/debugger) + +Resources +--------- + +Useful stuff: +- PowerISA_V2.07_PUBLIC.pdf - Book 3S +- ABI53BitOpenPOWER_21July2014_pub.pdf - ELFv2 ABI used for PPC64LE +- P8_um_external_v1.1_2015JAN29_pub.pdf - POWER8 User Manual + +Contact Info +------------ + +Andrei Warkentin (andrey.warkentin@gmail.com). diff --git a/docs/setupldr.png b/docs/setupldr.png new file mode 100644 index 0000000..0961cdf Binary files /dev/null and b/docs/setupldr.png differ diff --git a/docs/veneer.png b/docs/veneer.png new file mode 100644 index 0000000..9409575 Binary files /dev/null and b/docs/veneer.png differ diff --git a/entry.S b/entry.S index 81b8239..0348da9 100644 --- a/entry.S +++ b/entry.S @@ -112,9 +112,4 @@ _GLOBAL(_start) * like, so we keep it. */ ori r0, r0, 0 -#ifdef CONFIG_MAMBO - /* - * Stop sim. - */ - attn -#endif /* CONFIG_MAMBO */ + never: b . diff --git a/exc.c b/exc.c index 43cec08..eaec51e 100644 --- a/exc.c +++ b/exc.c @@ -62,8 +62,7 @@ exc_handler(eframe_t *frame) /* * Handle instruction storage faults within the HV address - * region (which is direct-mapped to 0). When the MMU is off, - * the translation happens automatically. + * region (which is direct-mapped to 0). */ if (frame->vec == EXC_ISI) { if (frame->hsrr0 >= HV_ASPACE && @@ -78,8 +77,7 @@ exc_handler(eframe_t *frame) /* * Handle data storage faults within the HV address - * region (which is direct-mapped to 0). When the MMU is off, - * the translation happens automatically. + * region (which is direct-mapped to 0). */ if (frame->vec == EXC_DSI) { if (get_DAR() >= HV_ASPACE && @@ -97,13 +95,6 @@ exc_handler(eframe_t *frame) exc_rfi(frame); } - if (frame->vec == EXC_HDEC) { - set_HDEC(DEC_DISABLE); - LOG("hypervisor decrementer, exception handler with MMU %s!", - (frame->hsrr1 & (MSR_IR | MSR_DR)) ? "on" : "off"); - exc_rfi(frame); - } - if (frame->vec == EXC_SC) { mtmsrd(MSR_RI, 1); if (rom_call(frame) == ERR_NONE) { @@ -174,20 +165,6 @@ exc_enable_ee(void) } -void -exc_enable_hdec(void) -{ - set_LPCR(get_LPCR() | LPCR_HDICE); -} - - -void -exc_disable_hdec(void) -{ - set_LPCR(get_LPCR() & ~LPCR_HDICE); -} - - void exc_init(void) { diff --git a/include/mambo.h b/include/mambo.h deleted file mode 100644 index 4767247..0000000 --- a/include/mambo.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Various callthru stuff. - * - * Copyright (C) 2015 Andrei Warkentin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef MAMBO_H -#define MAMBO_H - -void mambo_write(char *buf, length_t len); - -#endif /* MAMBO_H */ diff --git a/main.c b/main.c index 057475f..3ba0d3a 100644 --- a/main.c +++ b/main.c @@ -1,7 +1,5 @@ /* - * Simple PPC64LE freestanding "Hello, World" ``kernel'', meant - * to be booted via skiboot or another OPAL firmware on the - * "mambo" open-power sim. + * PReP HV, a huge hack. * * Copyright (C) 2015 Andrei Warkentin * @@ -23,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -38,76 +35,9 @@ #include #include -#define HELLO_MAMBO "Hello Mambo!\n" -#define HELLO_OPAL "Hello OPAL!\n" - kpcr_t kpcr; static time_req_t opal_timer; -static void -dump_props(void *fdt, int node, int depth) -{ - const char *n; - uint32_t tag; - int nextoffset; - const struct fdt_property *prop; - int offset = _fdt_check_node_offset(fdt, node);; - - if (offset < 0) { - return; - } - - do { - tag = fdt_next_tag(fdt, offset, &nextoffset); - if (tag == FDT_END) { - return; - } - - if (tag == FDT_PROP) { - prop = _fdt_offset_ptr(fdt, offset); - n = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); - - if (fdt32_to_cpu(prop->len) == 0) { - LOG("%.*c%s: true", depth, ' ', n); - } else if (!strcmp(n, "compatible") || - !strcmp(n, "bootargs") || - !strcmp(n, "linux,stdout-path") || - !strcmp(n, "epapr-version") || - !strcmp(n, "model") || - !strcmp(n, "device_type") - ) { - LOG("%.*c%s: %s", depth, ' ', n, prop->data); - } else if (!strcmp(n, "#address-cells") || - !strcmp(n, "#size-cells") || - !strcmp(n, "#bytes") || - !strcmp(n, "l2-cache-size") || - !strcmp(n, "slb-size") || - !strcmp(n, "timebase-frequency") || - !strcmp(n, "i-cache-size") || - !strcmp(n, "i-cache-sets") || - !strcmp(n, "i-cache-line-size") || - !strcmp(n, "i-cache-block-size") || - !strcmp(n, "d-cache-size") || - !strcmp(n, "d-cache-sets") || - !strcmp(n, "d-cache-line-size") || - !strcmp(n, "d-cache-block-size") || - !strcmp(n, "linux,phandle") || - !strcmp(n, "linux,initrd-start") || - !strcmp(n, "linux,initrd-end") || - !strcmp(n, "phandle")) { - LOG("%.*c%s: 0x%x", depth, ' ', n, - be32_to_cpu(*(uint32_t *) prop->data)); - } else { - LOG("%.*c%s: 0x%x@0x%x", depth, ' ', n, - fdt32_to_cpu(prop->len), - prop->data); - } - } - - offset = nextoffset; - } while ((tag != FDT_BEGIN_NODE) && (tag != FDT_END_NODE)); -} - static bool_t opal_timer_cb(time_req_t *t) { @@ -187,406 +117,18 @@ cpu_init(void *fdt) } - void -dump_nodes(void *fdt) -{ - int i; - int numrsv; - int offset; - int depth; - const char *n; - - if (fdt_check_header(fdt) != 0) { - LOG("Bad FDT"); - return; - } - - numrsv = fdt_num_mem_rsv(fdt); - LOG("FDT version: %d", fdt_version(fdt)); - - for (i = 0; i < numrsv; i++) { - ra_t addr; - length_t size; - if (fdt_get_mem_rsv(fdt, i, &addr, &size) != 0) { - break; - } - - LOG("/memreserve/ 0x%x 0x%x;", - addr, size); - } - - offset = 0; - depth = 0; - do { - n = fdt_get_name(fdt, offset, NULL); - if (n != NULL) { - LOG("%.*c<%s>", depth * 4, ' ', n); - } - - dump_props(fdt, offset, depth * 4 + 2); - - offset = fdt_next_node(fdt, offset, &depth); - if (offset < 0) { - break; - } - } while(1); -} - - -static uint64_t -test_syscall(uint64_t param1, uint64_t param2) -{ - register uint64_t r3 __asm__ ("r3") = param1; - register uint64_t r4 __asm__ ("r4") = param2; - asm volatile("sc 1" : "=r" (r3) : "r" (r3), "r" (r4)); - return r3; -} - - -static void -test_hv(void) -{ - static void *upage = NULL; - eframe_t uframe; - - if (upage == NULL) { - upage = mem_memalign(PAGE_SIZE, PAGE_SIZE); - } - - /* - * Grab a page and map it as "virtual real mode" page 0 in our VM. - */ - mmu_map_vrma(ptr_2_ra(upage), ptr_2_ra(upage) + PAGE_SIZE); - memcpy(upage, (void *) test_syscall, (uint64_t) &test_hv - - (uint64_t) &test_syscall); - lwsync(); - flush_cache((vaddr_t) upage, PAGE_SIZE); - - /* - * VM supervisor is at EA 0 and does test_syscall(0x1337, 0), - * which simply returns to HV supervisor state. - */ - uframe.r1 = 0 + PAGE_SIZE - STACKFRAMESIZE; - uframe.r3 = 0x1337; - uframe.r4 = 0; - uframe.hsrr0 = 0; - /* No interrupts, no MMU. */ - uframe.hsrr1 = (mfmsr() & ~(MSR_HV | MSR_IR | MSR_DR | MSR_EE)); - - /* - * Force switch into our "VM". The exception handler stashes - * the kernel state behind in a global (sigh), which is magically - * restored on a test_syscall(0x1337, 0). The things we do - * to avoid writing an actual scheduler. - */ - test_syscall(0x7e57, (uint64_t) &uframe); - - /* - * We return here. Unmap VM "real mode" memory. - */ - mmu_unmap_vrma(ptr_2_ra(upage), ptr_2_ra(upage) + PAGE_SIZE); -} - - -static void -test_u(void) -{ - static void *upage = NULL; - int en = mmu_enabled(); - eframe_t uframe; - - /* - * 1TB - 4K. - */ - ea_t ea = TB(1) - PAGE_SIZE; - - if (upage == NULL) { - upage = mem_memalign(PAGE_SIZE, PAGE_SIZE); - } - - if (!en) { - mmu_enable(); - } - - /* - * Grab a page. Needs to be mapped with access from - * unpriviledged mode. We will use both to contain - * code to run and the stack. - */ - mmu_map(ea, ptr_2_ra(upage), PP_RWRW, PAGE_4K); - memcpy((void *) ea, (void *) test_syscall, (uint64_t) &test_hv - - (uint64_t) &test_syscall); - lwsync(); - flush_cache(ea, PAGE_SIZE); - - /* - * User code does test_syscall(0x1337, 0), which simply returns - * to kernel state. - */ - uframe.r1 = ea + PAGE_SIZE - STACKFRAMESIZE; - uframe.r3 = 0x1337; - uframe.r4 = 0; - uframe.hsrr0 = ea; - uframe.hsrr1 = mfmsr() | MSR_PR; - - /* - * Force switch into user code. The exception handler stashes - * the kernel state behind in a global (sigh), which is magically - * restored on a test_syscall(0x1337, 0). The things we do - * to avoid writing an actual scheduler. - */ - test_syscall(0x7e57, (uint64_t) &uframe); - - /* - * We return here. - */ - mmu_unmap(ea, PAGE_4K); - - if (!en) { - mmu_disable(); - } -} - - -static void -test_mmu_16mb(void) -{ - int i; - bool_t good; - - /* - * 1TB - 16MB. - */ - uint64_t *ea = (uint64_t *) (TB(1) - MB(16)); - uint64_t *ea2 = (uint64_t *) (TB(1) - MB(32)); - int en = mmu_enabled(); - static uint64_t *p1 = NULL; - static uint64_t *p2 = NULL; - - if (p1 == NULL) { - p1 = mem_memalign(MB(16), MB(16)); - } - - if (p2 == NULL) { - p2 = mem_memalign(MB(16), MB(16)); - } - - if (!en) { - mmu_enable(); - } - - /* - * Because we create the mapping in the identity segment, which has - * base page size = 4K, 16M pages are achieved using MPSS. This is - * better than not using MPSS and manually populating 4K entries, - * becase at the TLB and ERAT level there should be only one entry. - * - * On the sim, of course, MPSS is still worse than using a segment - * where the base page size = 16M, because we need to do more - * oh so glacially slow PTE updates. - */ - LOG("mapping two EAs to same RA - on a sim this will take a while..."); - mmu_map((ea_t) ea, ptr_2_ra(p1), PP_RWXX, PAGE_16M); - LOG("mapped 0x%x to 0x%x as 16M", ea, p1); - mmu_map((ea_t) ea2, ptr_2_ra(p1), PP_RWXX, PAGE_16M); - LOG("mapped 0x%x to 0x%x as 16M", ea2, p1); - for (i = 0; i < (PAGE_SIZE * 2 / sizeof(uint64_t)); i++) { - ea[i] = (uint64_t) &p1[i]; - } - LOG("prep completed"); - good = true; - for (i = 0; i < (PAGE_SIZE * 2 / sizeof(uint64_t)); i++) { - if (ea2[i] != (uint64_t) &p1[i]) { - good = false; - LOG("error at 0x%x: expected 0x%x got 0x%x", - &ea2[i], (uint64_t) &p1[i], ea2[i]); - break; - } - } - LOG("16M MPSS mappings %swork", !good ? "don't " : ""); - if (!good) { - goto out; - } - - LOG("mapping same EAs to different RAs"); - mmu_unmap((ea_t) ea2, PAGE_16M); - mmu_map((ea_t) ea2, ptr_2_ra(p2), PP_RWXX, PAGE_16M); - LOG("mapped %p to %p as 16M", ea2, p2); - good = memcmp((void *) ea, (void *) ea2, PAGE_SIZE * 2) != 0; - LOG("mapped %p to %p %scorrectly", ea2, - p2, !good ? "in" : ""); - -out: - LOG("finishing up"); - mmu_unmap((ea_t) ea2, PAGE_16M); - mmu_unmap((ea_t) ea, PAGE_16M); - - if (!en) { - mmu_disable(); - } -} - - -static void -test_mmu(void) -{ - /* - * 1TB - 4K. - */ - int res; - void *source; - ea_t ea = TB(1) - PAGE_SIZE; - int en = mmu_enabled(); - - if (!en) { - mmu_enable(); - } - - source = (void *) &_start; - mmu_map(ea, ptr_2_ra(source), PP_RWXX, PAGE_4K); - res = memcmp((void *) ea, source, PAGE_SIZE); - LOG("mapped 0x%x to 0x%x %scorrectly", ea, - ptr_2_ra(source), res ? "in" : ""); - mmu_unmap(ea, PAGE_4K); - - source = (void *) (((ea_t) &_start) + PAGE_SIZE); - mmu_map(ea, ptr_2_ra(source), PP_RWXX, PAGE_4K); - res = memcmp((void *) ea, source , PAGE_SIZE); - LOG("mapped 0x%x to 0x%x %scorrectly", ea, - ptr_2_ra(source), res ? "in" : ""); - mmu_unmap(ea, PAGE_4K); - - if (!en) { - mmu_disable(); - } -} - - -static bool_t -timer_cb(time_req_t *t) -{ - LOG("Timer '%s' fired!", t->name); - - /* This is a recurring timer. */ - t->when = secs_to_tb((uint64_t) t->ctx) + mftb(); - return true; -} - - -static void -toggle_timer(void) -{ - static time_req_t t; - static bool_t on = false; - - if (!on) { - time_prep_s(5, timer_cb, "5s", (void *) 5, &t); - LOG("Enabling timer callback"); - time_enqueue(&t); - } else { - LOG("Disabling timer callback"); - time_dequeue(&t); - } - - on ^= true; -} - - -void -menu(void *fdt) -{ - int c = 0; - - /* Clear any chars. */ - while(con_getchar() != NO_CHAR); - - LOG("\nPick your poison:"); - do { - if (c != NO_CHAR) { - LOG("Choices: (MMU = %s):\n" - " (d) 5s delay\n" - " (D) toggle 5s timer\n" - " (e) test exception\n" - " (n) test nested exception\n" - " (f) dump FDT\n" - " (M) enable MMU\n" - " (m) disable MMU\n" - " (t) test MMU\n" - " (T) test MMU 16mb pages\n" - " (u) test non-priviledged code\n" - " (U) test VM real-mode code\n" - " (H) enable HV dec\n" - " (h) disable HV dec\n" - " (I) run initrd", - mmu_enabled() ? "enabled" : "disabled"); - } - - c = con_getchar(); - switch (c) { - case 'M': - mmu_enable(); - break; - case 'm': - mmu_disable(); - break; - case 't': - test_mmu(); - break; - case 'T': - test_mmu_16mb(); - break; - case 'u': - test_u(); - break; - case 'U': - test_hv(); - break; - case 'f': - dump_nodes(fdt); - break; - case 'e': - LOG("Testing exception handling..."); - LOG("sc(feed) => 0x%x", test_syscall(0xfeed, - 0xface)); - break; - case 'n': - LOG("Testing nested exception handling..."); - LOG("sc(dead) => 0x%x", test_syscall(0xdead, 0)); - break; - case 'd': - time_delay(secs_to_tb(5)); - break; - case 'D': - toggle_timer(); - break; - case 'H': - set_HDEC(DEC_DISABLE); - exc_enable_hdec(); - break; - case 'h': - exc_disable_hdec(); - break; - } - } while(1); -} void c_main(ra_t fdt_ra) { +#define HELLO_PREPHV "Hello, PReP HV!\n" void *fdt; - uint64_t len = cpu_to_be64(sizeof(HELLO_OPAL)); - -#ifdef CONFIG_MAMBO - /* - * Write using sim interface (simpler). - */ - mambo_write(HELLO_MAMBO, sizeof(HELLO_MAMBO)); -#endif /* CONFIG_MAMBO */ + uint64_t len = cpu_to_be64(sizeof(HELLO_PREPHV)); /* * Write using firmware interface. */ - opal_write(OPAL_TERMINAL_0, ptr_2_ra(&len), ptr_2_ra(HELLO_OPAL)); + opal_write(OPAL_TERMINAL_0, ptr_2_ra(&len), ptr_2_ra(HELLO_PREPHV)); /* * Some info. @@ -622,15 +164,10 @@ c_main(ra_t fdt_ra) uframe.hsrr0 = LAYOUT_VM_START + 0x00050000; uframe.hsrr1 = (mfmsr() ^ MSR_SF) | MSR_PR; - /* - * Force switch into user code. The exception handler stashes - * the kernel state behind in a global (sigh), which is magically - * restored on a test_syscall(0x1337, 0). The things we do - * to avoid writing an actual scheduler. - */ kpcr_get()->kern_sp = (uint64_t) &uframe; exc_disable_ee(); exc_rfi(&uframe); } + while(1); } diff --git a/mambo.S b/mambo.S deleted file mode 100644 index 2d50049..0000000 --- a/mambo.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Callthru (i.e. semihosting) facilities. - * - * Copyright (C) 2015 Andrei Warkentin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include - -/* - * Use simulator callthru (i.e. semihosting) facilities - * to print a string. - * - * r3 - buffer - * r4 - len - */ -_GLOBAL(mambo_write) -#define SIM_WRITE_CONSOLE_CODE 0 - li r6, 0 - mr r5, r4 - mr r4, r3 - li r3, SIM_WRITE_CONSOLE_CODE - .long 0x000eaeb0 - blr