From 76c3a5d45f6ade291cb98ef1b0900ee49a02d981 Mon Sep 17 00:00:00 2001 From: Wenchao Wang Date: Wed, 2 Jun 2021 17:59:30 +0800 Subject: [PATCH] Remove legacy VTLB engine As the new memory virtualization engine EPT2 (Extended Page Table) has been stable, the VTLB codes are deprecated. Change summary: * Removes the VTLB implementation and header files, and update the project files for 4 platforms; * Removes the related variables in other modules and redundant memory allocation for VTLB structure; * Adds the MMIO module to manage MMIO related functions. Signed-off-by: Wenchao Wang --- core/cpu.c | 8 +- core/include/intr.h | 1 + core/include/mmio.h | 95 ++ core/include/page_walker.h | 11 + core/include/vcpu.h | 19 +- core/include/vtlb.h | 133 --- core/mmio.c | 325 +++++ core/page_walker.c | 1 - core/vcpu.c | 191 +-- core/vtlb.c | 1044 ----------------- .../intelhaxm.xcodeproj/project.pbxproj | 20 +- platforms/linux/Kbuild | 2 +- platforms/linux/haxm-install.sh | 0 platforms/linux/haxm-uninstall.sh | 0 platforms/netbsd/Makefile | 2 +- platforms/windows/haxm-core.vcxproj | 4 +- 16 files changed, 497 insertions(+), 1359 deletions(-) create mode 100644 core/include/mmio.h delete mode 100644 core/include/vtlb.h create mode 100644 core/mmio.c delete mode 100644 core/vtlb.c mode change 100755 => 100644 platforms/linux/haxm-install.sh mode change 100755 => 100644 platforms/linux/haxm-uninstall.sh diff --git a/core/cpu.c b/core/cpu.c index 39a19094..c84cb0f1 100644 --- a/core/cpu.c +++ b/core/cpu.c @@ -36,7 +36,6 @@ #include "include/debug.h" #include "include/dump.h" #include "include/name.h" -#include "include/vtlb.h" #include "include/intr.h" #include "include/ept.h" @@ -327,12 +326,7 @@ void vcpu_handle_vmcs_pending(struct vcpu_t *vcpu) vcpu->vmcs_pending_entry_intr_info = 0; } - if (vcpu->vmcs_pending_guest_cr3) { - vmwrite(vcpu, GUEST_CR3, vtlb_get_cr3(vcpu)); - vcpu->vmcs_pending_guest_cr3 = 0; - } vcpu->vmcs_pending = 0; - return; } /* Return the value same as ioctl value */ @@ -633,7 +627,7 @@ void load_vmcs_common(struct vcpu_t *vcpu) vmwrite(vcpu, VMX_TSC_OFFSET, vcpu->tsc_offset); vmwrite(vcpu, GUEST_ACTIVITY_STATE, vcpu->state->_activity_state); - vcpu_vmwrite_all(vcpu, 0); + vcpu_vmwrite_all(vcpu); } diff --git a/core/include/intr.h b/core/include/intr.h index 93922eee..520c954c 100644 --- a/core/include/intr.h +++ b/core/include/intr.h @@ -47,6 +47,7 @@ uint hax_intr_is_blocked(struct vcpu_t *vcpu); void hax_handle_idt_vectoring(struct vcpu_t *vcpu); void vcpu_inject_intr(struct vcpu_t *vcpu, struct hax_tunnel *htun); void hax_inject_exception(struct vcpu_t *vcpu, uint8_t vector, uint32_t error_code); +void hax_inject_page_fault(struct vcpu_t *vcpu, mword error_code); /* * Get highest pending interrupt vector * Return HAX_INVALID_INTR_VECTOR when no pending diff --git a/core/include/mmio.h b/core/include/mmio.h new file mode 100644 index 00000000..a1a9daa5 --- /dev/null +++ b/core/include/mmio.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2009 Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HAX_CORE_MMIO_H_ +#define HAX_CORE_MMIO_H_ + +#include "vcpu.h" + +// Reads the given number of bytes from guest RAM (using a GVA) into the given +// buffer. This function is supposed to be called by the MMIO handler to obtain +// the instruction being executed by the given vCPU, which has generated an EPT +// violation. Its implementation should make use of the per-vCPU MMIO fetch +// cache. +// |vcpu| The vCPU executing the MMIO instruction. +// |gva| The GVA pointing to the start of the MMIO instruction in guest RAM. +// |buf| The buffer to copy the bytes to. +// |len| The number of bytes to copy. Must not exceed the maximum length of +// any valid IA instruction. +// Returns 0 on success, or one of the following error codes: +// -ENOMEM: Memory allocation/mapping error. + +int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64_t gva, uint8_t *buf, + int len); + +// Translates guest virtual address to guest physical address. +// |vcpu| Pointer to the vCPU +// |va| Guest virtual address +// |access| Access descriptor (read/write, user/supervisor) +// |pa| Guest physical address +// |len| Number of bytes for which translation is valid +// |update| Update access and dirty bits of guest structures +// Returns 0 if translation is successful, 0x80000000 OR'ed with the exception +// number otherwise. + +uint vcpu_translate(struct vcpu_t *vcpu, hax_vaddr_t va, uint access, + hax_paddr_t *pa, uint64_t *len, bool update); + +// Reads guest-linear memory. +// If flag is 0, this read is on behalf of the guest. This function updates the +// access/dirty bits in the guest page tables and injects a page fault if there +// is an error. In this case, the return value is true for success, false if a +// page fault was injected. +// If flag is 1, this function updates the access/dirty bits in the guest page +// tables but does not inject a page fault if there is an error. Instead, it +// returns the number of bytes read. +// If flag is 2, the memory read is for internal use. It does not update the +// guest page tables. It returns the number of bytes read. + +uint32_t vcpu_read_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, + void *dst, uint32_t dst_buflen, uint32_t size, + uint flag); + +// Writes guest-linear memory. +// If flag is 0, this memory write is on behalf of the guest. This function +// updates the access/dirty bits in the guest page tables and injects a page +// fault if there is an error. In this case, the return value is true for +// success, false if a page fault was injected. +// If flag is 1, it updates the access/dirty bits in the guest page tables but +// does not inject a page fault if there is an error. Instead, it returns the +// number of bytes written. +// A flag value of 2 is implemented, but not used. It does not update the guest +// page tables. It returns the number of bytes written. + +uint32_t vcpu_write_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, + uint32_t dst_buflen, const void *src, + uint32_t size, uint flag); + +#endif // HAX_CORE_MMIO_H_ diff --git a/core/include/page_walker.h b/core/include/page_walker.h index b0abb44a..8a50fe16 100644 --- a/core/include/page_walker.h +++ b/core/include/page_walker.h @@ -47,6 +47,17 @@ typedef uint64_t ADDRESS; #define PW_INVALID_GPA (~((uint64_t)0)) #define PW_NUM_OF_PDPT_ENTRIES_IN_32_BIT_MODE 4 +enum { + TF_OK = 0, + TF_FAILED = 0x80000000, // Translation failed + TF_GP2HP = 0x40000000, // GP->HP translation failed + TF_PROTECT = 0x00000001, // Fault due to protection + TF_WRITE = 0x00000002, // Fault due to write + TF_USER = 0x00000004, // Fault due to user mode + TF_RSVD = 0x00000008, // Fault due to reserved bit violation + TF_EXEC = 0x00000010 // Fault due to exec protection +}; + /* * Function: pw_perform_page_walk * Description: The function performs page walk over guest page tables for diff --git a/core/include/vcpu.h b/core/include/vcpu.h index 56aa9fe6..0a71f361 100644 --- a/core/include/vcpu.h +++ b/core/include/vcpu.h @@ -65,17 +65,6 @@ struct gstate { uint64_t apic_base; }; -struct cvtlb { - hax_vaddr_t va; - hax_paddr_t ha; - uint64_t flags; - uint guest_order; - uint order; - uint access; - uint flag; -}; - -struct hax_mmu; struct per_cpu_data; struct vcpu_vmx_data { @@ -179,7 +168,6 @@ struct vcpu_t { hax_mutex tmutex; struct vm_t *vm; - struct hax_mmu *mmu; struct vcpu_state_t *state; struct hax_tunnel *tunnel; uint8_t *io_buf; @@ -199,7 +187,6 @@ struct vcpu_t { uint64_t vmcs_pending_entry_error_code : 1; uint64_t vmcs_pending_entry_instr_length : 1; uint64_t vmcs_pending_entry_intr_info : 1; - uint64_t vmcs_pending_guest_cr3 : 1; uint64_t debug_control_dirty : 1; uint64_t dr_dirty : 1; uint64_t rflags_dirty : 1; @@ -208,7 +195,7 @@ struct vcpu_t { uint64_t interruptibility_dirty : 1; uint64_t pcpu_ctls_dirty : 1; uint64_t pae_pdpt_dirty : 1; - uint64_t padding : 45; + uint64_t padding : 46; }; /* For TSC offseting feature*/ @@ -237,7 +224,6 @@ struct vcpu_t { struct gstate gstate; struct hax_vcpu_mem *tunnel_vcpumem; struct hax_vcpu_mem *iobuf_vcpumem; - struct cvtlb prefetch[16]; struct em_context_t emulate_ctxt; struct vcpu_post_mmio post_mmio; @@ -261,11 +247,10 @@ void vcpu_save_guest_state(struct vcpu_t *vcpu); void vcpu_load_host_state(struct vcpu_t *vcpu); void vcpu_save_host_state(struct vcpu_t *vcpu); -int vtlb_active(struct vcpu_t *vcpu); int vcpu_vmexit_handler(struct vcpu_t *vcpu, exit_reason_t exit_reason, struct hax_tunnel *htun); void vcpu_vmread_all(struct vcpu_t *vcpu); -void vcpu_vmwrite_all(struct vcpu_t *vcpu, int force_vtlb_flush); +void vcpu_vmwrite_all(struct vcpu_t *vcpu); int vcpu_teardown(struct vcpu_t *vcpu); diff --git a/core/include/vtlb.h b/core/include/vtlb.h deleted file mode 100644 index 199ba813..00000000 --- a/core/include/vtlb.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2009 Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef HAX_CORE_VTLB_H_ -#define HAX_CORE_VTLB_H_ - -#include "vcpu.h" - -struct vcpu_t; - -enum { - TF_OK = 0, - TF_FAILED = 0x80000000, // Translation failed - TF_GP2HP = 0x40000000, // GP->HP translation failed - TF_PROTECT = 0x00000001, // Fault due to protection - TF_WRITE = 0x00000002, // Fault due to write - TF_USER = 0x00000004, // Fault due to user mode - TF_RSVD = 0x00000008, // Fault due to reserved bit violation - TF_EXEC = 0x00000010 // Fault due to exec protection -}; - -#define EXECUTION_DISABLE_MASK 0x8000000000000000ULL - -#define PTE32_W_BIT_MASK (1 << 1) -#define PTE32_USER_BIT_MASK (1 << 2) -#define PTE32_PWT_BIT_MASK (1 << 3) -#define PTE32_PCD_BIT_MASK (1 << 4) -#define PTE32_D_BIT_MASK (1 << 6) -#define PTE32_PAT_BIT_MASK (1 << 7) -#define PTE32_G_BIT_MASK (1 << 8) - -typedef enum mmu_mode { - MMU_MODE_INVALID = 0, - MMU_MODE_VTLB = 1, - MMU_MODE_EPT = 2 -} mmu_mode_t; - -typedef uint32_t pagemode_t; - -typedef struct vtlb { - hax_vaddr_t va; - hax_paddr_t ha; - uint64_t flags; - uint guest_order; - uint order; - uint access; -} vtlb_t; - -#define KERNEL_ADDR_OFFSET 0xc0000000 - -#define igo_addr(addr) (addr >= KERNEL_ADDR_OFFSET) - -typedef struct hax_mmu { - mmu_mode_t mmu_mode; - pagemode_t guest_mode; - pagemode_t host_mode; - struct hax_page *hpd_page; - struct hax_page *pde_page; - struct hax_page *pde_shadow_page; - hax_paddr_t pdir; - struct hax_link_list free_page_list; - struct hax_link_list used_page_list; - struct hax_link_list igo_page_list; - bool clean; - bool igo; /* Is global optimized */ -} hax_mmu_t; - -uint64_t vtlb_get_cr3(struct vcpu_t *vcpu); - -void vcpu_invalidate_tlb(struct vcpu_t *vcpu, bool global); -void vcpu_invalidate_tlb_addr(struct vcpu_t *vcpu, hax_vaddr_t va); - -uint vcpu_vtlb_alloc(struct vcpu_t *vcpu); -void vcpu_vtlb_free(struct vcpu_t *vcpu); - -bool handle_vtlb(struct vcpu_t *vcpu); - -uint vcpu_translate(struct vcpu_t *vcpu, hax_vaddr_t va, uint access, hax_paddr_t *pa, - uint64_t *len, bool update); - -uint32_t vcpu_read_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, void *dst, - uint32_t dst_buflen, uint32_t size, uint flag); -uint32_t vcpu_write_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, - uint32_t dst_buflen, const void *src, uint32_t size, - uint flag); - -/* - * Reads the given number of bytes from guest RAM (using a GVA) into the given - * buffer. This function is supposed to be called by the MMIO handler to obtain - * the instruction being executed by the given vCPU, which has generated an EPT - * violation. Its implementation should make use of the per-vCPU MMIO fetch - * cache. - * |vcpu|: The vCPU executing the MMIO instruction. - * |gva|: The GVA pointing to the start of the MMIO instruction in guest RAM. - * |buf|: The buffer to copy the bytes to. - * |len|: The number of bytes to copy. Must not exceed the maximum length of any - * valid IA instruction. - * Returns 0 on success, or one of the following error codes: - * -ENOMEM: Memory allocation/mapping error. - */ -int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64_t gva, uint8_t *buf, - int len); - -void hax_inject_page_fault(struct vcpu_t *vcpu, mword error_code); - -#endif // HAX_CORE_VTLB_H_ diff --git a/core/mmio.c b/core/mmio.c new file mode 100644 index 00000000..12df8e72 --- /dev/null +++ b/core/mmio.c @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2009 Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "include/ia32_defs.h" +#include "include/intr.h" +#include "include/mmio.h" +#include "include/page_walker.h" +#include "include/paging.h" + +typedef uint32_t pagemode_t; + +static void * mmio_map_guest_virtual_page_fast(struct vcpu_t *vcpu, + uint64_t gva, int len); +static void * mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, + uint64_t gva, + hax_kmap_user *kmap); +static pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu); + +int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64_t gva, uint8_t *buf, + int len) +{ + uint64_t end_gva; + uint8_t *src_buf; + uint offset; + + hax_assert(vcpu != NULL); + hax_assert(buf != NULL); + // A valid IA instruction is never longer than 15 bytes + hax_assert(len > 0 && len <= 15); + end_gva = gva + (uint)len - 1; + + if ((gva >> PG_ORDER_4K) != (end_gva >> PG_ORDER_4K)) { + uint32_t ret; + + hax_log(HAX_LOGI, "%s: GVA range spans two pages: gva=0x%llx, len=%d\n", + __func__, gva, len); + + ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0); + if (!ret) { + hax_log(HAX_LOGE, "%s: vcpu_read_guest_virtual() failed: " + "vcpu_id=%u, gva=0x%llx, len=%d\n", __func__, vcpu->vcpu_id, + gva, len); + return -ENOMEM; + } + + return 0; + } + + src_buf = mmio_map_guest_virtual_page_fast(vcpu, gva, len); + if (!src_buf) { + src_buf = mmio_map_guest_virtual_page_slow(vcpu, gva, + &vcpu->mmio_fetch.kmap); + if (!src_buf) + return -ENOMEM; + + vcpu->mmio_fetch.last_gva = gva; + vcpu->mmio_fetch.last_guest_cr3 = vcpu->state->_cr3; + vcpu->mmio_fetch.hit_count = 0; + vcpu->mmio_fetch.kva = src_buf; + } + offset = (uint)(gva & pgoffs(PG_ORDER_4K)); + memcpy_s(buf, len, src_buf + offset, len); + + return 0; +} + +uint vcpu_translate(struct vcpu_t *vcpu, hax_vaddr_t va, uint access, + hax_paddr_t *pa, uint64_t *len, bool update) +{ + pagemode_t mode = vcpu_get_pagemode(vcpu); + uint order = 0; + uint r = -1; + + hax_log(HAX_LOGD, "%s: vcpu_translate: %llx (%s,%s) mode %u\n", __func__, + va, access & TF_WRITE ? "W" : "R", access & TF_USER ? "U" : "S", + mode); + + switch (mode) { + case PM_FLAT: { + // Non-paging mode, no further actions. + *pa = va; + r = 0; + break; + } + case PM_2LVL: + case PM_PAE: + case PM_PML4: { + r = pw_perform_page_walk(vcpu, va, access, pa, &order, update, + false); + break; + } + default: { + // Should never happen + break; + } + } + + if (r == 0) { + // Translation is guaranteed valid until the end of 4096 bytes page + // (the minimum page size) due possible EPT remapping for the bigger + // translation units + uint64_t size = (uint64_t)1 << PG_ORDER_4K; + uint64_t extend = size - (va & (size - 1)); + + // Adjust validity of translation if necessary. + if (len != NULL && (*len == 0 || *len > extend)) { + *len = extend; + } + } + + return r; +} + +uint32_t vcpu_read_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, + void *dst, uint32_t dst_buflen, uint32_t size, + uint flag) +{ + // TBD: use guest CPL for access checks + char *dstp = dst; + uint32_t offset = 0; + int len2; + + // Flag == 1 is not currently used, but it could be enabled if useful. + hax_assert(flag == 0 || flag == 2); + + while (offset < size) { + hax_paddr_t gpa; + uint64_t len = size - offset; + + uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2); + if (r != 0) { + if (flag != 0) + return offset; // Number of bytes successfully read + + if (r & TF_GP2HP) { + hax_log(HAX_LOGE, "%s: read_guest_virtual(%llx, %x) failed\n", + __func__, addr, size); + } + hax_log(HAX_LOGD, "%s: read_guest_virtual(%llx, %x) injecting #PF" + "\n", __func__, addr, size); + vcpu->state->_cr2 = addr + offset; + hax_inject_page_fault(vcpu, r & 0x1f); + + return false; + } +// if (addr + offset != gpa) { +// hax_log(HAX_LOGI, "%s: gva=0x%llx, gpa=0x%llx, len=0x%llx\n", +// __func__, addr + offset, gpa, len); +// } + + len2 = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, (int)len, + (uint8_t *)(dstp + offset)); + if (len2 <= 0) { + vcpu_set_panic(vcpu); + hax_log(HAX_LOGPANIC, "%s: read guest virtual error, gpa:0x%llx, " + "len:0x%llx\n", __func__, gpa, len); + return false; + } + + len = (uint64_t)len2; + offset += len; + } + + return flag != 0 ? size : true; +} + +uint32_t vcpu_write_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, + uint32_t dst_buflen, const void *src, + uint32_t size, uint flag) +{ + // TODO: use guest CPL for access checks + const char *srcp = src; + uint32_t offset = 0; + int len2; + + hax_assert(flag == 0 || flag == 1); + hax_assert(dst_buflen >= size); + + while (offset < size) { + hax_paddr_t gpa; + uint64_t len = size - offset; + uint r = vcpu_translate(vcpu, addr + offset, TF_WRITE, &gpa, &len, + flag != 2); + if (r != 0) { + if (flag != 0) + return offset; // Number of bytes successfully written + + if (r & TF_GP2HP) { + vcpu_set_panic(vcpu); + hax_log(HAX_LOGPANIC, "%s: write_guest_virtual(%llx, %x) failed" + "\n", __func__, addr, size); + } + hax_log(HAX_LOGD, "%s: write_guest_virtual(%llx, %x) injecting #PF" + "\n", __func__, addr, size); + vcpu->state->_cr2 = addr + offset; + hax_inject_page_fault(vcpu, r & 0x1f); + + return false; + } + + len2 = (uint64_t)gpa_space_write_data(&vcpu->vm->gpa_space, gpa, len, + (uint8_t *)(srcp + offset)); + if (len2 <= 0) { + vcpu_set_panic(vcpu); + hax_log(HAX_LOGPANIC, "%s: write guest virtual error, gpa:0x%llx, " + "len:0x%llx\n", __func__, gpa, len); + return false; + } + + len = len2; + offset += len; + } + + return flag != 0 ? size : true; +} + +static inline void * mmio_map_guest_virtual_page_fast(struct vcpu_t *vcpu, + uint64_t gva, int len) +{ + if (!vcpu->mmio_fetch.kva) + return NULL; + + if ((gva >> PG_ORDER_4K) != (vcpu->mmio_fetch.last_gva >> PG_ORDER_4K) || + vcpu->state->_cr3 != vcpu->mmio_fetch.last_guest_cr3) { + // Invalidate the cache + vcpu->mmio_fetch.kva = NULL; + gpa_space_unmap_page(&vcpu->vm->gpa_space, &vcpu->mmio_fetch.kmap); + if (vcpu->mmio_fetch.hit_count < 2) { + hax_log(HAX_LOGD, "%s: Cache miss: cached_gva=0x%llx, " + "cached_cr3=0x%llx, gva=0x%llx, cr3=0x%llx, hits=0x%d, " + "vcpu_id=0x%u\n", __func__, vcpu->mmio_fetch.last_gva, + vcpu->mmio_fetch.last_guest_cr3, gva, vcpu->state->_cr3, + vcpu->mmio_fetch.hit_count, vcpu->vcpu_id); + } + + return NULL; + } + + // Here we assume the GVA of the MMIO instruction maps to the same guest + // page frame that contains the previous MMIO instruction, as long as guest + // CR3 has not changed. + // TODO: Is it possible for a guest to modify its page tables without + // replacing the root table (CR3) between two consecutive MMIO accesses? + vcpu->mmio_fetch.hit_count++; + // Skip GVA=>GPA=>KVA conversion, and just use the cached KVA. + // TODO: We do not walk the guest page tables in this case, which saves + // time, but also means the accessed/dirty bits of the relevant guest page + // table entries are not updated. This should be okay, since the same MMIO + // instruction was just fetched by hardware (before this EPT violation), + // which presumably has taken care of this matter. + return vcpu->mmio_fetch.kva; +} + +static void * mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, + uint64_t gva, + hax_kmap_user *kmap) +{ + uint64_t gva_aligned = gva & pgmask(PG_ORDER_4K); + uint64_t gpa; + uint ret; + void *kva; + + ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true); + if (ret) { + hax_log(HAX_LOGE, "%s: vcpu_translate() returned 0x%x: vcpu_id=%u, " + "gva=0x%llx\n", __func__, ret, vcpu->vcpu_id, gva); + // TODO: Inject a guest page fault? + return NULL; + } + hax_log(HAX_LOGD, "%s: gva=0x%llx => gpa=0x%llx, vcpu_id=0x%u\n", __func__, + gva_aligned, gpa, vcpu->vcpu_id); + + kva = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap, + NULL); + if (!kva) { + hax_log(HAX_LOGE, "%s: gpa_space_map_page() failed: vcpu_id=%u, " + "gva=0x%llx, gpa=0x%llx\n", __func__, vcpu->vcpu_id, gva, gpa); + return NULL; + } + + return kva; +} + +static pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu) +{ + if (!(vcpu->state->_cr0 & CR0_PG)) + return PM_FLAT; + + if (!(vcpu->state->_cr4 & CR4_PAE)) + return PM_2LVL; + + // Only support pure 32-bit paging. May support PAE paging in future. + // hax_assert(0); + if (!(vcpu->state->_efer & IA32_EFER_LMA)) + return PM_PAE; + + return PM_PML4; +} diff --git a/core/page_walker.c b/core/page_walker.c index 5c1c366e..5d83e26a 100644 --- a/core/page_walker.c +++ b/core/page_walker.c @@ -32,7 +32,6 @@ #include "include/ia32_defs.h" #include "include/paging.h" #include "include/vcpu.h" -#include "include/vtlb.h" #include "include/ept.h" #include "include/intr.h" #include "include/page_walker.h" diff --git a/core/vcpu.c b/core/vcpu.c index b9ce5410..f0e8528c 100644 --- a/core/vcpu.c +++ b/core/vcpu.c @@ -31,6 +31,7 @@ #include "../include/hax.h" #include "include/compiler.h" #include "include/ia32_defs.h" +#include "include/mmio.h" #include "include/vcpu.h" #include "include/mtrr.h" #include "include/vmx.h" @@ -41,7 +42,6 @@ #include "include/dump.h" #include "include/intr.h" -#include "include/vtlb.h" #include "include/ept.h" #include "include/paging.h" #include "include/hax_core_interface.h" @@ -108,7 +108,6 @@ static void advance_rip(struct vcpu_t *vcpu); static void handle_machine_check(struct vcpu_t *vcpu); static void handle_mem_fault(struct vcpu_t *vcpu, struct hax_tunnel *htun); -static void check_flush(struct vcpu_t *vcpu, uint32_t bits); static void vmwrite_efer(struct vcpu_t *vcpu); static int handle_msr_read(struct vcpu_t *vcpu, uint32_t msr, uint64_t *val); @@ -456,14 +455,11 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id) if (!vcpu->tmutex) goto fail_5; - if (!vcpu_vtlb_alloc(vcpu)) - goto fail_6; - if (!vcpu_alloc_cpuid(vcpu)) - goto fail_7; + goto fail_6; if (hax_vcpu_create_host(vcpu, vm_host, vm->vm_id, vcpu_id)) - goto fail_8; + goto fail_7; vcpu->prev_cpu_id = (uint32_t)(~0ULL); vcpu->cpu_id = hax_cpu_id(); @@ -493,10 +489,8 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id) hax_log(HAX_LOGD, "vcpu %d is created.\n", vcpu->vcpu_id); return vcpu; -fail_8: - vcpu_free_cpuid(vcpu); fail_7: - vcpu_vtlb_free(vcpu); + vcpu_free_cpuid(vcpu); fail_6: hax_mutex_free(vcpu->tmutex); fail_5: @@ -547,7 +541,6 @@ static int _vcpu_teardown(struct vcpu_t *vcpu) } hax_free_pages(vcpu->vmcs_page); hax_vfree(vcpu->state, sizeof(struct vcpu_state_t)); - vcpu_vtlb_free(vcpu); hax_mutex_free(vcpu->tmutex); vcpu_free_cpuid(vcpu); hax_vfree(vcpu, sizeof(struct vcpu_t)); @@ -1823,27 +1816,6 @@ int vcpu_vmexit_handler(struct vcpu_t *vcpu, exit_reason_t exit_reason, return ret; } -int vtlb_active(struct vcpu_t *vcpu) -{ - struct vcpu_state_t *state = vcpu->state; - struct per_cpu_data *cpu_data = current_cpu_data(); - - if (hax->ug_enable_flag) - return 0; - - hax_log(HAX_LOGD, "vtlb active: cr0, %llx\n", state->_cr0); - if ((state->_cr0 & CR0_PG) == 0) - return 1; - - if (config.disable_ept) - return 1; - - if (!cpu_data->vmx_info._ept_cap) - return 1; - - return 0; -} - static void advance_rip(struct vcpu_t *vcpu) { struct vcpu_state_t *state = vcpu->state; @@ -1912,7 +1884,7 @@ void vcpu_vmread_all(struct vcpu_t *vcpu) } } -void vcpu_vmwrite_all(struct vcpu_t *vcpu, int force_tlb_flush) +void vcpu_vmwrite_all(struct vcpu_t *vcpu) { struct vcpu_state_t *state = vcpu->state; @@ -1938,10 +1910,6 @@ void vcpu_vmwrite_all(struct vcpu_t *vcpu, int force_tlb_flush) vmx(vcpu, interruptibility_state).raw); vmwrite_cr(vcpu); - - if (force_tlb_flush) { - vcpu_invalidate_tlb(vcpu, 1); - } } // Prepares the values (4 GPAs) to be loaded into VMCS fields PDPTE{0..3}. @@ -2026,52 +1994,36 @@ static void vmwrite_cr(struct vcpu_t *vcpu) ~(cr0_fixed_0 ^ cr0_fixed_1); } - if (vtlb_active(vcpu)) { - hax_log(HAX_LOGD, "vTLB mode, cr0 %llx\n", vcpu->state->_cr0); - vcpu->mmu->mmu_mode = MMU_MODE_VTLB; - exc_bitmap |= 1u << VECTOR_PF; - cr0 |= CR0_WP; - cr0_mask |= CR0_WP; - cr4 |= CR4_PGE | CR4_PAE; - cr4_mask |= CR4_PGE | CR4_PAE | CR4_PSE; - pcpu_ctls |= CR3_LOAD_EXITING | CR3_STORE_EXITING | INVLPG_EXITING; - scpu_ctls &= ~ENABLE_EPT; - - vmwrite(vcpu, GUEST_CR3, vtlb_get_cr3(vcpu)); - state->_efer = 0; - } else { // EPTE - vcpu->mmu->mmu_mode = MMU_MODE_EPT; - // In EPT mode, we need to monitor guest writes to CR.PAE, so that we - // know when it wants to enter PAE paging mode (see IASDM Vol. 3A 4.1.2, - // Figure 4-1, as well as vcpu_prepare_pae_pdpt() and its caller). - // TODO: Monitor guest writes to CR4.{PGE, PSE, SMEP} as well (see IASDM - // Vol. 3A 4.4.1) - cr4_mask |= CR4_PAE; - eptp = vm_get_eptp(vcpu->vm); - hax_assert(eptp != INVALID_EPTP); - // hax_log(HAX_LOGD, "Guest eip:%llx, EPT mode, eptp:%llx\n", - // vcpu->state->_rip, eptp); - vmwrite(vcpu, GUEST_CR3, state->_cr3); - scpu_ctls |= ENABLE_EPT; - if (vcpu->pae_pdpt_dirty) { - // vcpu_prepare_pae_pdpt() has updated vcpu->pae_pdptes - // Note that because we do not monitor guest writes to CR3, the only - // case where vcpu->pae_pdptes is newer than VMCS GUEST_PDPTE{0..3} - // is following a guest write to CR0 or CR4 that requires PDPTEs to - // be reloaded, i.e. the pae_pdpt_dirty case. When the guest is in - // PAE paging mode but !pae_pdpt_dirty, VMCS GUEST_PDPTE{0..3} are - // already up-to-date following each VM exit (see Intel SDM Vol. 3C - // 27.3.4), and we must not overwrite them with our cached values - // (vcpu->pae_pdptes), which may be outdated. - vmwrite(vcpu, GUEST_PDPTE0, vcpu->pae_pdptes[0]); - vmwrite(vcpu, GUEST_PDPTE1, vcpu->pae_pdptes[1]); - vmwrite(vcpu, GUEST_PDPTE2, vcpu->pae_pdptes[2]); - vmwrite(vcpu, GUEST_PDPTE3, vcpu->pae_pdptes[3]); - vcpu->pae_pdpt_dirty = 0; - } - vmwrite(vcpu, VMX_EPTP, eptp); - // pcpu_ctls |= RDTSC_EXITING; - } + // In EPT mode, we need to monitor guest writes to CR.PAE, so that we know + // when it wants to enter PAE paging mode (see IASDM Vol. 3A 4.1.2, + // Figure 4-1, as well as vcpu_prepare_pae_pdpt() and its caller). + // TODO: Monitor guest writes to CR4.{PGE, PSE, SMEP} as well (see IASDM + // Vol. 3A 4.4.1) + cr4_mask |= CR4_PAE; + eptp = vm_get_eptp(vcpu->vm); + hax_assert(eptp != INVALID_EPTP); + // hax_log(HAX_LOGD, "Guest eip:%llx, EPT mode, eptp:%llx\n", + // vcpu->state->_rip, eptp); + vmwrite(vcpu, GUEST_CR3, state->_cr3); + scpu_ctls |= ENABLE_EPT; + if (vcpu->pae_pdpt_dirty) { + // vcpu_prepare_pae_pdpt() has updated vcpu->pae_pdptes + // Note that because we do not monitor guest writes to CR3, the only + // case where vcpu->pae_pdptes is newer than VMCS GUEST_PDPTE{0..3} is + // following a guest write to CR0 or CR4 that requires PDPTEs to be + // reloaded, i.e., the pae_pdpt_dirty case. When the guest is in PAE + // paging mode but !pae_pdpt_dirty, VMCS GUEST_PDPTE{0..3} are already + // up-to-date following each VM exit (see Intel SDM Vol. 3C 27.3.4), + // and we must not overwrite them with our cached values + // (vcpu->pae_pdptes), which may be outdated. + vmwrite(vcpu, GUEST_PDPTE0, vcpu->pae_pdptes[0]); + vmwrite(vcpu, GUEST_PDPTE1, vcpu->pae_pdptes[1]); + vmwrite(vcpu, GUEST_PDPTE2, vcpu->pae_pdptes[2]); + vmwrite(vcpu, GUEST_PDPTE3, vcpu->pae_pdptes[3]); + vcpu->pae_pdpt_dirty = 0; + } + vmwrite(vcpu, VMX_EPTP, eptp); + // pcpu_ctls |= RDTSC_EXITING; vmwrite(vcpu, GUEST_CR0, cr0); vmwrite(vcpu, VMX_CR0_MASK, cr0_mask); @@ -2168,16 +2120,11 @@ static bool qemu_support_fastmmio_extra(struct vcpu_t *vcpu) static bool is_mmio_address(struct vcpu_t *vcpu, hax_paddr_t gpa) { - hax_paddr_t hpa; - if (vtlb_active(vcpu)) { - hpa = hax_gpfn_to_hpa(vcpu->vm, gpa >> HAX_PAGE_SHIFT); - // hax_gpfn_to_hpa() assumes hpa == 0 is invalid - return !hpa; - } + hax_memslot *slot; - hax_memslot *slot = memslot_find(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K); + slot = memslot_find(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K); - return !slot; + return (slot == NULL); } static int vcpu_emulate_insn(struct vcpu_t *vcpu) @@ -2417,17 +2364,10 @@ static int exit_exc_nmi(struct vcpu_t *vcpu, struct hax_tunnel *htun) return HAX_RESUME; } case VECTOR_PF: { - if (vtlb_active(vcpu)) { - if (handle_vtlb(vcpu)) - return HAX_RESUME; - - return vcpu_emulate_insn(vcpu); - } else { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, "Page fault shouldn't happen when EPT is" - " enabled.\n"); - dump_vmcs(vcpu); - } + vcpu_set_panic(vcpu); + hax_log(HAX_LOGPANIC, "Page fault shouldn't happen when EPT is " + "enabled.\n"); + dump_vmcs(vcpu); break; } case VECTOR_MC: { @@ -2585,8 +2525,8 @@ static int exit_hlt(struct vcpu_t *vcpu, struct hax_tunnel *htun) static int exit_invlpg(struct vcpu_t *vcpu, struct hax_tunnel *htun) { advance_rip(vcpu); - vcpu_invalidate_tlb_addr(vcpu, vmx(vcpu, exit_qualification).address); htun->_exit_reason = vmx(vcpu, exit_reason).basic_reason; + return HAX_RESUME; } @@ -2596,34 +2536,6 @@ static int exit_rdtsc(struct vcpu_t *vcpu, struct hax_tunnel *htun) return HAX_RESUME; } -static void check_flush(struct vcpu_t *vcpu, uint32_t bits) -{ - switch (vmx(vcpu, exit_qualification).cr.creg) { - case 0: { - if (bits & (CR0_PE | CR0_PG)) { - vcpu_invalidate_tlb(vcpu, 1); - } - break; - } - case 2: { - break; - } - case 3: { - vcpu_invalidate_tlb(vcpu, 0); - break; - } - case 4: { - if (bits & (CR4_PSE | CR4_PAE | CR4_PGE)) { - vcpu_invalidate_tlb(vcpu, 1); - } - break; - } - case 8: { - break; - } - } -} - static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun) { int cr; @@ -2675,7 +2587,7 @@ static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun) // See IASDM Vol. 3A 4.4.1 cr0_pae_triggers = CR0_CD | CR0_NW | CR0_PG; if ((val & CR0_PG) && (state->_cr4 & CR4_PAE) && - !(state->_efer & IA32_EFER_LME) && !vtlb_active(vcpu) && + !(state->_efer & IA32_EFER_LME) && ((val ^ old_val) & cr0_pae_triggers)) { hax_log(HAX_LOGI, "%s: vCPU #%u triggers PDPT (re)load for" " EPT+PAE mode (CR0 path)\n", __func__, @@ -2697,7 +2609,7 @@ static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun) // TODO: CR4_SMEP is not yet defined cr4_pae_triggers = CR4_PAE | CR4_PGE | CR4_PSE; if ((val & CR4_PAE) && (state->_cr0 & CR0_PG) && - !(state->_efer & IA32_EFER_LME) && !vtlb_active(vcpu) && + !(state->_efer & IA32_EFER_LME) && ((val ^ old_val) & cr4_pae_triggers)) { hax_log(HAX_LOGI, "%s: vCPU #%u triggers PDPT (re)load for " "EPT+PAE mode (CR4 path)\n", __func__, @@ -2709,7 +2621,6 @@ static int exit_cr_access(struct vcpu_t *vcpu, struct hax_tunnel *htun) cr, val); break; } - check_flush(vcpu, old_val ^ val); vcpu_write_cr(state, cr, val); if (is_ept_pae) { @@ -3313,13 +3224,7 @@ static void vmwrite_efer(struct vcpu_t *vcpu) } if (vmx(vcpu, entry_ctls) & ENTRY_CONTROL_LOAD_EFER) { - uint32_t guest_efer = state->_efer; - - if (vtlb_active(vcpu)) { - guest_efer |= IA32_EFER_XD; - } - - vmwrite(vcpu, GUEST_EFER, guest_efer); + vmwrite(vcpu, GUEST_EFER, state->_efer); } if (entry_ctls != vmx(vcpu, entry_ctls)) { @@ -3450,10 +3355,6 @@ static int handle_msr_write(struct vcpu_t *vcpu, uint32_t msr, uint64_t val, vcpu_set_panic(vcpu); hax_log(HAX_LOGPANIC, "64-bit guest is not allowed on 32-bit host.\n"); - } else if ((state->_efer & IA32_EFER_LME) && vtlb_active(vcpu)) { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, "64-bit guest is not allowed on core 2 " - "machine.\n"); } else { vmwrite_efer(vcpu); } diff --git a/core/vtlb.c b/core/vtlb.c deleted file mode 100644 index a87bb1ec..00000000 --- a/core/vtlb.c +++ /dev/null @@ -1,1044 +0,0 @@ -/* - * Copyright (c) 2009 Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "../include/hax.h" -#include "include/ia32_defs.h" -#include "include/paging.h" -#include "include/vcpu.h" -#include "include/vtlb.h" -#include "include/ept.h" -#include "include/intr.h" -#include "include/page_walker.h" - -/* - * Design rule: Only support pure 32-bit guest with 2-level page table. - * Host uses 3-level PAE paging for the virtual TLB. - * - * Design idea: Host always keeps topmost two levels' mapping in memory, and - * this mapping services for all translations in the guest. Only change the PTE - * level to emulate guest's page tables. - * - * Key APIs: - * 1. handle_vtlb: Used to handle guest's page faults and refill proper mappings - * in vTLB to meet guest's translation requirement. - * 2. vcpu_invalidate_tlb: Invalidate all the vTLB entries, maybe called in - * mov CR3 or page mode switch case. - * 3. vcpu_invalidate_tlb_addr: Invalidate the special virtual address for the - * current page table. Maybe called as emulating invlpg instruction. - * 4. vcpu_translate: Translate a virtual address to guest a physical address. - * 5. vcpu_vtlb_alloc: In vcpu initialization stage, allocate a vTLB for each - * vCPU. - * 6. vcpu_vtlb_free: Reverse operation of vcpu_vtlb_alloc at vcpu destroy - * stage. - * 7. vcpu_read_guest_virtual: - * 8. vcpu_write_guest_virtual: - */ - -#define NR_PDE_PAGES 4 -#define NR_PDE_PAGE_ORDER 2 // 2 ^ NR_PDE_PAGE_ORDER = NR_PDE_PAGES - -#define NR_MMU_PAGES 256 - -static struct hax_page * mmu_zalloc_one_page(hax_mmu_t *mmu, bool igo); -static void mmu_recycle_vtlb_pages(hax_mmu_t *mmu); -static void vtlb_free_all_entries(hax_mmu_t *mmu); -static pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu); -static pte64_t * vtlb_get_pde(hax_mmu_t *mmu, hax_vaddr_t va, bool is_shadow); -static uint32_t vcpu_mmu_walk(struct vcpu_t *vcpu, hax_vaddr_t va, uint32_t access, - hax_paddr_t *pa, uint *order, uint64_t *flags, - bool update, bool prefetch); - -static void vtlb_update_pde(pte64_t *pde, pte64_t *shadow_pde, - struct hax_page *page) -{ - pte64_set_entry(pde, 1, hax_page_pa(page), true, true, true); - shadow_pde->raw = (mword)hax_page_va(page); -} - -// Insert a vTLB entry to the system for the guest -static uint32_t vtlb_insert_entry(struct vcpu_t *vcpu, hax_mmu_t *mmu, - vtlb_t *tlb) -{ - pte64_t *pte_base, *pde, *shadow_pde, *pte; - uint idx, is_user, is_write, is_exec, is_global, is_pwt, is_pcd, is_pat; - uint base_idx, i; - struct hax_page *page; - uint64_t flags = 0; - - is_global = !!(tlb->flags & PTE32_G_BIT_MASK); - hax_assert(mmu->host_mode == PM_PAE && tlb->order == 12); -retry: - pde = vtlb_get_pde(mmu, tlb->va, 0); - shadow_pde = vtlb_get_pde(mmu, tlb->va, 1); - if (!pte64_is_present(pde)) { - page = mmu_zalloc_one_page(mmu, is_global && igo_addr(tlb->va)); - if (!page) { - mmu_recycle_vtlb_pages(mmu); - goto retry; - } - vtlb_update_pde(pde, shadow_pde, page); - pte64_set_accessed(pde, 1); - } - - // Grab the PTE entry - pte_base = (void *)(mword)shadow_pde->raw; - idx = pte64_get_idx(0, tlb->va); - pte = &pte_base[idx]; - - is_user = !!(tlb->flags & PTE32_USER_BIT_MASK); - is_write = !!((tlb->access & TF_WRITE) || - ((tlb->flags & PTE32_D_BIT_MASK) && - (tlb->flags & PTE32_W_BIT_MASK))); - is_exec = !!(tlb->flags & ((uint64_t)1 << 63)); - is_pwt = !!(tlb->flags & PTE32_PWT_BIT_MASK); - is_pcd = !!(tlb->flags & PTE32_PCD_BIT_MASK); - is_pat = !!(tlb->flags & PTE32_PAT_BIT_MASK); - - // Set the pte entry accordingly. - pte64_set_entry(pte, 0, tlb->ha, is_user, is_write, is_exec); - pte64_set_ad(pte, 0, 1); - pte64_set_caching(pte, is_pat, is_pcd, is_pwt); - pte64_set_global(pte, 0, is_global); - - pte->x2 = 0x0; - base_idx = idx - idx % 16; - - for (i = 0; i < 16; i++) { - if (!vcpu->prefetch[i].flag) - continue; - pte = &pte_base[base_idx + i]; - if (pte64_is_present(pte) && (pte->x2 == 0x0)) - continue; - pte->raw = 0; - - flags = vcpu->prefetch[i].flags; - is_user = !!(flags & PTE32_USER_BIT_MASK); - is_write = !!((flags & PTE32_D_BIT_MASK) && (flags & PTE32_W_BIT_MASK)); - is_exec = !!(flags & ((uint64_t)1 << 63)); - is_pwt = !!(flags & PTE32_PWT_BIT_MASK); - is_pcd = !!(flags & PTE32_PCD_BIT_MASK); - is_pat = !!(flags & PTE32_PAT_BIT_MASK); - is_global = !!(flags & PTE32_G_BIT_MASK); - - // Set the pte entry accordingly. - pte64_set_entry(pte, 0, vcpu->prefetch[i].ha, is_user, is_write, - is_exec); - pte64_set_ad(pte, 0, 1); - pte64_set_caching(pte, is_pat, is_pcd, is_pwt); - pte64_set_global(pte, 0, is_global); - pte->x2 = 0x1; - } - - if(!is_global && igo_addr(tlb->va)) { - mmu->igo = false; - } - - return 0; -} - -static uint mmu_alloc_vtlb_pages(hax_mmu_t *mmu) -{ - int i; - struct hax_page *page, *n; - - for (i = 0; i < NR_MMU_PAGES; i++) { - page = hax_alloc_page(0, 1); - if (!page) - goto alloc_fail; - hax_list_add(&page->list, &mmu->free_page_list); - } - return 1; - -alloc_fail: - hax_list_entry_for_each_safe(page, n, &mmu->free_page_list, struct hax_page, - list) { - hax_list_del(&page->list); - hax_free_page(page); - } - return 0; -} - -static void mmu_free_vtlb_pages(hax_mmu_t *mmu) -{ - struct hax_page *page, *n; - - hax_list_entry_for_each_safe(page, n, &mmu->free_page_list, struct hax_page, - list) { - hax_list_del(&page->list); - hax_free_page(page); - } - hax_list_entry_for_each_safe(page, n, &mmu->used_page_list, struct hax_page, - list) { - hax_list_del(&page->list); - hax_free_page(page); - } - hax_list_entry_for_each_safe(page, n, &mmu->igo_page_list, struct hax_page, - list) { - hax_list_del(&page->list); - hax_free_page(page); - } -} - -static struct hax_page * mmu_zalloc_one_page(hax_mmu_t *mmu, bool igo) -{ - struct hax_page *page; - void *page_va; - - if (!hax_list_empty(&mmu->free_page_list)) { - page = hax_list_entry(list, struct hax_page, mmu->free_page_list.next); - hax_list_del(&page->list); - if (igo) { - hax_list_add(&page->list, &mmu->igo_page_list); - } else { - hax_list_add(&page->list, &mmu->used_page_list); - } - page_va = hax_page_va(page); - hax_assert(page_va); - memset(page_va, 0, PAGE_SIZE_4K); - return page; - } - return NULL; -} - -// Recycle all vTLB pages from used_list to free_list. -static void mmu_recycle_vtlb_pages(hax_mmu_t *mmu) -{ - vtlb_free_all_entries(mmu); - hax_list_join(&mmu->used_page_list, &mmu->free_page_list); - hax_init_list_head(&mmu->used_page_list); - if (!mmu->igo) { - hax_list_join(&mmu->igo_page_list, &mmu->free_page_list); - hax_init_list_head(&mmu->igo_page_list); - } - mmu->igo = true; - mmu->clean = true; -} - -uint vcpu_vtlb_alloc(struct vcpu_t *vcpu) -{ - struct hax_page *page; - uint i; - pte64_t *pdpte; - unsigned char *pde_va, *addr; - hax_mmu_t *mmu; - - hax_assert(!vcpu->mmu); - - mmu = hax_vmalloc(sizeof(hax_mmu_t), 0); - - if (!mmu) { - hax_log(HAX_LOGE, "No memory to create mmu for vcpu:%d\n", - vcpu->vcpu_id); - return 0; - } - memset(mmu, 0, sizeof(hax_mmu_t)); - vcpu->mmu = mmu; - mmu->mmu_mode = MMU_MODE_INVALID; - - // Must ensure the first page should be lower than 4G - page = hax_alloc_page(HAX_MEM_LOW_4G, 1); - if (!page) { - hax_log(HAX_LOGD, "No enough memory for creating vTLB root page!\n"); - goto alloc_fail0; - } - mmu->hpd_page = page; - - // Only support 32-bit guests - mmu->pde_page = hax_alloc_pages(NR_PDE_PAGE_ORDER, 0, 1); - if (!mmu->pde_page) - goto alloc_fail1; - - mmu->pde_shadow_page = hax_alloc_pages(NR_PDE_PAGE_ORDER, 0, 1); - if (!mmu->pde_shadow_page) - goto alloc_fail2; - - pde_va = hax_page_va(mmu->pde_page); - memset(pde_va, 0, NR_PDE_PAGES * PAGE_SIZE_4K); - - addr = hax_page_va(page); - memset(addr, 0, PAGE_SIZE_4K); - // Get the first PDPTE entry - pdpte = (pte64_t *)addr; - - for (i = 0; i < 4; i++) { - pte64_set_entry(pdpte + i, 2, hax_pa(pde_va + i * PAGE_SIZE_4K), 0, 0, - 0); - } - - hax_init_list_head(&mmu->free_page_list); - hax_init_list_head(&mmu->used_page_list); - hax_init_list_head(&mmu->igo_page_list); - if (!mmu_alloc_vtlb_pages(mmu)) - goto alloc_fail3; - - mmu->host_mode = PM_INVALID; - mmu->clean = true; - mmu->igo = true; - return 1; - -alloc_fail3: - hax_free_pages(mmu->pde_shadow_page); - mmu->pde_shadow_page = 0; -alloc_fail2: - hax_free_pages(mmu->pde_page); - mmu->pde_page = 0; -alloc_fail1: - hax_free_pages(mmu->hpd_page); - mmu->hpd_page = 0; -alloc_fail0: - hax_vfree(vcpu->mmu, sizeof(hax_mmu_t)); - vcpu->mmu = 0; - return 0; -} - -void vcpu_vtlb_free(struct vcpu_t *vcpu) -{ - hax_mmu_t *mmu = vcpu->mmu; - mmu_free_vtlb_pages(mmu); - if (mmu->pde_page) { - hax_free_page(mmu->pde_page); - mmu->pde_page = 0; - } - if (mmu->pde_shadow_page) { - hax_free_page(mmu->pde_shadow_page); - mmu->pde_shadow_page = 0; - } - if (mmu->hpd_page) { - hax_free_page(mmu->hpd_page); - mmu->hpd_page = 0; - } - hax_vfree(mmu, sizeof(hax_mmu_t)); - vcpu->mmu = 0; -} - -/* - * If is_shadow = 1, must ensure the non-shadow pde is present before calling - * here. - */ -static pte64_t * vtlb_get_pde(hax_mmu_t *mmu, hax_vaddr_t va, bool is_shadow) -{ - pte64_t *pde; - void *pde_va; - uint idx = (va >> 21) & 0x1ff; - uint32_t which_g = va >> 30; - struct hax_page *pde_page = is_shadow ? mmu->pde_shadow_page - : mmu->pde_page; - - pde_va = (unsigned char *)hax_page_va(pde_page) + which_g * PAGE_SIZE_4K; - - hax_assert(mmu->guest_mode < PM_PAE); - pde = (pte64_t *)pde_va + idx; - return pde; -} - -static void vtlb_invalidate_pte(pte64_t *shadow_pde, hax_vaddr_t va) -{ - pte64_t *pte; - void *pte_base; - uint idx; - - pte_base = (void *)(mword)shadow_pde->raw; - if (!pte_base) - return; - idx = pte64_get_idx(0, va); - pte = (pte64_t *)pte_base + idx; - pte64_clear_entry(pte); -} - -void vtlb_invalidate_addr(hax_mmu_t *mmu, hax_vaddr_t va) -{ - pte64_t *pde; - - if (mmu->clean && !igo_addr(va)) - return; - - hax_assert(mmu->host_mode == PM_PAE); - - hax_log(HAX_LOGD, "Flush address 0x%llx\n", va); - - pde = vtlb_get_pde(mmu, va, 0); - - if (!pte64_is_present(pde)) - return; - pde = vtlb_get_pde(mmu, va, 1); - vtlb_invalidate_pte(pde, va); -} - -/* - * Doesn't need to free shadow pde here, because its entry's validity depends on - * corresponding pde entry is present. - */ -static void vtlb_free_all_entries(hax_mmu_t *mmu) -{ - int nr_page = mmu->igo ? NR_PDE_PAGES - 1 : NR_PDE_PAGES; - void *pde_va = hax_page_va(mmu->pde_page); - memset(pde_va, 0, nr_page * PAGE_SIZE_4K); -} - -void vtlb_invalidate(hax_mmu_t *mmu) -{ - if (mmu->clean) - return; - - hax_assert(mmu->host_mode == PM_PAE); - hax_log(HAX_LOGD, "Flush whole vTLB\n"); - mmu_recycle_vtlb_pages(mmu); - - mmu->clean = 1; -} - -static uint vtlb_handle_page_fault(struct vcpu_t *vcpu, pagemode_t guest_mode, - hax_paddr_t pdir, hax_vaddr_t va, uint32_t access) -{ - uint r; - hax_paddr_t gpa; - vtlb_t tlb; - uint need_invalidation = 0; - hax_mmu_t *mmu = vcpu->mmu; - - hax_log(HAX_LOGD, "vTLB::handle_pagefault %08llx, %08llx %x [Mode %u]\n", - pdir, va, access, guest_mode); - - hax_assert(guest_mode != PM_INVALID); - if (guest_mode != mmu->guest_mode) { - pagemode_t new_host_mode = PM_INVALID; - switch (guest_mode) { - case PM_FLAT: - case PM_2LVL: { - new_host_mode = PM_PAE; - break; - } - case PM_PAE: - case PM_PML4: - default: { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, "Invalid guest page table mode %d\n", - mmu->guest_mode); - } - } - - if (new_host_mode != mmu->host_mode) { - vtlb_invalidate(mmu); - } else { - need_invalidation = 1; - } - - mmu->guest_mode = guest_mode; - mmu->host_mode = new_host_mode; - mmu->pdir = pdir; - hax_log(HAX_LOGD, "New vTLB mode %u, pdir %08llx\n", guest_mode, pdir); - } - - if (need_invalidation || - (pdir != mmu->pdir && mmu->guest_mode != PM_FLAT)) { - if (!mmu->clean) { - vtlb_invalidate(mmu); - } - mmu->pdir = pdir; - } - - // Check for a mapping in the guest page tables. - // If there isn't one, return the error code. - switch (mmu->guest_mode) { - case PM_FLAT: { - r = 0; - gpa = va; - tlb.guest_order = PG_ORDER_4K; - tlb.flags = (0ULL ^ EXECUTION_DISABLE_MASK) | PTE32_G_BIT_MASK | - PTE32_D_BIT_MASK | PTE32_USER_BIT_MASK | PTE32_W_BIT_MASK; - break; - } - case PM_2LVL: { - r = vcpu_mmu_walk(vcpu, va, access, &gpa, &tlb.guest_order, - &tlb.flags, true, /*true*/false); - break; - } - default: { - hax_log(HAX_LOGE, "Invalid guest's paging mode %d\n", - mmu->guest_mode); - return TF_FAILED; - } - } - - if (r != TF_OK) { - if (!(r & TF_GP2HP)) { - vtlb_invalidate_addr(mmu, va); - } - return r; - } - - tlb.order = tlb.guest_order = PG_ORDER_4K; - hax_assert(tlb.order == PG_ORDER_4K); - - tlb.ha = hax_gpfn_to_hpa(vcpu->vm, gpa >> 12); - if (!tlb.ha) - return TF_FAILED | TF_GP2HP; - - tlb.va = va; - tlb.access = access; - - /* - * Only PAE paging is used to emulate pure 32-bit 2-level paging. - * Now insert the entry in the vtlb for the translation. - */ - hax_assert(mmu->host_mode == PM_PAE); - vtlb_insert_entry(vcpu, mmu, &tlb); - mmu->clean = 0; - - return r; -} - -uint64_t vtlb_get_cr3(struct vcpu_t *vcpu) -{ - uint64_t cr3; - - hax_mmu_t *mmu = vcpu->mmu; - - cr3 = hax_page_pfn(mmu->hpd_page) << 12; - - hax_log(HAX_LOGD, "vTLB: guest mode %u, host mode %d, GUEST_CR3: %08llx\n", - mmu->guest_mode, mmu->host_mode, cr3); - - return cr3; -} - -/* - * Page table walker. - * @param vcpu Current vcpu point - * @param va Guest virtual address - * @param access Access descriptor (read/write, user/supervisor) - * @param pa Guest physical address - * @param size Size of physical page - * @param update Update access and dirty bits of guest structures - * @returns 0 if translation is successful, otherwise 0x80000000 OR'ed with - * the page fault error code. - */ -static uint32_t vcpu_mmu_walk(struct vcpu_t *vcpu, hax_vaddr_t va, uint32_t access, - hax_paddr_t *pa, uint *order, uint64_t *flags, - bool update, bool prefetch) -{ - uint lvl, idx; - void *pte_va; - hax_kmap_user pte_kmap; - bool writable; - pte32_t *pte, old_pte; - hax_paddr_t gpt_base; - bool pat; - uint64_t rights, requested_rights; - - access = access & (TF_WRITE | TF_USER | TF_EXEC); - requested_rights = (access & (TF_WRITE | TF_USER)) | - (access & TF_EXEC ? EXECUTION_DISABLE_MASK : 0); - // Seems the following one is wrong? - // hax_assert((mmu->guest_mode) == PM_2LVL); - -retry: - rights = TF_WRITE | TF_USER; - gpt_base = vcpu->state->_cr3 & pte32_get_cr3_mask(); - - // Page table walker. - for (lvl = PM_2LVL; lvl--; ) { - // Fetch the page table entry. - idx = pte32_get_idx(lvl, va); - pte_va = gpa_space_map_page(&vcpu->vm->gpa_space, - gpt_base >> PG_ORDER_4K, &pte_kmap, - &writable); - - if (!pte_va) - return TF_FAILED; - - hax_assert(!(update && !writable)); - - pte = (pte32_t *)pte_va + idx; - old_pte = *pte; - - // Check access - if (!pte32_is_present(&old_pte)) { - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - - return TF_FAILED | access; - } - - if (pte32_check_rsvd(&old_pte, lvl)) { - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - - return TF_FAILED | TF_PROTECT | TF_RSVD | access; - } - - // Always allow execution for pure 32-bit guest! - rights &= old_pte.raw; - rights ^= EXECUTION_DISABLE_MASK; - - if (!pte32_is_leaf(&old_pte, lvl)) { - // Not leaf; update accessed bit and go to the next level. - // Note: Accessed bit is set even though the access may not - // complete. This matches Atom behavior. - if (update && !pte32_is_accessed(&old_pte)) { - if (!pte32_atomic_set_accessed(pte, &old_pte)) { - hax_log(HAX_LOGD, - "translate walk: atomic PTE update failed\n"); - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - - goto retry; - } - } - gpt_base = pte32_get_address(&old_pte, lvl, 0); - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - } else { - // Permission violations must be checked only after present bit is - // checked at every level. - // Allow supervisor mode writes to read-only pages unless WP=1. - if (!(access & TF_USER) && !(vcpu->state->_cr0 & CR0_WP)) { - rights &= ~(uint64_t)TF_USER; - rights |= TF_WRITE; - } - - if ((rights & requested_rights) != requested_rights) { - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - - return TF_FAILED | TF_PROTECT | access; - } - - // Update accessed/dirty bits. - if (update && (!pte32_is_accessed(&old_pte) || - ((access & TF_WRITE) && !pte32_is_dirty(&old_pte)))) { - if (!pte32_atomic_set_ad(pte, lvl, access & TF_WRITE, - &old_pte)) { - hax_log(HAX_LOGD, - "translate walk: atomic PTE update failed\n"); - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - goto retry; - } - } - - *pa = pte32_get_address(&old_pte, lvl, va); - - if (pte32_is_4M_page(&old_pte, lvl)) { - *order = PG_ORDER_4M; - } else { - *order = PG_ORDER_4K; - } - pat = pte32_get_pat(&old_pte); - // G, D, PCD, PWT - *flags = rights | pat << 7 | (pte32_get_val(&old_pte) & 0x158); - if (prefetch && hax_gpfn_to_hpa(vcpu->vm, *pa >> 12)) { - uint base_idx = 0; - pte32_t pre_pte; - uint i; - //hax_log(HAX_LOGE, "guest: va %lx\n", va); - - base_idx = idx - idx % 16; - for (i = 0; i < 16; i++) { - vcpu->prefetch[i].flag = 0; - if (idx == base_idx + i) - continue; - - pte = (pte32_t *)pte_va + (base_idx + i); - pre_pte = *pte; - if (!pte32_is_present(&pre_pte)) - continue; - - if (pte32_check_rsvd(&pre_pte, lvl)) - continue; - - if (!pte32_is_accessed(&pre_pte) || - !pte32_is_dirty(&pre_pte)) - continue; - - vcpu->prefetch[i].ha = hax_gpfn_to_hpa(vcpu->vm, - pre_pte.raw >> 12); - if (!vcpu->prefetch[i].ha) - continue; - - rights = 0; - vcpu->prefetch[i].order = PG_ORDER_4K; - pat = pte32_get_pat(&pre_pte); - vcpu->prefetch[i].flags = rights | pat << 7 | - (pte32_get_val(&pre_pte) & 0xf7f); - - vcpu->prefetch[i].flag = 1; - } - } - - gpa_space_unmap_page(&vcpu->vm->gpa_space, &pte_kmap); - - return TF_OK; - } - } - return TF_OK; -} - -bool handle_vtlb(struct vcpu_t *vcpu) -{ - uint32_t access = vmx(vcpu, exit_exception_error_code); - pagemode_t mode = vcpu_get_pagemode(vcpu); - hax_paddr_t pdir = vcpu->state->_cr3 & (mode == PM_PAE ? ~0x1fULL : ~0xfffULL); - hax_vaddr_t cr2 = vmx(vcpu, exit_qualification).address; - - uint32_t ret = vtlb_handle_page_fault(vcpu, mode, pdir, cr2, access); - - hax_log(HAX_LOGD, "handle vtlb fault @%llx\n", cr2); - if (ret == 0) { - vcpu->vmcs_pending_guest_cr3 = 1; - return 1; - } - - if (ret & TF_GP2HP) { - hax_log(HAX_LOGD, "G2H translation failed (%08llx, %x)\n", cr2, access); - return 0; - } - - // Otherwise, inject PF into guest - access = ret & (vcpu->state->_efer & IA32_EFER_XD ? 0x1f : 0x0f); - vcpu->state->_cr2 = cr2; - hax_inject_page_fault(vcpu, access); - hax_log(HAX_LOGD, "Page fault (%08llx, %x)\n", cr2, access); - - return 1; -} - -// TODO: Move these functions to another source file (e.g. mmio.c), since they -// are not specific to vTLB mode -static inline void * mmio_map_guest_virtual_page_fast(struct vcpu_t *vcpu, - uint64_t gva, int len) -{ - if (!vcpu->mmio_fetch.kva) { - return NULL; - } - if ((gva >> PG_ORDER_4K) != (vcpu->mmio_fetch.last_gva >> PG_ORDER_4K) || - vcpu->state->_cr3 != vcpu->mmio_fetch.last_guest_cr3) { - // Invalidate the cache - vcpu->mmio_fetch.kva = NULL; - gpa_space_unmap_page(&vcpu->vm->gpa_space, &vcpu->mmio_fetch.kmap); - if (vcpu->mmio_fetch.hit_count < 2) { - hax_log(HAX_LOGD, "%s: Cache miss: cached_gva=0x%llx, " - "cached_cr3=0x%llx, gva=0x%llx, cr3=0x%llx, hits=0x%d, " - "vcpu_id=0x%u\n", __func__, vcpu->mmio_fetch.last_gva, - vcpu->mmio_fetch.last_guest_cr3, gva, vcpu->state->_cr3, - vcpu->mmio_fetch.hit_count, vcpu->vcpu_id); - } - return NULL; - } - // Here we assume the GVA of the MMIO instruction maps to the same guest - // page frame that contains the previous MMIO instruction, as long as guest - // CR3 has not changed. - // TODO: Is it possible for a guest to modify its page tables without - // replacing the root table (CR3) between two consecutive MMIO accesses? - vcpu->mmio_fetch.hit_count++; - // Skip GVA=>GPA=>KVA conversion, and just use the cached KVA - // TODO: We do not walk the guest page tables in this case, which saves - // time, but also means the accessed/dirty bits of the relevant guest page - // table entries are not updated. This should be okay, since the same MMIO - // instruction was just fetched by hardware (before this EPT violation), - // which presumably has taken care of this matter. - return vcpu->mmio_fetch.kva; -} - -static void * mmio_map_guest_virtual_page_slow(struct vcpu_t *vcpu, uint64_t gva, - hax_kmap_user *kmap) -{ - uint64_t gva_aligned = gva & pgmask(PG_ORDER_4K); - uint64_t gpa; - uint ret; - void *kva; - - ret = vcpu_translate(vcpu, gva_aligned, 0, &gpa, NULL, true); - if (ret) { - hax_log(HAX_LOGE, "%s: vcpu_translate() returned 0x%x: vcpu_id=%u," - " gva=0x%llx\n", __func__, ret, vcpu->vcpu_id, gva); - // TODO: Inject a guest page fault? - return NULL; - } - hax_log(HAX_LOGD, "%s: gva=0x%llx => gpa=0x%llx, vcpu_id=0x%u\n", __func__, - gva_aligned, gpa, vcpu->vcpu_id); - - kva = gpa_space_map_page(&vcpu->vm->gpa_space, gpa >> PG_ORDER_4K, kmap, - NULL); - if (!kva) { - hax_log(HAX_LOGE, "%s: gpa_space_map_page() failed: vcpu_id=%u, " - "gva=0x%llx, gpa=0x%llx\n", __func__, vcpu->vcpu_id, gva, gpa); - return NULL; - } - return kva; -} - -int mmio_fetch_instruction(struct vcpu_t *vcpu, uint64_t gva, uint8_t *buf, int len) -{ - uint64_t end_gva; - uint8_t *src_buf; - uint offset; - - hax_assert(vcpu != NULL); - hax_assert(buf != NULL); - // A valid IA instruction is never longer than 15 bytes - hax_assert(len > 0 && len <= 15); - end_gva = gva + (uint)len - 1; - if ((gva >> PG_ORDER_4K) != (end_gva >> PG_ORDER_4K)) { - uint32_t ret; - - hax_log(HAX_LOGI, "%s: GVA range spans two pages: gva=0x%llx, len=%d\n", - __func__, gva, len); - ret = vcpu_read_guest_virtual(vcpu, gva, buf, (uint)len, (uint)len, 0); - if (!ret) { - hax_log(HAX_LOGE, "%s: vcpu_read_guest_virtual() failed: " - "vcpu_id=%u, gva=0x%llx, len=%d\n", __func__, - vcpu->vcpu_id, gva, len); - return -ENOMEM; - } - return 0; - } - - src_buf = mmio_map_guest_virtual_page_fast(vcpu, gva, len); - if (!src_buf) { - src_buf = mmio_map_guest_virtual_page_slow(vcpu, gva, - &vcpu->mmio_fetch.kmap); - if (!src_buf) { - return -ENOMEM; - } - vcpu->mmio_fetch.last_gva = gva; - vcpu->mmio_fetch.last_guest_cr3 = vcpu->state->_cr3; - vcpu->mmio_fetch.hit_count = 0; - vcpu->mmio_fetch.kva = src_buf; - } - offset = (uint)(gva & pgoffs(PG_ORDER_4K)); - memcpy_s(buf, len, src_buf + offset, len); - return 0; -} - -/* - * Read guest-linear memory. - * If flag is 0, this read is on behalf of the guest. This function updates the - * access/dirty bits in the guest page tables and injects a page fault if there - * is an error. In this case, the return value is true for success, false if a - * page fault was injected. - * If flag is 1, this function updates the access/dirty bits in the guest page - * tables but does not inject a page fault if there is an error. Instead, it - * returns the number of bytes read. - * If flag is 2, the memory read is for internal use. It does not update the - * guest page tables. It returns the number of bytes read. - */ -uint32_t vcpu_read_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, void *dst, - uint32_t dst_buflen, uint32_t size, uint flag) -{ - // TBD: use guest CPL for access checks - char *dstp = dst; - uint32_t offset = 0; - int len2; - - // Flag == 1 is not currently used, but it could be enabled if useful. - hax_assert(flag == 0 || flag == 2); - - while (offset < size) { - hax_paddr_t gpa; - uint64_t len = size - offset; - uint r = vcpu_translate(vcpu, addr + offset, 0, &gpa, &len, flag != 2); - if (r != 0) { - if (flag != 0) - return offset; // Number of bytes successfully read - if (r & TF_GP2HP) { - hax_log(HAX_LOGE, "read_guest_virtual(%llx, %x) failed\n", - addr, size); - } - hax_log(HAX_LOGD, "read_guest_virtual(%llx, %x) injecting #PF\n", - addr, size); - vcpu->state->_cr2 = addr + offset; - hax_inject_page_fault(vcpu, r & 0x1f); - return false; - } -// if (addr + offset != gpa) { -// hax_log(HAX_LOGI, "%s: gva=0x%llx, gpa=0x%llx, len=0x%llx\n", -// __func__, addr + offset, gpa, len); -// } - - len2 = gpa_space_read_data(&vcpu->vm->gpa_space, gpa, (int)len, - (uint8_t *)(dstp + offset)); - if (len2 <= 0) { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, - "read guest virtual error, gpa:0x%llx, len:0x%llx\n", - gpa, len); - return false; - } else { - len = (uint64_t)len2; - } - - offset += len; - } - - return flag != 0 ? size : true; -} - -/* - * Write guest-linear memory. - * If flag is 0, this memory write is on behalf of the guest. This function - * updates the access/dirty bits in the guest page tables and injects a page - * fault if there is an error. In this case, the return value is true for - * success, false if a page fault was injected. - * If flag is 1, it updates the access/dirty bits in the guest page tables but - * does not inject a page fault if there is an error. Instead, it returns the - * number of bytes written. - * A flag value of 2 is implemented, but not used. It does not update the guest - * page tables. It returns the number of bytes written. - */ -uint32_t vcpu_write_guest_virtual(struct vcpu_t *vcpu, hax_vaddr_t addr, - uint32_t dst_buflen, const void *src, uint32_t size, - uint flag) -{ - // TODO: use guest CPL for access checks - const char *srcp = src; - uint32_t offset = 0; - int len2; - - hax_assert(flag == 0 || flag == 1); - hax_assert(dst_buflen >= size); - - while (offset < size) { - hax_paddr_t gpa; - uint64_t len = size - offset; - uint r = vcpu_translate(vcpu, addr + offset, TF_WRITE, &gpa, &len, - flag != 2); - if (r != 0) { - if (flag != 0) - return offset; // Number of bytes successfully written - if (r & TF_GP2HP) { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, "write_guest_virtual(%llx, %x) failed\n", - addr, size); - } - hax_log(HAX_LOGD, "write_guest_virtual(%llx, %x) injecting #PF\n", - addr, size); - vcpu->state->_cr2 = addr + offset; - hax_inject_page_fault(vcpu, r & 0x1f); - return false; - } - - len2 = (uint64_t)gpa_space_write_data(&vcpu->vm->gpa_space, gpa, len, - (uint8_t *)(srcp + offset)); - if (len2 <= 0) { - vcpu_set_panic(vcpu); - hax_log(HAX_LOGPANIC, - "write guest virtual error, gpa:0x%llx, len:0x%llx\n", - gpa, len); - return false; - } else { - len = len2; - } - - offset += len; - } - - return flag != 0 ? size : true; -} - -/* - * Guest virtual to guest physical address translation. - * @param va Guest virtual address - * @param access Access descriptor (read/write, user/supervisor) - * @param pa Guest physical address - * @param len Number of bytes for which translation is valid - * @param update Update access and dirty bits of guest structures - * @returns 0 if translation is successful, 0x80000000 OR'ed with the exception - * number otherwise. - */ -uint vcpu_translate(struct vcpu_t *vcpu, hax_vaddr_t va, uint access, hax_paddr_t *pa, - uint64_t *len, bool update) -{ - pagemode_t mode = vcpu_get_pagemode(vcpu); - uint order = 0; - uint r = -1; - - hax_log(HAX_LOGD, "vcpu_translate: %llx (%s,%s) mode %u\n", va, - access & TF_WRITE ? "W" : "R", access & TF_USER ? "U" : "S", mode); - - switch (mode) { - case PM_FLAT: { - // Non-paging mode, no further actions. - *pa = va; - r = 0; - break; - } - case PM_2LVL: - case PM_PAE: - case PM_PML4: { - r = pw_perform_page_walk(vcpu, va, access, pa, &order, update, - false); - break; - } - default: { - // Should never happen - break; - } - } - - if (r == 0) { - /* - * Translation is guaranteed valid until the end of 4096 bytes page - * (the minimum page size) due possible EPT remapping for the bigger - * translation units - */ - uint64_t size = (uint64_t)1 << PG_ORDER_4K; - uint64_t extend = size - (va & (size - 1)); - - // Adjust validity of translation if necessary. - if (len != NULL && (*len == 0 || *len > extend)) { - *len = extend; - } - } - return r; -} - -pagemode_t vcpu_get_pagemode(struct vcpu_t *vcpu) -{ - if (!(vcpu->state->_cr0 & CR0_PG)) - return PM_FLAT; - - if (!(vcpu->state->_cr4 & CR4_PAE)) - return PM_2LVL; - - // Only support pure 32-bit paging. May support PAE paging in future. - // hax_assert(0); - if (!(vcpu->state->_efer & IA32_EFER_LMA)) - return PM_PAE; - - return PM_PML4; -} - -void vcpu_invalidate_tlb(struct vcpu_t *vcpu, bool global) -{ - if (global) { - vcpu->mmu->igo = false; - } - vtlb_invalidate(vcpu->mmu); -} - -void vcpu_invalidate_tlb_addr(struct vcpu_t *vcpu, hax_vaddr_t va) -{ - vtlb_invalidate_addr(vcpu->mmu, va); -} diff --git a/platforms/darwin/intelhaxm.xcodeproj/project.pbxproj b/platforms/darwin/intelhaxm.xcodeproj/project.pbxproj index 9d546f9b..99265d03 100644 --- a/platforms/darwin/intelhaxm.xcodeproj/project.pbxproj +++ b/platforms/darwin/intelhaxm.xcodeproj/project.pbxproj @@ -9,10 +9,8 @@ /* Begin PBXBuildFile section */ 22BFCFCE13A59A4300AD9F0F /* ept.c in Sources */ = {isa = PBXBuildFile; fileRef = 22BFCFCD13A59A4300AD9F0F /* ept.c */; }; 22BFCFD213A59A6500AD9F0F /* intr_exc.c in Sources */ = {isa = PBXBuildFile; fileRef = 22BFCFD113A59A6500AD9F0F /* intr_exc.c */; }; - 22BFCFD613A59A8200AD9F0F /* vtlb.c in Sources */ = {isa = PBXBuildFile; fileRef = 22BFCFD513A59A8200AD9F0F /* vtlb.c */; }; 22BFCFD813A59A9100AD9F0F /* ept.h in Headers */ = {isa = PBXBuildFile; fileRef = 22BFCFD713A59A9100AD9F0F /* ept.h */; }; 22BFCFDC13A59AA000AD9F0F /* intr.h in Headers */ = {isa = PBXBuildFile; fileRef = 22BFCFDB13A59AA000AD9F0F /* intr.h */; }; - 22BFCFDE13A59AB100AD9F0F /* vtlb.h in Headers */ = {isa = PBXBuildFile; fileRef = 22BFCFDD13A59AB100AD9F0F /* vtlb.h */; }; 32A4FEBC0562C75700D090E7 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 089C167DFE841241C02AAC07 /* InfoPlist.strings */; }; 32A4FEBE0562C75700D090E7 /* com_intel_hax.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A224C3CFF42312311CA2CB7 /* com_intel_hax.c */; settings = {ATTRIBUTES = (); }; }; 43038ADA145F94190014BEE6 /* memory.c in Sources */ = {isa = PBXBuildFile; fileRef = 43038AD9145F94190014BEE6 /* memory.c */; }; @@ -74,6 +72,9 @@ CF148D601EE6BAEB0097A058 /* memslot.c in Sources */ = {isa = PBXBuildFile; fileRef = CF148D5F1EE6BAEB0097A058 /* memslot.c */; }; CF6A32291EDEB86E00468E62 /* pmu.h in Headers */ = {isa = PBXBuildFile; fileRef = CF6A32281EDEB86E00468E62 /* pmu.h */; }; CFB6FDDB1ED43C540048A750 /* ramblock.c in Sources */ = {isa = PBXBuildFile; fileRef = CFB6FDDA1ED43C540048A750 /* ramblock.c */; }; + CFC66285265E54840035D630 /* mmio.c in Sources */ = {isa = PBXBuildFile; fileRef = CFC66284265E54840035D630 /* mmio.c */; }; + CFC66287265E57400035D630 /* mmio.h in Headers */ = {isa = PBXBuildFile; fileRef = CFC66286265E57400035D630 /* mmio.h */; }; + CFC66289265E5D8D0035D630 /* name.h in Headers */ = {isa = PBXBuildFile; fileRef = CFC66288265E5D8D0035D630 /* name.h */; }; CFD697471ED2DC9700F10631 /* gpa_space.c in Sources */ = {isa = PBXBuildFile; fileRef = CFD697461ED2DC9700F10631 /* gpa_space.c */; }; CFD697491ED2DCB700F10631 /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = CFD697481ED2DCB700F10631 /* memory.h */; }; FA8F651E208BAD9A00C8E91F /* emulate.c in Sources */ = {isa = PBXBuildFile; fileRef = FA8F651D208BAD9A00C8E91F /* emulate.c */; }; @@ -98,10 +99,8 @@ 1A224C3CFF42312311CA2CB7 /* com_intel_hax.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = com_intel_hax.c; sourceTree = ""; }; 22BFCFCD13A59A4300AD9F0F /* ept.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ept.c; path = ../../core/ept.c; sourceTree = SOURCE_ROOT; }; 22BFCFD113A59A6500AD9F0F /* intr_exc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = intr_exc.c; path = ../../core/intr_exc.c; sourceTree = SOURCE_ROOT; }; - 22BFCFD513A59A8200AD9F0F /* vtlb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = vtlb.c; path = ../../core/vtlb.c; sourceTree = SOURCE_ROOT; }; 22BFCFD713A59A9100AD9F0F /* ept.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ept.h; sourceTree = ""; }; 22BFCFDB13A59AA000AD9F0F /* intr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intr.h; sourceTree = ""; }; - 22BFCFDD13A59AB100AD9F0F /* vtlb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vtlb.h; sourceTree = ""; }; 32A4FEC30562C75700D090E7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 32A4FEC40562C75800D090E7 /* intelhaxm.kext */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = intelhaxm.kext; sourceTree = BUILT_PRODUCTS_DIR; }; 43038AD9145F94190014BEE6 /* memory.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = memory.c; path = ../../core/memory.c; sourceTree = SOURCE_ROOT; }; @@ -163,6 +162,9 @@ CF148D5F1EE6BAEB0097A058 /* memslot.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = memslot.c; path = ../../core/memslot.c; sourceTree = ""; }; CF6A32281EDEB86E00468E62 /* pmu.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pmu.h; sourceTree = ""; }; CFB6FDDA1ED43C540048A750 /* ramblock.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ramblock.c; path = ../../core/ramblock.c; sourceTree = ""; }; + CFC66284265E54840035D630 /* mmio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = mmio.c; path = ../../core/mmio.c; sourceTree = ""; }; + CFC66286265E57400035D630 /* mmio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmio.h; sourceTree = ""; }; + CFC66288265E5D8D0035D630 /* name.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = name.h; sourceTree = ""; }; CFD697461ED2DC9700F10631 /* gpa_space.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gpa_space.c; path = ../../core/gpa_space.c; sourceTree = ""; }; CFD697481ED2DCB700F10631 /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = ""; }; D27513B306A6225300ADB3A4 /* Kernel.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Kernel.framework; path = /System/Library/Frameworks/Kernel.framework; sourceTree = ""; }; @@ -213,13 +215,13 @@ 247142CAFF3F8F9811CA285C /* Source */ = { isa = PBXGroup; children = ( + CFC66284265E54840035D630 /* mmio.c */, 64BB0CD120F36C470064593A /* ia32_ops.asm */, 64BB0CD020F36C470064593A /* vmx_ops.asm */, A669096A20F9985300739075 /* ia32.c */, 6496936E20D8AE0000C9BBAF /* cpuid.c */, 6E2DBBCB18EB6125003B66C9 /* page_walker.c */, 43038AD9145F94190014BEE6 /* memory.c */, - 22BFCFD513A59A8200AD9F0F /* vtlb.c */, CFD697461ED2DC9700F10631 /* gpa_space.c */, CF0539AC1EE536CB00FAD569 /* chunk.c */, 22BFCFD113A59A6500AD9F0F /* intr_exc.c */, @@ -273,12 +275,13 @@ B98ECF9F13A059BB00485DDB /* include */ = { isa = PBXGroup; children = ( + CFC66288265E5D8D0035D630 /* name.h */, + CFC66286265E57400035D630 /* mmio.h */, 64CD0F572101B51100099B53 /* ia32.h */, 642FD41C20D9F79100C197FF /* emulate_ops.h */, 642FD41D20D9F79100C197FF /* emulate.h */, CFD697481ED2DCB700F10631 /* memory.h */, 6E2DBBCD18EB6155003B66C9 /* page_walker.h */, - 22BFCFDD13A59AB100AD9F0F /* vtlb.h */, 22BFCFDB13A59AA000AD9F0F /* intr.h */, 22BFCFD713A59A9100AD9F0F /* ept.h */, 6456261E1EEFF705005280EF /* ept2.h */, @@ -316,6 +319,7 @@ 43C9A9E7138DDA93000A1071 /* hax_host.h in Headers */, 4397BF1E138F4530001A6A33 /* hax.h in Headers */, 642FD41F20D9F79100C197FF /* emulate.h in Headers */, + CFC66289265E5D8D0035D630 /* name.h in Headers */, 4397BF20138F4530001A6A33 /* hax_list.h in Headers */, 6E2DBBCE18EB6155003B66C9 /* page_walker.h in Headers */, 4397BF21138F4530001A6A33 /* hax_types.h in Headers */, @@ -337,6 +341,7 @@ B98ECFC513A059BB00485DDB /* types.h in Headers */, B98ECFC613A059BB00485DDB /* vcpu.h in Headers */, B98ECFC813A059BB00485DDB /* vm.h in Headers */, + CFC66287265E57400035D630 /* mmio.h in Headers */, 64CD0F582101B51100099B53 /* ia32.h in Headers */, B98ECFC913A059BB00485DDB /* vmx.h in Headers */, 4324E35613A3781500FA7CFB /* hax_core_interface.h in Headers */, @@ -349,7 +354,6 @@ 4344104313A3C1B1002E1442 /* vcpu_state.h in Headers */, 22BFCFD813A59A9100AD9F0F /* ept.h in Headers */, 22BFCFDC13A59AA000AD9F0F /* intr.h in Headers */, - 22BFCFDE13A59AB100AD9F0F /* vtlb.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -439,6 +443,7 @@ CF0539AD1EE536CB00FAD569 /* chunk.c in Sources */, 64B85BE91EF4D34D00223ABD /* ept2.c in Sources */, B98ECFB713A059BB00485DDB /* dump.c in Sources */, + CFC66285265E54840035D630 /* mmio.c in Sources */, B98ECFB813A059BB00485DDB /* hax.c in Sources */, B98ECFCB13A059BB00485DDB /* vcpu.c in Sources */, B98ECFCC13A059BB00485DDB /* vm.c in Sources */, @@ -454,7 +459,6 @@ CF148D601EE6BAEB0097A058 /* memslot.c in Sources */, 22BFCFCE13A59A4300AD9F0F /* ept.c in Sources */, 22BFCFD213A59A6500AD9F0F /* intr_exc.c in Sources */, - 22BFCFD613A59A8200AD9F0F /* vtlb.c in Sources */, 64B72B851EDFFF7E00A8C202 /* hax_host_mem.cpp in Sources */, A669096B20F9985300739075 /* ia32.c in Sources */, 43038ADA145F94190014BEE6 /* memory.c in Sources */, diff --git a/platforms/linux/Kbuild b/platforms/linux/Kbuild index e1d5f1a7..1cb9e816 100644 --- a/platforms/linux/Kbuild +++ b/platforms/linux/Kbuild @@ -18,6 +18,7 @@ haxm-y += ../../core/ia32_ops.o haxm-y += ../../core/intr_exc.o haxm-y += ../../core/memory.o haxm-y += ../../core/memslot.o +haxm-y += ../../core/mmio.o haxm-y += ../../core/name.o haxm-y += ../../core/page_walker.o haxm-y += ../../core/ramblock.o @@ -25,7 +26,6 @@ haxm-y += ../../core/vcpu.o haxm-y += ../../core/vm.o haxm-y += ../../core/vmx.o haxm-y += ../../core/vmx_ops.o -haxm-y += ../../core/vtlb.o # haxm-linux haxm-y += components.o diff --git a/platforms/linux/haxm-install.sh b/platforms/linux/haxm-install.sh old mode 100755 new mode 100644 diff --git a/platforms/linux/haxm-uninstall.sh b/platforms/linux/haxm-uninstall.sh old mode 100755 new mode 100644 diff --git a/platforms/netbsd/Makefile b/platforms/netbsd/Makefile index 09cf9e3f..2045bed9 100644 --- a/platforms/netbsd/Makefile +++ b/platforms/netbsd/Makefile @@ -30,13 +30,13 @@ SRCS+= ia32.c SRCS+= intr_exc.c SRCS+= memory.c SRCS+= memslot.c +SRCS+= mmio.c SRCS+= name.c SRCS+= page_walker.c SRCS+= ramblock.c SRCS+= vcpu.c SRCS+= vm.c SRCS+= vmx.c -SRCS+= vtlb.c .if ${MACHINE} == "amd64" TARGET_ELF= elf64 diff --git a/platforms/windows/haxm-core.vcxproj b/platforms/windows/haxm-core.vcxproj index 7e33e3ac..2cd3113e 100644 --- a/platforms/windows/haxm-core.vcxproj +++ b/platforms/windows/haxm-core.vcxproj @@ -113,7 +113,6 @@ - @@ -128,12 +127,13 @@ + + -