From eb2602e8c9615818cb46c39643fcde00fc983e95 Mon Sep 17 00:00:00 2001 From: Benjamin Lamowski Date: Fri, 23 Feb 2024 16:54:57 +0100 Subject: [PATCH] hw: add support for VMX Add support for Intel's Virtual Machine Extensions with nested paging. Fixes #5128 --- .../base-hw/lib/mk/spec/x86_64/core-hw-pc.mk | 1 + .../src/core/spec/x86_64/platform_support.cc | 5 +- .../core/spec/x86_64/virtualization/board.h | 22 +- .../spec/x86_64/virtualization/kernel/vm.cc | 63 +- .../spec/x86_64/virtualization/kernel/vmx.cc | 901 ++++++++++++++++++ .../x86_64/virtualization/virt_interface.h | 2 +- .../src/core/spec/x86_64/virtualization/vmx.h | 661 +++++++++++++ .../base-hw/src/include/hw/spec/x86_64/cpu.h | 205 +++- .../src/include/hw/spec/x86_64/x86_64.h | 22 +- .../base/include/spec/x86_64/cpu/cpu_state.h | 4 +- 10 files changed, 1851 insertions(+), 35 deletions(-) create mode 100644 repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc create mode 100644 repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h diff --git a/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk b/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk index 0856786fbbf..b7133939d56 100644 --- a/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk +++ b/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk @@ -21,6 +21,7 @@ SRC_CC += kernel/cpu_mp.cc SRC_CC += kernel/vm_thread_on.cc SRC_CC += spec/x86_64/virtualization/kernel/vm.cc SRC_CC += spec/x86_64/virtualization/kernel/svm.cc +SRC_CC += spec/x86_64/virtualization/kernel/vmx.cc SRC_CC += spec/x86_64/virtualization/vm_session_component.cc SRC_CC += vm_session_common.cc SRC_CC += vm_session_component.cc diff --git a/repos/base-hw/src/core/spec/x86_64/platform_support.cc b/repos/base-hw/src/core/spec/x86_64/platform_support.cc index 68dd4d38203..ef147837841 100644 --- a/repos/base-hw/src/core/spec/x86_64/platform_support.cc +++ b/repos/base-hw/src/core/spec/x86_64/platform_support.cc @@ -1,11 +1,12 @@ /* * \brief Platform implementations specific for x86_64 * \author Reto Buerki + * \author Benjamin Lamowski * \date 2015-05-04 */ /* - * Copyright (C) 2015-2017 Genode Labs GmbH + * Copyright (C) 2015-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -55,7 +56,7 @@ void Platform::_init_additional_platform_info(Xml_generator &xml) xml.node("hardware", [&]() { xml.node("features", [&] () { xml.attribute("svm", Hw::Virtualization_support::has_svm()); - xml.attribute("vmx", false); + xml.attribute("vmx", Hw::Virtualization_support::has_vmx()); }); xml.node("tsc", [&]() { xml.attribute("invariant", Hw::Lapic::invariant_tsc()); diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h index 26eaabc59e6..18154c140f8 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h @@ -18,10 +18,12 @@ #include #include +#include #include #include +#include #include -#include +#include using Genode::addr_t;; @@ -52,6 +54,7 @@ namespace Board { enum Custom_trapnos { TRAP_VMEXIT = 256, TRAP_VMSKIP = 257, + TRAP_VMERROR = 258, }; }; @@ -68,6 +71,7 @@ struct Board::Vcpu_context void initialize(Kernel::Cpu &cpu, addr_t table_phys_addr); void read_vcpu_state(Vcpu_state &state); void write_vcpu_state(Vcpu_state &state); + bool handle_vm_exit(unsigned &irq); Genode::Align_at regs; @@ -80,9 +84,19 @@ struct Board::Vcpu_context static Virt_interface &detect_virtualization(Vcpu_data &vcpu_data, unsigned id) { - return *Genode::construct_at( - vcpu_data.virt_interface_ptr(), - vcpu_data, id); + if (Hw::Virtualization_support::has_svm()) + return *Genode::construct_at( + vcpu_data.virt_interface_ptr(), + vcpu_data, + id); + else if (Hw::Virtualization_support::has_vmx()) { + return *Genode::construct_at( + vcpu_data.virt_interface_ptr(), + vcpu_data); + } else { + Genode::error( "No virtualization support detected."); + throw Core::Service_denied(); + } } }; diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc index 0c438543f6a..2101133cede 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc @@ -26,9 +26,10 @@ #include #include +#include #include #include -#include +#include using namespace Genode; @@ -86,15 +87,26 @@ void Vm::exception(Cpu & cpu) { using namespace Board; + bool pause = false; + addr_t table_phys_addr { 0 }; + unsigned irq_id { TRAP_VMSKIP }; + switch (_vcpu_context.regs->trapno) { case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END: _interrupt(_user_irq_pool, cpu.id()); - break; - case TRAP_VMEXIT: - /* exception method was entered because of a VMEXIT */ - break; + return; case TRAP_VMSKIP: - /* exception method was entered without exception */ + /* vCPU is running for the first time */ + table_phys_addr = + reinterpret_cast(_id.table); + _vcpu_context.initialize(cpu, table_phys_addr); + _vcpu_context.tsc_aux_host = cpu.id(); + _vcpu_context.exitcode = EXIT_STARTUP; + pause = true; + break; + case TRAP_VMEXIT: [[fallthrough]]; + case TRAP_VMERROR: + pause = _vcpu_context.handle_vm_exit(irq_id); break; default: error("VM: triggered unknown exception ", @@ -107,24 +119,20 @@ void Vm::exception(Cpu & cpu) return; }; - if (_vcpu_context.exitcode == EXIT_INIT) { - addr_t table_phys_addr = - reinterpret_cast(_id.table); - _vcpu_context.initialize(cpu, table_phys_addr); - _vcpu_context.tsc_aux_host = cpu.id(); - _vcpu_context.exitcode = EXIT_STARTUP; - _pause_vcpu(); - _context.submit(1); - return; + if ((pause == false) && (irq_id != TRAP_VMSKIP)) { + /* XXX consolidate with Cpu_job::_interrupt */ + if (!_cpu->handle_if_cpu_local_interrupt(irq_id)) { + /* it isn't a CPU-local IRQ, so, it must be a user IRQ */ + User_irq * irq = User_irq::object(_user_irq_pool, irq_id); + if (irq) irq->occurred(); + else Genode::raw("Unknown interrupt ", irq_id); + } + _cpu->pic().finish_request(); } - unsigned irq { }; - bool error { false }; - _vcpu_context.exitcode = _vcpu_context.virt.handle_vm_exit(irq, error); - - if (_vcpu_context.exitcode != EXIT_PAUSED) { - _pause_vcpu(); - _context.submit(1); + if (pause) { + _pause_vcpu(); + _context.submit(1); } } @@ -240,3 +248,14 @@ void Board::Vcpu_context::initialize(Kernel::Cpu &cpu, addr_t table_phys_addr) { virt.initialize(cpu, table_phys_addr, *regs); } + + +bool Board::Vcpu_context::handle_vm_exit(unsigned &irq) +{ + exitcode = virt.handle_vm_exit(irq, (regs->trapno == TRAP_VMERROR)); + + if (exitcode == EXIT_PAUSED) + return false; + else + return true; +} diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc new file mode 100644 index 00000000000..28136e87837 --- /dev/null +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc @@ -0,0 +1,901 @@ +/* + * VMX virtualization + * \author Benjamin Lamowski + * \date 2023-10-04 + */ + +/* + * Copyright (C) 2023-2024 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +using Genode::addr_t; +using Kernel::Cpu; +using Kernel::Vm; +using Board::Vmcs; +using Board::Vmcs_buf; + +extern "C" +{ + extern Genode::addr_t _kernel_entry; +} + +Genode::uint32_t Vmcs::system_rev = 0U; +Genode::uint32_t Vmcs::pinbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::pinbased_allowed_1 = 0U; +Genode::uint32_t Vmcs::pri_exit_allowed_0 = 0U; +Genode::uint32_t Vmcs::pri_exit_allowed_1 = 0U; +Genode::uint32_t Vmcs::vm_entry_allowed_0 = 0U; +Genode::uint32_t Vmcs::vm_entry_allowed_1 = 0U; +Genode::uint32_t Vmcs::pri_procbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::pri_procbased_allowed_1 = 0U; +Genode::uint32_t Vmcs::sec_procbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::sec_procbased_allowed_1 = 0U; +Genode::uint64_t Vmcs::cr0_fixed0 = 0U; +Genode::uint64_t Vmcs::cr0_fixed1 = 0U; +Genode::uint64_t Vmcs::cr0_mask = 0U; +Genode::uint64_t Vmcs::cr4_fixed0 = 0U; +Genode::uint64_t Vmcs::cr4_fixed1 = 0U; +Genode::uint64_t Vmcs::cr4_mask = 0U; +extern int __idt; + +Vmcs_buf::Vmcs_buf(Genode::uint32_t system_rev) +{ + Genode::memset((void *) this, 0, sizeof(Vmcs_buf)); + rev = system_rev; +} + +Vmcs::Vmcs(Genode::Vcpu_data &vcpu_data) +: + Board::Virt_interface(vcpu_data) +{ + if (!system_rev) + setup_vmx_info(); + + Genode::construct_at(vcpu_data.guest_virt_area_ptr(), system_rev); +} + + +void Vmcs::construct_host_vmcs(Genode::size_t cpu_id) +{ + static Genode::Constructible host_vmcs_buf[NR_OF_CPUS]; + + if (!host_vmcs_buf[cpu_id].constructed()) { + host_vmcs_buf[cpu_id].construct(system_rev); + + Genode::addr_t host_vmcs_phys = + Core::Platform::core_phys_addr( + (addr_t)& host_vmcs_buf[cpu_id]); + + vmxon(host_vmcs_phys); + } +} + +/* + * Setup static VMX information. This only works well as long as Intel's E and P + * cores report the same feature set. + */ +void Vmcs::setup_vmx_info() +{ + using Cpu = Hw::X86_64_cpu; + + + /* Get revision */ + Cpu::Ia32_vmx_basic::access_t vmx_basic = Cpu::Ia32_vmx_basic::read(); + system_rev = Cpu::Ia32_vmx_basic::Rev::get(vmx_basic); + + + /* Get pin-based controls */ + bool clear_controls = + Cpu::Ia32_vmx_basic::Clear_controls::get(vmx_basic); + + + Genode::uint64_t pinbased_ctls { }; + + if (clear_controls) + pinbased_ctls = Cpu::Ia32_vmx_true_pinbased_ctls::read(); + else + pinbased_ctls = Cpu::Ia32_vmx_pinbased_ctls::read(); + + pinbased_allowed_0 = + Cpu::Ia32_vmx_pinbased_ctls::Allowed_0_settings::get(pinbased_ctls); + + /* + * Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + * "Logical processors that support the 0-settings of any of these bits + * will support the VMX capability MSR IA32_VMX_TRUE_PIN- BASED_CTLS + * MSR, and software should consult this MSR to discover support for the + * 0-settings of these bits. Software that is not aware of the + * functionality of any one of these bits should set that bit to 1. + */ + Pin_based_execution_controls::Bit_1::set(pinbased_allowed_0); + Pin_based_execution_controls::Bit_2::set(pinbased_allowed_0); + Pin_based_execution_controls::Bit_4::set(pinbased_allowed_0); + pinbased_allowed_1 = + Cpu::Ia32_vmx_pinbased_ctls::Allowed_1_settings::get(pinbased_ctls); + + + /* Get entry controls */ + Genode::uint64_t vm_entry_ctls { }; + + if (clear_controls) + vm_entry_ctls = Cpu::Ia32_vmx_true_entry_ctls::read(); + else + vm_entry_ctls = Cpu::Ia32_vmx_entry_ctls::read(); + + vm_entry_allowed_0 = + Cpu::Ia32_vmx_entry_ctls::Allowed_0_settings::get(vm_entry_ctls); + vm_entry_allowed_1 = + Cpu::Ia32_vmx_entry_ctls::Allowed_1_settings::get(vm_entry_ctls); + + + /* Get primary exit controls */ + Genode::uint64_t pri_exit_ctls { }; + + if (clear_controls) + pri_exit_ctls = Cpu::Ia32_vmx_true_exit_ctls::read(); + else + pri_exit_ctls = Cpu::Ia32_vmx_exit_ctls::read(); + + pri_exit_allowed_0 = + Cpu::Ia32_vmx_exit_ctls::Allowed_0_settings::get(pri_exit_ctls); + pri_exit_allowed_1 = + Cpu::Ia32_vmx_exit_ctls::Allowed_1_settings::get(pri_exit_ctls); + + /* Get primary proc-based exit controls */ + Genode::uint64_t pri_procbased_ctls { }; + + if (clear_controls) + pri_procbased_ctls = Cpu::Ia32_vmx_true_procbased_ctls::read(); + else + pri_procbased_ctls = Cpu::Ia32_vmx_procbased_ctls::read(); + + pri_procbased_allowed_0 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_0_settings::get( + pri_procbased_ctls); + pri_procbased_allowed_1 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_1_settings::get( + pri_procbased_ctls); + + if (!Primary_proc_based_execution_controls:: + Activate_secondary_controls::get(pri_procbased_allowed_1)) { + Genode::error("Processor does not support secondary controls"); + // XXX panic + return; + } + + /* Get secondary proc-based exec controls */ + Cpu::Ia32_vmx_procbased_ctls2::access_t sec_procbased_ctls = + Cpu::Ia32_vmx_procbased_ctls2::read(); + sec_procbased_allowed_0 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_0_settings::get( + sec_procbased_ctls); + sec_procbased_allowed_1 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_1_settings::get( + sec_procbased_ctls); + + if (!Secondary_proc_based_execution_controls::Enable_ept::get( + sec_procbased_allowed_1)) { + Genode::error("Processor does not support nested page tables"); + // XXX panic + return; + } + + if (!Secondary_proc_based_execution_controls::Unrestricted_guest::get( + sec_procbased_allowed_1)) { + Genode::error("Processor does not support Unrestricted guest mode"); + // XXX panic + return; + } + + /* CR0 and CR4 fixed values */ + cr0_fixed0 = Cpu::Ia32_vmx_cr0_fixed0::read(); + cr0_fixed1 = Cpu::Ia32_vmx_cr0_fixed1::read(); + + /* + * We demand that unrestriced guest mode is used, hence don't force PE + * and PG For details, see Vol. 3C of the Intel SDM (September 2023): + * 24.8 Restrictions on VMX Operation + * Yes, forced-to-one bits are in fact read from IA32_VMX_CR0_FIXED0. + */ + Cpu::Cr0::Pe::clear(cr0_fixed0); + Cpu::Cr0::Pg::clear(cr0_fixed0); + + cr0_mask = ~cr0_fixed1 | cr0_fixed0; + Cpu::Cr0::Cd::set(cr0_mask); + Cpu::Cr0::Nw::set(cr0_mask); + + cr4_fixed0 = Cpu::Ia32_vmx_cr4_fixed0::read(); + cr4_fixed1 = Cpu::Ia32_vmx_cr4_fixed1::read(); + cr4_mask = ~cr4_fixed1 | cr4_fixed0; +} + +void Vmcs::initialize(Kernel::Cpu &cpu, Genode::addr_t page_table_phys, + Core::Cpu::Context ®s) +{ + using Cpu = Hw::X86_64_cpu; + + /* Enable VMX */ + Cpu::Ia32_feature_control::access_t feature_control = + Cpu::Ia32_feature_control::read(); + if (!Cpu::Ia32_feature_control::Vmx_no_smx::get(feature_control)) { + Genode::log("Enabling VMX."); + if (!Cpu::Ia32_feature_control::Lock::get(feature_control)) { + Cpu::Ia32_feature_control::Vmx_no_smx::set(feature_control, 1); + Cpu::Ia32_feature_control::Lock::set(feature_control, 1); + Cpu::Ia32_feature_control::write(feature_control); + } else { + Genode::error("VMX feature disabled"); + return; + } + } + + Cpu::Cr4::access_t cr4 = Cpu::Cr4::read(); + Cpu::Cr4::Vmxe::set(cr4); + Cpu::Cr4::write(cr4); + + construct_host_vmcs(cpu.id()); + + Genode::construct_at(vcpu_data.extended_data_ptr()); + + + vmclear(vcpu_data.guest_virt_area_phys_addr()); + _load_pointer(); + + prepare_vmcs(); + + /* + * Set the VMCS link pointer to ~0UL according to spec + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.4.2 Guest Non-Register State: vmcs link pointer + */ + write(E_VMCS_LINK_POINTER, ~0ULL); + + /* + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.11 Extended-Page-Table Pointer (EPTP) + */ + /* + * XXX make this more self-descriptive + * We have a 4 level page table in writeback memory + */ + write(E_EPT_POINTER, page_table_phys | (4 - 1) << 3 | 6); + + + write(E_HOST_IA32_EFER, Cpu::Ia32_efer::read()); + + /* + * If this looks the wrong way around then you are in good company. + * For details, and a nonchalant explanation of this cursed interface, + * see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + Genode::uint64_t cr0 = Cpu::Cr0::read(); + + cr0 = (cr0 & cr0_fixed1) | cr0_fixed0; + /* NW and CD shouln'd be set by hw in the first place, but to be sure. */ + Cpu::Cr0::Nw::clear(cr0); + Cpu::Cr0::Cd::clear(cr0); + Cpu::Cr0::write(cr0); + write(E_HOST_CR0, cr0); + write(E_CR0_GUEST_HOST_MASK, cr0_mask); + + write(E_HOST_CR3, Cpu::Cr3::read()); + + /* See above */ + cr4 = (cr4 & cr4_fixed1) | cr4_fixed0; + Cpu::Cr4::write(cr4); + write(E_HOST_CR4, cr4); + write(E_CR4_GUEST_HOST_MASK, cr4_mask); + + + // offsets from GDT in src/core/spec/x86_64/cpu.h + write(E_HOST_CS_SELECTOR, 0x8); + + write(E_HOST_FS_SELECTOR, 0); + write(E_HOST_GS_SELECTOR, 0); + + write(E_HOST_TR_BASE, reinterpret_cast(&(cpu.tss))); + write(E_HOST_TR_SELECTOR, 0x28); /* see Cpu::Tss::init() / the tss_descriptor is in slot 5 of the GDT */ + write(E_HOST_GDTR_BASE, reinterpret_cast(&(cpu.gdt))); + write(E_HOST_IDTR_BASE, reinterpret_cast(&__idt)); + + write(E_HOST_IA32_SYSENTER_ESP, reinterpret_cast(&(cpu.tss.rsp[0]))); + write(E_HOST_IA32_SYSENTER_CS, 0x8); + write(E_HOST_IA32_SYSENTER_EIP, reinterpret_cast(&_kernel_entry)); + + /* + * Set the RSP to trapno, so that _kernel_entry will save the registers + * into the right fields. + */ + write(E_HOST_RSP, reinterpret_cast(&(regs.trapno))); + write(E_HOST_RIP, reinterpret_cast(&_kernel_entry)); +} + + +/* + * Enforce VMX intercepts + */ +void Vmcs::enforce_intercepts(Genode::uint32_t desired_primary, + Genode::uint32_t desired_secondary) +{ + /* + * Processor-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.2 Processor-Based VM-Execution Controls + */ + + /* Exit on HLT instruction */ + Primary_proc_based_execution_controls::Hlt_exiting::set(desired_primary); + + /* Enforce use of nested paging */ + Primary_proc_based_execution_controls::Invlpg_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Cr3_load_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Cr3_store_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Activate_secondary_controls::set(desired_primary); + Secondary_proc_based_execution_controls::Enable_ept::set(desired_secondary); + Secondary_proc_based_execution_controls::Unrestricted_guest::set(desired_secondary); + + if (Secondary_proc_based_execution_controls::Virtualize_apic_accesses::get(sec_procbased_allowed_1)) + Secondary_proc_based_execution_controls::Virtualize_apic_accesses::set(desired_secondary); + + /* + * Always exit on IO and MSR accesses. + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 26.1.3 Instructions That Cause VM Exits Conditionally + */ + Primary_proc_based_execution_controls::Unconditional_io_exiting::set(desired_primary); + Primary_proc_based_execution_controls::Use_io_bitmaps::clear(desired_primary); + Primary_proc_based_execution_controls::Use_msr_bitmaps::clear(desired_primary); + + Genode::uint32_t pri_procbased_set = + (desired_primary | pri_procbased_allowed_0) + & pri_procbased_allowed_1; + write(E_PRI_PROC_BASED_VM_EXEC_CTRL, pri_procbased_set); + + Genode::uint32_t sec_procbased_set = + (desired_secondary | sec_procbased_allowed_0) + & sec_procbased_allowed_1; + write(E_SEC_PROC_BASED_VM_EXEC_CTRL, sec_procbased_set); +} + +void Vmcs::prepare_vmcs() +{ + /* + * Pin-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + */ + Genode::uint32_t pinbased_want = 0U; + Pin_based_execution_controls::External_interrupt_exiting::set(pinbased_want); + Pin_based_execution_controls::Nmi_exiting::set(pinbased_want); + Pin_based_execution_controls::Virtual_nmis::set(pinbased_want); + Genode::uint32_t pinbased_set = (pinbased_want | pinbased_allowed_0) + & pinbased_allowed_1; + write(E_PIN_BASED_VM_EXECUTION_CTRL, pinbased_set); + + /* + * Primary VM-Exit Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + */ + Genode::uint32_t primary_exit_want = 0U; + Primary_vm_exit_controls::Save_debug_controls::set(primary_exit_want); + Primary_vm_exit_controls::Host_address_space_size::set(primary_exit_want); + Primary_vm_exit_controls::Ack_interrupt_on_exit::set(primary_exit_want); + Primary_vm_exit_controls::Save_ia32_efer::set(primary_exit_want); + Primary_vm_exit_controls::Load_ia32_efer::set(primary_exit_want); + Genode::uint32_t primary_exit_set = + (primary_exit_want | pri_exit_allowed_0) & pri_exit_allowed_1; + write(E_PRIMARY_VM_EXIT_CONTROLS, primary_exit_set); + + /* + * VM-Entry Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + * 25.8.1 VM-Entry Controls + */ + Genode::uint32_t vm_entry_want = 0U; + Vm_entry_controls::Load_debug_controls::set(vm_entry_want); + Vm_entry_controls::Load_ia32_efer::set(vm_entry_want); + Genode::uint32_t vm_entry_set = + (vm_entry_want | vm_entry_allowed_0) & vm_entry_allowed_1; + write(E_VM_ENTRY_CONTROLS, vm_entry_set); + + + enforce_intercepts(0U, 0U); + + write(E_VM_EXIT_MSR_STORE_ADDRESS, msr_phys_addr(&guest_msr_store_area)); + write(E_VM_EXIT_MSR_STORE_COUNT, Board::Msr_store_area::get_count()); + write(E_VM_ENTRY_MSR_LOAD_ADDRESS, msr_phys_addr(&guest_msr_store_area)); + write(E_VM_ENTRY_MSR_LOAD_COUNT, Board::Msr_store_area::get_count()); + + write(E_VM_EXIT_MSR_LOAD_ADDRESS, msr_phys_addr(&host_msr_store_area)); + write(E_VM_EXIT_MSR_LOAD_COUNT, Board::Msr_store_area::get_count()); + + if (Secondary_proc_based_execution_controls::Virtualize_apic_accesses::get(sec_procbased_allowed_1)) + write(E_VIRTUAL_APIC_ADDRESS, vcpu_data.extended_data_phys_addr()); + + /* + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 26.2 Other Causes Of VM Exits: Exceptions + */ + write(E_EXCEPTION_BITMAP, Genode::Cpu_state::ALIGNMENT_CHECK | + Genode::Cpu_state::DEBUG); + write(E_PAGE_FAULT_ERROR_CODE_MASK, 0U); + write(E_PAGE_FAULT_ERROR_CODE_MATCH, 0U); + + /* + * For now, don't use CR3 targets. + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.7 CR3-Target Controls + */ + write(E_CR3_TARGET_COUNT, 0U); +} + +void Vmcs::write_vcpu_state(Genode::Vcpu_state &state) +{ + typedef Genode::Vcpu_state::Range Range; + typedef Genode::Vcpu_state::Segment Segment; + + using Cpu = Hw::X86_64_cpu; + using Genode::uint16_t; + using Genode::uint32_t; + + _load_pointer(); + + state.ip.charge(read(E_GUEST_RIP)); + state.ip_len.charge(read(E_VM_EXIT_INSTRUCTION_LENGTH)); + + state.flags.charge(read(E_GUEST_RFLAGS)); + state.sp.charge(read(E_GUEST_RSP)); + + state.dr7.charge(read(E_GUEST_DR7)); + + state.cr0.charge(read(E_GUEST_CR0)); + state.cr2.charge(Cpu::Cr2::read()); + state.cr3.charge(read(E_GUEST_CR3)); + state.cr4.charge(read(E_GUEST_CR4)); + + Genode::uint32_t ar { }; + + ar = static_cast(read(E_GUEST_CS_ACCESS_RIGHTS)); + state.cs.charge(Segment { + .sel = static_cast(read(E_GUEST_CS_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_CS_LIMIT)), + .base = read(E_GUEST_CS_BASE) + }); + + ar = static_cast(read(E_GUEST_SS_ACCESS_RIGHTS)); + state.ss.charge(Segment { + .sel = static_cast(read(E_GUEST_SS_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_SS_LIMIT)), + .base = read(E_GUEST_SS_BASE) + }); + + ar = static_cast(read(E_GUEST_ES_ACCESS_RIGHTS)); + state.es.charge(Segment { + .sel = static_cast(read(E_GUEST_ES_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_ES_LIMIT)), + .base = read(E_GUEST_ES_BASE) + }); + + ar = static_cast(read(E_GUEST_DS_ACCESS_RIGHTS)); + state.ds.charge(Segment { + .sel = static_cast(read(E_GUEST_DS_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_DS_LIMIT)), + .base = read(E_GUEST_DS_BASE) + }); + + ar = static_cast(read(E_GUEST_FS_ACCESS_RIGHTS)); + state.fs.charge(Segment { + .sel = static_cast(read(E_GUEST_FS_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_FS_LIMIT)), + .base = read(E_GUEST_FS_BASE) + }); + + ar = static_cast(read(E_GUEST_GS_ACCESS_RIGHTS)); + state.gs.charge(Segment { + .sel = static_cast(read(E_GUEST_GS_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_GS_LIMIT)), + .base = read(E_GUEST_GS_BASE) + }); + + ar = static_cast(read(E_GUEST_TR_ACCESS_RIGHTS)); + state.tr.charge(Segment { + .sel = static_cast(read(E_GUEST_TR_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_TR_LIMIT)), + .base = read(E_GUEST_TR_BASE) + }); + + ar = static_cast(read(E_GUEST_LDTR_ACCESS_RIGHTS)); + state.ldtr.charge(Segment { + .sel = static_cast(read(E_GUEST_LDTR_SELECTOR)), + .ar = static_cast((ar >> 4 & 0x1F00) | (ar & 0xFF)), + .limit = static_cast(read(E_GUEST_LDTR_LIMIT)), + .base = read(E_GUEST_LDTR_BASE) + }); + + state.gdtr.charge(Range { + .limit = static_cast(read(E_GUEST_GDTR_LIMIT)), + .base = read(E_GUEST_GDTR_BASE) + }); + + state.idtr.charge(Range { + .limit = static_cast(read(E_GUEST_IDTR_LIMIT)), + .base = read(E_GUEST_IDTR_BASE) + }); + + + state.sysenter_cs.charge(read(E_IA32_SYSENTER_CS)); + state.sysenter_sp.charge(read(E_GUEST_IA32_SYSENTER_ESP)); + state.sysenter_ip.charge(read(E_GUEST_IA32_SYSENTER_EIP)); + + state.qual_primary.charge(read(E_EXIT_QUALIFICATION)); + state.qual_secondary.charge(read(E_GUEST_PHYSICAL_ADDRESS)); + + /* Charging ctrl_primary and ctrl_secondary breaks Virtualbox 6 */ + + if (state.exit_reason == EXIT_PAUSED || state.exit_reason == VMX_EXIT_INVGUEST) { + state.inj_info.charge(static_cast(read(E_VM_ENTRY_INTERRUPT_INFO_FIELD))); + state.inj_error.charge(static_cast(read(E_VM_ENTRY_EXCEPTION_ERROR_CODE))); + + } else { + state.inj_info.charge(static_cast(read(E_IDT_VECTORING_INFORMATION_FIELD))); + state.inj_error.charge(static_cast(read(E_IDT_VECTORING_ERROR_CODE))); + } + + state.intr_state.charge( + static_cast(read(E_GUEST_INTERRUPTIBILITY_STATE))); + state.actv_state.charge( + static_cast(read(E_GUEST_ACTIVITY_STATE))); + + state.tsc.charge(Hw::Lapic::rdtsc()); + state.tsc_offset.charge(read(E_TSC_OFFSET)); + + state.efer.charge(read(E_GUEST_IA32_EFER)); + + state.pdpte_0.charge(read(E_GUEST_PDPTE0)); + state.pdpte_1.charge(read(E_GUEST_PDPTE1)); + state.pdpte_2.charge(read(E_GUEST_PDPTE2)); + state.pdpte_3.charge(read(E_GUEST_PDPTE3)); + + state.star.charge(guest_msr_store_area.star.get()); + state.lstar.charge(guest_msr_store_area.lstar.get()); + state.cstar.charge(guest_msr_store_area.cstar.get()); + state.fmask.charge(guest_msr_store_area.fmask.get()); + state.kernel_gs_base.charge(guest_msr_store_area.fmask.get()); + + Virtual_apic_state *virtual_apic_state = + reinterpret_cast( + vcpu_data.extended_data_ptr()); + state.tpr.charge(virtual_apic_state->get_vtpr()); + state.tpr_threshold.charge( + static_cast(read(E_TPR_THRESHOLD))); +} + + +void Vmcs::read_vcpu_state(Genode::Vcpu_state &state) +{ + _load_pointer(); + + if (state.flags.charged()) { + write(E_GUEST_RFLAGS, state.flags.value()); + } + + if (state.sp.charged()) { + write(E_GUEST_RSP, state.sp.value()); + } + + if (state.ip.charged()) { + write(E_GUEST_RIP, state.ip.value()); + write(E_VM_ENTRY_INSTRUCTION_LENGTH, state.ip_len.value()); + } + + if (state.dr7.charged()) { + write(E_GUEST_DR7, state.dr7.value()); + } + + if (state.cr0.charged() || state.cr2.charged() || + state.cr3.charged() || state.cr4.charged()) { + write(E_GUEST_CR0, (state.cr0.value() & ~cr0_mask & cr0_fixed1) | cr0_fixed0); + write(E_CR0_READ_SHADOW, (state.cr0.value() & cr0_fixed1) | cr0_fixed0); + cr2 = state.cr2.value(); + write(E_GUEST_CR3, state.cr3.value()); + write(E_GUEST_CR4, (state.cr4.value() & cr4_fixed1) | cr4_fixed0); + write(E_GUEST_CR4, (state.cr4.value() & ~cr4_mask & cr4_fixed1) | cr4_fixed0); + write(E_CR4_READ_SHADOW, (state.cr4.value() & cr4_fixed1) | cr4_fixed0); + } + + if (state.cs.charged() || state.ss.charged()) { + write(E_GUEST_CS_SELECTOR, state.cs.value().sel); + /* XXX document access right conversion */ + write(E_GUEST_CS_ACCESS_RIGHTS, (state.cs.value().ar << 4 & 0x1F000) | (state.cs.value().ar & 0xFF)); + write(E_GUEST_CS_LIMIT, state.cs.value().limit); + write(E_GUEST_CS_BASE, state.cs.value().base); + + write(E_GUEST_SS_SELECTOR, state.ss.value().sel); + write(E_GUEST_SS_ACCESS_RIGHTS, (state.ss.value().ar << 4 & 0x1F000) | (state.ss.value().ar & 0xFF)); + write(E_GUEST_SS_LIMIT, state.ss.value().limit); + write(E_GUEST_SS_BASE, state.ss.value().base); + } + + if (state.es.charged() || state.ds.charged()) { + write(E_GUEST_ES_SELECTOR, state.es.value().sel); + write(E_GUEST_ES_ACCESS_RIGHTS, (state.es.value().ar << 4 & 0x1F000) | (state.es.value().ar & 0xFF)); + write(E_GUEST_ES_LIMIT, state.es.value().limit); + write(E_GUEST_ES_BASE, state.es.value().base); + + write(E_GUEST_DS_SELECTOR, state.ds.value().sel); + write(E_GUEST_DS_ACCESS_RIGHTS, (state.ds.value().ar << 4 & 0x1F000) | (state.ds.value().ar & 0xFF)); + write(E_GUEST_DS_LIMIT, state.ds.value().limit); + write(E_GUEST_DS_BASE, state.ds.value().base); + } + + if (state.fs.charged() || state.gs.charged()) { + write(E_GUEST_FS_SELECTOR, state.fs.value().sel); + write(E_GUEST_FS_ACCESS_RIGHTS, (state.fs.value().ar << 4 & 0x1F000) | (state.fs.value().ar & 0xFF)); + write(E_GUEST_FS_LIMIT, state.fs.value().limit); + write(E_GUEST_FS_BASE, state.fs.value().base); + + write(E_GUEST_GS_SELECTOR, state.gs.value().sel); + write(E_GUEST_GS_ACCESS_RIGHTS, (state.gs.value().ar << 4 & 0x1F000) | (state.gs.value().ar & 0xFF)); + write(E_GUEST_GS_LIMIT, state.gs.value().limit); + write(E_GUEST_GS_BASE, state.gs.value().base); + } + + if (state.tr.charged()) { + write(E_GUEST_TR_SELECTOR, state.tr.value().sel); + write(E_GUEST_TR_ACCESS_RIGHTS, (state.tr.value().ar << 4 & 0x1F000) | (state.tr.value().ar & 0xFF)); + write(E_GUEST_TR_LIMIT, state.tr.value().limit); + write(E_GUEST_TR_BASE, state.tr.value().base); + } + + if (state.ldtr.charged()) { + write(E_GUEST_LDTR_SELECTOR, state.ldtr.value().sel); + write(E_GUEST_LDTR_ACCESS_RIGHTS, (state.ldtr.value().ar << 4 & 0x1F000) | (state.ldtr.value().ar & 0xFF)); + write(E_GUEST_LDTR_LIMIT, state.ldtr.value().limit); + write(E_GUEST_LDTR_BASE, state.ldtr.value().base); + } + + if (state.gdtr.charged()) { + write(E_GUEST_GDTR_LIMIT, state.gdtr.value().limit); + write(E_GUEST_GDTR_BASE, state.gdtr.value().base); + } + + if (state.idtr.charged()) { + write(E_GUEST_IDTR_LIMIT, state.idtr.value().limit); + write(E_GUEST_IDTR_BASE, state.idtr.value().base); + } + + if (state.sysenter_cs.charged() || state.sysenter_sp.charged() || + state.sysenter_ip.charged()) { + write(E_IA32_SYSENTER_CS, state.sysenter_cs.value()); + write(E_GUEST_IA32_SYSENTER_ESP, state.sysenter_sp.value()); + write(E_GUEST_IA32_SYSENTER_EIP, state.sysenter_ip.value()); + } + + if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) { + enforce_intercepts(state.ctrl_primary.value(), + state.ctrl_secondary.value()); + } + + if (state.inj_info.charged() || state.inj_error.charged()) { + Genode::uint32_t pri_controls = static_cast (read(E_PRI_PROC_BASED_VM_EXEC_CTRL)); + Genode::uint32_t sec_controls = static_cast (read(E_SEC_PROC_BASED_VM_EXEC_CTRL)); + bool set_controls = false; + + if (state.inj_info.value() & 0x1000) { + if (state.inj_info.value() & 0x80000000 && !Primary_proc_based_execution_controls::Interrupt_window_exiting::get(pri_controls)) + if (!Primary_proc_based_execution_controls::Interrupt_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Interrupt_window_exiting::set(pri_controls); + set_controls = true; + } + } else { + if (Primary_proc_based_execution_controls::Interrupt_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Interrupt_window_exiting::clear(pri_controls); + set_controls = true; + } + } + + if (state.inj_info.value() & 0x2000) { + if (!Primary_proc_based_execution_controls::Nmi_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Nmi_window_exiting::set(pri_controls); + set_controls = true; + } + } else { + if (Primary_proc_based_execution_controls::Nmi_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Nmi_window_exiting::clear(pri_controls); + set_controls = true; + } + } + + if (set_controls) + enforce_intercepts(pri_controls, sec_controls); + + write(E_VM_ENTRY_INTERRUPT_INFO_FIELD, + /* Filter out special signaling bits */ + (state.inj_info.value() & + (Genode::uint32_t) ~0x3000)); + + write(E_VM_ENTRY_EXCEPTION_ERROR_CODE, state.inj_error.value()); + } + + if (state.intr_state.charged()) { + write(E_GUEST_INTERRUPTIBILITY_STATE, state.intr_state.value()); + } + + if (state.actv_state.charged()) { + write(E_GUEST_ACTIVITY_STATE, state.actv_state.value()); + } + + if (state.tsc_offset.charged()) { + /* state.tsc not used by SVM */ + write(E_TSC_OFFSET, state.tsc_offset.value()); + } + + if (state.efer.charged()) { + auto efer = state.efer.value(); + write(E_GUEST_IA32_EFER, efer); + + Vm_entry_controls::access_t entry_controls = static_cast(read(E_VM_ENTRY_CONTROLS)); + if (Cpu::Ia32_efer::Lma::get(efer)) + Vm_entry_controls::Ia32e_mode_guest::set(entry_controls); + else + Vm_entry_controls::Ia32e_mode_guest::clear(entry_controls); + + write(E_VM_ENTRY_CONTROLS, entry_controls); + } + + if (state.pdpte_0.charged() || state.pdpte_1.charged() || + state.pdpte_1.charged() || state.pdpte_2.charged()) { + write(E_GUEST_PDPTE0, state.pdpte_0.value()); + write(E_GUEST_PDPTE1, state.pdpte_1.value()); + write(E_GUEST_PDPTE2, state.pdpte_2.value()); + write(E_GUEST_PDPTE3, state.pdpte_3.value()); + } + + if (state.star.charged() || state.lstar.charged() || + state.cstar.charged() || state.fmask.charged() || + state.kernel_gs_base.charged()) { + guest_msr_store_area.star.set(state.star.value()); + guest_msr_store_area.lstar.set(state.lstar.value()); + guest_msr_store_area.cstar.set(state.cstar.value()); + guest_msr_store_area.fmask.set(state.fmask.value()); + guest_msr_store_area.kernel_gs_base.set(state.kernel_gs_base.value()); + } + + Virtual_apic_state * virtual_apic_state = reinterpret_cast(vcpu_data.extended_data_ptr()); + if (state.tpr.charged()) { + virtual_apic_state->set_vtpr(state.tpr.value()); + write(E_TPR_THRESHOLD, state.tpr_threshold.value()); + } +} + +void Vmcs::switch_world(Core::Cpu::Context ®s) +{ + _load_pointer(); + + save_host_msrs(); + + Cpu::Cr2::write(cr2); + /* XXX save/restore ldt? */ + + regs.trapno = TRAP_VMEXIT; + asm volatile( + "fxrstor (%[fpu_context]);" + "mov %[regs], %%rsp;" + "popq %%r8;" + "popq %%r9;" + "popq %%r10;" + "popq %%r11;" + "popq %%r12;" + "popq %%r13;" + "popq %%r14;" + "popq %%r15;" + "popq %%rax;" + "popq %%rbx;" + "popq %%rcx;" + "popq %%rdx;" + "popq %%rdi;" + "popq %%rsi;" + "popq %%rbp;" + "vmresume;" + "vmlaunch;" + "add $8, %%rsp;" + "push $258;" /* on error, push TRAP_VMERROR */ + "jmp _kernel_entry;" + : + : [regs] "r"(®s.r8), [fpu_context] "r"(regs.fpu_context()) + : "memory"); +} + +/* + * Store MSRs to the Host MSR Store Area so that VMX restores them on VM exit + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 28.6 Loading MSRs + */ +void Vmcs::save_host_msrs() +{ + using Cpu = Hw::X86_64_cpu; + + host_msr_store_area.star.set(Cpu::Ia32_star::read()); + host_msr_store_area.lstar.set(Cpu::Ia32_lstar::read()); + host_msr_store_area.cstar.set(Cpu::Ia32_cstar::read()); + host_msr_store_area.fmask.set(Cpu::Ia32_fmask::read()); + host_msr_store_area.kernel_gs_base.set( + Cpu::Ia32_kernel_gs_base::read()); +} + + +void Vmcs::_load_pointer() +{ + /* + * XXX this should really be checking some kind of active flag so the + * VMCS doesn't get reloaded in the common case + */ + vmptrld(vcpu_data.guest_virt_area_phys_addr()); +} + + +uint64_t Vmcs::handle_vm_exit(unsigned & irq, bool error) +{ + if (error) { + /* For error codes, see + * Intel SDM (September 2023) Vol. 3C + * 31.4 Vm Instruction Error Numbers + */ + Genode::error( + "VM: execution error: ", + Genode::Hex(read(Vmcs::E_VM_INSTRUCTION_ERROR))); + // FIXME panic + return -1ULL; + } + + cr2 = Cpu::Cr2::read(); + uint64_t exitcode = read(E_EXIT_REASON) & 0xFFFF; + + switch (exitcode) { + case VMX_EXIT_NMI: + Genode::error("NMI exit not implemented."); + // FIXME panic + break; + case VMX_EXIT_INTR: + irq = static_cast( + read(E_VM_EXIT_INTERRUPT_INFORMATION) & 0xFF); + exitcode = EXIT_PAUSED; + break; + case VMX_EXIT_INVLPG: + Genode::error("INVLPG exit not implemented."); + // FIXME panic + break; + default: + break; + } + + return exitcode; +} diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h b/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h index 125063f9338..a75d36a7a0b 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h @@ -27,6 +27,7 @@ namespace Board { enum Virt_type { SVM, + VMX }; struct Virt_interface @@ -42,7 +43,6 @@ struct Virt_interface virtual Virt_type virt_type() = 0; virtual Genode::uint64_t handle_vm_exit(unsigned &irq, bool error = false) = 0; - Virt_interface(Genode::Vcpu_data &vcpu_data) : vcpu_data(vcpu_data) { } diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h b/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h new file mode 100644 index 00000000000..8b440a3edd1 --- /dev/null +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h @@ -0,0 +1,661 @@ +/* + * \brief VMX data structure + * \author Benjamin Lamowski + * \date 2023-09-26 + */ + +/* + * Copyright (C) 2023-2024 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _INCLUDE__SPEC__PC__VMX_H_ +#define _INCLUDE__SPEC__PC__VMX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Kernel { class Cpu; } + +namespace Board +{ + struct Vmcs; + struct Vmcs_buf; + struct Msr_store_area; + struct Virtual_apic_state; +} + +/* + * VMX exitcodes, incomplete list. + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table C-1. Basic Exit Reasons + */ +enum Vmx_exitcodes : Genode::uint32_t { + VMX_EXIT_NMI = 0, + VMX_EXIT_INTR = 1, + VMX_EXIT_INV = 7, + VMX_EXIT_CR = 28, + VMX_EXIT_INVLPG = 14, + VMX_EXIT_INVGUEST = 33, +}; + + +/* + * MSR-store area + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.7.2 VM-Exit Controls for MSRs + */ +struct +alignas(16) +Board::Msr_store_area +{ + struct Msr_entry + { + Genode::uint32_t msr_index = 0U; + Genode::uint32_t _reserved = 0U; + Genode::uint64_t msr_data = 0U; + + void set(Genode::uint64_t data) + { + msr_data = data; + } + + Genode::uint64_t get() + { + return msr_data; + } + } __attribute__((packed)); + + Msr_entry star { 0xC0000081 }; + Msr_entry lstar { 0xC0000082 }; + Msr_entry cstar { 0xC0000083 }; + Msr_entry fmask { 0xC0000084 }; + Msr_entry kernel_gs_base { 0xC0000102 }; + + static constexpr Core::size_t get_count() + { + return sizeof(Msr_store_area) / sizeof(Msr_entry); + } +}; + + +/* + * Physical VMCS buffer + */ +struct +alignas(Genode::get_page_size()) +Board::Vmcs_buf +{ + union { + Genode::uint32_t rev; + Genode::uint8_t pad[Genode::get_page_size()]; + }; + + Vmcs_buf(Genode::uint32_t rev); +}; + + +/* + * VMCS + * + * See Intel SDM (September 2023) Vol. 3C, section 24.2. + */ +struct +Board::Vmcs +: + public Board::Virt_interface +{ + static Genode::uint32_t system_rev; + static Genode::uint32_t pinbased_allowed_0; + static Genode::uint32_t pinbased_allowed_1; + static Genode::uint32_t vm_entry_allowed_0; + static Genode::uint32_t vm_entry_allowed_1; + static Genode::uint32_t pri_exit_allowed_0; + static Genode::uint32_t pri_exit_allowed_1; + static Genode::uint32_t pri_procbased_allowed_0; + static Genode::uint32_t pri_procbased_allowed_1; + static Genode::uint32_t sec_procbased_allowed_0; + static Genode::uint32_t sec_procbased_allowed_1; + static Genode::uint64_t cr0_fixed0; + static Genode::uint64_t cr0_fixed1; + static Genode::uint64_t cr0_mask; + static Genode::uint64_t cr4_fixed0; + static Genode::uint64_t cr4_fixed1; + static Genode::uint64_t cr4_mask; + static Genode::uint64_t vpid; + + Msr_store_area guest_msr_store_area { }; + /* XXX only needed per vCPU */ + Msr_store_area host_msr_store_area { }; + Genode::uint64_t cr2 { 0 }; + + Genode::addr_t msr_phys_addr(Msr_store_area *msr_ptr) + { + Genode::size_t offset = + (Genode::size_t)msr_ptr - (Genode::size_t)this; + return vcpu_data.virt_interface_phys_addr() + offset; + } + + /* + * VMCS field encodings + * + * See Intel SDM (September 2023) Vol. 3D, appendix B. + */ + enum Field_encoding : Genode::uint64_t { + /* + * B.1 16-Bit Fields + */ + + /* B.1.1 16-Bit Control Fields */ + E_VPID = 0x00000000, + E_POSTED_INT_NOTIFICATION_VEC = 0x00000002, + E_EPTP_INDEX = 0x00000004, + E_HLAT_PREFIX_SIZE = 0x00000006, + + /* B.1.2 16-Bit Guest-State Fields */ + E_GUEST_ES_SELECTOR = 0x00000800, + E_GUEST_CS_SELECTOR = 0x00000802, + E_GUEST_SS_SELECTOR = 0x00000804, + E_GUEST_DS_SELECTOR = 0x00000806, + E_GUEST_FS_SELECTOR = 0x00000808, + E_GUEST_GS_SELECTOR = 0x0000080A, + E_GUEST_LDTR_SELECTOR = 0x0000080C, + E_GUEST_TR_SELECTOR = 0x0000080E, + E_GUEST_INT_STATUS = 0x00000810, + E_PML_INDEX = 0x00000812, + + /* B.1.3 16-Bit Host-State Fields */ + E_HOST_ES_SELECTOR = 0x00000C00, + E_HOST_CS_SELECTOR = 0x00000C02, + E_HOST_SS_SELECTOR = 0x00000C04, + E_HOST_DS_SELECTOR = 0x00000C06, + E_HOST_FS_SELECTOR = 0x00000C08, + E_HOST_GS_SELECTOR = 0x00000C0A, + E_HOST_TR_SELECTOR = 0x00000C0C, + + + /* + * B.2 64-Bit Fields + */ + + /* B.2.1 64-Bit Control Fields */ + E_ADDRESS_OF_IO_BITMAP_A = 0x00002000, + E_ADDRESS_OF_IO_BITMAP_B = 0x00002002, + E_ADDRESS_OF_MSR_BITMAPS = 0x00002004, + E_VM_EXIT_MSR_STORE_ADDRESS = 0x00002006, + E_VM_EXIT_MSR_LOAD_ADDRESS = 0x00002008, + E_VM_ENTRY_MSR_LOAD_ADDRESS = 0x0000200A, + E_EXECUTIVE_VMCS_POINTER = 0x0000200C, + E_PML_ADDRESS = 0x0000200E, + E_TSC_OFFSET = 0x00002010, + E_VIRTUAL_APIC_ADDRESS = 0x00002012, + E_APIC_ACCESS_ADDRESS = 0x00002014, + E_POSTED_INT_DESC_ADDR = 0x00002016, + E_VM_FUNCTION_CONTROLS = 0x00002018, + E_EPT_POINTER = 0x0000201A, + E_EOI_EXIT_BITMAP_0 = 0x0000201C, + E_EOI_EXIT_BITMAP_1 = 0x0000201E, + E_EOI_EXIT_BITMAP_2 = 0x00002020, + E_EOI_EXIT_BITMAP_3 = 0x00002022, + E_EPT_LIST_ADDRESS = 0x00002024, + E_VMREAD_BITMAP_ADDRESS = 0x00002026, + E_VMWRITE_BITMAP_ADRESS = 0x00002028, + E_VIRT_EXC_INFO_ADDR = 0x0000202A, + E_XSS_EXITING_BITMAP = 0x0000202C, + E_ENCLS_EXITING_BITMAP = 0x0000202E, + E_SUB_PAGE_PERMISSION_TABLE_PTR = 0x00002030, + E_TSC_MTIPLIER = 0x00002032ul, + E_TERTIARY_VM_EXECUTION_CONTROLS = 0x00002034, + E_ENCLV_EXITING_BITMAP = 0x00002036, + E_PCONFIG_EXITING_BITMAP = 0x0000203E, + E_HLATP = 0x00002040, + E_SECONDARY_VM_EXECTION_CONROLS = 0x00002044, + + /* B.2.2 64-Bit Read-Only Data Field */ + E_GUEST_PHYSICAL_ADDRESS = 0x00002400, + + /* B.2.3 64-Bit Guest-State Fields */ + E_VMCS_LINK_POINTER = 0x00002800, + E_GUEST_IA32_DEBUGCTL = 0x00002802, + E_GUEST_IA32_PAT = 0x00002804, + E_GUEST_IA32_EFER = 0x00002806, + E_GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + E_GUEST_PDPTE0 = 0x0000280A, + E_GUEST_PDPTE1 = 0x0000280C, + E_GUEST_PDPTE2 = 0x0000280E, + E_GUEST_PDPTE3 = 0x00002810, + E_GUEST_IA32_BNDCFGS = 0x00002812, + E_GUEST_IA32_RTIT_CTL = 0x00002814, + E_GUEST_IA32_LBR_CTL = 0x00002816, + E_GUEST_IA32_PKRS = 0x00002818, + + /* B.2.4 64-Bit Host-State Fields */ + E_HOST_IA32_PAT = 0x00002C00, + E_HOST_IA32_EFER = 0x00002C02, + E_HOST_IA32_PERF_GLOBAL_CTRL = 0x00002C04, + E_HOST_IA32_PKRS = 0x00002C06, + + + /* + * B.3 32-Bit Fields + */ + + /* B.3.1 32-Bit Control Fields */ + E_PIN_BASED_VM_EXECUTION_CTRL = 0x00004000, + E_PRI_PROC_BASED_VM_EXEC_CTRL = 0x00004002, + E_EXCEPTION_BITMAP = 0x00004004, + E_PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + E_PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + E_CR3_TARGET_COUNT = 0x0000400A, + E_PRIMARY_VM_EXIT_CONTROLS = 0x0000400C, + E_VM_EXIT_MSR_STORE_COUNT = 0x0000400E, + E_VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + E_VM_ENTRY_CONTROLS = 0x00004012, + E_VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + E_VM_ENTRY_INTERRUPT_INFO_FIELD = 0x00004016, + E_VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + E_VM_ENTRY_INSTRUCTION_LENGTH = 0x0000401A, + E_TPR_THRESHOLD = 0x0000401C, + E_SEC_PROC_BASED_VM_EXEC_CTRL = 0x0000401E, + E_PLE_GAP = 0x00004020, + E_PLE_WINDOW = 0x00004022, + + /* B.3.2 32-Bit Read-Only Data Fields */ + E_VM_INSTRUCTION_ERROR = 0x00004400, + E_EXIT_REASON = 0x00004402, + E_VM_EXIT_INTERRUPT_INFORMATION = 0x00004404, + E_VM_EXIT_INTERRUPT_ERROR_CODE = 0x00004406, + E_IDT_VECTORING_INFORMATION_FIELD = 0x00004408, + E_IDT_VECTORING_ERROR_CODE = 0x0000440A, + E_VM_EXIT_INSTRUCTION_LENGTH = 0x0000440C, + E_VM_EXIT_INSTRUCTION_INFORMATION = 0x0000440E, + + /* B.3.3 32-Bit Guest-State Fields */ + E_GUEST_ES_LIMIT = 0x00004800, + E_GUEST_CS_LIMIT = 0x00004802, + E_GUEST_SS_LIMIT = 0x00004804, + E_GUEST_DS_LIMIT = 0x00004806, + E_GUEST_FS_LIMIT = 0x00004808, + E_GUEST_GS_LIMIT = 0x0000480A, + E_GUEST_LDTR_LIMIT = 0x0000480C, + E_GUEST_TR_LIMIT = 0x0000480E, + E_GUEST_GDTR_LIMIT = 0x00004810, + E_GUEST_IDTR_LIMIT = 0x00004812, + E_GUEST_ES_ACCESS_RIGHTS = 0x00004814, + E_GUEST_CS_ACCESS_RIGHTS = 0x00004816, + E_GUEST_SS_ACCESS_RIGHTS = 0x00004818, + E_GUEST_DS_ACCESS_RIGHTS = 0x0000481A, + E_GUEST_FS_ACCESS_RIGHTS = 0x0000481C, + E_GUEST_GS_ACCESS_RIGHTS = 0x0000481E, + E_GUEST_LDTR_ACCESS_RIGHTS = 0x00004820, + E_GUEST_TR_ACCESS_RIGHTS = 0x00004822, + E_GUEST_INTERRUPTIBILITY_STATE = 0x00004824, + E_GUEST_ACTIVITY_STATE = 0x00004826, + E_GUEST_SMBASE = 0x00004828, + E_IA32_SYSENTER_CS = 0x0000482A, + E_VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + + /* B.3.3 32-Bit Host-State Field */ + E_HOST_IA32_SYSENTER_CS = 0x00004C00, + + + /* + * B.4 Natural-Width Fields + */ + + /* B.4.1 Natural-Width Control Fields */ + E_CR0_GUEST_HOST_MASK = 0x00006000, + E_CR4_GUEST_HOST_MASK = 0x00006002, + E_CR0_READ_SHADOW = 0x00006004, + E_CR4_READ_SHADOW = 0x00006006, + E_CR3_TARGET_VALUE_0 = 0x00006008, + E_CR3_TARGET_VALUE_1 = 0x0000600A, + E_CR3_TARGET_VALUE_2 = 0x0000600C, + E_CR3_TARGET_VALUE_3 = 0x0000600E, + + /* B.4.2 Natural-Width Read-Only Data Fields */ + E_EXIT_QUALIFICATION = 0x00006400, + E_IO_RCX = 0x00006402, + E_IO_RSI = 0x00006404, + E_IO_RDI = 0x00006406, + E_IO_RIP = 0x00006408, + E_GUEST_LINEAR_ADDRRESS = 0x0000640A, + + /* B.4.3 Natural-Width Guest-State Fields */ + E_GUEST_CR0 = 0x00006800, + E_GUEST_CR3 = 0x00006802, + E_GUEST_CR4 = 0x00006804, + E_GUEST_ES_BASE = 0x00006806, + E_GUEST_CS_BASE = 0x00006808, + E_GUEST_SS_BASE = 0x0000680A, + E_GUEST_DS_BASE = 0x0000680C, + E_GUEST_FS_BASE = 0x0000680E, + E_GUEST_GS_BASE = 0x00006810, + E_GUEST_LDTR_BASE = 0x00006812, + E_GUEST_TR_BASE = 0x00006814, + E_GUEST_GDTR_BASE = 0x00006816, + E_GUEST_IDTR_BASE = 0x00006818, + E_GUEST_DR7 = 0x0000681A, + E_GUEST_RSP = 0x0000681C, + E_GUEST_RIP = 0x0000681E, + E_GUEST_RFLAGS = 0x00006820, + E_GUEST_PENDING_DEBUG_EXCEPTIONS = 0x00006822, + E_GUEST_IA32_SYSENTER_ESP = 0x00006824, + E_GUEST_IA32_SYSENTER_EIP = 0x00006826, + E_GUEST_IA32_S_CET = 0x00006828, + E_GUEST_SSP = 0x0000682A, + E_GUEST_IA32_INT_SSP_TABLE_ADDR = 0x0000682C, + + /* B.4.4 Natural-Width Host-State Fields */ + E_HOST_CR0 = 0x00006C00, + E_HOST_CR3 = 0x00006C02, + E_HOST_CR4 = 0x00006C04, + E_HOST_FS_BASE = 0x00006C06, + E_HOST_GS_BASE = 0x00006C08, + E_HOST_TR_BASE = 0x00006C0A, + E_HOST_GDTR_BASE = 0x00006C0C, + E_HOST_IDTR_BASE = 0x00006C0E, + E_HOST_IA32_SYSENTER_ESP = 0x00006C10, + E_HOST_IA32_SYSENTER_EIP = 0x00006C12, + E_HOST_RSP = 0x00006C14, + E_HOST_RIP = 0x00006C16, + E_HOST_IA32_S_CET = 0x00006C18, + E_HOST_SSP = 0x00006C1A, + E_HOST_IA32_INT_SSP_TABLE_ADDR = 0x00006C1C, + }; + + static void vmxon(Genode::addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmxon %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmxon failed"); + } + + static void vmptrld(Genode::addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmptrld %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmptrld failed"); + } + + static Genode::uint64_t read(Genode::uint32_t enc) + { + Genode::uint64_t val; + asm volatile( + "vmread %[enc], %[val];" + : [val] "=rm"(val) + : [enc] "rm"(static_cast(enc)) + : "cc"); + return val; + } + + static void vmclear(Genode::addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmclear %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmclear failed"); + } + + static void write(Genode::uint32_t enc, Genode::uint64_t val) + { + /* Genode::raw("VMWRITE: ", Genode::Hex(enc), " val: ", Genode::Hex(val)); */ + bool success = false; + asm volatile( + "vmwrite %[val], %[enc];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [enc]"rm"(static_cast(enc)), [val] "r"(val) + : "cc"); + assert(success && "vmwrite failed"); + } + + Vmcs(Genode::Vcpu_data &vcpu_data); + Virt_type virt_type() override + { + return Virt_type::VMX; + } + + + void initialize(Kernel::Cpu &cpu, Genode::addr_t page_table_phys, + Core::Cpu::Context ®s) override; + void write_vcpu_state(Genode::Vcpu_state &state) override; + void read_vcpu_state(Genode::Vcpu_state &state) override; + void switch_world(Core::Cpu::Context ®s) override; + Genode::uint64_t handle_vm_exit(unsigned &irq, bool error = false) override; + void save_host_msrs(); + void prepare_vmcs(); + void setup_vmx_info(); + static void enforce_intercepts(Genode::uint32_t desired_primary, + Genode::uint32_t desired_secondary); + void _load_pointer(); + void construct_host_vmcs(Genode::size_t cpu_id); +}; + + +/* + * Access controls + */ + +/* + * Pin-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + */ + +/* 25-5. Definitions of Pin-Based VM-Execution Controls */ +struct Pin_based_execution_controls : Genode::Register<32> +{ + struct External_interrupt_exiting : Bitfield<0,1> { }; + struct Bit_1 : Bitfield<1,1> { }; + struct Bit_2 : Bitfield<2,1> { }; + struct Nmi_exiting : Bitfield<3,1> { }; + struct Bit_4 : Bitfield<4,1> { }; + struct Virtual_nmis : Bitfield<5,1> { }; + struct Activate_vmx_preemtion_timer : Bitfield<6,1> { }; + struct Process_posted_interrupts : Bitfield<7,1> { }; +}; + +/* + * Primary VM-Exit Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + */ +struct Primary_vm_exit_controls : Genode::Register<32> +{ + struct Save_debug_controls : Bitfield< 2,1> { }; + struct Host_address_space_size : Bitfield< 9,1> { }; + struct Load_ia32_perf_global_ctrl : Bitfield<12,1> { }; + struct Ack_interrupt_on_exit : Bitfield<15,1> { }; + struct Save_ia32_pat : Bitfield<18,1> { }; + struct Load_ia32_pat : Bitfield<19,1> { }; + struct Save_ia32_efer : Bitfield<20,1> { }; + struct Load_ia32_efer : Bitfield<21,1> { }; + struct Save_vmx_preemt_timer_val : Bitfield<22,1> { }; + struct Clear_ia32_bndcfgs : Bitfield<23,1> { }; + struct Conceal_vmx_from_pt : Bitfield<24,1> { }; + struct Clear_ia32_rtit_ctl : Bitfield<25,1> { }; + struct Clear_ia32_lbr_ctl : Bitfield<26,1> { }; + struct Clear_uuinv : Bitfield<27,1> { }; + struct Load_cet_state : Bitfield<28,1> { }; + struct Load_pkrs : Bitfield<29,1> { }; + struct Save_ia32_perf_global_ctl : Bitfield<30,1> { }; + struct Activate_tertiary_controls : Bitfield<31,1> { }; +}; + + +/* + * Secondary VM-Exit Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-14. Definitions of Secondary VM-Exit Controls + */ +struct Secondary_vm_exit_controls : Genode::Register<32> +{ + struct Prematurely_busy_shadow_stack : Bitfield< 3,1> { }; +}; + + + +/* + * VM-Entry Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + * 25.8.1 VM-Entry Controls + */ +struct Vm_entry_controls : Genode::Register<32> +{ + struct Load_debug_controls : Bitfield< 2,1> { }; + struct Ia32e_mode_guest : Bitfield< 9,1> { }; + struct Entry_to_smm : Bitfield<10,1> { }; + struct Deactivate_dual_monitor_treatment : Bitfield<11,1> { }; + struct Load_ia32_perf_global_ctrl : Bitfield<13,1> { }; + struct Load_ia32_pat : Bitfield<14,1> { }; + struct Load_ia32_efer : Bitfield<15,1> { }; + struct Load_ia32_bndcfgs : Bitfield<16,1> { }; + struct Conceal_vmx_from_pt : Bitfield<17,1> { }; + struct Load_ia32_rtit_ctl : Bitfield<18,1> { }; + struct Load_uinv : Bitfield<19,1> { }; + struct Load_cet_state : Bitfield<20,1> { }; + struct Load_guest_ia32_lbr_ctl : Bitfield<21,1> { }; + struct Load_pkrs : Bitfield<22,1> { }; +}; + + +/* + * Processor-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.2 Processor-Based VM-Execution Controls + */ + +/* Table 25-6. Definitions of Primary Processor-Based VM-Execution Controls */ +struct Primary_proc_based_execution_controls : Genode::Register<32> +{ + struct Interrupt_window_exiting : Bitfield< 2,1> { }; + struct Use_tsc_offsetting : Bitfield< 3,1> { }; + struct Hlt_exiting : Bitfield< 7,1> { }; + struct Invlpg_exiting : Bitfield< 9,1> { }; + struct Mwait_exiting : Bitfield<10,1> { }; + struct Rdpmc_exiting : Bitfield<12,1> { }; + struct Cr3_load_exiting : Bitfield<15,1> { }; + struct Cr3_store_exiting : Bitfield<16,1> { }; + struct Activate_tertiary_controls : Bitfield<17,1> { }; + struct Cr8_load_exiting : Bitfield<19,1> { }; + struct Cr8_store_exiting : Bitfield<20,1> { }; + struct Use_tpr_shadow : Bitfield<21,1> { }; + struct Nmi_window_exiting : Bitfield<22,1> { }; + struct Mov_dr_exiting : Bitfield<23,1> { }; + struct Unconditional_io_exiting : Bitfield<24,1> { }; + struct Use_io_bitmaps : Bitfield<25,1> { }; + struct Monitor_trap_flag : Bitfield<27,1> { }; + struct Use_msr_bitmaps : Bitfield<28,1> { }; + struct Monitor_exiting : Bitfield<29,1> { }; + struct Pause_exiting : Bitfield<30,1> { }; + struct Activate_secondary_controls : Bitfield<31,1> { }; +}; + +/* Table 25-7. Definitions of Secondary Processor-Based VM-Execution Controls */ +struct Secondary_proc_based_execution_controls : Genode::Register<32> +{ + struct Virtualize_apic_accesses : Bitfield< 0,1> { }; + struct Enable_ept : Bitfield< 1,1> { }; + struct Descriptor_table_exiting : Bitfield< 2,1> { }; + struct Enable_rdtscp : Bitfield< 3,1> { }; + struct Vrtualize_x2apic_mode : Bitfield< 4,1> { }; + struct Enable_vpid : Bitfield< 5,1> { }; + struct Wbinvd_exiting : Bitfield< 6,1> { }; + struct Unrestricted_guest : Bitfield< 7,1> { }; + struct Apic_register_virtualization : Bitfield< 8,1> { }; + struct Virtual_interrupt_delivery : Bitfield< 9,1> { }; + struct Pause_loop_exiting : Bitfield<10,1> { }; + struct Rdrand_exiting : Bitfield<11,1> { }; + struct Enable_invpcid : Bitfield<12,1> { }; + struct Enable_vm_functi : Bitfield<13,1> { }; + struct Vmcs_shadowing : Bitfield<14,1> { }; + struct Enable_encls_exiting : Bitfield<15,1> { }; + struct Rdseed_exiting : Bitfield<16,1> { }; + struct Enable_pml : Bitfield<17,1> { }; + struct Ept_violation_ve : Bitfield<18,1> { }; + struct Conceal_vmx_from_pt : Bitfield<19,1> { }; + struct Enable_xsaves_xrstors : Bitfield<20,1> { }; + struct Mode_based_execut_control_for_ept : Bitfield<22,1> { }; + struct Sub_page_write_permissions_for_ept : Bitfield<23,1> { }; + struct Intel_pt_uses_guest_physical_addresses : Bitfield<24,1> { }; + struct Use_tsc_scaling : Bitfield<25,1> { }; + struct Enable_user_wait_and_pause : Bitfield<26,1> { }; + struct Enable_pconfig : Bitfield<27,1> { }; + struct Enable_enclv_exiting : Bitfield<28,1> { }; + struct Vmm_bus_Lock_detection : Bitfield<30,1> { }; + struct Instruction_timeout : Bitfield<31,1> { }; +}; + +/* 25-8. Definitions of Tertiary Processor-Based VM-Execution Controls */ +struct Tertiary_proc_based_execution_controls : Genode::Register<32> +{ + struct Loadiwkey_exiting : Bitfield< 0,1> { }; + struct Enable_hlat : Bitfield< 1,1> { }; + struct Ept_paging_write_control : Bitfield< 2,1> { }; + struct Guest_paging_verification : Bitfield< 3,1> { }; + struct Ipi_virtualization : Bitfield< 4,1> { }; + struct Virtualize_ia32_spec_ctrl : Bitfield< 7,1> { }; +}; + + +/* + * Virtual Apic State + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 30.1 Virtual Apic State + */ +struct Board::Virtual_apic_state +{ + enum { + VTPR_OFFSET = 0x80, + }; + + Genode::uint8_t pad[4096]; + + Genode::uint32_t get_vtpr() + { + return static_cast(*(pad + VTPR_OFFSET)); + } + + void set_vtpr(Genode::uint32_t vtpr) + { + Genode::uint32_t *tpr = + reinterpret_cast(pad + VTPR_OFFSET); + *tpr = vtpr; + } +}; + +#endif /* _INCLUDE__SPEC__PC__VMX_H_ */ diff --git a/repos/base-hw/src/include/hw/spec/x86_64/cpu.h b/repos/base-hw/src/include/hw/spec/x86_64/cpu.h index d050821c9e0..47997794c8d 100644 --- a/repos/base-hw/src/include/hw/spec/x86_64/cpu.h +++ b/repos/base-hw/src/include/hw/spec/x86_64/cpu.h @@ -1,11 +1,12 @@ /* * \brief x86_64 CPU definitions * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2017-04-07 */ /* - * Copyright (C) 2017 Genode Labs GmbH + * Copyright (C) 2017-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -130,16 +131,211 @@ struct Hw::X86_64_cpu ); X86_64_MSR_REGISTER(Ia32_efer, 0xC0000080, - struct Svme : Bitfield< 12, 1> { }; /* Secure Virtual Machine Enable */ + struct Lme : Bitfield< 8, 1> { }; /* Long Mode Enable */ + struct Lma : Bitfield<10, 1> { }; /* Long Mode Active */ + struct Svme : Bitfield<12, 1> { }; /* Secure Virtual Machine Enable */ + ); + + /* Map of BASE Address of FS */ + X86_64_MSR_REGISTER(Ia32_fs_base, 0xC0000100); + + /* Map of BASE Address of GS */ + X86_64_MSR_REGISTER(Ia32_gs_base, 0xC0000101); + + /* System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_star, 0xC0000081); + + /* IA-32e Mode System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_lstar, 0xC0000082); + + /* IA-32e Mode System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_cstar, 0xC0000083); + + /* System Call Flag Mask */ + X86_64_MSR_REGISTER(Ia32_fmask, 0xC0000084); + + /* Swap Target of BASE Address of GS */ + X86_64_MSR_REGISTER(Ia32_kernel_gs_base, 0xC0000102); + + /* See Vol. 4, Table 2-2 of the Intel SDM */ + X86_64_MSR_REGISTER(Ia32_feature_control, 0x3A, + struct Lock : Bitfield< 0, 0> { }; /* VMX Lock */ + struct Vmx_no_smx : Bitfield< 2, 2> { }; /* Enable VMX outside SMX */ ); /* * Auxiliary TSC register - * For details, see Vol. 3B of the Intel SDM: - * 17.17.2 IA32_TSC_AUX Register and RDTSCP Support + * For details, see Vol. 3B of the Intel SDM (September 2023): + * 18.17.2 IA32_TSC_AUX Register and RDTSCP Support */ X86_64_MSR_REGISTER(Ia32_tsc_aux, 0xc0000103); + /* + * Reporting Register of Basic VMX Capabilities + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.1 Basic VMX Information + */ + X86_64_MSR_REGISTER(Ia32_vmx_basic, 0x480, + struct Rev : Bitfield< 0,31> { }; /* VMCS revision */ + struct Vmcs_size : Bitfield<32,13> { }; /* VMCS region size*/ + struct Phys_addr_width : Bitfield<48, 1> { }; /* VMCS physical address width */ + struct Dual_monitor : Bitfield<49, 1> { }; /* Dual-monitor support */ + struct Memory_type : Bitfield<50, 4> { }; /* VMCS memory type */ + struct Ins_outs_exit : Bitfield<54, 1> { }; /* VM exit info for INS and OUTS instructions, see 27.2.5 */ + struct Clear_controls : Bitfield<55, 1> { }; /* VMCS controls may be cleared, see A.2 */ + struct Deliver_except : Bitfield<56, 1> { }; /* Deliver hardware exception on entry, see 2 */ + ); + + /* + * Capability Reporting Register of Pin-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.1 Pin-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_pinbased_ctls, 0x481, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Pin-Based VM-Execution Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.1 Pin-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_pinbased_ctls, 0x48D, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary Processor-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.2 Primary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_procbased_ctls, 0x482, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary Processor-Based VM-Execution Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.2 Primary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_procbased_ctls, 0x48E, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary VM-Exit Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.4.1 Primary VM-Exit Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_exit_ctls, 0x483, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Exit Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.4.1 Primary VM-Exit Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_exit_ctls, 0x48F, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Entry Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.5 VM-Entry Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_entry_ctls, 0x484, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Entry Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.5 VM-Entry Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_entry_ctls, 0x490, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Secondary Processor-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.3 Secondary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_procbased_ctls2, 0x48B, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of EPT and VPID + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.10 VPID and EPT Capabilities.” + */ + X86_64_MSR_REGISTER(Ia32_vmx_ept_vpid_cap, 0x48C, + struct Execute_only_translations : Bitfield< 0,1> { }; /* Excecute-only translations support */ + struct Page_walk_length_4 : Bitfield< 6,1> { }; /* Support page-walk length of 4 */ + struct Page_walk_length_5 : Bitfield< 7,1> { }; /* Support page-walk length of 5 */ + struct Uncachable : Bitfield< 8,1> { }; /* Paging memory can be uncachable (UC) */ + struct Writeback : Bitfield<14,1> { }; /* Paging memory can be writeback (WB) */ + struct Map_2mb_page : Bitfield<16,1> { }; /* Support 2MB pages */ + struct Map_1gb_page : Bitfield<17,1> { }; /* Support 1Gb pages */ + struct Invept : Bitfield<20,1> { }; /* INVEPT instruction support */ + struct Accessed_dirty_flags : Bitfield<21,1> { }; /* Support accessed and dirty flags */ + struct Vm_exit_info_ept_violations : Bitfield<22,1> { }; /* Advanced VM-exit information for EPT violations */ + struct Supervisor_shadow_stack_control : Bitfield<23,1> { }; /* Support for supervisor shadow-stack control */ + struct Single_context_invept : Bitfield<25,1> { }; /* Single-context INVEPT support */ + struct All_context_invept : Bitfield<26,1> { }; /* All-context INVEPT support */ + struct Invvpid : Bitfield<32,1> { }; /* INVVPID instruction support */ + struct Individual_address_invvpid : Bitfield<40,1> { }; /* Individual-address INVVPID support */ + struct Single_context_invvpid : Bitfield<41,1> { }; /* Single-context INVVPID support */ + struct All_context_invvpid : Bitfield<42,1> { }; /* All-context INVVPID support */ + struct Single_context_retaining_gobals : Bitfield<43,1> { }; /* Single-context-retaining-globals INVVPID support */ + struct Max_hlat_prexix_size : Bitfield<48,6> { }; /* Maximum HLAT prefix size */ + ); + + /* + * Capability Reporting Register of CR0 Bits Fixed to 0 + * [sic] in fact, bits reported here need to be 1 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr0_fixed0, 0x486); + + /* + * Capability Reporting Register of CR0 Bits Fixed to 1 + * [sic] in fact, bits *NOT* reported here need to be 0 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr0_fixed1, 0x487); + + /* + * Capability Reporting Register of CR5 Bits Fixed to 0 + * [sic] in fact, bits reported here need to be 1 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.8 VMX-Fixed Bits in CR4 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr4_fixed0, 0x488); + + /* + * Capability Reporting Register of CR4 Bits Fixed to 1 + * [sic] in fact, bits *NOT* reported here need to be 0 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.8 VMX-Fixed Bits in CR4 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr4_fixed1, 0x489); + + X86_64_CPUID_REGISTER(Cpuid_0_eax, 0, eax); X86_64_CPUID_REGISTER(Cpuid_0_ebx, 0, ebx); X86_64_CPUID_REGISTER(Cpuid_0_ecx, 0, ecx); @@ -148,6 +344,7 @@ struct Hw::X86_64_cpu X86_64_CPUID_REGISTER(Cpuid_1_eax, 1, eax); X86_64_CPUID_REGISTER(Cpuid_1_ecx, 1, ecx, + struct Vmx : Bitfield< 5, 1> { }; struct Tsc_deadline : Bitfield<24, 1> { }; ); diff --git a/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h b/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h index b41d14d5903..fa0c786092f 100644 --- a/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h +++ b/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h @@ -1,11 +1,12 @@ /* * \brief Definitions common to all x86_64 CPUs * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2017-04-10 */ /* - * Copyright (C) 2017 Genode Labs GmbH + * Copyright (C) 2017-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -273,6 +274,25 @@ struct Hw::Virtualization_support return false; } + + static bool has_vmx() + { + if (Hw::Vendor::get_vendor_id() != Hw::Vendor::INTEL) + return false; + + Cpu::Cpuid_1_ecx::access_t ecx = Cpu::Cpuid_1_ecx::read(); + if (!Cpu::Cpuid_1_ecx::Vmx::get(ecx)) + return false; + + /* Check if VMX feature is off and locked */ + Cpu::Ia32_feature_control::access_t feature_control = + Cpu::Ia32_feature_control::read(); + if (!Cpu::Ia32_feature_control::Vmx_no_smx::get(feature_control) && + Cpu::Ia32_feature_control::Lock::get(feature_control)) + return false; + + return true; + } }; #endif /* _SRC__LIB__HW__SPEC__X86_64__X86_64_H_ */ diff --git a/repos/base/include/spec/x86_64/cpu/cpu_state.h b/repos/base/include/spec/x86_64/cpu/cpu_state.h index 5002e93d3e5..99ca5c89015 100644 --- a/repos/base/include/spec/x86_64/cpu/cpu_state.h +++ b/repos/base/include/spec/x86_64/cpu/cpu_state.h @@ -4,13 +4,14 @@ * \author Christian Prochaska * \author Reto Buerki * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2011-04-15 * * This file contains the x86_64-specific part of the CPU state. */ /* - * Copyright (C) 2011-2017 Genode Labs GmbH + * Copyright (C) 2011-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -33,6 +34,7 @@ struct Genode::Cpu_state NO_MATH_COPROC = 0x07, GENERAL_PROTECTION = 0x0d, PAGE_FAULT = 0x0e, + ALIGNMENT_CHECK = 0x11, SUPERVISOR_CALL = 0x80, INTERRUPTS_START = 0x20, RESET = 0xfe,