diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 27de9f6afa6..3f6847aa8dc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -175,10 +175,6 @@ fn create_vmm_and_vcpus( .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; - let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) - .map_err(VmmError::EventFd) - .map_err(Internal)?; - let resource_allocator = ResourceAllocator::new()?; // Instantiate the MMIO device manager. @@ -187,13 +183,13 @@ fn create_vmm_and_vcpus( // Instantiate ACPI device manager. let acpi_device_manager = ACPIDeviceManager::new(); - // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` - // while on aarch64 we need to do it the other way around. - #[cfg(target_arch = "x86_64")] - let (vcpus, pio_device_manager) = { - setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let (vcpus, vcpus_exit_evt) = vm + .create_vcpus(vcpu_count) + .map_err(VmmError::Vm) + .map_err(Internal)?; + #[cfg(target_arch = "x86_64")] + let pio_device_manager = { // Make stdout non blocking. set_stdout_nonblocking(); @@ -208,25 +204,10 @@ fn create_vmm_and_vcpus( .map_err(Internal)?; // create pio dev manager with legacy devices - let pio_device_manager = { - // TODO Remove these unwraps. - let mut pio_dev_mgr = PortIODeviceManager::new(serial_device, reset_evt).unwrap(); - pio_dev_mgr.register_devices(vm.fd()).unwrap(); - pio_dev_mgr - }; - - (vcpus, pio_device_manager) - }; - - // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the - // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP - // was already initialized. - // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. - #[cfg(target_arch = "aarch64")] - let vcpus = { - let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; - setup_interrupt_controller(&mut vm, vcpu_count)?; - vcpus + // TODO Remove these unwraps. + let mut pio_dev_mgr = PortIODeviceManager::new(serial_device, reset_evt).unwrap(); + pio_dev_mgr.register_devices(vm.fd()).unwrap(); + pio_dev_mgr }; let vmm = Vmm { @@ -436,7 +417,7 @@ pub enum BuildMicrovmFromSnapshotError { /// Could not set TSC scaling within the snapshot: {0} SetTsc(#[from] crate::vstate::vcpu::SetTscError), /// Failed to restore microVM state: {0} - RestoreState(#[from] crate::vstate::vm::RestoreStateError), + RestoreState(#[from] crate::vstate::vm::ArchVmError), /// Failed to update microVM configuration: {0} VmUpdateConfig(#[from] MachineConfigError), /// Failed to restore MMIO device: {0} @@ -671,22 +652,6 @@ where }) } -/// Sets up the irqchip for a x86_64 microVM. -#[cfg(target_arch = "x86_64")] -pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), StartMicrovmError> { - vm.setup_irqchip() - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) -} - -/// Sets up the irqchip for a aarch64 microVM. -#[cfg(target_arch = "aarch64")] -pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), StartMicrovmError> { - vm.setup_irqchip(vcpu_count) - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) -} - /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -744,21 +709,6 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -fn create_vcpus( - kvm: &Kvm, - vm: &Vm, - vcpu_count: u8, - exit_evt: &EventFd, -) -> Result, VmmError> { - let mut vcpus = Vec::with_capacity(vcpu_count as usize); - for cpu_idx in 0..vcpu_count { - let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, kvm, exit_evt).map_err(VmmError::VcpuCreate)?; - vcpus.push(vcpu); - } - Ok(vcpus) -} - /// Configures the system for booting Linux. #[cfg_attr(target_arch = "aarch64", allow(unused))] pub fn configure_system_for_boot( @@ -1065,7 +1015,7 @@ pub(crate) mod tests { use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_RNG}; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; - use crate::test_utils::{arch_mem, single_region_mem, single_region_mem_at}; + use crate::test_utils::{single_region_mem, single_region_mem_at}; use crate::vmm_config::balloon::{BalloonBuilder, BalloonDeviceConfig, BALLOON_DEV_ID}; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; @@ -1073,6 +1023,7 @@ pub(crate) mod tests { use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::vsock::tests::default_config; use crate::vmm_config::vsock::{VsockBuilder, VsockDeviceConfig}; + use crate::vstate::vm::tests::setup_vm_with_memory; #[derive(Debug)] pub(crate) struct CustomBlockConfig { @@ -1128,16 +1079,8 @@ pub(crate) mod tests { } pub(crate) fn default_vmm() -> Vmm { - let guest_memory = arch_mem(128 << 20); + let (kvm, mut vm, guest_memory) = setup_vm_with_memory(128 << 20); - let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) - .map_err(VmmError::EventFd) - .map_err(StartMicrovmError::Internal) - .unwrap(); - - let kvm = Kvm::new(vec![]).unwrap(); - let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory).unwrap(); let mmio_device_manager = MMIODeviceManager::new(); let acpi_device_manager = ACPIDeviceManager::new(); #[cfg(target_arch = "x86_64")] @@ -1156,15 +1099,7 @@ pub(crate) mod tests { ) .unwrap(); - #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); - - #[cfg(target_arch = "aarch64")] - { - let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let _vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); - setup_interrupt_controller(&mut vm, 1).unwrap(); - } + let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap(); Vmm { events_observer: Some(std::io::stdin()), @@ -1383,24 +1318,6 @@ pub(crate) mod tests { ); } - #[test] - fn test_create_vcpus() { - let vcpu_count = 2; - let guest_memory = arch_mem(128 << 20); - - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - #[allow(unused_mut)] - let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory).unwrap(); - let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - - #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); - - let vcpu_vec = create_vcpus(&kvm, &vm, vcpu_count, &evfd).unwrap(); - assert_eq!(vcpu_vec.len(), vcpu_count as usize); - } - #[test] fn test_attach_net_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 8526d3c2901..eee2f86dc83 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -248,8 +248,8 @@ mod tests { #[test] fn test_register_legacy_devices() { - let (_, mut vm, _) = setup_vm_with_memory(0x1000); - crate::builder::setup_interrupt_controller(&mut vm).unwrap(); + let (_, vm, _) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); let mut ldm = PortIODeviceManager::new( Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { serial: Serial::with_events( diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index d35c8b36650..dc814de9766 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -549,7 +549,7 @@ mod tests { use crate::test_utils::multi_region_mem; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; - use crate::{builder, Vm}; + use crate::Vm; const QUEUE_SIZES: &[u16] = &[64]; @@ -660,6 +660,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_virtio_device() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); @@ -673,9 +674,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); device_manager .register_virtio_test_device( @@ -690,6 +691,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_too_many_devices() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); @@ -702,9 +704,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); for _i in crate::arch::IRQ_BASE..=crate::arch::IRQ_MAX { device_manager @@ -745,6 +747,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_device_info() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); @@ -756,9 +759,9 @@ mod tests { let mem_clone = guest_mem.clone(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 3c5f3e7e754..d62ffc84600 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -114,22 +114,14 @@ pub struct GuestRegionUffdMapping { /// Errors related to saving and restoring Microvm state. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MicrovmStateError { - /// Compatibility checks failed: {0} - IncompatibleState(String), - /// Provided MicroVM state is invalid. - InvalidInput, /// Operation not allowed: {0} NotAllowed(String), /// Cannot restore devices: {0} RestoreDevices(DevicePersistError), - /// Cannot restore Vcpu state: {0} - RestoreVcpuState(vstate::vcpu::VcpuError), - /// Cannot restore Vm state: {0} - RestoreVmState(vstate::vm::VmError), /// Cannot save Vcpu state: {0} SaveVcpuState(vstate::vcpu::VcpuError), /// Cannot save Vm state: {0} - SaveVmState(vstate::vm::VmError), + SaveVmState(vstate::vm::ArchVmError), /// Cannot signal Vcpu: {0} SignalVcpu(VcpuSendEventError), /// Vcpu is in unexpected state. @@ -142,9 +134,6 @@ pub enum MicrovmStateError { pub enum CreateSnapshotError { /// Cannot get dirty bitmap: {0} DirtyBitmap(VmmError), - #[rustfmt::skip] - /// Cannot translate microVM version to snapshot data version - UnsupportedVersion, /// Cannot write memory file: {0} Memory(MemoryError), /// Cannot perform {0} on the memory backing file: {1} @@ -155,8 +144,6 @@ pub enum CreateSnapshotError { SerializeMicrovmState(crate::snapshot::SnapshotError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), - /// Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}. - SnapshotBackingFileLengthMismatch(u64, u64), } /// Snapshot version diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs index 59b192dbe09..20585b337fc 100644 --- a/src/vmm/src/vstate/kvm.rs +++ b/src/vmm/src/vstate/kvm.rs @@ -23,9 +23,6 @@ pub enum KvmError { configured on the /dev/kvm file's ACL. */ Kvm(kvm_ioctls::Error), #[cfg(target_arch = "x86_64")] - /// Failed to get MSR index list to save into snapshots: {0} - GetMsrsToSave(crate::arch::x86_64::msr::MsrError), - #[cfg(target_arch = "x86_64")] /// Failed to get supported cpuid: {0} GetSupportedCpuId(kvm_ioctls::Error), /// The number of configured slots is bigger than the maximum reported by KVM @@ -45,9 +42,6 @@ pub struct Kvm { #[cfg(target_arch = "x86_64")] /// Supported CpuIds. pub supported_cpuid: CpuId, - #[cfg(target_arch = "x86_64")] - /// Msrs needed to be saved on snapshot creation. - pub msrs_to_save: MsrList, } impl Kvm { @@ -82,19 +76,22 @@ impl Kvm { let supported_cpuid = kvm_fd .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) .map_err(KvmError::GetSupportedCpuId)?; - let msrs_to_save = crate::arch::x86_64::msr::get_msrs_to_save(&kvm_fd) - .map_err(KvmError::GetMsrsToSave)?; Ok(Kvm { fd: kvm_fd, max_memslots, kvm_cap_modifiers, supported_cpuid, - msrs_to_save, }) } } + /// Msrs needed to be saved on snapshot creation. + #[cfg(target_arch = "x86_64")] + pub fn msrs_to_save(&self) -> Result { + crate::arch::x86_64::msr::get_msrs_to_save(&self.fd) + } + /// Check guest memory does not have more regions than kvm allows. pub fn check_memory(&self, guest_mem: &GuestMemoryMmap) -> Result<(), KvmError> { if guest_mem.num_regions() > self.max_memslots { diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 23ce07d06b9..228c8b8f062 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -36,10 +36,6 @@ pub type GuestMmapRegion = vm_memory::MmapRegion>; /// Errors associated with dumping guest memory to file. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MemoryError { - /// Cannot create memory: {0} - CreateMemory(VmMemoryError), - /// Cannot create memory region: {0} - CreateRegion(MmapRegionError), /// Cannot fetch system's page size: {0} PageSize(errno::Error), /// Cannot dump memory: {0} diff --git a/src/vmm/src/vstate/vcpu/aarch64.rs b/src/vmm/src/vstate/vcpu/aarch64.rs index 9db1173eeb8..7bdde9b9d1f 100644 --- a/src/vmm/src/vstate/vcpu/aarch64.rs +++ b/src/vmm/src/vstate/vcpu/aarch64.rs @@ -22,7 +22,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{error, IncMetric, METRICS}; use crate::vcpu::{VcpuConfig, VcpuError}; -use crate::vstate::kvm::{Kvm, OptionalCapabilities}; +use crate::vstate::kvm::OptionalCapabilities; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -78,7 +78,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm, _: &Kvm) -> Result { + pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -315,7 +315,7 @@ mod tests { fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { let (kvm, mut vm, vm_mem) = setup_vm_with_memory(mem_size); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); @@ -324,11 +324,11 @@ mod tests { #[test] fn test_create_vcpu() { - let (kvm, vm, _) = setup_vm_with_memory(0x1000); + let (_, vm, _) = setup_vm_with_memory(0x1000); unsafe { libc::close(vm.fd().as_raw_fd()) }; - let err = KvmVcpu::new(0, &vm, &kvm); + let err = KvmVcpu::new(0, &vm); assert_eq!( err.err().unwrap().to_string(), "Error creating vcpu: Bad file descriptor (os error 9)".to_string() @@ -378,8 +378,8 @@ mod tests { #[test] fn test_init_vcpu() { - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // KVM_ARM_VCPU_PSCI_0_2 is set by default. @@ -397,8 +397,8 @@ mod tests { #[test] fn test_vcpu_save_restore_state() { - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // Calling KVM_GET_REGLIST before KVM_VCPU_INIT will result in error. @@ -441,8 +441,8 @@ mod tests { // // This should fail with ENOEXEC. // https://elixir.bootlin.com/linux/v5.10.176/source/arch/arm64/kvm/arm.c#L1165 - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.dump_cpu_config().unwrap_err(); @@ -451,8 +451,8 @@ mod tests { #[test] fn test_dump_cpu_config_after_init() { // Test `dump_cpu_config()` after `KVM_VCPU_INIT`. - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.init(&[]).unwrap(); @@ -461,10 +461,10 @@ mod tests { #[test] fn test_setup_non_boot_vcpu() { - let (kvm, vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu1 = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu1 = KvmVcpu::new(0, &vm).unwrap(); vcpu1.init(&[]).unwrap(); - let mut vcpu2 = KvmVcpu::new(1, &vm, &kvm).unwrap(); + let mut vcpu2 = KvmVcpu::new(1, &vm).unwrap(); vcpu2.init(&[]).unwrap(); } diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index d9aa0abd1a8..b120a9f2c07 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -44,8 +44,6 @@ pub use aarch64::{KvmVcpuError, *}; #[cfg(target_arch = "x86_64")] pub use x86_64::{KvmVcpuError, *}; -use super::kvm::Kvm; - /// Signal number (SIGRTMIN) used to kick Vcpus. pub const VCPU_RTSIG_OFFSET: i32 = 0; @@ -214,10 +212,10 @@ impl Vcpu { /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. - pub fn new(index: u8, vm: &Vm, kvm: &Kvm, exit_evt: EventFd) -> Result { + pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result { let (event_sender, event_receiver) = channel(); let (response_sender, response_receiver) = channel(); - let kvm_vcpu = KvmVcpu::new(index, vm, kvm).unwrap(); + let kvm_vcpu = KvmVcpu::new(index, vm).unwrap(); Ok(Vcpu { exit_evt, @@ -787,6 +785,7 @@ pub(crate) mod tests { use crate::devices::BusDevice; use crate::seccomp::get_empty_filters; use crate::utils::signal::validate_signal_num; + use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; use crate::vstate::vm::tests::setup_vm_with_memory; @@ -933,20 +932,12 @@ pub(crate) mod tests { pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu, GuestMemoryMmap) { let (kvm, mut vm, gm) = setup_vm_with_memory(mem_size); - let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let (mut vcpus, _) = vm.create_vcpus(1).unwrap(); + let mut vcpu = vcpus.remove(0); #[cfg(target_arch = "aarch64")] - let vcpu = { - let mut vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); - vcpu.kvm_vcpu.init(&[]).unwrap(); - vm.setup_irqchip(1).unwrap(); - vcpu - }; - #[cfg(target_arch = "x86_64")] - let vcpu = { - vm.setup_irqchip().unwrap(); - Vcpu::new(1, &vm, &kvm, exit_evt).unwrap() - }; + vcpu.kvm_vcpu.init(&[]).unwrap(); + (kvm, vm, vcpu, gm) } @@ -978,7 +969,7 @@ pub(crate) mod tests { entry_addr.unwrap().kernel_load } - fn vcpu_configured_for_boot() -> (VcpuHandle, vmm_sys_util::eventfd::EventFd) { + fn vcpu_configured_for_boot() -> (VcpuHandle, EventFd, GuestMemoryMmap) { Vcpu::register_kick_signal_handler(); // Need enough mem to boot linux. let mem_size = 64 << 20; @@ -1030,7 +1021,7 @@ pub(crate) mod tests { // Wait for vCPUs to initialize their TLS before moving forward. barrier.wait(); - (vcpu_handle, vcpu_exit_evt) + (vcpu_handle, vcpu_exit_evt, vm_mem) } #[test] @@ -1043,7 +1034,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_tls() { - let (_, _, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _mem) = setup_vcpu(0x1000); // Running on the TLS vcpu should fail before we actually initialize it. unsafe { @@ -1084,7 +1075,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_kick() { Vcpu::register_kick_signal_handler(); - let (_, vm, mut vcpu, _) = setup_vcpu(0x1000); + let (_, vm, mut vcpu, _mem) = setup_vcpu(0x1000); let mut kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) @@ -1139,7 +1130,7 @@ pub(crate) mod tests { #[test] fn test_immediate_exit_shortcircuits_execution() { - let (_, _, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _mem) = setup_vcpu(0x1000); vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); // Set a dummy value to be returned by the emulate call @@ -1164,7 +1155,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_pause_resume() { - let (vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot(); + let (vcpu_handle, vcpu_exit_evt, _mem) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); @@ -1196,7 +1187,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_save_state_events() { - let (vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot(); + let (vcpu_handle, _vcpu_exit_evt, _mem) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); @@ -1229,7 +1220,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_dump_cpu_config() { - let (vcpu_handle, _) = vcpu_configured_for_boot(); + let (vcpu_handle, _, _mem) = vcpu_configured_for_boot(); // Queue a DumpCpuConfig event, expect a DumpedCpuConfig response. vcpu_handle diff --git a/src/vmm/src/vstate/vcpu/x86_64.rs b/src/vmm/src/vstate/vcpu/x86_64.rs index 39ff0879ee8..85c2db3868f 100644 --- a/src/vmm/src/vstate/vcpu/x86_64.rs +++ b/src/vmm/src/vstate/vcpu/x86_64.rs @@ -23,7 +23,6 @@ use crate::arch::x86_64::msr::{create_boot_msr_entries, MsrError}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{cpuid, CpuConfiguration}; use crate::logger::{IncMetric, METRICS}; -use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation}; use crate::vstate::vm::Vm; @@ -165,7 +164,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm, kvm: &Kvm) -> Result { + pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -175,7 +174,7 @@ impl KvmVcpu { index, fd: kvm_vcpu, peripherals: Default::default(), - msrs_to_save: kvm.msrs_to_save.as_slice().to_vec(), + msrs_to_save: vm.msrs_to_save().to_vec(), }) } @@ -726,6 +725,7 @@ mod tests { StaticCpuTemplate, }; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidEntry, CpuidKey}; + use crate::vstate::kvm::Kvm; use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; use crate::vstate::vm::Vm; @@ -750,7 +750,7 @@ mod tests { fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { let (kvm, vm, vm_mem) = setup_vm_with_memory(mem_size); vm.setup_irqchip().unwrap(); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); (kvm, vm, vcpu, vm_mem) } @@ -1168,11 +1168,11 @@ mod tests { #[test] fn test_get_msr_chunks_preserved_order() { // Regression test for #4666 - let (kvm, vm) = setup_vm(); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, vm) = setup_vm(); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); // The list of supported MSR indices, in the order they were returned by KVM - let msrs_to_save = kvm.msrs_to_save; + let msrs_to_save = vm.msrs_to_save(); // The MSRs after processing. The order should be identical to the one returned by KVM, with // the exception of deferred MSRs, which should be moved to the end (but show up in the same // order as they are listed in [`DEFERRED_MSRS`]. @@ -1185,7 +1185,6 @@ mod tests { .flat_map(|chunk| chunk.as_slice().iter()) .zip( msrs_to_save - .as_slice() .iter() .filter(|&idx| !DEFERRED_MSRS.contains(idx)) .chain(DEFERRED_MSRS.iter()), diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs deleted file mode 100644 index 19ca0b2a76c..00000000000 --- a/src/vmm/src/vstate/vm.rs +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the THIRD-PARTY file. - -#[cfg(target_arch = "x86_64")] -use std::fmt; - -#[cfg(target_arch = "x86_64")] -use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, - KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, -}; -use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; -// use kvm_ioctls::{Kvm, VmFd}; -use kvm_ioctls::VmFd; -use serde::{Deserialize, Serialize}; - -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::gic::GICDevice; -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::gic::GicState; -#[cfg(target_arch = "x86_64")] -use crate::utils::u64_to_usize; -use crate::vstate::kvm::Kvm; -use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; - -/// Errors associated with the wrappers over KVM ioctls. -/// Needs `rustfmt::skip` to make multiline comments work -#[rustfmt::skip] -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] -pub enum VmError { - /// Cannot set the memory regions: {0} - SetUserMemoryRegion(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Error creating the global interrupt controller: {0} - VmCreateGIC(crate::arch::aarch64::gic::GicError), - /// Cannot open the VM file descriptor: {0} - VmFd(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm pit state: {0} - VmGetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm clock: {0} - VmGetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm irqchip: {0} - VmGetIrqChip(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm pit state: {0} - VmSetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm clock: {0} - VmSetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm irqchip: {0} - VmSetIrqChip(kvm_ioctls::Error), - /// Cannot configure the microvm: {0} - VmSetup(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Failed to save the VM's GIC state: {0} - SaveGic(crate::arch::aarch64::gic::GicError), - #[cfg(target_arch = "aarch64")] - /// Failed to restore the VM's GIC state: {0} - RestoreGic(crate::arch::aarch64::gic::GicError), -} - -/// Error type for [`Vm::restore_state`] -#[allow(missing_docs)] -#[cfg(target_arch = "x86_64")] -#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] -pub enum RestoreStateError { - /// Set PIT2 error: {0} - SetPit2(kvm_ioctls::Error), - /// Set clock error: {0} - SetClock(kvm_ioctls::Error), - /// Set IrqChipPicMaster error: {0} - SetIrqChipPicMaster(kvm_ioctls::Error), - /// Set IrqChipPicSlave error: {0} - SetIrqChipPicSlave(kvm_ioctls::Error), - /// Set IrqChipIoAPIC error: {0} - SetIrqChipIoAPIC(kvm_ioctls::Error), - /// VM error: {0} - VmError(VmError), -} - -/// Error type for [`Vm::restore_state`] -#[cfg(target_arch = "aarch64")] -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum RestoreStateError { - /// {0} - GicError(crate::arch::aarch64::gic::GicError), - /// {0} - VmError(VmError), -} - -/// A wrapper around creating and using a VM. -#[derive(Debug)] -pub struct Vm { - fd: VmFd, - - // Arm specific fields. - // On aarch64 we need to keep around the fd obtained by creating the VGIC device. - #[cfg(target_arch = "aarch64")] - irqchip_handle: Option, -} - -/// Contains Vm functions that are usable across CPU architectures -impl Vm { - /// Create a new `Vm` struct. - pub fn new(kvm: &Kvm) -> Result { - // Create fd for interacting with kvm-vm specific functions. - let vm_fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; - - #[cfg(target_arch = "aarch64")] - { - Ok(Vm { - fd: vm_fd, - irqchip_handle: None, - }) - } - - #[cfg(target_arch = "x86_64")] - { - Ok(Vm { fd: vm_fd }) - } - } - - /// Initializes the guest memory. - pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { - self.set_kvm_memory_regions(guest_mem)?; - #[cfg(target_arch = "x86_64")] - self.fd - .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) - .map_err(VmError::VmSetup)?; - - Ok(()) - } - - pub(crate) fn set_kvm_memory_regions( - &self, - guest_mem: &GuestMemoryMmap, - ) -> Result<(), VmError> { - guest_mem - .iter() - .zip(0u32..) - .try_for_each(|(region, slot)| { - let flags = if region.bitmap().is_some() { - KVM_MEM_LOG_DIRTY_PAGES - } else { - 0 - }; - - let memory_region = kvm_userspace_memory_region { - slot, - guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len(), - // It's safe to unwrap because the guest address is valid. - userspace_addr: guest_mem.get_host_address(region.start_addr()).unwrap() as u64, - flags, - }; - - // SAFETY: Safe because the fd is a valid KVM file descriptor. - unsafe { self.fd.set_user_memory_region(memory_region) } - }) - .map_err(VmError::SetUserMemoryRegion)?; - Ok(()) - } - - /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn fd(&self) -> &VmFd { - &self.fd - } -} - -#[cfg(target_arch = "aarch64")] -impl Vm { - /// Creates the GIC (Global Interrupt Controller). - pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { - self.irqchip_handle = Some( - crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) - .map_err(VmError::VmCreateGIC)?, - ); - Ok(()) - } - - /// Gets a reference to the irqchip of the VM. - pub fn get_irqchip(&self) -> &GICDevice { - self.irqchip_handle.as_ref().expect("IRQ chip not set") - } - - /// Saves and returns the Kvm Vm state. - pub fn save_state(&self, mpidrs: &[u64]) -> Result { - Ok(VmState { - gic: self - .get_irqchip() - .save_device(mpidrs) - .map_err(VmError::SaveGic)?, - }) - } - - /// Restore the KVM VM state - /// - /// # Errors - /// - /// When [`GICDevice::restore_device`] errors. - pub fn restore_state( - &mut self, - mpidrs: &[u64], - state: &VmState, - ) -> Result<(), RestoreStateError> { - self.get_irqchip() - .restore_device(mpidrs, &state.gic) - .map_err(RestoreStateError::GicError)?; - Ok(()) - } -} - -/// Structure holding an general specific VM state. -#[cfg(target_arch = "aarch64")] -#[derive(Debug, Default, Serialize, Deserialize)] -pub struct VmState { - /// GIC state. - pub gic: GicState, -} - -#[cfg(target_arch = "x86_64")] -impl Vm { - /// Restores the KVM VM state. - /// - /// # Errors - /// - /// When: - /// - [`kvm_ioctls::VmFd::set_pit`] errors. - /// - [`kvm_ioctls::VmFd::set_clock`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - pub fn restore_state(&mut self, state: &VmState) -> Result<(), RestoreStateError> { - self.fd - .set_pit2(&state.pitstate) - .map_err(RestoreStateError::SetPit2)?; - self.fd - .set_clock(&state.clock) - .map_err(RestoreStateError::SetClock)?; - self.fd - .set_irqchip(&state.pic_master) - .map_err(RestoreStateError::SetIrqChipPicMaster)?; - self.fd - .set_irqchip(&state.pic_slave) - .map_err(RestoreStateError::SetIrqChipPicSlave)?; - self.fd - .set_irqchip(&state.ioapic) - .map_err(RestoreStateError::SetIrqChipIoAPIC)?; - Ok(()) - } - - /// Creates the irq chip and an in-kernel device model for the PIT. - pub fn setup_irqchip(&self) -> Result<(), VmError> { - self.fd.create_irq_chip().map_err(VmError::VmSetup)?; - // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 - // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - ..Default::default() - }; - self.fd.create_pit2(pit_config).map_err(VmError::VmSetup) - } - - /// Saves and returns the Kvm Vm state. - pub fn save_state(&self) -> Result { - let pitstate = self.fd.get_pit2().map_err(VmError::VmGetPit2)?; - - let mut clock = self.fd.get_clock().map_err(VmError::VmGetClock)?; - // This bit is not accepted in SET_CLOCK, clear it. - clock.flags &= !KVM_CLOCK_TSC_STABLE; - - let mut pic_master = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_MASTER, - ..Default::default() - }; - self.fd - .get_irqchip(&mut pic_master) - .map_err(VmError::VmGetIrqChip)?; - - let mut pic_slave = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_SLAVE, - ..Default::default() - }; - self.fd - .get_irqchip(&mut pic_slave) - .map_err(VmError::VmGetIrqChip)?; - - let mut ioapic = kvm_irqchip { - chip_id: KVM_IRQCHIP_IOAPIC, - ..Default::default() - }; - self.fd - .get_irqchip(&mut ioapic) - .map_err(VmError::VmGetIrqChip)?; - - Ok(VmState { - pitstate, - clock, - pic_master, - pic_slave, - ioapic, - }) - } -} - -#[cfg(target_arch = "x86_64")] -#[derive(Default, Deserialize, Serialize)] -/// Structure holding VM kvm state. -pub struct VmState { - pitstate: kvm_pit_state2, - clock: kvm_clock_data, - // TODO: rename this field to adopt inclusive language once Linux updates it, too. - pic_master: kvm_irqchip, - // TODO: rename this field to adopt inclusive language once Linux updates it, too. - pic_slave: kvm_irqchip, - ioapic: kvm_irqchip, -} - -#[cfg(target_arch = "x86_64")] -impl fmt::Debug for VmState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("VmState") - .field("pitstate", &self.pitstate) - .field("clock", &self.clock) - .field("pic_master", &"?") - .field("pic_slave", &"?") - .field("ioapic", &"?") - .finish() - } -} - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - #[cfg(target_arch = "x86_64")] - use crate::snapshot::Snapshot; - use crate::test_utils::single_region_mem; - use crate::vstate::memory::GuestMemoryMmap; - - // Auxiliary function being used throughout the tests. - pub(crate) fn setup_vm() -> (Kvm, Vm) { - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - let vm = Vm::new(&kvm).expect("Cannot create new vm"); - (kvm, vm) - } - - // Auxiliary function being used throughout the tests. - pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { - let (kvm, vm) = setup_vm(); - let gm = single_region_mem(mem_size); - vm.memory_init(&gm).unwrap(); - (kvm, vm, gm) - } - - #[test] - fn test_new() { - // Testing with a valid /dev/kvm descriptor. - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - Vm::new(&kvm).unwrap(); - } - - #[test] - fn test_vm_memory_init() { - let (_, vm) = setup_vm(); - // Create valid memory region and test that the initialization is successful. - let gm = single_region_mem(0x1000); - vm.memory_init(&gm).unwrap(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vm_save_restore_state() { - let (_, vm) = setup_vm(); - // Irqchips, clock and pitstate are not configured so trying to save state should fail. - vm.save_state().unwrap_err(); - - let (_, vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - let vm_state = vm.save_state().unwrap(); - assert_eq!( - vm_state.pitstate.flags | KVM_PIT_SPEAKER_DUMMY, - KVM_PIT_SPEAKER_DUMMY - ); - assert_eq!(vm_state.clock.flags & KVM_CLOCK_TSC_STABLE, 0); - assert_eq!(vm_state.pic_master.chip_id, KVM_IRQCHIP_PIC_MASTER); - assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); - assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); - - let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - vm.restore_state(&vm_state).unwrap(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vm_save_restore_state_bad_irqchip() { - use kvm_bindings::KVM_NR_IRQCHIPS; - - let (_, vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - let mut vm_state = vm.save_state().unwrap(); - - let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - // Try to restore an invalid PIC Master chip ID - let orig_master_chip_id = vm_state.pic_master.chip_id; - vm_state.pic_master.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - vm_state.pic_master.chip_id = orig_master_chip_id; - - // Try to restore an invalid PIC Slave chip ID - let orig_slave_chip_id = vm_state.pic_slave.chip_id; - vm_state.pic_slave.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - vm_state.pic_slave.chip_id = orig_slave_chip_id; - - // Try to restore an invalid IOPIC chip ID - vm_state.ioapic.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vmstate_serde() { - let mut snapshot_data = vec![0u8; 10000]; - - let (_, mut vm, _) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - let state = vm.save_state().unwrap(); - Snapshot::serialize(&mut snapshot_data.as_mut_slice(), &state).unwrap(); - let restored_state: VmState = Snapshot::deserialize(&mut snapshot_data.as_slice()).unwrap(); - - vm.restore_state(&restored_state).unwrap(); - } - - #[test] - fn test_set_kvm_memory_regions() { - let (_, vm) = setup_vm(); - - let gm = single_region_mem(0x1000); - let res = vm.set_kvm_memory_regions(&gm); - res.unwrap(); - - // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE - // will result in error. - let gm = single_region_mem(0x10); - let res = vm.set_kvm_memory_regions(&gm); - assert_eq!( - res.unwrap_err().to_string(), - "Cannot set the memory regions: Invalid argument (os error 22)" - ); - } -} diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs new file mode 100644 index 00000000000..a059803ba97 --- /dev/null +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -0,0 +1,94 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use kvm_ioctls::VmFd; +use serde::{Deserialize, Serialize}; + +use super::VmError; +use crate::arch::aarch64::gic::GicState; +use crate::Kvm; + +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + pub(super) fd: VmFd, + // On aarch64 we need to keep around the fd obtained by creating the VGIC device. + irqchip_handle: Option, +} + +/// Error type for [`Vm::restore_state`] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { + /// Error creating the global interrupt controller: {0} + VmCreateGIC(crate::arch::aarch64::gic::GicError), + /// Failed to save the VM's GIC state: {0} + SaveGic(crate::arch::aarch64::gic::GicError), + /// Failed to restore the VM's GIC state: {0} + RestoreGic(crate::arch::aarch64::gic::GicError), +} + +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { + let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; + Ok(ArchVm { + fd, + irqchip_handle: None, + }) + } + + pub(super) fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + + pub(super) fn arch_post_create_vcpus(&mut self, nr_vcpus: u8) -> Result<(), ArchVmError> { + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the + // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + self.setup_irqchip(nr_vcpus) + } + + /// Creates the GIC (Global Interrupt Controller). + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { + self.irqchip_handle = Some( + crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) + .map_err(ArchVmError::VmCreateGIC)?, + ); + Ok(()) + } + + /// Gets a reference to the irqchip of the VM. + pub fn get_irqchip(&self) -> &crate::arch::aarch64::gic::GICDevice { + self.irqchip_handle.as_ref().expect("IRQ chip not set") + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self, mpidrs: &[u64]) -> Result { + Ok(VmState { + gic: self + .get_irqchip() + .save_device(mpidrs) + .map_err(ArchVmError::SaveGic)?, + }) + } + + /// Restore the KVM VM state + /// + /// # Errors + /// + /// When [`crate::arch::aarch64::gic::GICDevice::restore_device`] errors. + pub fn restore_state(&mut self, mpidrs: &[u64], state: &VmState) -> Result<(), ArchVmError> { + self.get_irqchip() + .restore_device(mpidrs, &state.gic) + .map_err(ArchVmError::RestoreGic)?; + Ok(()) + } +} + +/// Structure holding an general specific VM state. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct VmState { + /// GIC state. + pub gic: GicState, +} diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs new file mode 100644 index 00000000000..c9c0e7e7c2f --- /dev/null +++ b/src/vmm/src/vstate/vm/mod.rs @@ -0,0 +1,169 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; +use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; + +use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; + +#[cfg(target_arch = "x86_64")] +#[path = "x86_64.rs"] +mod arch; +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod arch; + +pub use arch::{ArchVm as Vm, ArchVmError, VmState}; + +use crate::vstate::vcpu::VcpuError; +use crate::Vcpu; + +/// Errors associated with the wrappers over KVM ioctls. +/// Needs `rustfmt::skip` to make multiline comments work +#[rustfmt::skip] +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum VmError { + /// Cannot set the memory regions: {0} + SetUserMemoryRegion(kvm_ioctls::Error), + /// Cannot open the VM file descriptor: {0} + VmFd(kvm_ioctls::Error), + /// {0} + Arch(#[from] ArchVmError), + /// Error during eventfd operations: {0} + EventFd(std::io::Error), + /// Failed to create vcpu: {0} + CreateVcpu(VcpuError), +} + +/// Contains Vm functions that are usable across CPU architectures +impl Vm { + /// Creates the specified number of [`Vcpu`]s. + /// + /// The returned [`EventFd`] is written to whenever any of the vcpus exit. + pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec, EventFd), VmError> { + self.arch_pre_create_vcpus(vcpu_count)?; + + let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?; + + let mut vcpus = Vec::with_capacity(vcpu_count as usize); + for cpu_idx in 0..vcpu_count { + let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?; + let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?; + vcpus.push(vcpu); + } + + self.arch_post_create_vcpus(vcpu_count)?; + + Ok((vcpus, exit_evt)) + } + + /// Initializes the guest memory. + pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { + self.set_kvm_memory_regions(guest_mem) + } + + pub(crate) fn set_kvm_memory_regions( + &self, + guest_mem: &GuestMemoryMmap, + ) -> Result<(), VmError> { + guest_mem + .iter() + .zip(0u32..) + .try_for_each(|(region, slot)| { + let flags = if region.bitmap().is_some() { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + + let memory_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + userspace_addr: region.as_ptr() as u64, + flags, + }; + + // SAFETY: Safe because the fd is a valid KVM file descriptor. + unsafe { self.fd.set_user_memory_region(memory_region) } + }) + .map_err(VmError::SetUserMemoryRegion)?; + Ok(()) + } + + /// Gets a reference to the kvm file descriptor owned by this VM. + pub fn fd(&self) -> &VmFd { + &self.fd + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::test_utils::single_region_mem; + use crate::vstate::kvm::Kvm; + use crate::vstate::memory::GuestMemoryMmap; + + // Auxiliary function being used throughout the tests. + pub(crate) fn setup_vm() -> (Kvm, Vm) { + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let vm = Vm::new(&kvm).expect("Cannot create new vm"); + (kvm, vm) + } + + // Auxiliary function being used throughout the tests. + pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { + let (kvm, vm) = setup_vm(); + let gm = single_region_mem(mem_size); + vm.memory_init(&gm).unwrap(); + (kvm, vm, gm) + } + + #[test] + fn test_new() { + // Testing with a valid /dev/kvm descriptor. + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + Vm::new(&kvm).unwrap(); + } + + #[test] + fn test_vm_memory_init() { + let (_, vm) = setup_vm(); + // Create valid memory region and test that the initialization is successful. + let gm = single_region_mem(0x1000); + vm.memory_init(&gm).unwrap(); + } + + #[test] + fn test_set_kvm_memory_regions() { + let (_, vm) = setup_vm(); + + let gm = single_region_mem(0x1000); + let res = vm.set_kvm_memory_regions(&gm); + res.unwrap(); + + // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE + // will result in error. + let gm = single_region_mem(0x10); + let res = vm.set_kvm_memory_regions(&gm); + assert_eq!( + res.unwrap_err().to_string(), + "Cannot set the memory regions: Invalid argument (os error 22)" + ); + } + + #[test] + fn test_create_vcpus() { + let vcpu_count = 2; + let (_, mut vm, _) = setup_vm_with_memory(128 << 20); + + let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap(); + + assert_eq!(vcpu_vec.len(), vcpu_count as usize); + } +} diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs new file mode 100644 index 00000000000..f44e825b6f9 --- /dev/null +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -0,0 +1,269 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt; + +use kvm_bindings::{ + kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, MsrList, KVM_CLOCK_TSC_STABLE, + KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, +}; +use kvm_ioctls::VmFd; +use serde::{Deserialize, Serialize}; + +use crate::arch::x86_64::msr::MsrError; +use crate::utils::u64_to_usize; +use crate::vstate::vm::VmError; + +/// Error type for [`Vm::restore_state`] +#[allow(missing_docs)] +#[cfg(target_arch = "x86_64")] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { + /// Set PIT2 error: {0} + SetPit2(kvm_ioctls::Error), + /// Set clock error: {0} + SetClock(kvm_ioctls::Error), + /// Set IrqChipPicMaster error: {0} + SetIrqChipPicMaster(kvm_ioctls::Error), + /// Set IrqChipPicSlave error: {0} + SetIrqChipPicSlave(kvm_ioctls::Error), + /// Set IrqChipIoAPIC error: {0} + SetIrqChipIoAPIC(kvm_ioctls::Error), + /// Failed to get KVM vm pit state: {0} + VmGetPit2(kvm_ioctls::Error), + /// Failed to get KVM vm clock: {0} + VmGetClock(kvm_ioctls::Error), + /// Failed to get KVM vm irqchip: {0} + VmGetIrqChip(kvm_ioctls::Error), + /// Failed to set KVM vm irqchip: {0} + VmSetIrqChip(kvm_ioctls::Error), + /// Failed to get MSR index list to save into snapshots: {0} + GetMsrsToSave(MsrError), + /// Failed during KVM_SET_TSS_ADDRESS: {0} + SetTssAddress(kvm_ioctls::Error), +} + +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + pub(super) fd: VmFd, + msrs_to_save: MsrList, +} + +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result { + let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; + let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?; + + fd.set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) + .map_err(ArchVmError::SetTssAddress)?; + + Ok(ArchVm { fd, msrs_to_save }) + } + + pub(super) fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + self.setup_irqchip() + } + + pub(super) fn arch_post_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + + /// Restores the KVM VM state. + /// + /// # Errors + /// + /// When: + /// - [`kvm_ioctls::VmFd::set_pit`] errors. + /// - [`kvm_ioctls::VmFd::set_clock`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + pub fn restore_state(&mut self, state: &VmState) -> Result<(), ArchVmError> { + self.fd + .set_pit2(&state.pitstate) + .map_err(ArchVmError::SetPit2)?; + self.fd + .set_clock(&state.clock) + .map_err(ArchVmError::SetClock)?; + self.fd + .set_irqchip(&state.pic_master) + .map_err(ArchVmError::SetIrqChipPicMaster)?; + self.fd + .set_irqchip(&state.pic_slave) + .map_err(ArchVmError::SetIrqChipPicSlave)?; + self.fd + .set_irqchip(&state.ioapic) + .map_err(ArchVmError::SetIrqChipIoAPIC)?; + Ok(()) + } + + /// Creates the irq chip and an in-kernel device model for the PIT. + pub fn setup_irqchip(&self) -> Result<(), ArchVmError> { + self.fd + .create_irq_chip() + .map_err(ArchVmError::VmSetIrqChip)?; + // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 + // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..Default::default() + }; + self.fd + .create_pit2(pit_config) + .map_err(ArchVmError::VmSetIrqChip) + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self) -> Result { + let pitstate = self.fd.get_pit2().map_err(ArchVmError::VmGetPit2)?; + + let mut clock = self.fd.get_clock().map_err(ArchVmError::VmGetClock)?; + // This bit is not accepted in SET_CLOCK, clear it. + clock.flags &= !KVM_CLOCK_TSC_STABLE; + + let mut pic_master = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + self.fd + .get_irqchip(&mut pic_master) + .map_err(ArchVmError::VmGetIrqChip)?; + + let mut pic_slave = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_SLAVE, + ..Default::default() + }; + self.fd + .get_irqchip(&mut pic_slave) + .map_err(ArchVmError::VmGetIrqChip)?; + + let mut ioapic = kvm_irqchip { + chip_id: KVM_IRQCHIP_IOAPIC, + ..Default::default() + }; + self.fd + .get_irqchip(&mut ioapic) + .map_err(ArchVmError::VmGetIrqChip)?; + + Ok(VmState { + pitstate, + clock, + pic_master, + pic_slave, + ioapic, + }) + } + + /// Gets the list of MSRs to save when creating snapshots + pub fn msrs_to_save(&self) -> &[u32] { + self.msrs_to_save.as_slice() + } +} + +#[derive(Default, Deserialize, Serialize)] +/// Structure holding VM kvm state. +pub struct VmState { + pitstate: kvm_pit_state2, + clock: kvm_clock_data, + // TODO: rename this field to adopt inclusive language once Linux updates it, too. + pic_master: kvm_irqchip, + // TODO: rename this field to adopt inclusive language once Linux updates it, too. + pic_slave: kvm_irqchip, + ioapic: kvm_irqchip, +} + +impl fmt::Debug for VmState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("VmState") + .field("pitstate", &self.pitstate) + .field("clock", &self.clock) + .field("pic_master", &"?") + .field("pic_slave", &"?") + .field("ioapic", &"?") + .finish() + } +} + +#[cfg(test)] +mod tests { + use kvm_bindings::{ + KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, + KVM_PIT_SPEAKER_DUMMY, + }; + + use crate::snapshot::Snapshot; + use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; + use crate::vstate::vm::VmState; + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vm_save_restore_state() { + let (_, vm) = setup_vm(); + // Irqchips, clock and pitstate are not configured so trying to save state should fail. + vm.save_state().unwrap_err(); + + let (_, vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + let vm_state = vm.save_state().unwrap(); + assert_eq!( + vm_state.pitstate.flags | KVM_PIT_SPEAKER_DUMMY, + KVM_PIT_SPEAKER_DUMMY + ); + assert_eq!(vm_state.clock.flags & KVM_CLOCK_TSC_STABLE, 0); + assert_eq!(vm_state.pic_master.chip_id, KVM_IRQCHIP_PIC_MASTER); + assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); + assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); + + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + vm.restore_state(&vm_state).unwrap(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vm_save_restore_state_bad_irqchip() { + use kvm_bindings::KVM_NR_IRQCHIPS; + + let (_, vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + let mut vm_state = vm.save_state().unwrap(); + + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + // Try to restore an invalid PIC Master chip ID + let orig_master_chip_id = vm_state.pic_master.chip_id; + vm_state.pic_master.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + vm_state.pic_master.chip_id = orig_master_chip_id; + + // Try to restore an invalid PIC Slave chip ID + let orig_slave_chip_id = vm_state.pic_slave.chip_id; + vm_state.pic_slave.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + vm_state.pic_slave.chip_id = orig_slave_chip_id; + + // Try to restore an invalid IOPIC chip ID + vm_state.ioapic.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vmstate_serde() { + let mut snapshot_data = vec![0u8; 10000]; + + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + let state = vm.save_state().unwrap(); + Snapshot::serialize(&mut snapshot_data.as_mut_slice(), &state).unwrap(); + let restored_state: VmState = Snapshot::deserialize(&mut snapshot_data.as_slice()).unwrap(); + + vm.restore_state(&restored_state).unwrap(); + } +}