From 619f07fad5d0f289db89e4af6b77291b909b2ea1 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 29 Jan 2025 12:13:40 +0000 Subject: [PATCH 01/11] refactor: move arch specific code from vm.rs into modules Follow a similar approach we already take for the vcpu module, where all the architecture specific stuff is inside aarch64/x86_64 modules, keeping only the common code in the `vm` module. No functional change intended. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/vm.rs | 464 ------------------------------- src/vmm/src/vstate/vm/aarch64.rs | 66 +++++ src/vmm/src/vstate/vm/mod.rs | 199 +++++++++++++ src/vmm/src/vstate/vm/x86_64.rs | 221 +++++++++++++++ 4 files changed, 486 insertions(+), 464 deletions(-) delete mode 100644 src/vmm/src/vstate/vm.rs create mode 100644 src/vmm/src/vstate/vm/aarch64.rs create mode 100644 src/vmm/src/vstate/vm/mod.rs create mode 100644 src/vmm/src/vstate/vm/x86_64.rs diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs deleted file mode 100644 index 19ca0b2a76c..00000000000 --- a/src/vmm/src/vstate/vm.rs +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the THIRD-PARTY file. - -#[cfg(target_arch = "x86_64")] -use std::fmt; - -#[cfg(target_arch = "x86_64")] -use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, - KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, -}; -use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; -// use kvm_ioctls::{Kvm, VmFd}; -use kvm_ioctls::VmFd; -use serde::{Deserialize, Serialize}; - -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::gic::GICDevice; -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::gic::GicState; -#[cfg(target_arch = "x86_64")] -use crate::utils::u64_to_usize; -use crate::vstate::kvm::Kvm; -use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; - -/// Errors associated with the wrappers over KVM ioctls. -/// Needs `rustfmt::skip` to make multiline comments work -#[rustfmt::skip] -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] -pub enum VmError { - /// Cannot set the memory regions: {0} - SetUserMemoryRegion(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Error creating the global interrupt controller: {0} - VmCreateGIC(crate::arch::aarch64::gic::GicError), - /// Cannot open the VM file descriptor: {0} - VmFd(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm pit state: {0} - VmGetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm clock: {0} - VmGetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm irqchip: {0} - VmGetIrqChip(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm pit state: {0} - VmSetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm clock: {0} - VmSetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm irqchip: {0} - VmSetIrqChip(kvm_ioctls::Error), - /// Cannot configure the microvm: {0} - VmSetup(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Failed to save the VM's GIC state: {0} - SaveGic(crate::arch::aarch64::gic::GicError), - #[cfg(target_arch = "aarch64")] - /// Failed to restore the VM's GIC state: {0} - RestoreGic(crate::arch::aarch64::gic::GicError), -} - -/// Error type for [`Vm::restore_state`] -#[allow(missing_docs)] -#[cfg(target_arch = "x86_64")] -#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] -pub enum RestoreStateError { - /// Set PIT2 error: {0} - SetPit2(kvm_ioctls::Error), - /// Set clock error: {0} - SetClock(kvm_ioctls::Error), - /// Set IrqChipPicMaster error: {0} - SetIrqChipPicMaster(kvm_ioctls::Error), - /// Set IrqChipPicSlave error: {0} - SetIrqChipPicSlave(kvm_ioctls::Error), - /// Set IrqChipIoAPIC error: {0} - SetIrqChipIoAPIC(kvm_ioctls::Error), - /// VM error: {0} - VmError(VmError), -} - -/// Error type for [`Vm::restore_state`] -#[cfg(target_arch = "aarch64")] -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum RestoreStateError { - /// {0} - GicError(crate::arch::aarch64::gic::GicError), - /// {0} - VmError(VmError), -} - -/// A wrapper around creating and using a VM. -#[derive(Debug)] -pub struct Vm { - fd: VmFd, - - // Arm specific fields. - // On aarch64 we need to keep around the fd obtained by creating the VGIC device. - #[cfg(target_arch = "aarch64")] - irqchip_handle: Option, -} - -/// Contains Vm functions that are usable across CPU architectures -impl Vm { - /// Create a new `Vm` struct. - pub fn new(kvm: &Kvm) -> Result { - // Create fd for interacting with kvm-vm specific functions. - let vm_fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; - - #[cfg(target_arch = "aarch64")] - { - Ok(Vm { - fd: vm_fd, - irqchip_handle: None, - }) - } - - #[cfg(target_arch = "x86_64")] - { - Ok(Vm { fd: vm_fd }) - } - } - - /// Initializes the guest memory. - pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { - self.set_kvm_memory_regions(guest_mem)?; - #[cfg(target_arch = "x86_64")] - self.fd - .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) - .map_err(VmError::VmSetup)?; - - Ok(()) - } - - pub(crate) fn set_kvm_memory_regions( - &self, - guest_mem: &GuestMemoryMmap, - ) -> Result<(), VmError> { - guest_mem - .iter() - .zip(0u32..) - .try_for_each(|(region, slot)| { - let flags = if region.bitmap().is_some() { - KVM_MEM_LOG_DIRTY_PAGES - } else { - 0 - }; - - let memory_region = kvm_userspace_memory_region { - slot, - guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len(), - // It's safe to unwrap because the guest address is valid. - userspace_addr: guest_mem.get_host_address(region.start_addr()).unwrap() as u64, - flags, - }; - - // SAFETY: Safe because the fd is a valid KVM file descriptor. - unsafe { self.fd.set_user_memory_region(memory_region) } - }) - .map_err(VmError::SetUserMemoryRegion)?; - Ok(()) - } - - /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn fd(&self) -> &VmFd { - &self.fd - } -} - -#[cfg(target_arch = "aarch64")] -impl Vm { - /// Creates the GIC (Global Interrupt Controller). - pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { - self.irqchip_handle = Some( - crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) - .map_err(VmError::VmCreateGIC)?, - ); - Ok(()) - } - - /// Gets a reference to the irqchip of the VM. - pub fn get_irqchip(&self) -> &GICDevice { - self.irqchip_handle.as_ref().expect("IRQ chip not set") - } - - /// Saves and returns the Kvm Vm state. - pub fn save_state(&self, mpidrs: &[u64]) -> Result { - Ok(VmState { - gic: self - .get_irqchip() - .save_device(mpidrs) - .map_err(VmError::SaveGic)?, - }) - } - - /// Restore the KVM VM state - /// - /// # Errors - /// - /// When [`GICDevice::restore_device`] errors. - pub fn restore_state( - &mut self, - mpidrs: &[u64], - state: &VmState, - ) -> Result<(), RestoreStateError> { - self.get_irqchip() - .restore_device(mpidrs, &state.gic) - .map_err(RestoreStateError::GicError)?; - Ok(()) - } -} - -/// Structure holding an general specific VM state. -#[cfg(target_arch = "aarch64")] -#[derive(Debug, Default, Serialize, Deserialize)] -pub struct VmState { - /// GIC state. - pub gic: GicState, -} - -#[cfg(target_arch = "x86_64")] -impl Vm { - /// Restores the KVM VM state. - /// - /// # Errors - /// - /// When: - /// - [`kvm_ioctls::VmFd::set_pit`] errors. - /// - [`kvm_ioctls::VmFd::set_clock`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - pub fn restore_state(&mut self, state: &VmState) -> Result<(), RestoreStateError> { - self.fd - .set_pit2(&state.pitstate) - .map_err(RestoreStateError::SetPit2)?; - self.fd - .set_clock(&state.clock) - .map_err(RestoreStateError::SetClock)?; - self.fd - .set_irqchip(&state.pic_master) - .map_err(RestoreStateError::SetIrqChipPicMaster)?; - self.fd - .set_irqchip(&state.pic_slave) - .map_err(RestoreStateError::SetIrqChipPicSlave)?; - self.fd - .set_irqchip(&state.ioapic) - .map_err(RestoreStateError::SetIrqChipIoAPIC)?; - Ok(()) - } - - /// Creates the irq chip and an in-kernel device model for the PIT. - pub fn setup_irqchip(&self) -> Result<(), VmError> { - self.fd.create_irq_chip().map_err(VmError::VmSetup)?; - // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 - // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - ..Default::default() - }; - self.fd.create_pit2(pit_config).map_err(VmError::VmSetup) - } - - /// Saves and returns the Kvm Vm state. - pub fn save_state(&self) -> Result { - let pitstate = self.fd.get_pit2().map_err(VmError::VmGetPit2)?; - - let mut clock = self.fd.get_clock().map_err(VmError::VmGetClock)?; - // This bit is not accepted in SET_CLOCK, clear it. - clock.flags &= !KVM_CLOCK_TSC_STABLE; - - let mut pic_master = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_MASTER, - ..Default::default() - }; - self.fd - .get_irqchip(&mut pic_master) - .map_err(VmError::VmGetIrqChip)?; - - let mut pic_slave = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_SLAVE, - ..Default::default() - }; - self.fd - .get_irqchip(&mut pic_slave) - .map_err(VmError::VmGetIrqChip)?; - - let mut ioapic = kvm_irqchip { - chip_id: KVM_IRQCHIP_IOAPIC, - ..Default::default() - }; - self.fd - .get_irqchip(&mut ioapic) - .map_err(VmError::VmGetIrqChip)?; - - Ok(VmState { - pitstate, - clock, - pic_master, - pic_slave, - ioapic, - }) - } -} - -#[cfg(target_arch = "x86_64")] -#[derive(Default, Deserialize, Serialize)] -/// Structure holding VM kvm state. -pub struct VmState { - pitstate: kvm_pit_state2, - clock: kvm_clock_data, - // TODO: rename this field to adopt inclusive language once Linux updates it, too. - pic_master: kvm_irqchip, - // TODO: rename this field to adopt inclusive language once Linux updates it, too. - pic_slave: kvm_irqchip, - ioapic: kvm_irqchip, -} - -#[cfg(target_arch = "x86_64")] -impl fmt::Debug for VmState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("VmState") - .field("pitstate", &self.pitstate) - .field("clock", &self.clock) - .field("pic_master", &"?") - .field("pic_slave", &"?") - .field("ioapic", &"?") - .finish() - } -} - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - #[cfg(target_arch = "x86_64")] - use crate::snapshot::Snapshot; - use crate::test_utils::single_region_mem; - use crate::vstate::memory::GuestMemoryMmap; - - // Auxiliary function being used throughout the tests. - pub(crate) fn setup_vm() -> (Kvm, Vm) { - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - let vm = Vm::new(&kvm).expect("Cannot create new vm"); - (kvm, vm) - } - - // Auxiliary function being used throughout the tests. - pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { - let (kvm, vm) = setup_vm(); - let gm = single_region_mem(mem_size); - vm.memory_init(&gm).unwrap(); - (kvm, vm, gm) - } - - #[test] - fn test_new() { - // Testing with a valid /dev/kvm descriptor. - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - Vm::new(&kvm).unwrap(); - } - - #[test] - fn test_vm_memory_init() { - let (_, vm) = setup_vm(); - // Create valid memory region and test that the initialization is successful. - let gm = single_region_mem(0x1000); - vm.memory_init(&gm).unwrap(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vm_save_restore_state() { - let (_, vm) = setup_vm(); - // Irqchips, clock and pitstate are not configured so trying to save state should fail. - vm.save_state().unwrap_err(); - - let (_, vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - let vm_state = vm.save_state().unwrap(); - assert_eq!( - vm_state.pitstate.flags | KVM_PIT_SPEAKER_DUMMY, - KVM_PIT_SPEAKER_DUMMY - ); - assert_eq!(vm_state.clock.flags & KVM_CLOCK_TSC_STABLE, 0); - assert_eq!(vm_state.pic_master.chip_id, KVM_IRQCHIP_PIC_MASTER); - assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); - assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); - - let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - vm.restore_state(&vm_state).unwrap(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vm_save_restore_state_bad_irqchip() { - use kvm_bindings::KVM_NR_IRQCHIPS; - - let (_, vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - let mut vm_state = vm.save_state().unwrap(); - - let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - - // Try to restore an invalid PIC Master chip ID - let orig_master_chip_id = vm_state.pic_master.chip_id; - vm_state.pic_master.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - vm_state.pic_master.chip_id = orig_master_chip_id; - - // Try to restore an invalid PIC Slave chip ID - let orig_slave_chip_id = vm_state.pic_slave.chip_id; - vm_state.pic_slave.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - vm_state.pic_slave.chip_id = orig_slave_chip_id; - - // Try to restore an invalid IOPIC chip ID - vm_state.ioapic.chip_id = KVM_NR_IRQCHIPS; - vm.restore_state(&vm_state).unwrap_err(); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_vmstate_serde() { - let mut snapshot_data = vec![0u8; 10000]; - - let (_, mut vm, _) = setup_vm_with_memory(0x1000); - vm.setup_irqchip().unwrap(); - let state = vm.save_state().unwrap(); - Snapshot::serialize(&mut snapshot_data.as_mut_slice(), &state).unwrap(); - let restored_state: VmState = Snapshot::deserialize(&mut snapshot_data.as_slice()).unwrap(); - - vm.restore_state(&restored_state).unwrap(); - } - - #[test] - fn test_set_kvm_memory_regions() { - let (_, vm) = setup_vm(); - - let gm = single_region_mem(0x1000); - let res = vm.set_kvm_memory_regions(&gm); - res.unwrap(); - - // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE - // will result in error. - let gm = single_region_mem(0x10); - let res = vm.set_kvm_memory_regions(&gm); - assert_eq!( - res.unwrap_err().to_string(), - "Cannot set the memory regions: Invalid argument (os error 22)" - ); - } -} diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs new file mode 100644 index 00000000000..0925ad4d7e2 --- /dev/null +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -0,0 +1,66 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +use crate::arch::aarch64::gic::GicState; +use crate::vstate::vm::VmError; +use crate::Vm; + +/// Error type for [`Vm::restore_state`] +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum RestoreStateError { + /// {0} + GicError(crate::arch::aarch64::gic::GicError), + /// {0} + VmError(VmError), +} + +impl Vm { + /// Creates the GIC (Global Interrupt Controller). + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { + self.irqchip_handle = Some( + crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) + .map_err(VmError::VmCreateGIC)?, + ); + Ok(()) + } + + /// Gets a reference to the irqchip of the VM. + pub fn get_irqchip(&self) -> &crate::arch::aarch64::gic::GICDevice { + self.irqchip_handle.as_ref().expect("IRQ chip not set") + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self, mpidrs: &[u64]) -> Result { + Ok(crate::vstate::vm::VmState { + gic: self + .get_irqchip() + .save_device(mpidrs) + .map_err(VmError::SaveGic)?, + }) + } + + /// Restore the KVM VM state + /// + /// # Errors + /// + /// When [`crate::arch::aarch64::gic::GICDevice::restore_device`] errors. + pub fn restore_state( + &mut self, + mpidrs: &[u64], + state: &crate::vstate::vm::VmState, + ) -> Result<(), RestoreStateError> { + self.get_irqchip() + .restore_device(mpidrs, &state.gic) + .map_err(RestoreStateError::GicError)?; + Ok(()) + } +} + +/// Structure holding an general specific VM state. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct VmState { + /// GIC state. + pub gic: GicState, +} diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs new file mode 100644 index 00000000000..65c3ab43f74 --- /dev/null +++ b/src/vmm/src/vstate/vm/mod.rs @@ -0,0 +1,199 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; +use kvm_ioctls::VmFd; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::gic::GICDevice; +#[cfg(target_arch = "x86_64")] +use crate::utils::u64_to_usize; +use crate::vstate::kvm::Kvm; +use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; + +#[cfg(target_arch = "x86_64")] +#[path = "x86_64.rs"] +mod arch; +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod arch; + +pub use arch::{RestoreStateError, VmState}; + +/// Errors associated with the wrappers over KVM ioctls. +/// Needs `rustfmt::skip` to make multiline comments work +#[rustfmt::skip] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum VmError { + /// Cannot set the memory regions: {0} + SetUserMemoryRegion(kvm_ioctls::Error), + #[cfg(target_arch = "aarch64")] + /// Error creating the global interrupt controller: {0} + VmCreateGIC(crate::arch::aarch64::gic::GicError), + /// Cannot open the VM file descriptor: {0} + VmFd(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to get KVM vm pit state: {0} + VmGetPit2(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to get KVM vm clock: {0} + VmGetClock(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to get KVM vm irqchip: {0} + VmGetIrqChip(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to set KVM vm pit state: {0} + VmSetPit2(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to set KVM vm clock: {0} + VmSetClock(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to set KVM vm irqchip: {0} + VmSetIrqChip(kvm_ioctls::Error), + /// Cannot configure the microvm: {0} + VmSetup(kvm_ioctls::Error), + #[cfg(target_arch = "aarch64")] + /// Failed to save the VM's GIC state: {0} + SaveGic(crate::arch::aarch64::gic::GicError), + #[cfg(target_arch = "aarch64")] + /// Failed to restore the VM's GIC state: {0} + RestoreGic(crate::arch::aarch64::gic::GicError), +} + +/// A wrapper around creating and using a VM. +#[derive(Debug)] +pub struct Vm { + fd: VmFd, + + // Arm specific fields. + // On aarch64 we need to keep around the fd obtained by creating the VGIC device. + #[cfg(target_arch = "aarch64")] + irqchip_handle: Option, +} + +/// Contains Vm functions that are usable across CPU architectures +impl Vm { + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { + // Create fd for interacting with kvm-vm specific functions. + let vm_fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; + + #[cfg(target_arch = "aarch64")] + { + Ok(Vm { + fd: vm_fd, + irqchip_handle: None, + }) + } + + #[cfg(target_arch = "x86_64")] + { + Ok(Vm { fd: vm_fd }) + } + } + + /// Initializes the guest memory. + pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { + self.set_kvm_memory_regions(guest_mem)?; + #[cfg(target_arch = "x86_64")] + self.fd + .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) + .map_err(VmError::VmSetup)?; + + Ok(()) + } + + pub(crate) fn set_kvm_memory_regions( + &self, + guest_mem: &GuestMemoryMmap, + ) -> Result<(), VmError> { + guest_mem + .iter() + .zip(0u32..) + .try_for_each(|(region, slot)| { + let flags = if region.bitmap().is_some() { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + + let memory_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + // It's safe to unwrap because the guest address is valid. + userspace_addr: guest_mem.get_host_address(region.start_addr()).unwrap() as u64, + flags, + }; + + // SAFETY: Safe because the fd is a valid KVM file descriptor. + unsafe { self.fd.set_user_memory_region(memory_region) } + }) + .map_err(VmError::SetUserMemoryRegion)?; + Ok(()) + } + + /// Gets a reference to the kvm file descriptor owned by this VM. + pub fn fd(&self) -> &VmFd { + &self.fd + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::test_utils::single_region_mem; + use crate::vstate::memory::GuestMemoryMmap; + + // Auxiliary function being used throughout the tests. + pub(crate) fn setup_vm() -> (Kvm, Vm) { + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let vm = Vm::new(&kvm).expect("Cannot create new vm"); + (kvm, vm) + } + + // Auxiliary function being used throughout the tests. + pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { + let (kvm, vm) = setup_vm(); + let gm = single_region_mem(mem_size); + vm.memory_init(&gm).unwrap(); + (kvm, vm, gm) + } + + #[test] + fn test_new() { + // Testing with a valid /dev/kvm descriptor. + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + Vm::new(&kvm).unwrap(); + } + + #[test] + fn test_vm_memory_init() { + let (_, vm) = setup_vm(); + // Create valid memory region and test that the initialization is successful. + let gm = single_region_mem(0x1000); + vm.memory_init(&gm).unwrap(); + } + + #[test] + fn test_set_kvm_memory_regions() { + let (_, vm) = setup_vm(); + + let gm = single_region_mem(0x1000); + let res = vm.set_kvm_memory_regions(&gm); + res.unwrap(); + + // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE + // will result in error. + let gm = single_region_mem(0x10); + let res = vm.set_kvm_memory_regions(&gm); + assert_eq!( + res.unwrap_err().to_string(), + "Cannot set the memory regions: Invalid argument (os error 22)" + ); + } +} diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs new file mode 100644 index 00000000000..95ac742ad76 --- /dev/null +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -0,0 +1,221 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt; + +use kvm_bindings::{ + kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, + KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, +}; +use serde::{Deserialize, Serialize}; + +use crate::vstate::vm::VmError; +use crate::Vm; + +/// Error type for [`Vm::restore_state`] +#[allow(missing_docs)] +#[cfg(target_arch = "x86_64")] +#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] +pub enum RestoreStateError { + /// Set PIT2 error: {0} + SetPit2(kvm_ioctls::Error), + /// Set clock error: {0} + SetClock(kvm_ioctls::Error), + /// Set IrqChipPicMaster error: {0} + SetIrqChipPicMaster(kvm_ioctls::Error), + /// Set IrqChipPicSlave error: {0} + SetIrqChipPicSlave(kvm_ioctls::Error), + /// Set IrqChipIoAPIC error: {0} + SetIrqChipIoAPIC(kvm_ioctls::Error), + /// VM error: {0} + VmError(VmError), +} + +impl Vm { + /// Restores the KVM VM state. + /// + /// # Errors + /// + /// When: + /// - [`kvm_ioctls::VmFd::set_pit`] errors. + /// - [`kvm_ioctls::VmFd::set_clock`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. + pub fn restore_state(&mut self, state: &VmState) -> Result<(), RestoreStateError> { + self.fd + .set_pit2(&state.pitstate) + .map_err(RestoreStateError::SetPit2)?; + self.fd + .set_clock(&state.clock) + .map_err(RestoreStateError::SetClock)?; + self.fd + .set_irqchip(&state.pic_master) + .map_err(RestoreStateError::SetIrqChipPicMaster)?; + self.fd + .set_irqchip(&state.pic_slave) + .map_err(RestoreStateError::SetIrqChipPicSlave)?; + self.fd + .set_irqchip(&state.ioapic) + .map_err(RestoreStateError::SetIrqChipIoAPIC)?; + Ok(()) + } + + /// Creates the irq chip and an in-kernel device model for the PIT. + pub fn setup_irqchip(&self) -> Result<(), VmError> { + self.fd.create_irq_chip().map_err(VmError::VmSetup)?; + // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 + // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..Default::default() + }; + self.fd.create_pit2(pit_config).map_err(VmError::VmSetup) + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self) -> Result { + let pitstate = self.fd.get_pit2().map_err(VmError::VmGetPit2)?; + + let mut clock = self.fd.get_clock().map_err(VmError::VmGetClock)?; + // This bit is not accepted in SET_CLOCK, clear it. + clock.flags &= !KVM_CLOCK_TSC_STABLE; + + let mut pic_master = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + self.fd + .get_irqchip(&mut pic_master) + .map_err(VmError::VmGetIrqChip)?; + + let mut pic_slave = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_SLAVE, + ..Default::default() + }; + self.fd + .get_irqchip(&mut pic_slave) + .map_err(VmError::VmGetIrqChip)?; + + let mut ioapic = kvm_irqchip { + chip_id: KVM_IRQCHIP_IOAPIC, + ..Default::default() + }; + self.fd + .get_irqchip(&mut ioapic) + .map_err(VmError::VmGetIrqChip)?; + + Ok(VmState { + pitstate, + clock, + pic_master, + pic_slave, + ioapic, + }) + } +} + +#[derive(Default, Deserialize, Serialize)] +/// Structure holding VM kvm state. +pub struct VmState { + pitstate: kvm_pit_state2, + clock: kvm_clock_data, + // TODO: rename this field to adopt inclusive language once Linux updates it, too. + pic_master: kvm_irqchip, + // TODO: rename this field to adopt inclusive language once Linux updates it, too. + pic_slave: kvm_irqchip, + ioapic: kvm_irqchip, +} + +impl fmt::Debug for VmState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("VmState") + .field("pitstate", &self.pitstate) + .field("clock", &self.clock) + .field("pic_master", &"?") + .field("pic_slave", &"?") + .field("ioapic", &"?") + .finish() + } +} + +#[cfg(test)] +mod tests { + use kvm_bindings::{ + KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, + KVM_PIT_SPEAKER_DUMMY, + }; + + use crate::snapshot::Snapshot; + use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; + use crate::vstate::vm::VmState; + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vm_save_restore_state() { + let (_, vm) = setup_vm(); + // Irqchips, clock and pitstate are not configured so trying to save state should fail. + vm.save_state().unwrap_err(); + + let (_, vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + let vm_state = vm.save_state().unwrap(); + assert_eq!( + vm_state.pitstate.flags | KVM_PIT_SPEAKER_DUMMY, + KVM_PIT_SPEAKER_DUMMY + ); + assert_eq!(vm_state.clock.flags & KVM_CLOCK_TSC_STABLE, 0); + assert_eq!(vm_state.pic_master.chip_id, KVM_IRQCHIP_PIC_MASTER); + assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); + assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); + + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + vm.restore_state(&vm_state).unwrap(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vm_save_restore_state_bad_irqchip() { + use kvm_bindings::KVM_NR_IRQCHIPS; + + let (_, vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + let mut vm_state = vm.save_state().unwrap(); + + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + + // Try to restore an invalid PIC Master chip ID + let orig_master_chip_id = vm_state.pic_master.chip_id; + vm_state.pic_master.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + vm_state.pic_master.chip_id = orig_master_chip_id; + + // Try to restore an invalid PIC Slave chip ID + let orig_slave_chip_id = vm_state.pic_slave.chip_id; + vm_state.pic_slave.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + vm_state.pic_slave.chip_id = orig_slave_chip_id; + + // Try to restore an invalid IOPIC chip ID + vm_state.ioapic.chip_id = KVM_NR_IRQCHIPS; + vm.restore_state(&vm_state).unwrap_err(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vmstate_serde() { + let mut snapshot_data = vec![0u8; 10000]; + + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); + let state = vm.save_state().unwrap(); + Snapshot::serialize(&mut snapshot_data.as_mut_slice(), &state).unwrap(); + let restored_state: VmState = Snapshot::deserialize(&mut snapshot_data.as_slice()).unwrap(); + + vm.restore_state(&restored_state).unwrap(); + } +} From 82664fd704c1adb3cd8dfeb4acb4188f0ccf2305 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 29 Jan 2025 12:42:19 +0000 Subject: [PATCH 02/11] refactor: Split `VmError` enum into arch specific versions Introduce `enum ArchVmError` which contains architecture specific variants for VM operations (mostly related to snapshot creation/restore). Do this by adapting exiting architecture specific enums that were used for snapshot restoration (x86_64)/GIC creation (aarch64). While we're at it, remove unused/duplicated enum variants, and on x86 use the SetIrqChip enum variant for failures inside `setup_irq_chip` instead of producing some generic `VmError`. Admittedly, there's a lot more cleanup that can be done here, but for now this is enough to attain my goals (separating architecture specific code). No functional change intended (apart from error messages changing). Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 6 ++-- src/vmm/src/persist.rs | 2 +- src/vmm/src/vstate/vm/aarch64.rs | 33 +++++++++----------- src/vmm/src/vstate/vm/mod.rs | 31 ++----------------- src/vmm/src/vstate/vm/x86_64.rs | 53 ++++++++++++++++++++------------ 5 files changed, 56 insertions(+), 69 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 27de9f6afa6..0db73698490 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -71,7 +71,7 @@ use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError}; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuError}; -use crate::vstate::vm::Vm; +use crate::vstate::vm::{Vm, VmError}; use crate::{device_manager, EventManager, Vmm, VmmError}; /// Errors associated with starting the instance. @@ -436,7 +436,7 @@ pub enum BuildMicrovmFromSnapshotError { /// Could not set TSC scaling within the snapshot: {0} SetTsc(#[from] crate::vstate::vcpu::SetTscError), /// Failed to restore microVM state: {0} - RestoreState(#[from] crate::vstate::vm::RestoreStateError), + RestoreState(#[from] crate::vstate::vm::ArchVmError), /// Failed to update microVM configuration: {0} VmUpdateConfig(#[from] MachineConfigError), /// Failed to restore MMIO device: {0} @@ -675,6 +675,7 @@ where #[cfg(target_arch = "x86_64")] pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), StartMicrovmError> { vm.setup_irqchip() + .map_err(VmError::Arch) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal) } @@ -683,6 +684,7 @@ pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), StartMicrovmError> #[cfg(target_arch = "aarch64")] pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), StartMicrovmError> { vm.setup_irqchip(vcpu_count) + .map_err(VmError::Arch) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal) } diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 3c5f3e7e754..05529107b6b 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -129,7 +129,7 @@ pub enum MicrovmStateError { /// Cannot save Vcpu state: {0} SaveVcpuState(vstate::vcpu::VcpuError), /// Cannot save Vm state: {0} - SaveVmState(vstate::vm::VmError), + SaveVmState(vstate::vm::ArchVmError), /// Cannot signal Vcpu: {0} SignalVcpu(VcpuSendEventError), /// Vcpu is in unexpected state. diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs index 0925ad4d7e2..45d9da0dc10 100644 --- a/src/vmm/src/vstate/vm/aarch64.rs +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -4,24 +4,25 @@ use serde::{Deserialize, Serialize}; use crate::arch::aarch64::gic::GicState; -use crate::vstate::vm::VmError; use crate::Vm; /// Error type for [`Vm::restore_state`] -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum RestoreStateError { - /// {0} - GicError(crate::arch::aarch64::gic::GicError), - /// {0} - VmError(VmError), +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { + /// Error creating the global interrupt controller: {0} + VmCreateGIC(crate::arch::aarch64::gic::GicError), + /// Failed to save the VM's GIC state: {0} + SaveGic(crate::arch::aarch64::gic::GicError), + /// Failed to restore the VM's GIC state: {0} + RestoreGic(crate::arch::aarch64::gic::GicError), } impl Vm { /// Creates the GIC (Global Interrupt Controller). - pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { self.irqchip_handle = Some( crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) - .map_err(VmError::VmCreateGIC)?, + .map_err(ArchVmError::VmCreateGIC)?, ); Ok(()) } @@ -32,12 +33,12 @@ impl Vm { } /// Saves and returns the Kvm Vm state. - pub fn save_state(&self, mpidrs: &[u64]) -> Result { - Ok(crate::vstate::vm::VmState { + pub fn save_state(&self, mpidrs: &[u64]) -> Result { + Ok(VmState { gic: self .get_irqchip() .save_device(mpidrs) - .map_err(VmError::SaveGic)?, + .map_err(ArchVmError::SaveGic)?, }) } @@ -46,14 +47,10 @@ impl Vm { /// # Errors /// /// When [`crate::arch::aarch64::gic::GICDevice::restore_device`] errors. - pub fn restore_state( - &mut self, - mpidrs: &[u64], - state: &crate::vstate::vm::VmState, - ) -> Result<(), RestoreStateError> { + pub fn restore_state(&mut self, mpidrs: &[u64], state: &VmState) -> Result<(), ArchVmError> { self.get_irqchip() .restore_device(mpidrs, &state.gic) - .map_err(RestoreStateError::GicError)?; + .map_err(ArchVmError::RestoreGic)?; Ok(()) } } diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index 65c3ab43f74..5c54643d258 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -22,7 +22,7 @@ mod arch; #[path = "aarch64.rs"] mod arch; -pub use arch::{RestoreStateError, VmState}; +pub use arch::{ArchVmError, VmState}; /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work @@ -31,37 +31,12 @@ pub use arch::{RestoreStateError, VmState}; pub enum VmError { /// Cannot set the memory regions: {0} SetUserMemoryRegion(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Error creating the global interrupt controller: {0} - VmCreateGIC(crate::arch::aarch64::gic::GicError), /// Cannot open the VM file descriptor: {0} VmFd(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm pit state: {0} - VmGetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm clock: {0} - VmGetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get KVM vm irqchip: {0} - VmGetIrqChip(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm pit state: {0} - VmSetPit2(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm clock: {0} - VmSetClock(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to set KVM vm irqchip: {0} - VmSetIrqChip(kvm_ioctls::Error), /// Cannot configure the microvm: {0} VmSetup(kvm_ioctls::Error), - #[cfg(target_arch = "aarch64")] - /// Failed to save the VM's GIC state: {0} - SaveGic(crate::arch::aarch64::gic::GicError), - #[cfg(target_arch = "aarch64")] - /// Failed to restore the VM's GIC state: {0} - RestoreGic(crate::arch::aarch64::gic::GicError), + /// {0} + Arch(#[from] ArchVmError), } /// A wrapper around creating and using a VM. diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 95ac742ad76..9b38e304e9a 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -9,14 +9,13 @@ use kvm_bindings::{ }; use serde::{Deserialize, Serialize}; -use crate::vstate::vm::VmError; use crate::Vm; /// Error type for [`Vm::restore_state`] #[allow(missing_docs)] #[cfg(target_arch = "x86_64")] -#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] -pub enum RestoreStateError { +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { /// Set PIT2 error: {0} SetPit2(kvm_ioctls::Error), /// Set clock error: {0} @@ -27,8 +26,18 @@ pub enum RestoreStateError { SetIrqChipPicSlave(kvm_ioctls::Error), /// Set IrqChipIoAPIC error: {0} SetIrqChipIoAPIC(kvm_ioctls::Error), - /// VM error: {0} - VmError(VmError), + /// Failed to get KVM vm pit state: {0} + VmGetPit2(kvm_ioctls::Error), + /// Failed to get KVM vm clock: {0} + VmGetClock(kvm_ioctls::Error), + /// Failed to get KVM vm irqchip: {0} + VmGetIrqChip(kvm_ioctls::Error), + /// Failed to set KVM vm pit state: {0} + VmSetPit2(kvm_ioctls::Error), + /// Failed to set KVM vm clock: {0} + VmSetClock(kvm_ioctls::Error), + /// Failed to set KVM vm irqchip: {0} + VmSetIrqChip(kvm_ioctls::Error), } impl Vm { @@ -42,42 +51,46 @@ impl Vm { /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. - pub fn restore_state(&mut self, state: &VmState) -> Result<(), RestoreStateError> { + pub fn restore_state(&mut self, state: &VmState) -> Result<(), ArchVmError> { self.fd .set_pit2(&state.pitstate) - .map_err(RestoreStateError::SetPit2)?; + .map_err(ArchVmError::SetPit2)?; self.fd .set_clock(&state.clock) - .map_err(RestoreStateError::SetClock)?; + .map_err(ArchVmError::SetClock)?; self.fd .set_irqchip(&state.pic_master) - .map_err(RestoreStateError::SetIrqChipPicMaster)?; + .map_err(ArchVmError::SetIrqChipPicMaster)?; self.fd .set_irqchip(&state.pic_slave) - .map_err(RestoreStateError::SetIrqChipPicSlave)?; + .map_err(ArchVmError::SetIrqChipPicSlave)?; self.fd .set_irqchip(&state.ioapic) - .map_err(RestoreStateError::SetIrqChipIoAPIC)?; + .map_err(ArchVmError::SetIrqChipIoAPIC)?; Ok(()) } /// Creates the irq chip and an in-kernel device model for the PIT. - pub fn setup_irqchip(&self) -> Result<(), VmError> { - self.fd.create_irq_chip().map_err(VmError::VmSetup)?; + pub fn setup_irqchip(&self) -> Result<(), ArchVmError> { + self.fd + .create_irq_chip() + .map_err(ArchVmError::VmSetIrqChip)?; // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. let pit_config = kvm_pit_config { flags: KVM_PIT_SPEAKER_DUMMY, ..Default::default() }; - self.fd.create_pit2(pit_config).map_err(VmError::VmSetup) + self.fd + .create_pit2(pit_config) + .map_err(ArchVmError::VmSetIrqChip) } /// Saves and returns the Kvm Vm state. - pub fn save_state(&self) -> Result { - let pitstate = self.fd.get_pit2().map_err(VmError::VmGetPit2)?; + pub fn save_state(&self) -> Result { + let pitstate = self.fd.get_pit2().map_err(ArchVmError::VmGetPit2)?; - let mut clock = self.fd.get_clock().map_err(VmError::VmGetClock)?; + let mut clock = self.fd.get_clock().map_err(ArchVmError::VmGetClock)?; // This bit is not accepted in SET_CLOCK, clear it. clock.flags &= !KVM_CLOCK_TSC_STABLE; @@ -87,7 +100,7 @@ impl Vm { }; self.fd .get_irqchip(&mut pic_master) - .map_err(VmError::VmGetIrqChip)?; + .map_err(ArchVmError::VmGetIrqChip)?; let mut pic_slave = kvm_irqchip { chip_id: KVM_IRQCHIP_PIC_SLAVE, @@ -95,7 +108,7 @@ impl Vm { }; self.fd .get_irqchip(&mut pic_slave) - .map_err(VmError::VmGetIrqChip)?; + .map_err(ArchVmError::VmGetIrqChip)?; let mut ioapic = kvm_irqchip { chip_id: KVM_IRQCHIP_IOAPIC, @@ -103,7 +116,7 @@ impl Vm { }; self.fd .get_irqchip(&mut ioapic) - .map_err(VmError::VmGetIrqChip)?; + .map_err(ArchVmError::VmGetIrqChip)?; Ok(VmState { pitstate, From 2b0dece315e04782b654ecde4e2a769f97f06f84 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 29 Jan 2025 13:03:42 +0000 Subject: [PATCH 03/11] refactor: Split `struct Vm` into arch-specific structs Similarly to what we do in the vcpu module for the Vcpu/KvmVcpu structs. The difference is that we do not implement any function on `ArchVm`, so that common fields like `fd` can live in `Vm`, instead of needing to be duplicated into all the `ArchVm` types. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/vm/aarch64.rs | 23 ++++++++++++++++++-- src/vmm/src/vstate/vm/mod.rs | 36 ++------------------------------ src/vmm/src/vstate/vm/x86_64.rs | 17 +++++++++++++-- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs index 45d9da0dc10..9ff48c6c7ee 100644 --- a/src/vmm/src/vstate/vm/aarch64.rs +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -1,10 +1,20 @@ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; +use super::VmError; use crate::arch::aarch64::gic::GicState; -use crate::Vm; +use crate::vstate::kvm::Kvm; + +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + pub(super) fd: VmFd, + // On aarch64 we need to keep around the fd obtained by creating the VGIC device. + irqchip_handle: Option, +} /// Error type for [`Vm::restore_state`] #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] @@ -17,7 +27,16 @@ pub enum ArchVmError { RestoreGic(crate::arch::aarch64::gic::GicError), } -impl Vm { +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { + let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; + Ok(ArchVm { + fd, + irqchip_handle: None, + }) + } + /// Creates the GIC (Global Interrupt Controller). pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { self.irqchip_handle = Some( diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index 5c54643d258..df4e72c2e95 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -8,11 +8,8 @@ use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; use kvm_ioctls::VmFd; -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::gic::GICDevice; #[cfg(target_arch = "x86_64")] use crate::utils::u64_to_usize; -use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; #[cfg(target_arch = "x86_64")] @@ -22,7 +19,7 @@ mod arch; #[path = "aarch64.rs"] mod arch; -pub use arch::{ArchVmError, VmState}; +pub use arch::{ArchVm as Vm, ArchVmError, VmState}; /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work @@ -39,38 +36,8 @@ pub enum VmError { Arch(#[from] ArchVmError), } -/// A wrapper around creating and using a VM. -#[derive(Debug)] -pub struct Vm { - fd: VmFd, - - // Arm specific fields. - // On aarch64 we need to keep around the fd obtained by creating the VGIC device. - #[cfg(target_arch = "aarch64")] - irqchip_handle: Option, -} - /// Contains Vm functions that are usable across CPU architectures impl Vm { - /// Create a new `Vm` struct. - pub fn new(kvm: &Kvm) -> Result { - // Create fd for interacting with kvm-vm specific functions. - let vm_fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; - - #[cfg(target_arch = "aarch64")] - { - Ok(Vm { - fd: vm_fd, - irqchip_handle: None, - }) - } - - #[cfg(target_arch = "x86_64")] - { - Ok(Vm { fd: vm_fd }) - } - } - /// Initializes the guest memory. pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { self.set_kvm_memory_regions(guest_mem)?; @@ -122,6 +89,7 @@ impl Vm { pub(crate) mod tests { use super::*; use crate::test_utils::single_region_mem; + use crate::vstate::kvm::Kvm; use crate::vstate::memory::GuestMemoryMmap; // Auxiliary function being used throughout the tests. diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 9b38e304e9a..1e4bef11ce5 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -7,9 +7,10 @@ use kvm_bindings::{ kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, }; +use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; -use crate::Vm; +use crate::vstate::vm::VmError; /// Error type for [`Vm::restore_state`] #[allow(missing_docs)] @@ -40,7 +41,19 @@ pub enum ArchVmError { VmSetIrqChip(kvm_ioctls::Error), } -impl Vm { +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + pub(super) fd: VmFd, +} + +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result { + let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; + Ok(ArchVm { fd }) + } + /// Restores the KVM VM state. /// /// # Errors From d81bf5948afae59af68c47dea908a31dfae42452 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 29 Jan 2025 13:19:50 +0000 Subject: [PATCH 04/11] refactor: Store msrs_to_save in `struct Vm` instead of `Kvm` This allows us to not need to feed `struct Kvm` all the way down to Vcpu creation. While we're at it, abstract away the `MsrList` type, and just have a helper that returns a slice of u32 (as every call site converts the MsrList to this anyway). Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 17 ++++++---------- src/vmm/src/vstate/kvm.rs | 15 ++++++-------- src/vmm/src/vstate/vcpu/aarch64.rs | 32 +++++++++++++++--------------- src/vmm/src/vstate/vcpu/mod.rs | 11 +++++----- src/vmm/src/vstate/vcpu/x86_64.rs | 15 +++++++------- src/vmm/src/vstate/vm/aarch64.rs | 2 +- src/vmm/src/vstate/vm/x86_64.rs | 14 +++++++++++-- 7 files changed, 53 insertions(+), 53 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 0db73698490..1a15dbce2f7 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -192,7 +192,7 @@ fn create_vmm_and_vcpus( #[cfg(target_arch = "x86_64")] let (vcpus, pio_device_manager) = { setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; // Make stdout non blocking. set_stdout_nonblocking(); @@ -224,7 +224,7 @@ fn create_vmm_and_vcpus( // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. #[cfg(target_arch = "aarch64")] let vcpus = { - let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; setup_interrupt_controller(&mut vm, vcpu_count)?; vcpus }; @@ -746,16 +746,11 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -fn create_vcpus( - kvm: &Kvm, - vm: &Vm, - vcpu_count: u8, - exit_evt: &EventFd, -) -> Result, VmmError> { +fn create_vcpus(vm: &Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { let mut vcpus = Vec::with_capacity(vcpu_count as usize); for cpu_idx in 0..vcpu_count { let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, kvm, exit_evt).map_err(VmmError::VcpuCreate)?; + let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; vcpus.push(vcpu); } Ok(vcpus) @@ -1164,7 +1159,7 @@ pub(crate) mod tests { #[cfg(target_arch = "aarch64")] { let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let _vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); + let _vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); setup_interrupt_controller(&mut vm, 1).unwrap(); } @@ -1399,7 +1394,7 @@ pub(crate) mod tests { #[cfg(target_arch = "x86_64")] setup_interrupt_controller(&mut vm).unwrap(); - let vcpu_vec = create_vcpus(&kvm, &vm, vcpu_count, &evfd).unwrap(); + let vcpu_vec = create_vcpus(&vm, vcpu_count, &evfd).unwrap(); assert_eq!(vcpu_vec.len(), vcpu_count as usize); } diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs index 59b192dbe09..20585b337fc 100644 --- a/src/vmm/src/vstate/kvm.rs +++ b/src/vmm/src/vstate/kvm.rs @@ -23,9 +23,6 @@ pub enum KvmError { configured on the /dev/kvm file's ACL. */ Kvm(kvm_ioctls::Error), #[cfg(target_arch = "x86_64")] - /// Failed to get MSR index list to save into snapshots: {0} - GetMsrsToSave(crate::arch::x86_64::msr::MsrError), - #[cfg(target_arch = "x86_64")] /// Failed to get supported cpuid: {0} GetSupportedCpuId(kvm_ioctls::Error), /// The number of configured slots is bigger than the maximum reported by KVM @@ -45,9 +42,6 @@ pub struct Kvm { #[cfg(target_arch = "x86_64")] /// Supported CpuIds. pub supported_cpuid: CpuId, - #[cfg(target_arch = "x86_64")] - /// Msrs needed to be saved on snapshot creation. - pub msrs_to_save: MsrList, } impl Kvm { @@ -82,19 +76,22 @@ impl Kvm { let supported_cpuid = kvm_fd .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) .map_err(KvmError::GetSupportedCpuId)?; - let msrs_to_save = crate::arch::x86_64::msr::get_msrs_to_save(&kvm_fd) - .map_err(KvmError::GetMsrsToSave)?; Ok(Kvm { fd: kvm_fd, max_memslots, kvm_cap_modifiers, supported_cpuid, - msrs_to_save, }) } } + /// Msrs needed to be saved on snapshot creation. + #[cfg(target_arch = "x86_64")] + pub fn msrs_to_save(&self) -> Result { + crate::arch::x86_64::msr::get_msrs_to_save(&self.fd) + } + /// Check guest memory does not have more regions than kvm allows. pub fn check_memory(&self, guest_mem: &GuestMemoryMmap) -> Result<(), KvmError> { if guest_mem.num_regions() > self.max_memslots { diff --git a/src/vmm/src/vstate/vcpu/aarch64.rs b/src/vmm/src/vstate/vcpu/aarch64.rs index 9db1173eeb8..7bdde9b9d1f 100644 --- a/src/vmm/src/vstate/vcpu/aarch64.rs +++ b/src/vmm/src/vstate/vcpu/aarch64.rs @@ -22,7 +22,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{error, IncMetric, METRICS}; use crate::vcpu::{VcpuConfig, VcpuError}; -use crate::vstate::kvm::{Kvm, OptionalCapabilities}; +use crate::vstate::kvm::OptionalCapabilities; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -78,7 +78,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm, _: &Kvm) -> Result { + pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -315,7 +315,7 @@ mod tests { fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { let (kvm, mut vm, vm_mem) = setup_vm_with_memory(mem_size); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); @@ -324,11 +324,11 @@ mod tests { #[test] fn test_create_vcpu() { - let (kvm, vm, _) = setup_vm_with_memory(0x1000); + let (_, vm, _) = setup_vm_with_memory(0x1000); unsafe { libc::close(vm.fd().as_raw_fd()) }; - let err = KvmVcpu::new(0, &vm, &kvm); + let err = KvmVcpu::new(0, &vm); assert_eq!( err.err().unwrap().to_string(), "Error creating vcpu: Bad file descriptor (os error 9)".to_string() @@ -378,8 +378,8 @@ mod tests { #[test] fn test_init_vcpu() { - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // KVM_ARM_VCPU_PSCI_0_2 is set by default. @@ -397,8 +397,8 @@ mod tests { #[test] fn test_vcpu_save_restore_state() { - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // Calling KVM_GET_REGLIST before KVM_VCPU_INIT will result in error. @@ -441,8 +441,8 @@ mod tests { // // This should fail with ENOEXEC. // https://elixir.bootlin.com/linux/v5.10.176/source/arch/arm64/kvm/arm.c#L1165 - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.dump_cpu_config().unwrap_err(); @@ -451,8 +451,8 @@ mod tests { #[test] fn test_dump_cpu_config_after_init() { // Test `dump_cpu_config()` after `KVM_VCPU_INIT`. - let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.init(&[]).unwrap(); @@ -461,10 +461,10 @@ mod tests { #[test] fn test_setup_non_boot_vcpu() { - let (kvm, vm, _) = setup_vm_with_memory(0x1000); - let mut vcpu1 = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu1 = KvmVcpu::new(0, &vm).unwrap(); vcpu1.init(&[]).unwrap(); - let mut vcpu2 = KvmVcpu::new(1, &vm, &kvm).unwrap(); + let mut vcpu2 = KvmVcpu::new(1, &vm).unwrap(); vcpu2.init(&[]).unwrap(); } diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index d9aa0abd1a8..363964b6edc 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -44,8 +44,6 @@ pub use aarch64::{KvmVcpuError, *}; #[cfg(target_arch = "x86_64")] pub use x86_64::{KvmVcpuError, *}; -use super::kvm::Kvm; - /// Signal number (SIGRTMIN) used to kick Vcpus. pub const VCPU_RTSIG_OFFSET: i32 = 0; @@ -214,10 +212,10 @@ impl Vcpu { /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. - pub fn new(index: u8, vm: &Vm, kvm: &Kvm, exit_evt: EventFd) -> Result { + pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result { let (event_sender, event_receiver) = channel(); let (response_sender, response_receiver) = channel(); - let kvm_vcpu = KvmVcpu::new(index, vm, kvm).unwrap(); + let kvm_vcpu = KvmVcpu::new(index, vm).unwrap(); Ok(Vcpu { exit_evt, @@ -787,6 +785,7 @@ pub(crate) mod tests { use crate::devices::BusDevice; use crate::seccomp::get_empty_filters; use crate::utils::signal::validate_signal_num; + use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; use crate::vstate::vm::tests::setup_vm_with_memory; @@ -937,7 +936,7 @@ pub(crate) mod tests { #[cfg(target_arch = "aarch64")] let vcpu = { - let mut vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); + let mut vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); vcpu.kvm_vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu @@ -945,7 +944,7 @@ pub(crate) mod tests { #[cfg(target_arch = "x86_64")] let vcpu = { vm.setup_irqchip().unwrap(); - Vcpu::new(1, &vm, &kvm, exit_evt).unwrap() + Vcpu::new(1, &vm, exit_evt).unwrap() }; (kvm, vm, vcpu, gm) } diff --git a/src/vmm/src/vstate/vcpu/x86_64.rs b/src/vmm/src/vstate/vcpu/x86_64.rs index 39ff0879ee8..85c2db3868f 100644 --- a/src/vmm/src/vstate/vcpu/x86_64.rs +++ b/src/vmm/src/vstate/vcpu/x86_64.rs @@ -23,7 +23,6 @@ use crate::arch::x86_64::msr::{create_boot_msr_entries, MsrError}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{cpuid, CpuConfiguration}; use crate::logger::{IncMetric, METRICS}; -use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation}; use crate::vstate::vm::Vm; @@ -165,7 +164,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm, kvm: &Kvm) -> Result { + pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -175,7 +174,7 @@ impl KvmVcpu { index, fd: kvm_vcpu, peripherals: Default::default(), - msrs_to_save: kvm.msrs_to_save.as_slice().to_vec(), + msrs_to_save: vm.msrs_to_save().to_vec(), }) } @@ -726,6 +725,7 @@ mod tests { StaticCpuTemplate, }; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidEntry, CpuidKey}; + use crate::vstate::kvm::Kvm; use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; use crate::vstate::vm::Vm; @@ -750,7 +750,7 @@ mod tests { fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { let (kvm, vm, vm_mem) = setup_vm_with_memory(mem_size); vm.setup_irqchip().unwrap(); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); (kvm, vm, vcpu, vm_mem) } @@ -1168,11 +1168,11 @@ mod tests { #[test] fn test_get_msr_chunks_preserved_order() { // Regression test for #4666 - let (kvm, vm) = setup_vm(); - let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + let (_, vm) = setup_vm(); + let vcpu = KvmVcpu::new(0, &vm).unwrap(); // The list of supported MSR indices, in the order they were returned by KVM - let msrs_to_save = kvm.msrs_to_save; + let msrs_to_save = vm.msrs_to_save(); // The MSRs after processing. The order should be identical to the one returned by KVM, with // the exception of deferred MSRs, which should be moved to the end (but show up in the same // order as they are listed in [`DEFERRED_MSRS`]. @@ -1185,7 +1185,6 @@ mod tests { .flat_map(|chunk| chunk.as_slice().iter()) .zip( msrs_to_save - .as_slice() .iter() .filter(|&idx| !DEFERRED_MSRS.contains(idx)) .chain(DEFERRED_MSRS.iter()), diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs index 9ff48c6c7ee..593ae7bc5ea 100644 --- a/src/vmm/src/vstate/vm/aarch64.rs +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use super::VmError; use crate::arch::aarch64::gic::GicState; -use crate::vstate::kvm::Kvm; +use crate::Kvm; /// Structure representing the current architecture's understand of what a "virtual machine" is. #[derive(Debug)] diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 1e4bef11ce5..19af125e5ec 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -4,12 +4,13 @@ use std::fmt; use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, + kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, MsrList, KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, }; use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; +use crate::arch::x86_64::msr::MsrError; use crate::vstate::vm::VmError; /// Error type for [`Vm::restore_state`] @@ -39,19 +40,23 @@ pub enum ArchVmError { VmSetClock(kvm_ioctls::Error), /// Failed to set KVM vm irqchip: {0} VmSetIrqChip(kvm_ioctls::Error), + /// Failed to get MSR index list to save into snapshots: {0} + GetMsrsToSave(MsrError), } /// Structure representing the current architecture's understand of what a "virtual machine" is. #[derive(Debug)] pub struct ArchVm { pub(super) fd: VmFd, + msrs_to_save: MsrList, } impl ArchVm { /// Create a new `Vm` struct. pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result { let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; - Ok(ArchVm { fd }) + let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?; + Ok(ArchVm { fd, msrs_to_save }) } /// Restores the KVM VM state. @@ -139,6 +144,11 @@ impl ArchVm { ioapic, }) } + + /// Gets the list of MSRs to save when creating snapshots + pub fn msrs_to_save(&self) -> &[u32] { + self.msrs_to_save.as_slice() + } } #[derive(Default, Deserialize, Serialize)] From fabc1b60975430d6205126784425e22fb4ca26c6 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 29 Jan 2025 13:55:46 +0000 Subject: [PATCH 05/11] refactor: move vcpu creation into `struct Vm` This abstracts away the weird irqchip setup dance that builder.rs has been doing (where depending on architecture, irqchip needs to be created before or after vcpu creation. Removes a whole bunch of code that builder.rs that didn't really have too much business being there Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 102 ++++----------------------- src/vmm/src/device_manager/legacy.rs | 4 +- src/vmm/src/device_manager/mmio.rs | 14 ++-- src/vmm/src/vstate/vm/aarch64.rs | 12 ++++ src/vmm/src/vstate/vm/mod.rs | 47 +++++++++++- src/vmm/src/vstate/vm/x86_64.rs | 9 +++ 6 files changed, 87 insertions(+), 101 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1a15dbce2f7..b450ce50b10 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -71,7 +71,7 @@ use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError}; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuError}; -use crate::vstate::vm::{Vm, VmError}; +use crate::vstate::vm::Vm; use crate::{device_manager, EventManager, Vmm, VmmError}; /// Errors associated with starting the instance. @@ -175,10 +175,6 @@ fn create_vmm_and_vcpus( .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; - let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) - .map_err(VmmError::EventFd) - .map_err(Internal)?; - let resource_allocator = ResourceAllocator::new()?; // Instantiate the MMIO device manager. @@ -187,13 +183,13 @@ fn create_vmm_and_vcpus( // Instantiate ACPI device manager. let acpi_device_manager = ACPIDeviceManager::new(); - // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` - // while on aarch64 we need to do it the other way around. - #[cfg(target_arch = "x86_64")] - let (vcpus, pio_device_manager) = { - setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let (vcpus, vcpus_exit_evt) = vm + .create_vcpus(vcpu_count) + .map_err(VmmError::Vm) + .map_err(Internal)?; + #[cfg(target_arch = "x86_64")] + let pio_device_manager = { // Make stdout non blocking. set_stdout_nonblocking(); @@ -208,25 +204,10 @@ fn create_vmm_and_vcpus( .map_err(Internal)?; // create pio dev manager with legacy devices - let pio_device_manager = { - // TODO Remove these unwraps. - let mut pio_dev_mgr = PortIODeviceManager::new(serial_device, reset_evt).unwrap(); - pio_dev_mgr.register_devices(vm.fd()).unwrap(); - pio_dev_mgr - }; - - (vcpus, pio_device_manager) - }; - - // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the - // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP - // was already initialized. - // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. - #[cfg(target_arch = "aarch64")] - let vcpus = { - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; - setup_interrupt_controller(&mut vm, vcpu_count)?; - vcpus + // TODO Remove these unwraps. + let mut pio_dev_mgr = PortIODeviceManager::new(serial_device, reset_evt).unwrap(); + pio_dev_mgr.register_devices(vm.fd()).unwrap(); + pio_dev_mgr }; let vmm = Vmm { @@ -671,24 +652,6 @@ where }) } -/// Sets up the irqchip for a x86_64 microVM. -#[cfg(target_arch = "x86_64")] -pub fn setup_interrupt_controller(vm: &mut Vm) -> Result<(), StartMicrovmError> { - vm.setup_irqchip() - .map_err(VmError::Arch) - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) -} - -/// Sets up the irqchip for a aarch64 microVM. -#[cfg(target_arch = "aarch64")] -pub fn setup_interrupt_controller(vm: &mut Vm, vcpu_count: u8) -> Result<(), StartMicrovmError> { - vm.setup_irqchip(vcpu_count) - .map_err(VmError::Arch) - .map_err(VmmError::Vm) - .map_err(StartMicrovmError::Internal) -} - /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -746,16 +709,6 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -fn create_vcpus(vm: &Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { - let mut vcpus = Vec::with_capacity(vcpu_count as usize); - for cpu_idx in 0..vcpu_count { - let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; - vcpus.push(vcpu); - } - Ok(vcpus) -} - /// Configures the system for booting Linux. #[cfg_attr(target_arch = "aarch64", allow(unused))] pub fn configure_system_for_boot( @@ -1127,11 +1080,6 @@ pub(crate) mod tests { pub(crate) fn default_vmm() -> Vmm { let guest_memory = arch_mem(128 << 20); - let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) - .map_err(VmmError::EventFd) - .map_err(StartMicrovmError::Internal) - .unwrap(); - let kvm = Kvm::new(vec![]).unwrap(); let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_memory).unwrap(); @@ -1153,15 +1101,7 @@ pub(crate) mod tests { ) .unwrap(); - #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); - - #[cfg(target_arch = "aarch64")] - { - let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let _vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); - setup_interrupt_controller(&mut vm, 1).unwrap(); - } + let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap(); Vmm { events_observer: Some(std::io::stdin()), @@ -1380,24 +1320,6 @@ pub(crate) mod tests { ); } - #[test] - fn test_create_vcpus() { - let vcpu_count = 2; - let guest_memory = arch_mem(128 << 20); - - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - #[allow(unused_mut)] - let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory).unwrap(); - let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - - #[cfg(target_arch = "x86_64")] - setup_interrupt_controller(&mut vm).unwrap(); - - let vcpu_vec = create_vcpus(&vm, vcpu_count, &evfd).unwrap(); - assert_eq!(vcpu_vec.len(), vcpu_count as usize); - } - #[test] fn test_attach_net_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 8526d3c2901..eee2f86dc83 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -248,8 +248,8 @@ mod tests { #[test] fn test_register_legacy_devices() { - let (_, mut vm, _) = setup_vm_with_memory(0x1000); - crate::builder::setup_interrupt_controller(&mut vm).unwrap(); + let (_, vm, _) = setup_vm_with_memory(0x1000); + vm.setup_irqchip().unwrap(); let mut ldm = PortIODeviceManager::new( Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { serial: Serial::with_events( diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index d35c8b36650..e4e5e68dcb8 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -549,7 +549,7 @@ mod tests { use crate::test_utils::multi_region_mem; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; - use crate::{builder, Vm}; + use crate::Vm; const QUEUE_SIZES: &[u16] = &[64]; @@ -673,9 +673,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); device_manager .register_virtio_test_device( @@ -702,9 +702,9 @@ mod tests { let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); for _i in crate::arch::IRQ_BASE..=crate::arch::IRQ_MAX { device_manager @@ -756,9 +756,9 @@ mod tests { let mem_clone = guest_mem.clone(); #[cfg(target_arch = "x86_64")] - builder::setup_interrupt_controller(&mut vm).unwrap(); + vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] - builder::setup_interrupt_controller(&mut vm, 1).unwrap(); + vm.setup_irqchip(1).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); diff --git a/src/vmm/src/vstate/vm/aarch64.rs b/src/vmm/src/vstate/vm/aarch64.rs index 593ae7bc5ea..a059803ba97 100644 --- a/src/vmm/src/vstate/vm/aarch64.rs +++ b/src/vmm/src/vstate/vm/aarch64.rs @@ -37,6 +37,18 @@ impl ArchVm { }) } + pub(super) fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + + pub(super) fn arch_post_create_vcpus(&mut self, nr_vcpus: u8) -> Result<(), ArchVmError> { + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the + // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + self.setup_irqchip(nr_vcpus) + } + /// Creates the GIC (Global Interrupt Controller). pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { self.irqchip_handle = Some( diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index df4e72c2e95..c34ac3c7d52 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -7,6 +7,7 @@ use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; #[cfg(target_arch = "x86_64")] use crate::utils::u64_to_usize; @@ -21,10 +22,13 @@ mod arch; pub use arch::{ArchVm as Vm, ArchVmError, VmState}; +use crate::vstate::vcpu::VcpuError; +use crate::Vcpu; + /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +#[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmError { /// Cannot set the memory regions: {0} SetUserMemoryRegion(kvm_ioctls::Error), @@ -34,10 +38,34 @@ pub enum VmError { VmSetup(kvm_ioctls::Error), /// {0} Arch(#[from] ArchVmError), + /// Error during eventfd operations: {0} + EventFd(std::io::Error), + /// Failed to create vcpu: {0} + CreateVcpu(VcpuError), } /// Contains Vm functions that are usable across CPU architectures impl Vm { + /// Creates the specified number of [`Vcpu`]s. + /// + /// The returned [`EventFd`] is written to whenever any of the vcpus exit. + pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec, EventFd), VmError> { + self.arch_pre_create_vcpus(vcpu_count)?; + + let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?; + + let mut vcpus = Vec::with_capacity(vcpu_count as usize); + for cpu_idx in 0..vcpu_count { + let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?; + let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?; + vcpus.push(vcpu); + } + + self.arch_post_create_vcpus(vcpu_count)?; + + Ok((vcpus, exit_evt)) + } + /// Initializes the guest memory. pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { self.set_kvm_memory_regions(guest_mem)?; @@ -88,7 +116,7 @@ impl Vm { #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::test_utils::single_region_mem; + use crate::test_utils::{arch_mem, single_region_mem}; use crate::vstate::kvm::Kvm; use crate::vstate::memory::GuestMemoryMmap; @@ -139,4 +167,19 @@ pub(crate) mod tests { "Cannot set the memory regions: Invalid argument (os error 22)" ); } + + #[test] + fn test_create_vcpus() { + let vcpu_count = 2; + let guest_memory = arch_mem(128 << 20); + + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + #[allow(unused_mut)] + let mut vm = Vm::new(&kvm).unwrap(); + vm.memory_init(&guest_memory).unwrap(); + + let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap(); + + assert_eq!(vcpu_vec.len(), vcpu_count as usize); + } } diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 19af125e5ec..483339521dc 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -59,6 +59,15 @@ impl ArchVm { Ok(ArchVm { fd, msrs_to_save }) } + pub(super) fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + self.setup_irqchip() + } + + pub(super) fn arch_post_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + /// Restores the KVM VM state. /// /// # Errors From 414ea122071e3e6f034df5bf4d9907b0e594f1f2 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 31 Jan 2025 14:05:34 +0000 Subject: [PATCH 06/11] refactor(test): use vm.create_vcpus() instead of open coding it Instead of doing the irqchip dance inside unit testing code, just call vm.create_vcpus, which does it for us. Signed-off-by: Patrick Roy --- src/vmm/src/device_manager/mmio.rs | 3 +++ src/vmm/src/vstate/vcpu/mod.rs | 16 ++++------------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index e4e5e68dcb8..dc814de9766 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -660,6 +660,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_virtio_device() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); @@ -690,6 +691,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_too_many_devices() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); @@ -745,6 +747,7 @@ mod tests { } #[test] + #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_device_info() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 363964b6edc..9ad8f867145 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -932,20 +932,12 @@ pub(crate) mod tests { pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu, GuestMemoryMmap) { let (kvm, mut vm, gm) = setup_vm_with_memory(mem_size); - let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let (mut vcpus, _) = vm.create_vcpus(1).unwrap(); + let mut vcpu = vcpus.remove(0); #[cfg(target_arch = "aarch64")] - let vcpu = { - let mut vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); - vcpu.kvm_vcpu.init(&[]).unwrap(); - vm.setup_irqchip(1).unwrap(); - vcpu - }; - #[cfg(target_arch = "x86_64")] - let vcpu = { - vm.setup_irqchip().unwrap(); - Vcpu::new(1, &vm, exit_evt).unwrap() - }; + vcpu.kvm_vcpu.init(&[]).unwrap(); + (kvm, vm, vcpu, gm) } From 7975ce32a1c31ac7b299bda843face08ce5d0a0b Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 31 Jan 2025 14:59:10 +0000 Subject: [PATCH 07/11] refactor(test): be more consistent in using vm-creation utilities We have some utility functions for creating dummy VMs/etc. in unittests, so let's try to use them instead of copy-pasting the same few lines around. Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 8 +++----- src/vmm/src/vstate/vm/mod.rs | 9 ++------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index b450ce50b10..3f6847aa8dc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1015,7 +1015,7 @@ pub(crate) mod tests { use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_RNG}; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; - use crate::test_utils::{arch_mem, single_region_mem, single_region_mem_at}; + use crate::test_utils::{single_region_mem, single_region_mem_at}; use crate::vmm_config::balloon::{BalloonBuilder, BalloonDeviceConfig, BALLOON_DEV_ID}; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; @@ -1023,6 +1023,7 @@ pub(crate) mod tests { use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::vsock::tests::default_config; use crate::vmm_config::vsock::{VsockBuilder, VsockDeviceConfig}; + use crate::vstate::vm::tests::setup_vm_with_memory; #[derive(Debug)] pub(crate) struct CustomBlockConfig { @@ -1078,11 +1079,8 @@ pub(crate) mod tests { } pub(crate) fn default_vmm() -> Vmm { - let guest_memory = arch_mem(128 << 20); + let (kvm, mut vm, guest_memory) = setup_vm_with_memory(128 << 20); - let kvm = Kvm::new(vec![]).unwrap(); - let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory).unwrap(); let mmio_device_manager = MMIODeviceManager::new(); let acpi_device_manager = ACPIDeviceManager::new(); #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index c34ac3c7d52..3277564f725 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -116,7 +116,7 @@ impl Vm { #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::test_utils::{arch_mem, single_region_mem}; + use crate::test_utils::single_region_mem; use crate::vstate::kvm::Kvm; use crate::vstate::memory::GuestMemoryMmap; @@ -171,12 +171,7 @@ pub(crate) mod tests { #[test] fn test_create_vcpus() { let vcpu_count = 2; - let guest_memory = arch_mem(128 << 20); - - let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); - #[allow(unused_mut)] - let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(128 << 20); let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap(); From ece1cda157db3a111abe6cbd011aea15485e5239 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Feb 2025 11:58:33 +0000 Subject: [PATCH 08/11] refactor: avoid an unwrap in set_kvm_memory_regions There's no need to use the GuestMemoryMmap to resolve the pointer to the start of the GuestMemoryRegion by resolving the guest physical address it starts at - because we literally store that pointer inside the GuestMemoryRegion, so can just use that. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/vm/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index 3277564f725..9c1395e07d7 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -95,8 +95,7 @@ impl Vm { slot, guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), - // It's safe to unwrap because the guest address is valid. - userspace_addr: guest_mem.get_host_address(region.start_addr()).unwrap() as u64, + userspace_addr: region.as_ptr() as u64, flags, }; From f462d4fbc1a0133220acae75bf31d3e55c6b283d Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Feb 2025 14:09:57 +0000 Subject: [PATCH 09/11] refactor: move KVM_SET_TSS_ADDR call into Vm::arch_create Since we issue this ioctl with a constant address, it doesn't really matter _when_ we do it. Internally, it causes a hidden memslot of size 3 pages to be created. If we create it _after_ all other memslots, then this ioctl can fail with EEXIST if it would overlap some pre-existing memslot. Now, instead the creation of the overlapping memslot would fail with EEXIST. But this is a moot point, because if Firecracker ever tried to create a memslot that overlaps this one, something is fundamentally broken. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/vm/mod.rs | 10 +--------- src/vmm/src/vstate/vm/x86_64.rs | 7 +++++++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index 9c1395e07d7..7d9d3e04ba9 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -9,8 +9,6 @@ use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; use kvm_ioctls::VmFd; use vmm_sys_util::eventfd::EventFd; -#[cfg(target_arch = "x86_64")] -use crate::utils::u64_to_usize; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; #[cfg(target_arch = "x86_64")] @@ -68,13 +66,7 @@ impl Vm { /// Initializes the guest memory. pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { - self.set_kvm_memory_regions(guest_mem)?; - #[cfg(target_arch = "x86_64")] - self.fd - .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) - .map_err(VmError::VmSetup)?; - - Ok(()) + self.set_kvm_memory_regions(guest_mem) } pub(crate) fn set_kvm_memory_regions( diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 483339521dc..5dd904a15f5 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -11,6 +11,7 @@ use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; use crate::arch::x86_64::msr::MsrError; +use crate::utils::u64_to_usize; use crate::vstate::vm::VmError; /// Error type for [`Vm::restore_state`] @@ -42,6 +43,8 @@ pub enum ArchVmError { VmSetIrqChip(kvm_ioctls::Error), /// Failed to get MSR index list to save into snapshots: {0} GetMsrsToSave(MsrError), + /// Failed during KVM_SET_TSS_ADDRESS: {0} + SetTssAddress(kvm_ioctls::Error), } /// Structure representing the current architecture's understand of what a "virtual machine" is. @@ -56,6 +59,10 @@ impl ArchVm { pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result { let fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?; + + fd.set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) + .map_err(ArchVmError::SetTssAddress)?; + Ok(ArchVm { fd, msrs_to_save }) } From 7b3951559cbf25907f405f44947773a25f34b5f5 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 7 Feb 2025 17:17:03 +0000 Subject: [PATCH 10/11] fix: delete never constructed error enum variants All of these were dead code (but the compiler isn't able to tell us because the vmm crate is a library crate where everything is `pub`, and it doesn't know that no downstream users outside of the `firecracker` crate exist, and that everything that's not used in `firecracker` is dead code). Signed-off-by: Patrick Roy --- src/vmm/src/persist.rs | 13 ------------- src/vmm/src/vstate/memory.rs | 4 ---- src/vmm/src/vstate/vm/mod.rs | 2 -- src/vmm/src/vstate/vm/x86_64.rs | 4 ---- 4 files changed, 23 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 05529107b6b..d62ffc84600 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -114,18 +114,10 @@ pub struct GuestRegionUffdMapping { /// Errors related to saving and restoring Microvm state. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MicrovmStateError { - /// Compatibility checks failed: {0} - IncompatibleState(String), - /// Provided MicroVM state is invalid. - InvalidInput, /// Operation not allowed: {0} NotAllowed(String), /// Cannot restore devices: {0} RestoreDevices(DevicePersistError), - /// Cannot restore Vcpu state: {0} - RestoreVcpuState(vstate::vcpu::VcpuError), - /// Cannot restore Vm state: {0} - RestoreVmState(vstate::vm::VmError), /// Cannot save Vcpu state: {0} SaveVcpuState(vstate::vcpu::VcpuError), /// Cannot save Vm state: {0} @@ -142,9 +134,6 @@ pub enum MicrovmStateError { pub enum CreateSnapshotError { /// Cannot get dirty bitmap: {0} DirtyBitmap(VmmError), - #[rustfmt::skip] - /// Cannot translate microVM version to snapshot data version - UnsupportedVersion, /// Cannot write memory file: {0} Memory(MemoryError), /// Cannot perform {0} on the memory backing file: {1} @@ -155,8 +144,6 @@ pub enum CreateSnapshotError { SerializeMicrovmState(crate::snapshot::SnapshotError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), - /// Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}. - SnapshotBackingFileLengthMismatch(u64, u64), } /// Snapshot version diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 23ce07d06b9..228c8b8f062 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -36,10 +36,6 @@ pub type GuestMmapRegion = vm_memory::MmapRegion>; /// Errors associated with dumping guest memory to file. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MemoryError { - /// Cannot create memory: {0} - CreateMemory(VmMemoryError), - /// Cannot create memory region: {0} - CreateRegion(MmapRegionError), /// Cannot fetch system's page size: {0} PageSize(errno::Error), /// Cannot dump memory: {0} diff --git a/src/vmm/src/vstate/vm/mod.rs b/src/vmm/src/vstate/vm/mod.rs index 7d9d3e04ba9..c9c0e7e7c2f 100644 --- a/src/vmm/src/vstate/vm/mod.rs +++ b/src/vmm/src/vstate/vm/mod.rs @@ -32,8 +32,6 @@ pub enum VmError { SetUserMemoryRegion(kvm_ioctls::Error), /// Cannot open the VM file descriptor: {0} VmFd(kvm_ioctls::Error), - /// Cannot configure the microvm: {0} - VmSetup(kvm_ioctls::Error), /// {0} Arch(#[from] ArchVmError), /// Error during eventfd operations: {0} diff --git a/src/vmm/src/vstate/vm/x86_64.rs b/src/vmm/src/vstate/vm/x86_64.rs index 5dd904a15f5..f44e825b6f9 100644 --- a/src/vmm/src/vstate/vm/x86_64.rs +++ b/src/vmm/src/vstate/vm/x86_64.rs @@ -35,10 +35,6 @@ pub enum ArchVmError { VmGetClock(kvm_ioctls::Error), /// Failed to get KVM vm irqchip: {0} VmGetIrqChip(kvm_ioctls::Error), - /// Failed to set KVM vm pit state: {0} - VmSetPit2(kvm_ioctls::Error), - /// Failed to set KVM vm clock: {0} - VmSetClock(kvm_ioctls::Error), /// Failed to set KVM vm irqchip: {0} VmSetIrqChip(kvm_ioctls::Error), /// Failed to get MSR index list to save into snapshots: {0} From 8a5cf783da2b4409b542c01507c3678823a49ba9 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 10 Feb 2025 16:57:42 +0000 Subject: [PATCH 11/11] fix(test): dont drop guest memory before using it Dropping a GuestMemoryMmap causes the memory to get unmapped, so if we try to KVM_RUN after dropping it, we'll just get EFAULT. Admittedly no idea how we didn't run into this issue before. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/vcpu/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 9ad8f867145..b120a9f2c07 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -969,7 +969,7 @@ pub(crate) mod tests { entry_addr.unwrap().kernel_load } - fn vcpu_configured_for_boot() -> (VcpuHandle, vmm_sys_util::eventfd::EventFd) { + fn vcpu_configured_for_boot() -> (VcpuHandle, EventFd, GuestMemoryMmap) { Vcpu::register_kick_signal_handler(); // Need enough mem to boot linux. let mem_size = 64 << 20; @@ -1021,7 +1021,7 @@ pub(crate) mod tests { // Wait for vCPUs to initialize their TLS before moving forward. barrier.wait(); - (vcpu_handle, vcpu_exit_evt) + (vcpu_handle, vcpu_exit_evt, vm_mem) } #[test] @@ -1034,7 +1034,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_tls() { - let (_, _, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _mem) = setup_vcpu(0x1000); // Running on the TLS vcpu should fail before we actually initialize it. unsafe { @@ -1075,7 +1075,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_kick() { Vcpu::register_kick_signal_handler(); - let (_, vm, mut vcpu, _) = setup_vcpu(0x1000); + let (_, vm, mut vcpu, _mem) = setup_vcpu(0x1000); let mut kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) @@ -1130,7 +1130,7 @@ pub(crate) mod tests { #[test] fn test_immediate_exit_shortcircuits_execution() { - let (_, _, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _mem) = setup_vcpu(0x1000); vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); // Set a dummy value to be returned by the emulate call @@ -1155,7 +1155,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_pause_resume() { - let (vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot(); + let (vcpu_handle, vcpu_exit_evt, _mem) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); @@ -1187,7 +1187,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_save_state_events() { - let (vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot(); + let (vcpu_handle, _vcpu_exit_evt, _mem) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); @@ -1220,7 +1220,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_dump_cpu_config() { - let (vcpu_handle, _) = vcpu_configured_for_boot(); + let (vcpu_handle, _, _mem) = vcpu_configured_for_boot(); // Queue a DumpCpuConfig event, expect a DumpedCpuConfig response. vcpu_handle