Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 36 additions & 43 deletions src/vmm/src/devices/virtio/iov_deque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use std::os::fd::AsRawFd;
use libc::{c_int, c_void, iovec, off_t, size_t};
use memfd;

use super::queue::FIRECRACKER_MAX_QUEUE_SIZE;
use crate::arch::PAGE_SIZE;

#[derive(Debug, thiserror::Error, displaydoc::Display)]
Expand Down Expand Up @@ -69,26 +68,42 @@ pub enum IovDequeError {
//
// Like that, the elements stored in the buffer are always laid out in contiguous virtual memory,
// so making a slice out of them does not require any copies.
//
// The `L` const generic determines the maximum number of `iovec` elements the queue should hold.
//
// ```Rust
// pub struct iovec {
// pub iov_base: *mut ::c_void,
// pub iov_len: ::size_t,
// }
// ```
//
// This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into
// 1 memory page: 256 * sizeof(iovec) == 4096 == PAGE_SIZE. IovDeque only operates with `PAGE_SIZE`
// granularity.
#[derive(Debug)]
pub struct IovDeque {
pub struct IovDeque<const L: u16> {
pub iov: *mut libc::iovec,
pub start: u16,
pub len: u16,
}

// SAFETY: This is `Send`. We hold sole ownership of the underlying buffer.
unsafe impl Send for IovDeque {}
unsafe impl<const L: u16> Send for IovDeque<L> {}

impl<const L: u16> IovDeque<L> {
const BYTES: usize = L as usize * std::mem::size_of::<iovec>();
const _ASSERT: () = assert!(Self::BYTES % PAGE_SIZE == 0);

impl IovDeque {
/// Create a [`memfd`] object that represents a single physical page
fn create_memfd() -> Result<memfd::Memfd, IovDequeError> {
// Create a sealable memfd.
let opts = memfd::MemfdOptions::default().allow_sealing(true);
let mfd = opts.create("sized-1K")?;
let mfd = opts.create("iov_deque")?;

// Resize to system page size.
mfd.as_file()
.set_len(PAGE_SIZE.try_into().unwrap())
.set_len(Self::BYTES.try_into().unwrap())
.map_err(IovDequeError::MemfdResize)?;

// Add seals to prevent further resizing.
Expand Down Expand Up @@ -121,35 +136,13 @@ impl IovDeque {

/// Allocate memory for our ring buffer
///
/// This will allocate exactly two pages of virtual memory. In order to implement the
/// optimization that allows us to always have elements in contiguous memory we need
/// allocations at the granularity of `PAGE_SIZE`. Now, our queues are at maximum 256
/// descriptors long and `struct iovec` looks like this:
///
/// ```Rust
/// pub struct iovec {
/// pub iov_base: *mut ::c_void,
/// pub iov_len: ::size_t,
/// }
/// ```
///
/// so, it's 16 bytes long. As a result, we need a single page for holding the actual data of
/// our buffer.
/// This will allocate 2 * `Self::BYTES` bytes of virtual memory.
fn allocate_ring_buffer_memory() -> Result<*mut c_void, IovDequeError> {
// The fact that we allocate two pages is due to the size of `struct iovec` times our queue
// size equals the page size. Add here a debug assertion to reflect that and ensure that we
// will adapt our logic if the assumption changes in the future.
const {
assert!(
std::mem::size_of::<iovec>() * FIRECRACKER_MAX_QUEUE_SIZE as usize == PAGE_SIZE
);
}

// SAFETY: We are calling the system call with valid arguments
unsafe {
Self::mmap(
std::ptr::null_mut(),
PAGE_SIZE * 2,
Self::BYTES * 2,
libc::PROT_NONE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
Expand All @@ -169,7 +162,7 @@ impl IovDeque {
let _ = unsafe {
Self::mmap(
buffer,
PAGE_SIZE,
Self::BYTES,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand All @@ -180,19 +173,17 @@ impl IovDeque {
// Map the second page of virtual memory to the physical page described by the memfd object
//
// SAFETY: This is safe because:
// * Both `buffer` and the result of `buffer.add(PAGE_SIZE)` are within bounds of the
// * Both `buffer` and the result of `buffer.add(Self::BYTES)` are within bounds of the
// allocation we got from `Self::allocate_ring_buffer_memory`.
// * The computed offset is `PAGE_SIZE * size_of::<c_void>() == PAGE_SIZE bytes` which fits
// in `isize`
// * The resulting pointer is the beginning of the second page of our allocation, so it
// doesn't wrap around the address space.
let next_page = unsafe { buffer.add(PAGE_SIZE) };
let next_page = unsafe { buffer.add(Self::BYTES) };

// SAFETY: We are calling the system call with valid arguments
let _ = unsafe {
Self::mmap(
next_page,
PAGE_SIZE,
Self::BYTES,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand All @@ -216,7 +207,7 @@ impl IovDeque {
/// Returns `true` if the [`IovDeque`] is full, `false` otherwise
#[inline(always)]
pub fn is_full(&self) -> bool {
self.len() == FIRECRACKER_MAX_QUEUE_SIZE
self.len() == L
}

/// Resets the queue, dropping all its elements.
Expand Down Expand Up @@ -261,8 +252,8 @@ impl IovDeque {

self.start += nr_iovecs;
self.len -= nr_iovecs;
if self.start >= FIRECRACKER_MAX_QUEUE_SIZE {
self.start -= FIRECRACKER_MAX_QUEUE_SIZE;
if self.start >= L {
self.start -= L;
}
}

Expand Down Expand Up @@ -318,19 +309,21 @@ impl IovDeque {
}
}

impl Drop for IovDeque {
impl<const L: u16> Drop for IovDeque<L> {
fn drop(&mut self) {
// SAFETY: We are passing an address that we got from a previous allocation of `2 *
// PAGE_SIZE` bytes by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), PAGE_SIZE * 2) };
// Self::BYTES` bytes by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), Self::BYTES * 2) };
}
}

#[cfg(test)]
mod tests {
use libc::iovec;

use super::IovDeque;
// Redefine `IovDeque` with specific length. Otherwise
// Rust will not know what to do.
type IovDeque = super::IovDeque<256>;

#[test]
fn test_new() {
Expand Down
43 changes: 28 additions & 15 deletions src/vmm/src/devices/virtio/iovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use vm_memory::{
};

use super::iov_deque::{IovDeque, IovDequeError};
use super::queue::FIRECRACKER_MAX_QUEUE_SIZE;
use crate::devices::virtio::queue::DescriptorChain;
use crate::vstate::memory::GuestMemoryMmap;

Expand Down Expand Up @@ -223,10 +224,11 @@ pub struct ParsedDescriptorChain {
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
/// of data from that buffer.
/// `L` const generic value must be a multiple of 256 as required by the `IovDeque` requirements.
#[derive(Debug)]
pub struct IoVecBufferMut {
pub struct IoVecBufferMut<const L: u16 = FIRECRACKER_MAX_QUEUE_SIZE> {
// container of the memory regions included in this IO vector
pub vecs: IovDeque,
pub vecs: IovDeque<L>,
// Total length of the IoVecBufferMut
// We use `u32` here because we use this type in devices which
// should not give us huge buffers. In any case this
Expand All @@ -236,9 +238,9 @@ pub struct IoVecBufferMut {

// SAFETY: `IoVecBufferMut` doesn't allow for interior mutability and no shared ownership is
// possible as it doesn't implement clone
unsafe impl Send for IoVecBufferMut {}
unsafe impl<const L: u16> Send for IoVecBufferMut<L> {}

impl IoVecBufferMut {
impl<const L: u16> IoVecBufferMut<L> {
/// Append a `DescriptorChain` in this `IoVecBufferMut`
///
/// # Safety
Expand Down Expand Up @@ -477,7 +479,11 @@ mod tests {
use libc::{c_void, iovec};
use vm_memory::VolatileMemoryError;

use super::{IoVecBuffer, IoVecBufferMut};
use super::IoVecBuffer;
// Redefine `IoVecBufferMut` with specific length. Otherwise
// Rust will not know what to do.
type IoVecBufferMut = super::IoVecBufferMut<256>;

use crate::devices::virtio::iov_deque::IovDeque;
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
use crate::devices::virtio::test_utils::VirtQueue;
Expand Down Expand Up @@ -514,7 +520,7 @@ mod tests {
}
}

impl From<&mut [u8]> for IoVecBufferMut {
impl<const L: u16> From<&mut [u8]> for super::IoVecBufferMut<L> {
fn from(buf: &mut [u8]) -> Self {
let mut vecs = IovDeque::new().unwrap();
vecs.push_back(iovec {
Expand All @@ -529,7 +535,7 @@ mod tests {
}
}

impl From<Vec<&mut [u8]>> for IoVecBufferMut {
impl<const L: u16> From<Vec<&mut [u8]>> for super::IoVecBufferMut<L> {
fn from(buffer: Vec<&mut [u8]>) -> Self {
let mut len = 0;
let mut vecs = IovDeque::new().unwrap();
Expand Down Expand Up @@ -804,9 +810,14 @@ mod verification {
use vm_memory::bitmap::BitmapSlice;
use vm_memory::VolatileSlice;

use super::{IoVecBuffer, IoVecBufferMut};
use crate::arch::PAGE_SIZE;
use super::IoVecBuffer;
use crate::devices::virtio::iov_deque::IovDeque;
// Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise
// Rust will not know what to do.
type IoVecBufferMut256 = super::IoVecBufferMut<256>;
type IovDeque256 = IovDeque<256>;

use crate::arch::PAGE_SIZE;
use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;

// Maximum memory size to use for our buffers. For the time being 1KB.
Expand Down Expand Up @@ -837,7 +848,7 @@ mod verification {
///
/// This stub helps imitate the effect of mirroring without all the elaborate memory
/// allocation trick.
pub fn push_back(deque: &mut IovDeque, iov: iovec) {
pub fn push_back<const L: u16>(deque: &mut IovDeque<L>, iov: iovec) {
// This should NEVER happen, since our ring buffer is as big as the maximum queue size.
// We also check for the sanity of the VirtIO queues, in queue.rs, which means that if
// we ever try to add something in a full ring buffer, there is an internal
Expand Down Expand Up @@ -893,22 +904,22 @@ mod verification {
}
}

fn create_iov_deque() -> IovDeque {
fn create_iov_deque() -> IovDeque256 {
// SAFETY: safe because the layout has non-zero size
let mem = unsafe {
std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
2 * PAGE_SIZE,
PAGE_SIZE,
))
};
IovDeque {
IovDeque256 {
iov: mem.cast(),
start: kani::any_where(|&start| start < FIRECRACKER_MAX_QUEUE_SIZE),
len: 0,
}
}

fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque, u32) {
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque256, u32) {
let mut vecs = create_iov_deque();
let mut len = 0u32;
for _ in 0..nr_descs {
Expand All @@ -928,7 +939,7 @@ mod verification {
(vecs, len)
}

impl IoVecBufferMut {
impl IoVecBufferMut256 {
fn any_of_length(nr_descs: usize) -> Self {
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
// object initialized to zeroes, trying to be nice to Kani.
Expand Down Expand Up @@ -1018,10 +1029,12 @@ mod verification {
#[kani::proof]
#[kani::unwind(5)]
#[kani::solver(cadical)]
// The `IovDeque` is defined as type alias in the kani module. Because of this
// we need to specify original type here for stub to work.
#[kani::stub(IovDeque::push_back, stubs::push_back)]
fn verify_write_to_iovec() {
for nr_descs in 0..MAX_DESC_LENGTH {
let mut iov_mut = IoVecBufferMut::any_of_length(nr_descs);
let mut iov_mut = IoVecBufferMut256::any_of_length(nr_descs);

let mut buf = kani::vec::any_vec::<u8, GUEST_MEMORY_SIZE>();
let offset: u32 = kani::any();
Expand Down
3 changes: 2 additions & 1 deletion src/vmm/src/devices/virtio/net/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use libc::{iovec, EAGAIN};
use log::error;
use vmm_sys_util::eventfd::EventFd;

use super::NET_QUEUE_MAX_SIZE;
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
use crate::devices::virtio::gen::virtio_net::{
Expand Down Expand Up @@ -104,7 +105,7 @@ pub struct RxBuffers {
pub min_buffer_size: u32,
// An [`IoVecBufferMut`] covering all the memory we have available for receiving network
// frames.
pub iovec: IoVecBufferMut,
pub iovec: IoVecBufferMut<NET_QUEUE_MAX_SIZE>,
// A map of which part of the memory belongs to which `DescriptorChain` object
pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
// Buffers that we have used and they are ready to be given back to the guest.
Expand Down
6 changes: 3 additions & 3 deletions src/vmm/src/devices/virtio/net/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

use std::io;

use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;

/// Maximum size of the queue for network device.
pub const NET_QUEUE_MAX_SIZE: u16 = 512;
/// Maximum size of the frame buffers handled by this device.
pub const MAX_BUFFER_SIZE: usize = 65562;
/// The number of queues of the network device.
pub const NET_NUM_QUEUES: usize = 2;
pub const NET_QUEUE_SIZES: [u16; NET_NUM_QUEUES] = [FIRECRACKER_MAX_QUEUE_SIZE; NET_NUM_QUEUES];
pub const NET_QUEUE_SIZES: [u16; NET_NUM_QUEUES] = [NET_QUEUE_MAX_SIZE; NET_NUM_QUEUES];
/// The index of the rx queue from Net device queues/queues_evts vector.
pub const RX_INDEX: usize = 0;
/// The index of the tx queue from Net device queues/queues_evts vector.
Expand Down
5 changes: 2 additions & 3 deletions src/vmm/src/devices/virtio/net/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ use std::sync::{Arc, Mutex};
use serde::{Deserialize, Serialize};

use super::device::{Net, RxBuffers};
use super::{TapError, NET_NUM_QUEUES, RX_INDEX};
use super::{TapError, NET_NUM_QUEUES, NET_QUEUE_MAX_SIZE, RX_INDEX};
use crate::devices::virtio::device::DeviceState;
use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState};
use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;
use crate::devices::virtio::TYPE_NET;
use crate::mmds::data_store::Mmds;
use crate::mmds::ns::MmdsNetworkStack;
Expand Down Expand Up @@ -147,7 +146,7 @@ impl Persist<'_> for Net {
&constructor_args.mem,
TYPE_NET,
NET_NUM_QUEUES,
FIRECRACKER_MAX_QUEUE_SIZE,
NET_QUEUE_MAX_SIZE,
)?;
net.irq_trigger.irq_status = Arc::new(AtomicU32::new(state.virtio_state.interrupt_status));
net.avail_features = state.virtio_state.avail_features;
Expand Down
5 changes: 4 additions & 1 deletion src/vmm/src/devices/virtio/net/tap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,13 @@ pub mod tests {
use std::os::unix::ffi::OsStrExt;

use super::*;
use crate::devices::virtio::iovec::IoVecBufferMut;
use crate::devices::virtio::net::gen;
use crate::devices::virtio::net::test_utils::{enable, if_index, TapTrafficSimulator};

// Redefine `IoVecBufferMut` with specific length. Otherwise
// Rust will not know what to do.
type IoVecBufferMut = crate::devices::virtio::iovec::IoVecBufferMut<256>;

// The size of the virtio net header
const VNET_HDR_SIZE: usize = 10;

Expand Down
2 changes: 1 addition & 1 deletion src/vmm/src/devices/virtio/rng/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ mod tests {
let desc = entropy_dev.queues_mut()[RNG_QUEUE].pop().unwrap();
assert!(matches!(
// SAFETY: This descriptor chain is only loaded into one buffer
unsafe { IoVecBufferMut::from_descriptor_chain(&mem, desc) },
unsafe { IoVecBufferMut::<256>::from_descriptor_chain(&mem, desc) },
Err(crate::devices::virtio::iovec::IoVecError::ReadOnlyDescriptor)
));

Expand Down
Loading