Skip to content
97 changes: 96 additions & 1 deletion block/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ use std::fmt::{self, Debug};
use std::fs::{File, OpenOptions};
use std::io::{self, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use std::os::linux::fs::MetadataExt;
use std::os::unix::fs::FileTypeExt;
use std::os::unix::io::AsRawFd;
use std::path::Path;
use std::str::FromStr;
Expand All @@ -61,7 +62,7 @@ use vm_memory::{
};
use vm_virtio::{AccessPlatform, Translatable};
use vmm_sys_util::eventfd::EventFd;
use vmm_sys_util::{aio, ioctl_io_nr};
use vmm_sys_util::{aio, ioctl_io_nr, ioctl_ior_nr};

use crate::async_io::{AsyncIo, AsyncIoError, AsyncIoResult};
use crate::error::{BlockError, BlockErrorKind, BlockResult, ErrorOp};
Expand Down Expand Up @@ -1193,6 +1194,36 @@ ioctl_io_nr!(BLKSSZGET, 0x12, 104);
ioctl_io_nr!(BLKPBSZGET, 0x12, 123);
ioctl_io_nr!(BLKIOMIN, 0x12, 120);
ioctl_io_nr!(BLKIOOPT, 0x12, 121);
ioctl_ior_nr!(BLKGETSIZE64, 0x12, 114, u64);

/// Returns `(logical_size, physical_size)` in bytes for regular files and block devices.
///
/// For regular files, logical size is `st_size` and physical size is
/// `st_blocks * 512` (actual host allocation). For block devices both
/// values equal the `BLKGETSIZE64` result.
pub fn query_device_size(file: &File) -> io::Result<(u64, u64)> {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if you can do a very simple unit test for that using /tmp/foo and /dev/zero 🤔

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. I first thought it's actually covered through existing tests, but this topic is far more complicated than it seemed to be. I've added some more sophisticated unit tests just for this new function, including the /dev/zero one. The block device is then covered by the integration tests in any case.

Thanks

let m = file.metadata()?;
if m.is_file() {
// st_blocks is always in 512-byte units on Linux
Ok((m.len(), m.st_blocks() * 512))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

regarding 512. After a first glance, I couldn't finde a constant in the libc crate for that. So we can keep it as is

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, this is st_blocks from stat which is always at 512 byte granularity.

Thanks

} else if m.file_type().is_block_device() {
let mut size: u64 = 0;
// SAFETY: BLKGETSIZE64 reads the device size into a u64 pointer.
let ret = unsafe { libc::ioctl(file.as_raw_fd(), BLKGETSIZE64() as _, &mut size) };
if ret != 0 {
return Err(io::Error::last_os_error());
}
Ok((size, size))
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!(
"disk image must be a regular file or block device, is: {:?}",
m.file_type()
),
))
}
}

#[derive(Copy, Clone)]
enum BlockSize {
Expand Down Expand Up @@ -1442,4 +1473,68 @@ mod unit_tests {
// SAFETY: buf was allocated with this layout via alloc_zeroed.
unsafe { dealloc(buf, layout) };
}

#[test]
fn test_query_device_size_regular_file() {
let temp_file = TempFile::new().unwrap();
let mut f = temp_file.into_file();
// 5 sectors + 13 extra bytes - not page aligned, not sectoraligned
f.write_all(&[0xAB; 5 * 512 + 13]).unwrap();
f.sync_all().unwrap();

let (logical, physical) = query_device_size(&f).unwrap();
assert_eq!(logical, 5 * 512 + 13);
assert!(physical > 0);
}

#[test]
fn test_query_device_size_sparse_file_punch_hole() {
let temp_file = TempFile::new().unwrap();
let f = temp_file.as_file();
// Allocate 1 MiB
let size: i64 = 1 << 20;
f.set_len(size as u64).unwrap();
// SAFETY: fd is valid, range is within file size.
let ret = unsafe {
libc::fallocate(
f.as_raw_fd(),
0, // allocate
0,
size,
)
};
assert_eq!(ret, 0, "fallocate failed: {}", io::Error::last_os_error());
f.sync_all().unwrap();

let (log_before, phys_before) = query_device_size(f).unwrap();
assert_eq!(log_before, size as u64);
assert_eq!(phys_before, size as u64);

// Punch a hole in the middle 512 KiB
// SAFETY: fd is valid, range is within file size.
let ret = unsafe {
libc::fallocate(
f.as_raw_fd(),
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
size / 4,
size / 2,
)
};
assert_eq!(ret, 0, "punch hole failed: {}", io::Error::last_os_error());
f.sync_all().unwrap();

let (logical, physical) = query_device_size(f).unwrap();
assert_eq!(logical, size as u64, "logical size must not change");
assert!(
physical < logical,
"physical ({physical}) should be less than logical ({logical}) after punch hole"
);
}

#[test]
fn test_query_device_size_rejects_char_device() {
let f = std::fs::File::open("/dev/zero").unwrap();
let err = query_device_size(&f).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
}
}
10 changes: 7 additions & 3 deletions block/src/qcow/raw_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use vmm_sys_util::file_traits::FileSync;
use vmm_sys_util::seek_hole::SeekHole;
use vmm_sys_util::write_zeroes::{PunchHole, WriteZeroesAt};

use crate::BlockBackend;
use crate::{BlockBackend, query_device_size};

#[derive(Debug)]
pub struct RawFile {
Expand Down Expand Up @@ -374,11 +374,15 @@ impl SeekHole for RawFile {

impl BlockBackend for RawFile {
fn logical_size(&self) -> std::result::Result<u64, crate::Error> {
Ok(self.metadata().map_err(crate::Error::RawFileError)?.len())
Ok(query_device_size(&self.file)
.map_err(crate::Error::RawFileError)?
.0)
}

fn physical_size(&self) -> std::result::Result<u64, crate::Error> {
Ok(self.metadata().map_err(crate::Error::RawFileError)?.len())
Ok(query_device_size(&self.file)
.map_err(crate::Error::RawFileError)?
.1)
}
}

Expand Down
19 changes: 10 additions & 9 deletions block/src/raw_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause

use std::fs::File;
use std::io::{Error, Seek, SeekFrom};
use std::io::Error;
use std::os::unix::io::{AsRawFd, RawFd};

use io_uring::{IoUring, opcode, types};
Expand All @@ -14,7 +14,9 @@ use vmm_sys_util::eventfd::EventFd;
use crate::async_io::{
AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult,
};
use crate::{BatchRequest, DiskTopology, RequestType, SECTOR_SIZE, probe_sparse_support};
use crate::{
BatchRequest, DiskTopology, RequestType, SECTOR_SIZE, probe_sparse_support, query_device_size,
};

pub struct RawFileDisk {
file: File,
Expand All @@ -28,16 +30,15 @@ impl RawFileDisk {

impl DiskFile for RawFileDisk {
fn logical_size(&mut self) -> DiskFileResult<u64> {
self.file
.seek(SeekFrom::End(0))
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.0)
}

fn physical_size(&mut self) -> DiskFileResult<u64> {
self.file
.metadata()
.map(|m| m.len())
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.1)
}

fn new_async_io(&self, ring_depth: u32) -> DiskFileResult<Box<dyn AsyncIo>> {
Expand Down
16 changes: 7 additions & 9 deletions block/src/raw_async_aio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

use std::collections::VecDeque;
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::os::unix::io::{AsRawFd, RawFd};

use libc::{FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, FALLOC_FL_ZERO_RANGE};
Expand All @@ -18,7 +17,7 @@ use vmm_sys_util::eventfd::EventFd;
use crate::async_io::{
AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult,
};
use crate::{DiskTopology, SECTOR_SIZE, probe_sparse_support};
use crate::{DiskTopology, SECTOR_SIZE, probe_sparse_support, query_device_size};

pub struct RawFileDiskAio {
file: File,
Expand All @@ -32,16 +31,15 @@ impl RawFileDiskAio {

impl DiskFile for RawFileDiskAio {
fn logical_size(&mut self) -> DiskFileResult<u64> {
self.file
.seek(SeekFrom::End(0))
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.0)
}

fn physical_size(&mut self) -> DiskFileResult<u64> {
self.file
.metadata()
.map(|m| m.len())
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.1)
}

fn new_async_io(&self, ring_depth: u32) -> DiskFileResult<Box<dyn AsyncIo>> {
Expand Down
16 changes: 7 additions & 9 deletions block/src/raw_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use std::collections::VecDeque;
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::os::unix::io::{AsRawFd, RawFd};

use libc::{FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, FALLOC_FL_ZERO_RANGE};
Expand All @@ -14,7 +13,7 @@ use vmm_sys_util::eventfd::EventFd;
use crate::async_io::{
AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult,
};
use crate::{DiskTopology, SECTOR_SIZE, probe_sparse_support};
use crate::{DiskTopology, SECTOR_SIZE, probe_sparse_support, query_device_size};

pub struct RawFileDiskSync {
file: File,
Expand All @@ -28,16 +27,15 @@ impl RawFileDiskSync {

impl DiskFile for RawFileDiskSync {
fn logical_size(&mut self) -> DiskFileResult<u64> {
self.file
.seek(SeekFrom::End(0))
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.0)
}

fn physical_size(&mut self) -> DiskFileResult<u64> {
self.file
.metadata()
.map(|m| m.len())
.map_err(DiskFileError::Size)
Ok(query_device_size(&self.file)
.map_err(DiskFileError::Size)?
.1)
}

fn new_async_io(&self, _ring_depth: u32) -> DiskFileResult<Box<dyn AsyncIo>> {
Expand Down
20 changes: 13 additions & 7 deletions virtio-devices/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause

use std::cmp::max;
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::num::Wrapping;
use std::ops::Deref;
Expand Down Expand Up @@ -887,10 +888,16 @@ impl Block {
match self.lock_granularity_choice {
LockGranularityChoice::Full => LockGranularity::WholeFile,
LockGranularityChoice::ByteRange => {
// Byte-range lock covering [0, size)
self.disk_image.physical_size().map_or_else(
// use a safe fallback
|e| {
// Byte range lock covering [0, max(logical, physical))
// logical > physical for sparse files, physical > logical
// for small dense files due to filesystem block rounding.
let logical = self.disk_image.logical_size();
let physical = self.disk_image.physical_size();
match (logical, physical) {
(Ok(l), Ok(p)) => LockGranularity::ByteRange(0, max(l, p)),
(Ok(l), Err(_)) => LockGranularity::ByteRange(0, l),
(Err(_), Ok(p)) => LockGranularity::ByteRange(0, p),
(Err(e), Err(_)) => {
let fallback = LockGranularity::WholeFile;
warn!(
"Can't get disk size for id={},path={}, falling back to {:?}: error: {e}",
Expand All @@ -899,9 +906,8 @@ impl Block {
fallback
);
fallback
},
|size| LockGranularity::ByteRange(0, size),
)
}
}
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion vmm/src/seccomp_filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,10 @@ mod kvm {
pub const KVM_SET_NESTED_STATE: u64 = 1082175167;
}

// Block device ioctls for sparse support probing (not exported by libc)
// Block device ioctls (not exported by libc)
const BLKDISCARD: u64 = 0x1277; // _IO(0x12, 119)
const BLKZEROOUT: u64 = 0x127f; // _IO(0x12, 127)
const BLKGETSIZE64: u64 = 0x80081272; // _IOR(0x12, 114, size_t)

// MSHV IOCTL code. This is unstable until the kernel code has been declared stable.
#[cfg(feature = "mshv")]
Expand Down Expand Up @@ -265,6 +266,7 @@ fn create_vmm_ioctl_seccomp_rule_common(
and![Cond::new(1, ArgLen::Dword, Eq, BLKPBSZGET as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, BLKIOMIN as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, BLKIOOPT as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, BLKGETSIZE64 as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, BLKDISCARD as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, BLKZEROOUT as _)?],
and![Cond::new(1, ArgLen::Dword, Eq, FIOCLEX as _)?],
Expand Down
Loading