From fdbbd47a6b442374a54cbd9f1ca12a5307561310 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:12:27 +0200 Subject: [PATCH 01/34] virtio/fs: extract init binary blob into its own crate Move the init binary build script and include_bytes!() from the devices crate into a new init-blob crate. The passthrough modules reference the binary as init_blob::INIT_BINARY instead of using include_bytes! directly. Inspired by https://github.com/containers/libkrun/pull/593. Suggested-by: Geoffrey Goodman Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- Cargo.lock | 5 +++++ Cargo.toml | 1 + src/devices/Cargo.toml | 3 ++- src/devices/src/virtio/fs/linux/passthrough.rs | 2 +- src/devices/src/virtio/fs/macos/passthrough.rs | 2 +- src/init-blob/Cargo.toml | 11 +++++++++++ src/{devices => init-blob}/build.rs | 0 src/init-blob/src/lib.rs | 1 + 8 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 src/init-blob/Cargo.toml rename src/{devices => init-blob}/build.rs (100%) create mode 100644 src/init-blob/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ecb90d195..41ed2cbcc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,6 +527,10 @@ dependencies = [ "serde_core", ] +[[package]] +name = "init-blob" +version = "0.1.0-1.18.0" + [[package]] name = "iocuddle" version = "0.1.1" @@ -663,6 +667,7 @@ dependencies = [ "caps", "crossbeam-channel", "imago", + "init-blob", "krun-arch", "krun-display", "krun-hvf", diff --git a/Cargo.toml b/Cargo.toml index 00b06aa00..35b1dbba4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "src/libkrun", + "src/init-blob", "src/input", "src/display", "src/utils", diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index eacb6cc97..df5ec5a58 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -3,7 +3,7 @@ name = "krun-devices" version = "0.1.0-1.18.0" authors = ["The libkrun Authors"] edition = "2021" -build = "build.rs" + description = "Virtual device emulation for libkrun" license = "Apache-2.0" repository = "https://github.com/containers/libkrun" @@ -37,6 +37,7 @@ vm-memory = { version = "0.17", features = ["backend-mmap"] } zerocopy = { version = "0.8.26", optional = true, features = ["derive"] } krun_display = { package = "krun-display", version = "0.1.0", path = "../display", optional = true, features = ["bindgen_clang_runtime"] } krun_input = { package = "krun-input", version = "0.1.0", path = "../input", features = ["bindgen_clang_runtime"], optional = true } +init-blob = { path = "../init-blob" } arch = { package = "krun-arch", version = "=0.1.0-1.18.0", path = "../arch" } utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index e5ca21a03..a0c1d6020 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -33,7 +33,7 @@ const EMPTY_CSTR: &[u8] = b"\0"; const PROC_CSTR: &[u8] = b"/proc/self/fd\0"; const INIT_CSTR: &[u8] = b"init.krun\0"; -static INIT_BINARY: &[u8] = include_bytes!(env!("KRUN_INIT_BINARY_PATH")); +static INIT_BINARY: &[u8] = init_blob::INIT_BINARY; type Inode = u64; type Handle = u64; diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 53680bd92..419cd645b 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -37,7 +37,7 @@ const SECURITY_CAPABILITY: &[u8] = b"security.capability\0"; const UID_MAX: u32 = u32::MAX - 1; -static INIT_BINARY: &[u8] = include_bytes!(env!("KRUN_INIT_BINARY_PATH")); +static INIT_BINARY: &[u8] = init_blob::INIT_BINARY; type Inode = u64; type Handle = u64; diff --git a/src/init-blob/Cargo.toml b/src/init-blob/Cargo.toml new file mode 100644 index 000000000..7792e2042 --- /dev/null +++ b/src/init-blob/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "init-blob" +version = "0.1.0-1.18.0" +edition = "2021" +description = "Default init binary blob for libkrun guests" +license = "Apache-2.0" +repository = "https://github.com/containers/libkrun" +build = "build.rs" + +[lib] +path = "src/lib.rs" diff --git a/src/devices/build.rs b/src/init-blob/build.rs similarity index 100% rename from src/devices/build.rs rename to src/init-blob/build.rs diff --git a/src/init-blob/src/lib.rs b/src/init-blob/src/lib.rs new file mode 100644 index 000000000..4397da679 --- /dev/null +++ b/src/init-blob/src/lib.rs @@ -0,0 +1 @@ +pub static INIT_BINARY: &[u8] = include_bytes!(env!("KRUN_INIT_BINARY_PATH")); From 66d7e99fd4b46c337ad06a614d026fbebacaedf9 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:13:31 +0200 Subject: [PATCH 02/34] virtio/fs: introduce InodeAllocator for shared inode numbering Replace the private next_inode AtomicU64 inside PassthroughFs with a shared InodeAllocator that is passed in at construction. This lets multiple layers (e.g. a future virtual-inode overlay) allocate from the same counter without implicit coordination via reserved ranges. The allocator starts at ROOT_ID + 2, reserving inode 2 for the existing init_inode in PassthroughFs. This reservation is removed in the next commit when init handling moves to AugmentFs. PassthroughFs::new() and PassthroughFsRo::new() now take an Arc parameter. FsWorker::new() creates the allocator and passes it through. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- src/devices/src/virtio/fs/inode_alloc.rs | 30 +++++++++++++++++++ .../src/virtio/fs/linux/passthrough.rs | 9 +++--- .../src/virtio/fs/macos/passthrough.rs | 9 +++--- src/devices/src/virtio/fs/mod.rs | 1 + src/devices/src/virtio/fs/read_only.rs | 5 ++-- src/devices/src/virtio/fs/worker.rs | 12 ++++++-- 6 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 src/devices/src/virtio/fs/inode_alloc.rs diff --git a/src/devices/src/virtio/fs/inode_alloc.rs b/src/devices/src/virtio/fs/inode_alloc.rs new file mode 100644 index 000000000..63e570acd --- /dev/null +++ b/src/devices/src/virtio/fs/inode_alloc.rs @@ -0,0 +1,30 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use super::fuse; + +/// Allocates unique FUSE inode numbers. +/// +/// FUSE inode numbers are opaque identifiers with two reserved values: +/// - `0` — invalid / negative-entry cache sentinel (never allocated) +/// - `1` (`ROOT_ID`) — the root directory of the filesystem +/// +/// All other numbers are allocated sequentially starting from `ROOT_ID + 2` +/// (inode 2 is reserved for the legacy init_inode in PassthroughFs until the +/// AugmentFs overlay takes over init handling). +/// The allocator is `Send + Sync` and safe to share across threads. +pub struct InodeAllocator { + next: AtomicU64, +} + +impl InodeAllocator { + pub fn new() -> Self { + Self { + next: AtomicU64::new(fuse::ROOT_ID + 2), + } + } + + /// Allocate the next inode number. Each call returns a unique value. + pub fn next(&self) -> u64 { + self.next.fetch_add(1, Ordering::Relaxed) + } +} diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index a0c1d6020..abda1ce53 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -25,6 +25,7 @@ use super::super::filesystem::{ ListxattrReply, OpenOptions, SetattrValid, ZeroCopyReader, ZeroCopyWriter, }; use super::super::fuse; +use super::super::inode_alloc::InodeAllocator; use super::super::multikey::MultikeyBTreeMap; const CURRENT_DIR_CSTR: &[u8] = b".\0"; @@ -358,7 +359,7 @@ pub struct PassthroughFs { // documentation of the `O_PATH` flag in `open(2)` for more details on what one can and cannot // do with an fd opened with this flag. inodes: RwLock>>, - next_inode: AtomicU64, + inode_alloc: Arc, init_inode: u64, // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be @@ -392,7 +393,7 @@ enum FileOrLink { } impl PassthroughFs { - pub fn new(cfg: Config) -> io::Result { + pub fn new(cfg: Config, inode_alloc: Arc) -> io::Result { let fd = if let Some(fd) = cfg.proc_sfd_rawfd { fd } else { @@ -438,7 +439,7 @@ impl PassthroughFs { Ok(PassthroughFs { inodes: RwLock::new(MultikeyBTreeMap::new()), - next_inode: AtomicU64::new(fuse::ROOT_ID + 2), + inode_alloc, init_inode: fuse::ROOT_ID + 1, handles: RwLock::new(BTreeMap::new()), @@ -579,7 +580,7 @@ impl PassthroughFs { // There is a possible race here where 2 threads end up adding the same file // into the inode list. However, since each of those will get a unique Inode // value and unique file descriptors this shouldn't be that much of a problem. - let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); + let inode = self.inode_alloc.next(); self.inodes.write().unwrap().insert( inode, InodeAltKey { diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 419cd645b..3d27aec7f 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -29,6 +29,7 @@ use super::super::filesystem::{ ListxattrReply, OpenOptions, SetattrValid, ZeroCopyReader, ZeroCopyWriter, }; use super::super::fuse; +use super::super::inode_alloc::InodeAllocator; use super::super::multikey::MultikeyBTreeMap; const INIT_CSTR: &[u8] = b"init.krun\0"; @@ -543,7 +544,7 @@ impl Default for Config { /// combination of mount namespaces and the pivot_root system call. pub struct PassthroughFs { inodes: RwLock>>, - next_inode: AtomicU64, + inode_alloc: Arc, init_inode: u64, handles: RwLock>>, @@ -560,7 +561,7 @@ pub struct PassthroughFs { } impl PassthroughFs { - pub fn new(cfg: Config) -> io::Result { + pub fn new(cfg: Config, inode_alloc: Arc) -> io::Result { let root = CString::new(cfg.root_dir.as_str()).expect("CString::new failed"); // Safe because this doesn't modify any memory and we check the return value. @@ -579,7 +580,7 @@ impl PassthroughFs { Ok(PassthroughFs { inodes: RwLock::new(MultikeyBTreeMap::new()), - next_inode: AtomicU64::new(fuse::ROOT_ID + 2), + inode_alloc, init_inode: fuse::ROOT_ID + 1, handles: RwLock::new(BTreeMap::new()), @@ -723,7 +724,7 @@ impl PassthroughFs { // There is a possible race here where 2 threads end up adding the same file // into the inode list. However, since each of those will get a unique Inode // value and unique file descriptors this shouldn't be that much of a problem. - let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); + let inode = self.inode_alloc.next(); self.inodes.write().unwrap().insert( inode, InodeAltKey { diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index 7ce9d48c2..179535131 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -2,6 +2,7 @@ mod device; #[allow(dead_code)] mod filesystem; pub mod fuse; +mod inode_alloc; #[allow(dead_code)] mod multikey; mod read_only; diff --git a/src/devices/src/virtio/fs/read_only.rs b/src/devices/src/virtio/fs/read_only.rs index e975f2dda..eb8aebef3 100644 --- a/src/devices/src/virtio/fs/read_only.rs +++ b/src/devices/src/virtio/fs/read_only.rs @@ -25,6 +25,7 @@ use super::filesystem::{ OpenOptions, SetattrValid, ZeroCopyReader, ZeroCopyWriter, }; use super::fuse; +use super::inode_alloc::InodeAllocator; use super::passthrough::{self, PassthroughFs}; use crate::virtio::bindings; @@ -60,9 +61,9 @@ pub struct PassthroughFsRo { } impl PassthroughFsRo { - pub fn new(cfg: passthrough::Config) -> io::Result { + pub fn new(cfg: passthrough::Config, inode_alloc: Arc) -> io::Result { Ok(Self { - inner: PassthroughFs::new(cfg)?, + inner: PassthroughFs::new(cfg, inode_alloc)?, }) } } diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index c612b3e9b..e554aa377 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -16,6 +16,7 @@ use vm_memory::GuestMemoryMmap; use super::super::{FsError, Queue}; use super::defs::{HPQ_INDEX, REQ_INDEX}; use super::descriptor_utils::{Reader, Writer}; +use super::inode_alloc::InodeAllocator; use super::passthrough::{self, PassthroughFs}; use super::read_only::PassthroughFsRo; use super::server::Server; @@ -83,10 +84,17 @@ impl FsWorker { exit_code: Arc, #[cfg(target_os = "macos")] map_sender: Option>, ) -> Result { + let inode_alloc = Arc::new(InodeAllocator::new()); let server = if read_only { - FsServer::ReadOnly(Server::new(PassthroughFsRo::new(passthrough_cfg)?)) + FsServer::ReadOnly(Server::new(PassthroughFsRo::new( + passthrough_cfg, + inode_alloc, + )?)) } else { - FsServer::ReadWrite(Server::new(PassthroughFs::new(passthrough_cfg)?)) + FsServer::ReadWrite(Server::new(PassthroughFs::new( + passthrough_cfg, + inode_alloc, + )?)) }; Ok(Self { queues, From 614a504f772b31286893947d7a31f8ce16b0effe Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:15:57 +0200 Subject: [PATCH 03/34] virtio/fs: introduce generic AugmentFs overlay for files like init.krun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce AugmentFs, a generic overlay that wraps any FileSystem implementation and intercepts FUSE operations for virtual inodes — synthetic read-only files and directories backed by static data. One-shot files can only be looked up once. Remove all init.krun special-case code (init_inode, init_handle, INIT_CSTR) from both the Linux and macOS passthrough implementations. The init.krun virtual file is now configured via VirtualDirEntry in the krun API layer and handled generically by the overlay. FsDeviceConfig carries a Vec and FsWorker wraps AugmentFs / AugmentFs. The InodeAllocator now starts at ROOT_ID + 1 since the init_inode reservation is no longer needed. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- Cargo.lock | 2 +- src/devices/Cargo.toml | 1 - src/devices/src/virtio/fs/augment_fs.rs | 737 ++++++++++++++++++ src/devices/src/virtio/fs/device.rs | 6 + src/devices/src/virtio/fs/inode_alloc.rs | 6 +- .../src/virtio/fs/linux/passthrough.rs | 77 +- .../src/virtio/fs/macos/passthrough.rs | 49 +- src/devices/src/virtio/fs/mod.rs | 2 + src/devices/src/virtio/fs/virtual_entry.rs | 56 ++ src/devices/src/virtio/fs/worker.rs | 27 +- src/devices/src/virtio/linux_errno.rs | 34 + src/libkrun/Cargo.toml | 1 + src/libkrun/src/lib.rs | 34 +- src/vmm/src/builder.rs | 1 + src/vmm/src/vmm_config/fs.rs | 5 + 15 files changed, 895 insertions(+), 143 deletions(-) create mode 100644 src/devices/src/virtio/fs/augment_fs.rs create mode 100644 src/devices/src/virtio/fs/virtual_entry.rs diff --git a/Cargo.lock b/Cargo.lock index 41ed2cbcc..c0c4dd9b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -667,7 +667,6 @@ dependencies = [ "caps", "crossbeam-channel", "imago", - "init-blob", "krun-arch", "krun-display", "krun-hvf", @@ -861,6 +860,7 @@ version = "1.18.0" dependencies = [ "crossbeam-channel", "env_logger", + "init-blob", "krun-aws-nitro", "krun-devices", "krun-display", diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index df5ec5a58..1be66e164 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -37,7 +37,6 @@ vm-memory = { version = "0.17", features = ["backend-mmap"] } zerocopy = { version = "0.8.26", optional = true, features = ["derive"] } krun_display = { package = "krun-display", version = "0.1.0", path = "../display", optional = true, features = ["bindgen_clang_runtime"] } krun_input = { package = "krun-input", version = "0.1.0", path = "../input", features = ["bindgen_clang_runtime"], optional = true } -init-blob = { path = "../init-blob" } arch = { package = "krun-arch", version = "=0.1.0-1.18.0", path = "../arch" } utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } diff --git a/src/devices/src/virtio/fs/augment_fs.rs b/src/devices/src/virtio/fs/augment_fs.rs new file mode 100644 index 000000000..a694e5b96 --- /dev/null +++ b/src/devices/src/virtio/fs/augment_fs.rs @@ -0,0 +1,737 @@ +// Virtual inode overlay for virtiofs. +// +// `AugmentFs` wraps an inner `FileSystem` implementation and intercepts +// FUSE operations for virtual inodes — synthetic read-only files that exist +// only in memory. All other operations are delegated to the inner filesystem. +// +// Virtual inodes are injected into the root directory (parent = ROOT_ID) and +// are currently only accessible via lookup (they do not appear in readdir). +// +// One-shot files can only be looked up once — the name is removed from the +// directory on first lookup so subsequent lookups return ENOENT. + +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; +use std::collections::HashMap; +use std::ffi::CStr; +use std::ffi::CString; +use std::io; +use std::mem; +use std::sync::atomic::AtomicI32; +use std::sync::Arc; +use std::sync::RwLock; +use std::time::Duration; + +#[cfg(target_os = "macos")] +use utils::worker_message::WorkerMessage; + +use super::filesystem::{ + Context, DirEntry, Entry, Extensions, FileSystem, FsOptions, GetxattrReply, ListxattrReply, + OpenOptions, SetattrValid, ZeroCopyReader, ZeroCopyWriter, +}; +use super::fuse; +use super::inode_alloc::InodeAllocator; +use super::virtual_entry::{VirtualDirEntry, VirtualEntry, VirtualEntryContent, VIRTUAL_BLKSIZE}; +use crate::virtio::bindings; +use crate::virtio::linux_errno; + +type Inode = u64; +type Handle = u64; + +/// Sentinel handle returned for all virtual file opens. This works because +/// virtual file operations dispatch on inode, not handle — there is no +/// per-open state. If per-fd state is ever needed (e.g. writable virtual +/// files), this must be replaced with a real handle allocator. +const VIRTUAL_HANDLE: Handle = 0; + +/// Persistent virtual entries never change. +const VIRTUAL_TIMEOUT: Duration = Duration::MAX; + +/// Overlay that injects virtual inodes into an inner `FileSystem`. +pub struct AugmentFs { + inner: T, + /// Maps (parent_inode, name) → child inode number. One-shot entries + /// are removed on first lookup so the file can only be opened once. + name_to_inode: RwLock>, + /// Maps virtual inode number → (mode, inode data). One-shot entries are + /// removed from this map on release. + inodes: RwLock>, +} + +impl> AugmentFs { + /// Create a new overlay. + /// + /// `entries` are registered as virtual inodes in the root directory. + /// Inode numbers are obtained from `inode_alloc`, the same allocator + /// used by the inner filesystem. + pub fn new(inner: T, inode_alloc: &InodeAllocator, entries: Vec) -> Self { + let mut name_to_inode = HashMap::new(); + let mut inodes = HashMap::new(); + + Self::register_entries( + fuse::ROOT_ID, + entries, + inode_alloc, + &mut name_to_inode, + &mut inodes, + ); + + Self { + inner, + name_to_inode: RwLock::new(name_to_inode), + inodes: RwLock::new(inodes), + } + } + + fn register_entries( + parent: Inode, + entries: Vec, + inode_alloc: &InodeAllocator, + name_to_inode: &mut HashMap<(Inode, CString), Inode>, + inodes: &mut HashMap, + ) { + for entry in entries { + let ino = inode_alloc.next(); + name_to_inode.insert((parent, entry.name), ino); + + // Recurse into directory children before moving the node. + if let VirtualEntryContent::Dir { children } = entry.entry.content { + Self::register_entries(ino, children, inode_alloc, name_to_inode, inodes); + inodes.insert( + ino, + VirtualEntry { + mode: entry.entry.mode, + one_shot: entry.entry.one_shot, + content: VirtualEntryContent::Dir { + children: Vec::new(), + }, + }, + ); + } else { + inodes.insert(ino, entry.entry); + } + } + } + + fn is_virtual(&self, inode: Inode) -> bool { + self.inodes.read().unwrap().contains_key(&inode) + } + + fn virtual_stat(ino: Inode, vnode: &VirtualEntry) -> (bindings::stat64, Duration) { + let mut st: bindings::stat64 = unsafe { mem::zeroed() }; + st.st_ino = ino; + st.st_mode = vnode.st_mode() as _; + st.st_blksize = VIRTUAL_BLKSIZE as _; + let timeout = if vnode.one_shot { + Duration::ZERO + } else { + VIRTUAL_TIMEOUT + }; + match &vnode.content { + VirtualEntryContent::File { data, .. } => { + st.st_size = data.len() as i64; + st.st_nlink = 1; + st.st_blocks = ((data.len() as i64) + 511) / 512; + } + VirtualEntryContent::Dir { .. } => { + st.st_nlink = 2; + } + } + (st, timeout) + } +} + +impl> FileSystem for AugmentFs { + type Inode = Inode; + type Handle = Handle; + + fn init(&self, capable: FsOptions) -> io::Result { + self.inner.init(capable) + } + + fn destroy(&self) { + self.inner.destroy() + } + + fn lookup(&self, ctx: Context, parent: Inode, name: &CStr) -> io::Result { + let key = (parent, CString::from(name)); + let inode = self.name_to_inode.read().unwrap().get(&key).copied(); + if let Some(inode) = inode { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + let one_shot = vnode.one_shot; + let (st, timeout) = Self::virtual_stat(inode, vnode); + + if one_shot { + drop(inodes); + self.name_to_inode.write().unwrap().remove(&key); + } + + return Ok(Entry { + inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: timeout, + entry_timeout: timeout, + }); + } + } + self.inner.lookup(ctx, parent, name) + } + + fn forget(&self, ctx: Context, inode: Inode, count: u64) { + if !self.is_virtual(inode) { + self.inner.forget(ctx, inode, count) + } + } + + fn batch_forget(&self, ctx: Context, mut requests: Vec<(Inode, u64)>) { + requests.retain(|(ino, _)| !self.is_virtual(*ino)); + self.inner.batch_forget(ctx, requests); + } + + fn getattr( + &self, + ctx: Context, + inode: Inode, + handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + return Ok(Self::virtual_stat(inode, vnode)); + } + } + self.inner.getattr(ctx, inode, handle) + } + + fn setattr( + &self, + ctx: Context, + inode: Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner.setattr(ctx, inode, attr, handle, valid) + } + + fn readlink(&self, ctx: Context, inode: Inode) -> io::Result> { + if self.is_virtual(inode) { + return Err(linux_errno::einval()); + } + self.inner.readlink(ctx, inode) + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + self.inner.symlink(ctx, linkname, parent, name, extensions) + } + + fn mknod( + &self, + ctx: Context, + inode: Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + self.inner + .mknod(ctx, inode, name, mode, rdev, umask, extensions) + } + + fn mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + let key = (parent, CString::from(name)); + if self.name_to_inode.read().unwrap().contains_key(&key) { + return Err(linux_errno::eexist()); + } + self.inner.mkdir(ctx, parent, name, mode, umask, extensions) + } + + fn unlink(&self, ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.inner.unlink(ctx, parent, name) + } + + fn rmdir(&self, ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.inner.rmdir(ctx, parent, name) + } + + fn rename( + &self, + ctx: Context, + olddir: Inode, + oldname: &CStr, + newdir: Inode, + newname: &CStr, + flags: u32, + ) -> io::Result<()> { + self.inner + .rename(ctx, olddir, oldname, newdir, newname, flags) + } + + fn link( + &self, + ctx: Context, + inode: Inode, + newparent: Inode, + newname: &CStr, + ) -> io::Result { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner.link(ctx, inode, newparent, newname) + } + + fn open( + &self, + ctx: Context, + inode: Inode, + kill_priv: bool, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + if vnode.is_dir() { + return Err(linux_errno::eisdir()); + } + if (flags as i32 & libc::O_ACCMODE) != libc::O_RDONLY { + return Err(linux_errno::eacces()); + } + return Ok((Some(VIRTUAL_HANDLE), OpenOptions::empty())); + } + } + self.inner.open(ctx, inode, kill_priv, flags) + } + + fn create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + kill_priv: bool, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + self.inner + .create(ctx, parent, name, mode, kill_priv, flags, umask, extensions) + } + + fn read( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + mut w: W, + size: u32, + offset: u64, + lock_owner: Option, + flags: u32, + ) -> io::Result { + { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + let data = vnode.data().ok_or_else(linux_errno::eisdir)?; + let off: usize = offset.try_into().map_err(|_| linux_errno::einval())?; + if off >= data.len() { + return Ok(0); + } + let remaining = data.len() - off; + let len = remaining.min(size as usize); + return w.write(&data[off..(off + len)]); + } + } + self.inner + .read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + + fn write( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + r: R, + size: u32, + offset: u64, + lock_owner: Option, + delayed_write: bool, + kill_priv: bool, + flags: u32, + ) -> io::Result { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner.write( + ctx, + inode, + handle, + r, + size, + offset, + lock_owner, + delayed_write, + kill_priv, + flags, + ) + } + + fn flush(&self, ctx: Context, inode: Inode, handle: Handle, lock_owner: u64) -> io::Result<()> { + if self.is_virtual(inode) { + return Ok(()); + } + self.inner.flush(ctx, inode, handle, lock_owner) + } + + fn fsync(&self, ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> { + if self.is_virtual(inode) { + return Ok(()); + } + self.inner.fsync(ctx, inode, datasync, handle) + } + + fn fallocate( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner + .fallocate(ctx, inode, handle, mode, offset, length) + } + + fn release( + &self, + ctx: Context, + inode: Inode, + flags: u32, + handle: Handle, + flush: bool, + flock_release: bool, + lock_owner: Option, + ) -> io::Result<()> { + { + let mut inodes = self.inodes.write().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + if vnode.one_shot { + inodes.remove(&inode); + } + return Ok(()); + } + } + self.inner + .release(ctx, inode, flags, handle, flush, flock_release, lock_owner) + } + + fn statfs(&self, ctx: Context, inode: Inode) -> io::Result { + self.inner.statfs(ctx, inode) + } + + fn getxattr( + &self, + ctx: Context, + inode: Inode, + name: &CStr, + size: u32, + ) -> io::Result { + if self.is_virtual(inode) { + return Err(linux_errno::enodata()); + } + self.inner.getxattr(ctx, inode, name, size) + } + + fn listxattr(&self, ctx: Context, inode: Inode, size: u32) -> io::Result { + if self.is_virtual(inode) { + if size == 0 { + return Ok(ListxattrReply::Count(0)); + } + return Ok(ListxattrReply::Names(Vec::new())); + } + self.inner.listxattr(ctx, inode, size) + } + + fn setxattr( + &self, + ctx: Context, + inode: Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner.setxattr(ctx, inode, name, value, flags) + } + + fn removexattr(&self, ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> { + if self.is_virtual(inode) { + return Err(linux_errno::eperm()); + } + self.inner.removexattr(ctx, inode, name) + } + + fn opendir( + &self, + ctx: Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.inner.opendir(ctx, inode, flags) + } + + fn readdir( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + self.inner + .readdir(ctx, inode, handle, size, offset, add_entry) + } + + fn readdirplus( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry, Entry) -> io::Result, + { + self.inner + .readdirplus(ctx, inode, handle, size, offset, add_entry) + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Inode, + datasync: bool, + handle: Handle, + ) -> io::Result<()> { + self.inner.fsyncdir(ctx, inode, datasync, handle) + } + + fn releasedir(&self, ctx: Context, inode: Inode, flags: u32, handle: Handle) -> io::Result<()> { + self.inner.releasedir(ctx, inode, flags, handle) + } + + fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> { + if self.is_virtual(inode) { + if mask & (libc::W_OK as u32) != 0 { + return Err(linux_errno::eacces()); + } + return Ok(()); + } + self.inner.access(ctx, inode, mask) + } + + fn lseek( + &self, + ctx: Context, + inode: Inode, + _handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + let size = vnode.data().ok_or_else(linux_errno::eisdir)?.len() as u64; + // FUSE lseek is only called for SEEK_DATA/SEEK_HOLE. + return match whence as i32 { + libc::SEEK_DATA => { + if offset < size { + Ok(offset) + } else { + Err(linux_errno::enxio()) + } + } + libc::SEEK_HOLE => { + if offset < size { + Ok(size) + } else { + Err(linux_errno::enxio()) + } + } + _ => Err(linux_errno::einval()), + }; + } + } + self.inner.lseek(ctx, inode, _handle, offset, whence) + } + + fn copyfilerange( + &self, + ctx: Context, + inode_in: Inode, + handle_in: Handle, + offset_in: u64, + inode_out: Inode, + handle_out: Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + // Virtual inodes don't have real file descriptors, so copy_file_range + // cannot work. Return EXDEV to tell the kernel to fall back to + // read+write. + if self.is_virtual(inode_in) || self.is_virtual(inode_out) { + return Err(linux_errno::exdev()); + } + self.inner.copyfilerange( + ctx, inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ) + } + + fn setupmapping( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + { + let inodes = self.inodes.read().unwrap(); + if let Some(vnode) = inodes.get(&inode) { + let data = vnode.data().ok_or_else(linux_errno::eisdir)?; + #[cfg(target_os = "linux")] + { + if (moffset + len) > shm_size { + return Err(linux_errno::einval()); + } + + let addr = host_shm_base + moffset; + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED, + -1, + 0, + ) + }; + if std::ptr::eq(ret, libc::MAP_FAILED) { + return Err(io::Error::last_os_error()); + } + + let foff = foffset as usize; + if foff < data.len() { + let available = data.len() - foff; + let to_copy = (len as usize).min(available); + unsafe { + libc::memcpy( + addr as *mut libc::c_void, + data.as_ptr().add(foff) as *const _, + to_copy, + ) + }; + } + + return Ok(()); + } + + // TODO: implement DAX for virtual files on macOS. + // Needs a shared memory region manager (see setupmapping + // in macos/passthrough.rs for the real-file DAX path). + #[cfg(target_os = "macos")] + { + let _ = data; + return Err(linux_errno::enosys()); + } + } + } + self.inner.setupmapping( + ctx, + inode, + handle, + foffset, + len, + flags, + moffset, + host_shm_base, + shm_size, + #[cfg(target_os = "macos")] + map_sender, + ) + } + + fn removemapping( + &self, + ctx: Context, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + self.inner.removemapping( + ctx, + requests, + host_shm_base, + shm_size, + #[cfg(target_os = "macos")] + map_sender, + ) + } + + fn ioctl( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + flags: u32, + cmd: u32, + arg: u64, + in_size: u32, + out_size: u32, + exit_code: &Arc, + ) -> io::Result> { + self.inner.ioctl( + ctx, inode, handle, flags, cmd, arg, in_size, out_size, exit_code, + ) + } +} diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index bc877bc24..945f8393e 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -17,6 +17,7 @@ use super::super::{ VirtioShmRegion, }; use super::passthrough; +use super::virtual_entry::VirtualDirEntry; use super::worker::FsWorker; use super::ExportTable; use super::{defs, defs::uapi}; @@ -48,6 +49,7 @@ pub struct Fs { shm_region: Option, passthrough_cfg: passthrough::Config, read_only: bool, + virtual_entries: Vec, worker_thread: Option>, worker_stopfd: EventFd, exit_code: Arc, @@ -62,6 +64,7 @@ impl Fs { exit_code: Arc, allow_root_dir_delete: bool, read_only: bool, + virtual_entries: Vec, ) -> super::Result { let avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); @@ -84,6 +87,7 @@ impl Fs { shm_region: None, passthrough_cfg: fs_cfg, read_only, + virtual_entries, worker_thread: None, worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?, exit_code, @@ -180,6 +184,7 @@ impl VirtioDevice for Fs { queue_evts.push(dq.event); } + let virtual_entries = self.virtual_entries.clone(); let worker = FsWorker::new( worker_queues, queue_evts, @@ -188,6 +193,7 @@ impl VirtioDevice for Fs { self.shm_region.clone(), self.passthrough_cfg.clone(), self.read_only, + virtual_entries, self.worker_stopfd.try_clone().unwrap(), self.exit_code.clone(), #[cfg(target_os = "macos")] diff --git a/src/devices/src/virtio/fs/inode_alloc.rs b/src/devices/src/virtio/fs/inode_alloc.rs index 63e570acd..1919b1406 100644 --- a/src/devices/src/virtio/fs/inode_alloc.rs +++ b/src/devices/src/virtio/fs/inode_alloc.rs @@ -8,9 +8,7 @@ use super::fuse; /// - `0` — invalid / negative-entry cache sentinel (never allocated) /// - `1` (`ROOT_ID`) — the root directory of the filesystem /// -/// All other numbers are allocated sequentially starting from `ROOT_ID + 2` -/// (inode 2 is reserved for the legacy init_inode in PassthroughFs until the -/// AugmentFs overlay takes over init handling). +/// All other numbers are allocated sequentially starting from `ROOT_ID + 1`. /// The allocator is `Send + Sync` and safe to share across threads. pub struct InodeAllocator { next: AtomicU64, @@ -19,7 +17,7 @@ pub struct InodeAllocator { impl InodeAllocator { pub fn new() -> Self { Self { - next: AtomicU64::new(fuse::ROOT_ID + 2), + next: AtomicU64::new(fuse::ROOT_ID + 1), } } diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index abda1ce53..08da133f0 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -32,9 +32,6 @@ const CURRENT_DIR_CSTR: &[u8] = b".\0"; const PARENT_DIR_CSTR: &[u8] = b"..\0"; const EMPTY_CSTR: &[u8] = b"\0"; const PROC_CSTR: &[u8] = b"/proc/self/fd\0"; -const INIT_CSTR: &[u8] = b"init.krun\0"; - -static INIT_BINARY: &[u8] = init_blob::INIT_BINARY; type Inode = u64; type Handle = u64; @@ -360,13 +357,11 @@ pub struct PassthroughFs { // do with an fd opened with this flag. inodes: RwLock>>, inode_alloc: Arc, - init_inode: u64, // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be // used for reading and writing data. handles: RwLock>>, next_handle: AtomicU64, - init_handle: u64, // File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from // `inodes` into one that can go into `handles`. This is accomplished by reading the @@ -440,11 +435,9 @@ impl PassthroughFs { Ok(PassthroughFs { inodes: RwLock::new(MultikeyBTreeMap::new()), inode_alloc, - init_inode: fuse::ROOT_ID + 1, handles: RwLock::new(BTreeMap::new()), next_handle: AtomicU64::new(1), - init_handle: 0, proc_self_fd, @@ -993,25 +986,7 @@ impl FileSystem for PassthroughFs { fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result { debug!("do_lookup: {name:?}"); - let init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; - - if self.init_inode != 0 && name == init_name { - let mut st: libc::stat64 = unsafe { mem::zeroed() }; - st.st_size = INIT_BINARY.len() as i64; - st.st_ino = self.init_inode; - st.st_mode = 0o100_755; - - Ok(Entry { - inode: self.init_inode, - generation: 0, - attr: st, - attr_flags: 0, - attr_timeout: self.cfg.attr_timeout, - entry_timeout: self.cfg.entry_timeout, - }) - } else { - self.do_lookup(parent, name) - } + self.do_lookup(parent, name) } fn forget(&self, _ctx: Context, inode: Inode, count: u64) { @@ -1130,11 +1105,7 @@ impl FileSystem for PassthroughFs { kill_priv: bool, flags: u32, ) -> io::Result<(Option, OpenOptions)> { - if inode == self.init_inode { - Ok((Some(self.init_handle), OpenOptions::empty())) - } else { - self.do_open(inode, kill_priv, flags) - } + self.do_open(inode, kill_priv, flags) } fn release( @@ -1235,16 +1206,6 @@ impl FileSystem for PassthroughFs { _flags: u32, ) -> io::Result { debug!("read: {inode:?}"); - if inode == self.init_inode { - let off: usize = offset.try_into().map_err(|_| einval())?; - let len = if off + (size as usize) < INIT_BINARY.len() { - size as usize - } else { - INIT_BINARY.len() - off - }; - return w.write(&INIT_BINARY[off..(off + len)]); - } - let data = self .handles .read() @@ -1825,10 +1786,6 @@ impl FileSystem for PassthroughFs { return Err(io::Error::from_raw_os_error(libc::ENOSYS)); } - if inode == self.init_inode { - return Err(io::Error::from_raw_os_error(libc::ENODATA)); - } - let mut buf = vec![0; size as usize]; // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we @@ -2088,36 +2045,6 @@ impl FileSystem for PassthroughFs { debug!("setupmapping: ino {inode:?} addr={addr:x} len={len}"); - if inode == self.init_inode { - let ret = unsafe { - libc::mmap( - addr as *mut libc::c_void, - len as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED, - -1, - 0, - ) - }; - if std::ptr::eq(ret, libc::MAP_FAILED) { - return Err(io::Error::last_os_error()); - } - - let to_copy = if len as usize > INIT_BINARY.len() { - INIT_BINARY.len() - } else { - len as usize - }; - unsafe { - libc::memcpy( - addr as *mut libc::c_void, - INIT_BINARY.as_ptr() as *const _, - to_copy, - ) - }; - return Ok(()); - } - let file = self.open_inode(inode, open_flags)?; let fd = file.as_raw_fd(); diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 3d27aec7f..d1a862d0c 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -32,14 +32,11 @@ use super::super::fuse; use super::super::inode_alloc::InodeAllocator; use super::super::multikey::MultikeyBTreeMap; -const INIT_CSTR: &[u8] = b"init.krun\0"; const XATTR_KEY: &[u8] = b"user.containers.override_stat\0"; const SECURITY_CAPABILITY: &[u8] = b"security.capability\0"; const UID_MAX: u32 = u32::MAX - 1; -static INIT_BINARY: &[u8] = init_blob::INIT_BINARY; - type Inode = u64; type Handle = u64; @@ -545,11 +542,9 @@ impl Default for Config { pub struct PassthroughFs { inodes: RwLock>>, inode_alloc: Arc, - init_inode: u64, handles: RwLock>>, next_handle: AtomicU64, - init_handle: u64, map_windows: Mutex>, @@ -581,11 +576,9 @@ impl PassthroughFs { Ok(PassthroughFs { inodes: RwLock::new(MultikeyBTreeMap::new()), inode_alloc, - init_inode: fuse::ROOT_ID + 1, handles: RwLock::new(BTreeMap::new()), next_handle: AtomicU64::new(1), - init_handle: 0, map_windows: Mutex::new(HashMap::new()), @@ -1202,25 +1195,7 @@ impl FileSystem for PassthroughFs { fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result { debug!("lookup: {name:?}"); - let _init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; - - if self.init_inode != 0 && name == _init_name { - let mut st: bindings::stat64 = unsafe { mem::zeroed() }; - st.st_size = INIT_BINARY.len() as i64; - st.st_ino = self.init_inode; - st.st_mode = 0o100_755; - - Ok(Entry { - inode: self.init_inode, - generation: 0, - attr: st, - attr_flags: 0, - attr_timeout: self.cfg.attr_timeout, - entry_timeout: self.cfg.entry_timeout, - }) - } else { - self.do_lookup(parent, name) - } + self.do_lookup(parent, name) } fn forget(&self, _ctx: Context, inode: Inode, count: u64) { @@ -1340,11 +1315,7 @@ impl FileSystem for PassthroughFs { kill_priv: bool, flags: u32, ) -> io::Result<(Option, OpenOptions)> { - if inode == self.init_inode { - Ok((Some(self.init_handle), OpenOptions::empty())) - } else { - self.do_open(inode, kill_priv, flags) - } + self.do_open(inode, kill_priv, flags) } fn release( @@ -1457,18 +1428,6 @@ impl FileSystem for PassthroughFs { _flags: u32, ) -> io::Result { debug!("read: {inode:?}"); - if inode == self.init_inode { - let off: usize = offset - .try_into() - .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?; - let len = if off + (size as usize) < INIT_BINARY.len() { - size as usize - } else { - INIT_BINARY.len() - off - }; - return w.write(&INIT_BINARY[off..(off + len)]); - } - let data = self .handles .read() @@ -2054,10 +2013,6 @@ impl FileSystem for PassthroughFs { return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); } - if inode == self.init_inode { - return Err(linux_error(io::Error::from_raw_os_error(libc::ENODATA))); - } - if name.to_bytes() == XATTR_KEY { return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); } diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index 179535131..ae5b7bbdc 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -1,3 +1,4 @@ +mod augment_fs; mod device; #[allow(dead_code)] mod filesystem; @@ -7,6 +8,7 @@ mod inode_alloc; mod multikey; mod read_only; mod server; +pub mod virtual_entry; mod worker; #[cfg(target_os = "linux")] diff --git a/src/devices/src/virtio/fs/virtual_entry.rs b/src/devices/src/virtio/fs/virtual_entry.rs new file mode 100644 index 000000000..06f6915b3 --- /dev/null +++ b/src/devices/src/virtio/fs/virtual_entry.rs @@ -0,0 +1,56 @@ +// Virtual entry types for the virtiofs overlay. + +use std::ffi::CString; + +/// Block size reported by virtual entries in st_blksize. +pub const VIRTUAL_BLKSIZE: i64 = 4096; + +/// A synthetic filesystem entry that exists only in memory. +#[derive(Clone, Debug)] +pub struct VirtualEntry { + /// Permission bits. File type bits (S_IFMT) are ignored — the type + /// is derived from the `content` variant. + pub mode: u32, + /// If true, the entry can only be looked up once. + pub one_shot: bool, + pub content: VirtualEntryContent, +} + +#[derive(Clone, Debug)] +pub enum VirtualEntryContent { + /// A read-only file backed by a static byte slice. + File { data: &'static [u8] }, + /// A directory containing other virtual entries. + Dir { children: Vec }, +} + +impl VirtualEntry { + pub fn is_dir(&self) -> bool { + matches!(self.content, VirtualEntryContent::Dir { .. }) + } + + /// Returns the full st_mode: file type bits from the variant OR'd + /// with the permission bits from self.mode. + #[allow(clippy::unnecessary_cast)] // libc::S_IF* is u16 on macOS, u32 on Linux + pub fn st_mode(&self) -> u32 { + let file_type = match self.content { + VirtualEntryContent::File { .. } => libc::S_IFREG as u32, + VirtualEntryContent::Dir { .. } => libc::S_IFDIR as u32, + }; + file_type | (self.mode & !(libc::S_IFMT as u32)) + } + + pub fn data(&self) -> Option<&'static [u8]> { + match &self.content { + VirtualEntryContent::File { data } => Some(data), + VirtualEntryContent::Dir { .. } => None, + } + } +} + +/// A named entry in a virtual directory. +#[derive(Clone, Debug)] +pub struct VirtualDirEntry { + pub name: CString, + pub entry: VirtualEntry, +} diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index e554aa377..084a2aa85 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -14,17 +14,19 @@ use utils::eventfd::EventFd; use vm_memory::GuestMemoryMmap; use super::super::{FsError, Queue}; +use super::augment_fs::AugmentFs; use super::defs::{HPQ_INDEX, REQ_INDEX}; use super::descriptor_utils::{Reader, Writer}; use super::inode_alloc::InodeAllocator; use super::passthrough::{self, PassthroughFs}; use super::read_only::PassthroughFsRo; use super::server::Server; +use super::virtual_entry::VirtualDirEntry; use crate::virtio::{InterruptTransport, VirtioShmRegion}; enum FsServer { - ReadWrite(Server), - ReadOnly(Server), + ReadWrite(Server>), + ReadOnly(Server>), } impl FsServer { @@ -80,21 +82,26 @@ impl FsWorker { shm_region: Option, passthrough_cfg: passthrough::Config, read_only: bool, + virtual_entries: Vec, stop_fd: EventFd, exit_code: Arc, #[cfg(target_os = "macos")] map_sender: Option>, ) -> Result { let inode_alloc = Arc::new(InodeAllocator::new()); let server = if read_only { - FsServer::ReadOnly(Server::new(PassthroughFsRo::new( - passthrough_cfg, - inode_alloc, - )?)) + let inner = PassthroughFsRo::new(passthrough_cfg, inode_alloc.clone())?; + FsServer::ReadOnly(Server::new(AugmentFs::new( + inner, + &inode_alloc, + virtual_entries, + ))) } else { - FsServer::ReadWrite(Server::new(PassthroughFs::new( - passthrough_cfg, - inode_alloc, - )?)) + let inner = PassthroughFs::new(passthrough_cfg, inode_alloc.clone())?; + FsServer::ReadWrite(Server::new(AugmentFs::new( + inner, + &inode_alloc, + virtual_entries, + ))) }; Ok(Self { queues, diff --git a/src/devices/src/virtio/linux_errno.rs b/src/devices/src/virtio/linux_errno.rs index 59aca5789..105f977b5 100644 --- a/src/devices/src/virtio/linux_errno.rs +++ b/src/devices/src/virtio/linux_errno.rs @@ -183,3 +183,37 @@ pub fn linux_errno_raw(errno: i32) -> i32 { _ => LINUX_EIO, } } + +// Helper functions returning io::Error with Linux errno values. +use std::io; + +pub fn eperm() -> io::Error { + io::Error::from_raw_os_error(LINUX_EPERM) +} +pub fn enoent() -> io::Error { + io::Error::from_raw_os_error(LINUX_ENOENT) +} +pub fn eacces() -> io::Error { + io::Error::from_raw_os_error(LINUX_EACCES) +} +pub fn eexist() -> io::Error { + io::Error::from_raw_os_error(LINUX_EEXIST) +} +pub fn einval() -> io::Error { + io::Error::from_raw_os_error(LINUX_EINVAL) +} +pub fn eisdir() -> io::Error { + io::Error::from_raw_os_error(LINUX_EISDIR) +} +pub fn exdev() -> io::Error { + io::Error::from_raw_os_error(LINUX_EXDEV) +} +pub fn enosys() -> io::Error { + io::Error::from_raw_os_error(LINUX_ENOSYS) +} +pub fn enodata() -> io::Error { + io::Error::from_raw_os_error(LINUX_ENODATA) +} +pub fn enxio() -> io::Error { + io::Error::from_raw_os_error(LINUX_ENXIO) +} diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 4e54bf99c..27525ea7e 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -31,6 +31,7 @@ krun_display = { package = "krun-display", version = "0.1.0", path = "../display krun_input = { package = "krun-input", version = "0.1.0", path = "../input", optional = true, features = ["bindgen_clang_runtime"] } devices = { package = "krun-devices", version = "=0.1.0-1.18.0", path = "../devices" } +init-blob = { path = "../init-blob" } polly = { package = "krun-polly", version = "=0.1.0-1.18.0", path = "../polly" } utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } vmm = { package = "krun-vmm", version = "=0.1.0-1.18.0", path = "../vmm" } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index a7b7eee6a..9e2215a62 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -14,6 +14,8 @@ use env_logger::{Env, Target}; #[cfg(feature = "gpu")] use krun_display::DisplayBackend; +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +use devices::virtio::fs::virtual_entry::{VirtualDirEntry, VirtualEntry, VirtualEntryContent}; use libc::{c_char, c_int, size_t}; use once_cell::sync::Lazy; use polly::event_manager::EventManager; @@ -23,7 +25,6 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; use std::env; -#[cfg(target_os = "linux")] use std::ffi::CString; use std::ffi::{c_void, CStr}; use std::fs::File; @@ -90,6 +91,23 @@ static KRUN_NITRO_DEBUG: Mutex = Mutex::new(false); // Path to the init binary to be executed inside the VM. const INIT_PATH: &str = "/init.krun"; +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +const DEFAULT_INIT_PAYLOAD: &[u8] = init_blob::INIT_BINARY; + +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +fn init_virtual_entry() -> VirtualDirEntry { + VirtualDirEntry { + name: CString::new("init.krun").unwrap(), + entry: VirtualEntry { + mode: 0o755, + one_shot: true, + content: VirtualEntryContent::File { + data: DEFAULT_INIT_PAYLOAD, + }, + }, + } +} + static KRUNFW: LazyLock> = LazyLock::new(|| unsafe { libloading::Library::new(KRUNFW_NAME).ok() }); @@ -578,7 +596,7 @@ pub extern "C" fn krun_set_vm_config(ctx_id: u32, num_vcpus: u8, ram_mib: u32) - #[allow(clippy::missing_safety_doc)] #[no_mangle] -#[cfg(not(feature = "tee"))] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) -> i32 { let root_path = match CStr::from_ptr(c_root_path).to_str() { Ok(root) => root, @@ -598,6 +616,7 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) shm_size: Some(1 << 29), allow_root_dir_delete: false, read_only: false, + virtual_entries: vec![init_virtual_entry()], }); } Entry::Vacant(_) => return -libc::ENOENT, @@ -608,7 +627,7 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) #[allow(clippy::missing_safety_doc)] #[no_mangle] -#[cfg(not(feature = "tee"))] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] pub unsafe extern "C" fn krun_add_virtiofs( ctx_id: u32, c_tag: *const c_char, @@ -619,7 +638,7 @@ pub unsafe extern "C" fn krun_add_virtiofs( #[allow(clippy::missing_safety_doc)] #[no_mangle] -#[cfg(not(feature = "tee"))] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] pub unsafe extern "C" fn krun_add_virtiofs2( ctx_id: u32, c_tag: *const c_char, @@ -631,7 +650,7 @@ pub unsafe extern "C" fn krun_add_virtiofs2( #[allow(clippy::missing_safety_doc)] #[no_mangle] -#[cfg(not(feature = "tee"))] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] pub unsafe extern "C" fn krun_add_virtiofs3( ctx_id: u32, c_tag: *const c_char, @@ -664,12 +683,17 @@ pub unsafe extern "C" fn krun_add_virtiofs3( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + let mut virtual_entries = Vec::new(); + if tag == "/dev/root" { + virtual_entries.push(init_virtual_entry()); + } cfg.vmr.add_fs_device(FsDeviceConfig { fs_id: tag.to_string(), shared_dir: path.to_string(), shm_size: shm, allow_root_dir_delete: false, read_only, + virtual_entries, }); } Entry::Vacant(_) => return -libc::ENOENT, diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index b92b931d4..38072ce70 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -2042,6 +2042,7 @@ fn attach_fs_devices( exit_code.clone(), config.allow_root_dir_delete, config.read_only, + config.virtual_entries.clone(), ) .unwrap(), )); diff --git a/src/vmm/src/vmm_config/fs.rs b/src/vmm/src/vmm_config/fs.rs index ccf86f5cd..dc5906dab 100644 --- a/src/vmm/src/vmm_config/fs.rs +++ b/src/vmm/src/vmm_config/fs.rs @@ -1,3 +1,6 @@ +#[cfg(not(feature = "aws-nitro"))] +use devices::virtio::fs::virtual_entry::VirtualDirEntry; + #[derive(Clone, Debug)] pub struct FsDeviceConfig { pub fs_id: String, @@ -5,4 +8,6 @@ pub struct FsDeviceConfig { pub shm_size: Option, pub allow_root_dir_delete: bool, pub read_only: bool, + #[cfg(not(feature = "aws-nitro"))] + pub virtual_entries: Vec, } From 6d174938a1fedb20d434ab9bb2aa01ce16125380 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:16:53 +0200 Subject: [PATCH 04/34] lib: add krun_disable_implicit_init() Add API to prevent the default init binary (/init.krun) from being injected into the root filesystem. Follows the existing krun_disable_implicit_{console,vsock} pattern. Must be called before krun_set_root(). Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- include/libkrun.h | 13 +++++++++++ src/libkrun/src/lib.rs | 50 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 3004110f6..c6caba5fe 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1153,6 +1153,7 @@ int32_t krun_get_max_vcpus(void); */ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); + /* * Do not create an implicit console device in the guest. By using this API, * libkrun will create zero console devices on behalf of the user. Any @@ -1167,6 +1168,18 @@ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); */ int32_t krun_disable_implicit_console(uint32_t ctx_id); +/** + * Do not inject the default init binary (/init.krun) into the root + * filesystem. Must be called before krun_set_root(). + * + * Arguments: + * "ctx_id" - the configuration context ID. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_disable_implicit_init(uint32_t ctx_id); + /** * Disable the implicit vsock device. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 9e2215a62..4834f3d25 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -184,6 +184,8 @@ struct ContextConfig { console_output: Option, vmm_uid: Option, vmm_gid: Option, + #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] + disable_implicit_init: bool, } impl ContextConfig { @@ -616,7 +618,13 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) shm_size: Some(1 << 29), allow_root_dir_delete: false, read_only: false, - virtual_entries: vec![init_virtual_entry()], + virtual_entries: { + let mut v = Vec::new(); + if !cfg.disable_implicit_init { + v.push(init_virtual_entry()); + } + v + }, }); } Entry::Vacant(_) => return -libc::ENOENT, @@ -684,7 +692,7 @@ pub unsafe extern "C" fn krun_add_virtiofs3( Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); let mut virtual_entries = Vec::new(); - if tag == "/dev/root" { + if tag == "/dev/root" && !cfg.disable_implicit_init { virtual_entries.push(init_virtual_entry()); } cfg.vmr.add_fs_device(FsDeviceConfig { @@ -2432,6 +2440,19 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( KRUN_SUCCESS } +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub extern "C" fn krun_disable_implicit_init(ctx_id: u32) -> i32 { + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + ctx_cfg.get_mut().disable_implicit_init = true; + } + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + #[no_mangle] pub extern "C" fn krun_disable_implicit_console(ctx_id: u32) -> i32 { match CTX_MAP.lock().unwrap().entry(ctx_id) { @@ -2878,3 +2899,28 @@ fn krun_start_enter_nitro(ctx_id: u32) -> i32 { } } } + +#[cfg(all(test, not(feature = "tee")))] +mod test_disable_implicit_init { + use super::*; + + #[test] + fn test_disable_implicit_init() { + let ctx = unsafe { krun_create_ctx() } as u32; + unsafe { + krun_disable_implicit_init(ctx); + krun_set_root(ctx, c"/tmp".as_ptr()); + } + + let ctx_map = CTX_MAP.lock().unwrap(); + let cfg = ctx_map.get(&ctx).unwrap(); + assert_eq!(cfg.vmr.fs.len(), 1); + assert!( + cfg.vmr.fs[0].virtual_entries.is_empty(), + "root virtiofs should not inject init.krun after krun_disable_implicit_init()" + ); + drop(ctx_map); + + assert_eq!(krun_free_ctx(ctx), KRUN_SUCCESS); + } +} From 3ab23add4779db64be69ffb17f3ff977059d20b6 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:17:51 +0200 Subject: [PATCH 05/34] lib: add krun_fs_add_overlay_file(), krun_fs_add_overlay_dir() APIs Add C APIs to inject virtual files and directories into a virtiofs device. Entries are backed entirely by host memory (no host file). Files support one-shot semantics (disappear after the first lookup). Paths may contain '/' to nest entries inside existing virtual directories (e.g. krun_fs_add_overlay_dir for "etc", then krun_fs_add_overlay_file for "etc/hostname"). Intermediate directories must already exist; -ENOENT / -ENOTDIR is returned otherwise. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- include/libkrun.h | 50 ++++++++++ src/libkrun/src/lib.rs | 207 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 235 insertions(+), 22 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index c6caba5fe..37ce25e85 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1180,6 +1180,56 @@ int32_t krun_disable_implicit_console(uint32_t ctx_id); */ int32_t krun_disable_implicit_init(uint32_t ctx_id); +/** + * Add a virtual overlay file to a virtiofs device. + * + * The file is backed entirely by host memory (no host file). The data + * pointer is NOT copied — the caller must keep the memory valid for the + * full VM lifetime. + * + * "path" may contain '/' to place the file inside a virtual directory + * previously created with krun_fs_add_overlay_dir (e.g. "etc/hostname"). + * All intermediate directories must already exist; -ENOENT is returned + * if a component is missing, -ENOTDIR if a component is not a directory. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "fs_tag" - tag of the virtiofs device (e.g. "/dev/root"). + * "path" - path of the file (e.g. "init.krun" or "etc/hostname"). + * "data" - pointer to the file content. + * "data_len" - length of the file content in bytes. + * "mode" - file mode bits (e.g. 0100644 for a regular file). + * "one_shot" - if true, the file can only be looked up once. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_fs_add_overlay_file(uint32_t ctx_id, const char *fs_tag, + const char *path, const uint8_t *data, + size_t data_len, uint32_t mode, bool one_shot); + +/** + * Add a virtual overlay directory to a virtiofs device. + * + * The directory is empty and read-only, useful as a mount point. + * + * "path" may contain '/' to nest inside an existing virtual directory + * (e.g. "usr/lib"). All intermediate directories must already exist; + * -ENOENT is returned if a component is missing, -ENOTDIR if a component + * is not a directory. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "fs_tag" - tag of the virtiofs device (e.g. "/dev/root"). + * "path" - path of the directory (e.g. "dev" or "usr/lib"). + * "mode" - directory mode bits (e.g. 040755). + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_fs_add_overlay_dir(uint32_t ctx_id, const char *fs_tag, + const char *path, uint32_t mode); + /** * Disable the implicit vsock device. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 4834f3d25..283df1141 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -19,8 +19,6 @@ use devices::virtio::fs::virtual_entry::{VirtualDirEntry, VirtualEntry, VirtualE use libc::{c_char, c_int, size_t}; use once_cell::sync::Lazy; use polly::event_manager::EventManager; -#[cfg(all(feature = "blk", not(feature = "tee")))] -use rand::distr::{Alphanumeric, SampleString}; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; @@ -613,10 +611,9 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) let cfg = ctx_cfg.get_mut(); cfg.vmr.add_fs_device(FsDeviceConfig { fs_id, - shared_dir, + shared_dir: Some(shared_dir), // Default to a conservative 512 MB window. shm_size: Some(1 << 29), - allow_root_dir_delete: false, read_only: false, virtual_entries: { let mut v = Vec::new(); @@ -666,7 +663,7 @@ pub unsafe extern "C" fn krun_add_virtiofs3( shm_size: u64, read_only: bool, ) -> i32 { - if c_tag.is_null() || c_path.is_null() { + if c_tag.is_null() { return -libc::EINVAL; } @@ -674,9 +671,15 @@ pub unsafe extern "C" fn krun_add_virtiofs3( Ok(tag) => tag, Err(_) => return -libc::EINVAL, }; - let path = match CStr::from_ptr(c_path).to_str() { - Ok(path) => path, - Err(_) => return -libc::EINVAL, + + // NULL path means NullFs (virtual-only filesystem, no host directory). + let path = if c_path.is_null() { + None + } else { + match CStr::from_ptr(c_path).to_str() { + Ok(path) => Some(path), + Err(_) => return -libc::EINVAL, + } }; let shm = if shm_size > 0 { @@ -697,9 +700,8 @@ pub unsafe extern "C" fn krun_add_virtiofs3( } cfg.vmr.add_fs_device(FsDeviceConfig { fs_id: tag.to_string(), - shared_dir: path.to_string(), + shared_dir: path.map(|p| p.to_string()), shm_size: shm, - allow_root_dir_delete: false, read_only, virtual_entries, }); @@ -2411,25 +2413,35 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( return -libc::EINVAL; } - // To boot from a filesystem other than virtiofs, - // we need to setup a temporary root from which init.krun can be executed. - // Otherwise, it would have to be copied to the target filesystem beforehand. - // Instead, init.krun will run from virtiofs and then switch to the real root. - let root_dir_suffix = Alphanumeric.sample_string(&mut rand::rng(), 6); - let empty_root = env::temp_dir().join(format!("krun-empty-root-{root_dir_suffix}")); - - if let Err(e) = std::fs::create_dir_all(&empty_root) { - error!("Failed to create empty root directory: {e:?}"); - return -libc::EINVAL; + // Boot from a block device: the virtiofs root only needs to + // serve init.krun and provide mount points for /dev, /proc, /sys. + // Use a NullFs (no host directory) with the inode overlay. + let mut virtual_entries = Vec::new(); + if !ctx_cfg.disable_implicit_init { + virtual_entries.push(init_virtual_entry()); + } + // init.c needs these directories as mount points before + // pivoting to the block device root. + for name in ["dev", "proc", "sys", "newroot"] { + virtual_entries.push(VirtualDirEntry { + name: CString::new(name).unwrap(), + entry: VirtualEntry { + mode: 0o755, + one_shot: false, + content: VirtualEntryContent::Dir { + children: Vec::new(), + }, + }, + }); } ctx_cfg.vmr.add_fs_device(FsDeviceConfig { fs_id: "/dev/root".into(), - shared_dir: empty_root.to_string_lossy().into(), + shared_dir: None, // Default to a conservative 512 MB window. shm_size: Some(1 << 29), - allow_root_dir_delete: true, read_only: false, + virtual_entries, }); ctx_cfg.set_block_root(device, fstype, options); @@ -2453,6 +2465,157 @@ pub extern "C" fn krun_disable_implicit_init(ctx_id: u32) -> i32 { KRUN_SUCCESS } +/// Resolve a path like "a/b/c" into parent directory children + leaf name. +/// Errors with a libc errno if any intermediate component is missing or not a Dir. +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +fn resolve_overlay_path<'a>( + entries: &'a mut Vec, + path: &str, +) -> Result<(&'a mut Vec, CString), i32> { + let path = path.strip_prefix('/').unwrap_or(path); + let components: Vec<&str> = path.split('/').collect(); + let (leaf, parents) = components.split_last().ok_or(-libc::EINVAL)?; + if leaf.is_empty() { + return Err(-libc::EINVAL); + } + + let mut current = entries; + for component in parents { + let dir = current + .iter_mut() + .find(|e| e.name.as_c_str().to_bytes() == component.as_bytes()) + .ok_or(-libc::ENOENT)?; + match &mut dir.entry.content { + VirtualEntryContent::Dir { children } => current = children, + _ => return Err(-libc::ENOTDIR), + } + } + + let name = CString::new(*leaf).map_err(|_| -libc::EINVAL)?; + Ok((current, name)) +} + +/// Add a virtual overlay entry to a virtiofs device, resolving paths with `/`. +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +fn fs_add_overlay_entry(ctx_id: u32, fs_tag: &str, path: &str, entry: VirtualEntry) -> i32 { + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + let fs_cfg = match cfg.vmr.fs.iter_mut().find(|fs| fs.fs_id == fs_tag) { + Some(fs) => fs, + None => return -libc::ENOENT, + }; + let (parent_children, name) = + match resolve_overlay_path(&mut fs_cfg.virtual_entries, path) { + Ok(v) => v, + Err(e) => return e, + }; + parent_children.push(VirtualDirEntry { name, entry }); + } + Entry::Vacant(_) => return -libc::ENOENT, + } + KRUN_SUCCESS +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub unsafe extern "C" fn krun_get_default_init( + data_out: *mut *const u8, + len_out: *mut size_t, +) -> i32 { + if data_out.is_null() || len_out.is_null() { + return -libc::EINVAL; + } + *data_out = DEFAULT_INIT_PAYLOAD.as_ptr(); + *len_out = DEFAULT_INIT_PAYLOAD.len(); + KRUN_SUCCESS +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub unsafe extern "C" fn krun_fs_add_overlay_file( + ctx_id: u32, + c_fs_tag: *const c_char, + c_path: *const c_char, + data: *const u8, + data_len: size_t, + mode: u32, + one_shot: bool, +) -> i32 { + if c_fs_tag.is_null() || c_path.is_null() { + return -libc::EINVAL; + } + + let fs_tag = match CStr::from_ptr(c_fs_tag).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + let path = match CStr::from_ptr(c_path).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + + // SAFETY: The caller guarantees the memory remains valid for the VM + // lifetime (see the C header contract). + let payload: &'static [u8] = if data_len == 0 { + &[] + } else { + if data.is_null() { + return -libc::EINVAL; + } + slice::from_raw_parts(data, data_len) + }; + + fs_add_overlay_entry( + ctx_id, + fs_tag, + path, + VirtualEntry { + mode, + one_shot, + content: VirtualEntryContent::File { data: payload }, + }, + ) +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub unsafe extern "C" fn krun_fs_add_overlay_dir( + ctx_id: u32, + c_fs_tag: *const c_char, + c_path: *const c_char, + mode: u32, +) -> i32 { + if c_fs_tag.is_null() || c_path.is_null() { + return -libc::EINVAL; + } + + let fs_tag = match CStr::from_ptr(c_fs_tag).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + let path = match CStr::from_ptr(c_path).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + + fs_add_overlay_entry( + ctx_id, + fs_tag, + path, + VirtualEntry { + mode, + one_shot: false, + content: VirtualEntryContent::Dir { + children: Vec::new(), + }, + }, + ) +} + #[no_mangle] pub extern "C" fn krun_disable_implicit_console(ctx_id: u32) -> i32 { match CTX_MAP.lock().unwrap().entry(ctx_id) { From 3f57196a3ca413d694ce89692e243f8d201787a6 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:18:48 +0200 Subject: [PATCH 06/34] lib: add krun_get_default_init() Add API to retrieve the built-in default init binary. Callers that use krun_disable_implicit_init() can use this to obtain the init binary and inject it themselves via krun_fs_add_overlay_file(). Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- include/libkrun.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/libkrun.h b/include/libkrun.h index 37ce25e85..7a12e91a4 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1180,6 +1180,25 @@ int32_t krun_disable_implicit_console(uint32_t ctx_id); */ int32_t krun_disable_implicit_init(uint32_t ctx_id); +/** + * Get a pointer to the built-in default init binary. + * + * This is the same binary that libkrun injects as /init.krun by default. + * Callers that use krun_disable_implicit_init() can use this to inject the + * init binary themselves (e.g. via krun_fs_add_overlay_file with custom + * settings). + * + * The returned pointer is valid for the lifetime of the process (static data). + * + * Arguments: + * "data_out" - receives a pointer to the init binary bytes. + * "len_out" - receives the length in bytes. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_get_default_init(const uint8_t **data_out, size_t *len_out); + /** * Add a virtual overlay file to a virtiofs device. * From 3f85d9868c2abef41f5a9b6de0b43236b324a3b7 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 15:36:56 +0200 Subject: [PATCH 07/34] libkrun.h: document that implicit resource creation will become opt-in Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- include/libkrun.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/libkrun.h b/include/libkrun.h index 7a12e91a4..5a63be917 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1153,6 +1153,13 @@ int32_t krun_get_max_vcpus(void); */ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); +/* + * NOTE: Implicit resource creation is a legacy convenience. The 2.0 API + * (see https://github.com/containers/libkrun/issues/634) will not create + * any implicit resources. Callers should start using the + * krun_disable_implicit_* functions now to ease migration. + */ + /* * Do not create an implicit console device in the guest. By using this API, From 60e878c2be5735b5ffe045f8f4629bf9e1b2ccfd Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:21:18 +0200 Subject: [PATCH 08/34] virtio/fs: add NullFs, a minimal empty-root FileSystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NullFs implements the FileSystem trait with just an empty root directory. It can be wrapped with AugmentFs to serve virtual files without any host directory involvement. Fs::new() now accepts Option for shared_dir — None selects NullFs. FsDeviceConfig and FsServer gain the corresponding variants. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- src/devices/src/virtio/fs/device.rs | 22 +++++++----- src/devices/src/virtio/fs/mod.rs | 1 + src/devices/src/virtio/fs/null_fs.rs | 50 ++++++++++++++++++++++++++++ src/devices/src/virtio/fs/worker.rs | 44 ++++++++++++++++-------- src/vmm/src/vmm_config/fs.rs | 4 ++- 5 files changed, 99 insertions(+), 22 deletions(-) create mode 100644 src/devices/src/virtio/fs/null_fs.rs diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index 945f8393e..f0fb45401 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -47,7 +47,7 @@ pub struct Fs { device_state: DeviceState, config: VirtioFsConfig, shm_region: Option, - passthrough_cfg: passthrough::Config, + passthrough_cfg: Option, read_only: bool, virtual_entries: Vec, worker_thread: Option>, @@ -60,7 +60,7 @@ pub struct Fs { impl Fs { pub fn new( fs_id: String, - shared_dir: String, + shared_dir: Option, exit_code: Arc, allow_root_dir_delete: bool, read_only: bool, @@ -73,11 +73,11 @@ impl Fs { config.tag[..tag.len()].copy_from_slice(tag.as_slice()); config.num_request_queues = 1; - let fs_cfg = passthrough::Config { - root_dir: shared_dir, + let fs_cfg = shared_dir.map(|root_dir| passthrough::Config { + root_dir, allow_root_dir_delete, ..Default::default() - }; + }); Ok(Fs { avail_features, @@ -107,10 +107,16 @@ impl Fs { pub fn set_export_table(&mut self, export_table: ExportTable) -> u64 { static FS_UNIQUE_ID: AtomicU64 = AtomicU64::new(0); - self.passthrough_cfg.export_fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); - self.passthrough_cfg.export_table = Some(export_table); + let Some(cfg) = self.passthrough_cfg.as_mut() else { + // NullFs-backed devices have no passthrough config and don't + // participate in cross-domain fd export. Consume (and waste) an + // fsid so numbering stays dense, but don't store the table. + return FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); + }; + cfg.export_fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); + cfg.export_table = Some(export_table); - self.passthrough_cfg.export_fsid + cfg.export_fsid } #[cfg(target_os = "macos")] diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index ae5b7bbdc..f8ef63295 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -6,6 +6,7 @@ pub mod fuse; mod inode_alloc; #[allow(dead_code)] mod multikey; +mod null_fs; mod read_only; mod server; pub mod virtual_entry; diff --git a/src/devices/src/virtio/fs/null_fs.rs b/src/devices/src/virtio/fs/null_fs.rs new file mode 100644 index 000000000..4bb4b6360 --- /dev/null +++ b/src/devices/src/virtio/fs/null_fs.rs @@ -0,0 +1,50 @@ +// A minimal filesystem that serves an empty root directory. +// +// Used with AugmentFs to provide a virtual-only filesystem (e.g. for +// booting from a block device where the virtiofs root only needs init.krun). + +use std::ffi::CStr; +use std::io; +use std::mem; +use std::time::Duration; + +use super::filesystem::{Context, Entry, FileSystem, FsOptions}; +use super::fuse; +use super::virtual_entry::VIRTUAL_BLKSIZE; +use crate::virtio::bindings; + +/// An empty filesystem with just a root directory and nothing in it. +pub struct NullFs; + +type Inode = u64; +type Handle = u64; + +impl FileSystem for NullFs { + type Inode = Inode; + type Handle = Handle; + + fn init(&self, _capable: FsOptions) -> io::Result { + Ok(FsOptions::empty()) + } + + fn lookup(&self, _ctx: Context, _parent: Inode, _name: &CStr) -> io::Result { + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } + + fn getattr( + &self, + _ctx: Context, + inode: Inode, + _handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + if inode == fuse::ROOT_ID { + let mut st: bindings::stat64 = unsafe { mem::zeroed() }; + st.st_ino = fuse::ROOT_ID; + st.st_mode = libc::S_IFDIR | 0o755; + st.st_nlink = 2; + st.st_blksize = VIRTUAL_BLKSIZE as _; + return Ok((st, Duration::MAX)); + } + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } +} diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index 084a2aa85..b8e722b5d 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -18,6 +18,7 @@ use super::augment_fs::AugmentFs; use super::defs::{HPQ_INDEX, REQ_INDEX}; use super::descriptor_utils::{Reader, Writer}; use super::inode_alloc::InodeAllocator; +use super::null_fs::NullFs; use super::passthrough::{self, PassthroughFs}; use super::read_only::PassthroughFsRo; use super::server::Server; @@ -27,6 +28,7 @@ use crate::virtio::{InterruptTransport, VirtioShmRegion}; enum FsServer { ReadWrite(Server>), ReadOnly(Server>), + Null(Server>), } impl FsServer { @@ -55,6 +57,14 @@ impl FsServer { #[cfg(target_os = "macos")] map_sender, ), + FsServer::Null(s) => s.handle_message( + r, + w, + shm_region, + exit_code, + #[cfg(target_os = "macos")] + map_sender, + ), } } } @@ -80,7 +90,7 @@ impl FsWorker { interrupt: InterruptTransport, mem: GuestMemoryMmap, shm_region: Option, - passthrough_cfg: passthrough::Config, + passthrough_cfg: Option, read_only: bool, virtual_entries: Vec, stop_fd: EventFd, @@ -88,20 +98,28 @@ impl FsWorker { #[cfg(target_os = "macos")] map_sender: Option>, ) -> Result { let inode_alloc = Arc::new(InodeAllocator::new()); - let server = if read_only { - let inner = PassthroughFsRo::new(passthrough_cfg, inode_alloc.clone())?; - FsServer::ReadOnly(Server::new(AugmentFs::new( - inner, - &inode_alloc, - virtual_entries, - ))) - } else { - let inner = PassthroughFs::new(passthrough_cfg, inode_alloc.clone())?; - FsServer::ReadWrite(Server::new(AugmentFs::new( - inner, + let server = match passthrough_cfg { + Some(cfg) if read_only => { + let inner = PassthroughFsRo::new(cfg, inode_alloc.clone())?; + FsServer::ReadOnly(Server::new(AugmentFs::new( + inner, + &inode_alloc, + virtual_entries, + ))) + } + Some(cfg) => { + let inner = PassthroughFs::new(cfg, inode_alloc.clone())?; + FsServer::ReadWrite(Server::new(AugmentFs::new( + inner, + &inode_alloc, + virtual_entries, + ))) + } + None => FsServer::Null(Server::new(AugmentFs::new( + NullFs, &inode_alloc, virtual_entries, - ))) + ))), }; Ok(Self { queues, diff --git a/src/vmm/src/vmm_config/fs.rs b/src/vmm/src/vmm_config/fs.rs index dc5906dab..bd6633d32 100644 --- a/src/vmm/src/vmm_config/fs.rs +++ b/src/vmm/src/vmm_config/fs.rs @@ -4,7 +4,9 @@ use devices::virtio::fs::virtual_entry::VirtualDirEntry; #[derive(Clone, Debug)] pub struct FsDeviceConfig { pub fs_id: String, - pub shared_dir: String, + /// Host directory to pass through. None means a virtual-only filesystem + /// (NullFs + AugmentFs, no host directory). + pub shared_dir: Option, pub shm_size: Option, pub allow_root_dir_delete: bool, pub read_only: bool, From 32c3d0c1105d59879b8f12ce42cfba4739a19f24 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:23:18 +0200 Subject: [PATCH 09/34] lib: rewrite krun_set_root_disk_remount to use NullFs krun_set_root_disk_remount no longer creates a temporary empty host directory. Instead it configures a NullFs-backed virtiofs device (shared_dir: None) with init.krun overlaid via AugmentFs. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- src/libkrun/src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 283df1141..60dbfd32f 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2352,7 +2352,7 @@ pub extern "C" fn krun_setgid(ctx_id: u32, gid: libc::gid_t) -> i32 { KRUN_SUCCESS } -#[cfg(all(feature = "blk", not(feature = "tee")))] +#[cfg(all(feature = "blk", not(any(feature = "tee", feature = "aws-nitro"))))] #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_root_disk_remount( @@ -2440,6 +2440,7 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( shared_dir: None, // Default to a conservative 512 MB window. shm_size: Some(1 << 29), + allow_root_dir_delete: false, read_only: false, virtual_entries, }); @@ -2561,11 +2562,10 @@ pub unsafe extern "C" fn krun_fs_add_overlay_file( // lifetime (see the C header contract). let payload: &'static [u8] = if data_len == 0 { &[] - } else { - if data.is_null() { - return -libc::EINVAL; - } + } else if !data.is_null() { slice::from_raw_parts(data, data_len) + } else { + return -libc::EINVAL; }; fs_add_overlay_entry( From d0d27645c9cdb31352f28be8b65edabfc84f1775 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:26:18 +0200 Subject: [PATCH 10/34] virtio/fs: remove REMOVE_ROOT_DIR ioctl and allow_root_dir_delete The temporary root directory hack is gone (replaced by NullFs), so the ioctl that cleaned it up and the config flag that gated it are no longer needed. Remove allow_root_dir_delete from FsDeviceConfig, Fs::new(), passthrough Config, and all call sites. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- init/init.c | 11 ----------- src/devices/src/virtio/fs/device.rs | 2 -- src/devices/src/virtio/fs/linux/passthrough.rs | 10 ---------- src/devices/src/virtio/fs/macos/passthrough.rs | 7 ------- src/devices/src/virtio/fs/read_only.rs | 8 -------- src/libkrun/src/lib.rs | 1 - src/vmm/src/builder.rs | 1 - src/vmm/src/vmm_config/fs.rs | 1 - 8 files changed, 41 deletions(-) diff --git a/init/init.c b/init/init.c index 59a5c3d94..2d2be2834 100644 --- a/init/init.c +++ b/init/init.c @@ -43,7 +43,6 @@ #endif #define KRUN_EXIT_CODE_IOCTL 0x7602 -#define KRUN_REMOVE_ROOT_DIR_IOCTL 0x7603 #define KRUN_MAGIC "KRUN" #define KRUN_FOOTER_LEN 12 @@ -1475,16 +1474,6 @@ int main(int argc, char **argv) chdir("/newroot"); - fd = open("/", O_RDONLY); - if (fd < 0) { - perror("Couldn't open temporary root directory for removing"); - exit(-1); - } - if (ioctl(fd, KRUN_REMOVE_ROOT_DIR_IOCTL) < 0) { - perror("Error removing temporary root directory"); - } - close(fd); - if (mount(".", "/", NULL, MS_MOVE, NULL) < 0) { perror("remount root"); exit(-1); diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index f0fb45401..c757c9d3d 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -62,7 +62,6 @@ impl Fs { fs_id: String, shared_dir: Option, exit_code: Arc, - allow_root_dir_delete: bool, read_only: bool, virtual_entries: Vec, ) -> super::Result { @@ -75,7 +74,6 @@ impl Fs { let fs_cfg = shared_dir.map(|root_dir| passthrough::Config { root_dir, - allow_root_dir_delete, ..Default::default() }); diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index 08da133f0..2bfa46349 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -325,7 +325,6 @@ pub struct Config { pub export_fsid: u64, /// Table of exported FDs to share with other subsystems. pub export_table: Option, - pub allow_root_dir_delete: bool, } impl Default for Config { @@ -340,7 +339,6 @@ impl Default for Config { proc_sfd_rawfd: None, export_fsid: 0, export_table: None, - allow_root_dir_delete: false, } } } @@ -2122,10 +2120,6 @@ impl FileSystem for PassthroughFs { const VIRTIO_IOC_EXIT_CODE_REQ: u32 = request_code_none!(VIRTIO_IOC_MAGIC, VIRTIO_IOC_TYPE_EXIT_CODE) as u32; - const VIRTIO_IOC_REMOVE_ROOT_DIR_CODE: u8 = 3; - const VIRTIO_IOC_REMOVE_ROOT_DIR_REQ: u32 = - request_code_none!(VIRTIO_IOC_MAGIC, VIRTIO_IOC_REMOVE_ROOT_DIR_CODE) as u32; - match cmd { VIRTIO_IOC_EXPORT_FD_REQ => { if out_size as usize != VIRTIO_IOC_EXPORT_FD_SIZE { @@ -2160,10 +2154,6 @@ impl FileSystem for PassthroughFs { exit_code.store(arg as i32, Ordering::SeqCst); Ok(Vec::new()) } - VIRTIO_IOC_REMOVE_ROOT_DIR_REQ if self.cfg.allow_root_dir_delete => { - std::fs::remove_dir_all(&self.cfg.root_dir)?; - Ok(Vec::new()) - } _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), } } diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index d1a862d0c..3a0500735 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -514,7 +514,6 @@ pub struct Config { pub export_fsid: u64, /// Table of exported FDs to share with other subsystems. Not supported for macos. pub export_table: Option, - pub allow_root_dir_delete: bool, } impl Default for Config { @@ -529,7 +528,6 @@ impl Default for Config { proc_sfd_rawfd: None, export_fsid: 0, export_table: None, - allow_root_dir_delete: false, } } } @@ -2441,17 +2439,12 @@ impl FileSystem for PassthroughFs { // We can't use nix::request_code_none here since it's system-dependent // and we need the value from Linux. const VIRTIO_IOC_EXIT_CODE_REQ: u32 = 0x7602; - const VIRTIO_IOC_REMOVE_ROOT_DIR_REQ: u32 = 0x7603; match cmd { VIRTIO_IOC_EXIT_CODE_REQ => { exit_code.store(arg as i32, Ordering::SeqCst); Ok(Vec::new()) } - VIRTIO_IOC_REMOVE_ROOT_DIR_REQ if self.cfg.allow_root_dir_delete => { - std::fs::remove_dir_all(&self.cfg.root_dir)?; - Ok(Vec::new()) - } _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), } } diff --git a/src/devices/src/virtio/fs/read_only.rs b/src/devices/src/virtio/fs/read_only.rs index eb8aebef3..5495db1ed 100644 --- a/src/devices/src/virtio/fs/read_only.rs +++ b/src/devices/src/virtio/fs/read_only.rs @@ -36,10 +36,6 @@ fn erofs() -> io::Error { io::Error::from_raw_os_error(libc::EROFS) } -// Keep the Linux ioctl number so read-only virtio-fs can still handle -// non-mutating control ioctls while rejecting host-side root deletion. -const VIRTIO_IOC_REMOVE_ROOT_DIR_REQ: u32 = 0x7603; - fn read_only_open_flags(flags: u32) -> io::Result { let f = flags as i32; if f & libc::O_ACCMODE != libc::O_RDONLY { @@ -319,10 +315,6 @@ impl FileSystem for PassthroughFsRo { out_size: u32, exit_code: &Arc, ) -> io::Result> { - if cmd == VIRTIO_IOC_REMOVE_ROOT_DIR_REQ { - return Err(erofs()); - } - self.inner.ioctl( ctx, inode, handle, flags, cmd, arg, in_size, out_size, exit_code, ) diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 60dbfd32f..2c7976b3c 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2440,7 +2440,6 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( shared_dir: None, // Default to a conservative 512 MB window. shm_size: Some(1 << 29), - allow_root_dir_delete: false, read_only: false, virtual_entries, }); diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 38072ce70..8e8ca4e18 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -2040,7 +2040,6 @@ fn attach_fs_devices( config.fs_id.clone(), config.shared_dir.clone(), exit_code.clone(), - config.allow_root_dir_delete, config.read_only, config.virtual_entries.clone(), ) diff --git a/src/vmm/src/vmm_config/fs.rs b/src/vmm/src/vmm_config/fs.rs index bd6633d32..92927ec9a 100644 --- a/src/vmm/src/vmm_config/fs.rs +++ b/src/vmm/src/vmm_config/fs.rs @@ -8,7 +8,6 @@ pub struct FsDeviceConfig { /// (NullFs + AugmentFs, no host directory). pub shared_dir: Option, pub shm_size: Option, - pub allow_root_dir_delete: bool, pub read_only: bool, #[cfg(not(feature = "aws-nitro"))] pub virtual_entries: Vec, From fdeab1cb0138d24812b47fddb68278b1c1ee37b9 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 16:28:09 +0200 Subject: [PATCH 11/34] virtio/fs: move EXIT_CODE ioctl to AugmentFs The exit-code ioctl is a krun mechanism, not a filesystem operation. Move it to the AugmentFs overlay where it is handled before any delegation to the inner filesystem. The Linux passthrough retains only EXPORT_FD (which needs access to passthrough-internal handle and export tables). The macOS passthrough no longer implements ioctl at all. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- src/devices/src/virtio/fs/augment_fs.rs | 18 +++++++++--- .../src/virtio/fs/linux/passthrough.rs | 14 ++------- .../src/virtio/fs/macos/passthrough.rs | 29 ++----------------- 3 files changed, 19 insertions(+), 42 deletions(-) diff --git a/src/devices/src/virtio/fs/augment_fs.rs b/src/devices/src/virtio/fs/augment_fs.rs index a694e5b96..ab7779508 100644 --- a/src/devices/src/virtio/fs/augment_fs.rs +++ b/src/devices/src/virtio/fs/augment_fs.rs @@ -17,7 +17,7 @@ use std::ffi::CStr; use std::ffi::CString; use std::io; use std::mem; -use std::sync::atomic::AtomicI32; +use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Arc; use std::sync::RwLock; use std::time::Duration; @@ -730,8 +730,18 @@ impl> FileSystem for AugmentFs out_size: u32, exit_code: &Arc, ) -> io::Result> { - self.inner.ioctl( - ctx, inode, handle, flags, cmd, arg, in_size, out_size, exit_code, - ) + // We can't use nix::request_code_none here since it's system-dependent + // and we need the value from Linux. + const VIRTIO_IOC_EXIT_CODE_REQ: u32 = 0x7602; + + match cmd { + VIRTIO_IOC_EXIT_CODE_REQ => { + exit_code.store(arg as i32, Ordering::SeqCst); + Ok(Vec::new()) + } + _ => self.inner.ioctl( + ctx, inode, handle, flags, cmd, arg, in_size, out_size, exit_code, + ), + } } } diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index 2bfa46349..8272a7e01 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -16,7 +16,7 @@ use std::sync::{Arc, RwLock}; use std::time::Duration; use caps::{has_cap, CapSet, Capability}; -use nix::{request_code_none, request_code_read}; +use nix::request_code_read; use vm_memory::ByteValued; @@ -2101,10 +2101,10 @@ impl FileSystem for PassthroughFs { handle: Self::Handle, _flags: u32, cmd: u32, - arg: u64, + _arg: u64, _in_size: u32, out_size: u32, - exit_code: &Arc, + _exit_code: &Arc, ) -> io::Result> { const VIRTIO_IOC_MAGIC: u8 = b'v'; @@ -2116,10 +2116,6 @@ impl FileSystem for PassthroughFs { VIRTIO_IOC_EXPORT_FD_SIZE ) as u32; - const VIRTIO_IOC_TYPE_EXIT_CODE: u8 = 2; - const VIRTIO_IOC_EXIT_CODE_REQ: u32 = - request_code_none!(VIRTIO_IOC_MAGIC, VIRTIO_IOC_TYPE_EXIT_CODE) as u32; - match cmd { VIRTIO_IOC_EXPORT_FD_REQ => { if out_size as usize != VIRTIO_IOC_EXPORT_FD_SIZE { @@ -2150,10 +2146,6 @@ impl FileSystem for PassthroughFs { ret.extend_from_slice(&handle.to_ne_bytes()); Ok(ret) } - VIRTIO_IOC_EXIT_CODE_REQ => { - exit_code.store(arg as i32, Ordering::SeqCst); - Ok(Vec::new()) - } _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), } } diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 3a0500735..cf43e0d0c 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -8,11 +8,11 @@ use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::fs::File; use std::io; -use std::mem::{self, MaybeUninit}; +use std::mem::MaybeUninit; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::ptr::null_mut; use std::str::FromStr; -use std::sync::atomic::{AtomicBool, AtomicI32, AtomicI64, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; use std::sync::{Arc, Mutex, RwLock}; use std::time::Duration; @@ -2423,29 +2423,4 @@ impl FileSystem for PassthroughFs { Ok(()) } - - fn ioctl( - &self, - _ctx: Context, - _inode: Self::Inode, - _handle: Self::Handle, - _flags: u32, - cmd: u32, - arg: u64, - _in_size: u32, - _out_size: u32, - exit_code: &Arc, - ) -> io::Result> { - // We can't use nix::request_code_none here since it's system-dependent - // and we need the value from Linux. - const VIRTIO_IOC_EXIT_CODE_REQ: u32 = 0x7602; - - match cmd { - VIRTIO_IOC_EXIT_CODE_REQ => { - exit_code.store(arg as i32, Ordering::SeqCst); - Ok(Vec::new()) - } - _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), - } - } } From 00b5bddceb8443aeb6852a42461cb47577c9e26f Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 15:32:19 +0200 Subject: [PATCH 12/34] tests: add augmentfs integration test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boot a VM with a pure NullFs root — no host directory at all. Every file in the root (init.krun, guest-agent, .krun_config.json, test data) is injected as a virtual overlay, and /dev, /proc, /sys are virtual empty directories used as mount points. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- tests/test_cases/src/lib.rs | 4 + tests/test_cases/src/test_augmentfs.rs | 304 +++++++++++++++++++++++++ 2 files changed, 308 insertions(+) create mode 100644 tests/test_cases/src/test_augmentfs.rs diff --git a/tests/test_cases/src/lib.rs b/tests/test_cases/src/lib.rs index 83f3b6b14..a8ad7eaa6 100644 --- a/tests/test_cases/src/lib.rs +++ b/tests/test_cases/src/lib.rs @@ -22,6 +22,9 @@ use test_multiport_console::TestMultiportConsole; mod test_virtiofs_root_ro; use test_virtiofs_root_ro::TestVirtiofsRootRo; +mod test_augmentfs; +use test_augmentfs::TestAugmentFs; + mod test_pjdfstest; use test_pjdfstest::TestPjdfstest; @@ -84,6 +87,7 @@ pub fn test_cases() -> Vec { TestCase::new("net-vmnet-helper", Box::new(TestNet::new_vmnet_helper())), TestCase::new("multiport-console", Box::new(TestMultiportConsole)), TestCase::new("virtiofs-root-ro", Box::new(TestVirtiofsRootRo)), + TestCase::new("augmentfs", Box::new(TestAugmentFs)), TestCase::new("virtiofs-misc", Box::new(TestVirtioFsMisc)), TestCase::new("pjdfstest", Box::new(TestPjdfstest)), TestCase::new("perf-net-passt-tx", Box::new(TestNetPerf::new_passt_tx())), diff --git a/tests/test_cases/src/test_augmentfs.rs b/tests/test_cases/src/test_augmentfs.rs new file mode 100644 index 000000000..34edce96b --- /dev/null +++ b/tests/test_cases/src/test_augmentfs.rs @@ -0,0 +1,304 @@ +// Test the AugmentFs overlay over a NullFs. +// +// Boots a VM with NO host filesystem — the root virtiofs is backed entirely +// by virtual inodes: init.krun (one-shot), the guest-agent binary (one-shot), +// a .krun_config.json (one-shot), persistent test files, and virtual +// directories as mount points for /dev, /proc, /sys. + +use macros::{guest, host}; + +pub struct TestAugmentFs; + +fn make_test_payload() -> Vec { + (0..8192u32).map(|i| (i % 251) as u8).collect() +} + +#[host] +mod host { + use super::*; + + use crate::{krun_call, krun_call_u32}; + use crate::{Test, TestSetup}; + use krun_sys::*; + use std::ffi::CString; + use std::ptr::null_mut; + + impl Test for TestAugmentFs { + fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { + let test_case = CString::new(test_setup.test_case)?; + + // Read the guest-agent binary into memory. Leaked because + // krun_start_enter never returns. + let guest_agent_path = std::env::var("KRUN_TEST_GUEST_AGENT_PATH") + .expect("KRUN_TEST_GUEST_AGENT_PATH not set"); + let guest_agent_bytes: &'static [u8] = + Vec::leak(std::fs::read(&guest_agent_path).expect("Failed to read guest-agent")); + + // Build JSON config: exec the guest-agent with our test name. + let json = format!( + r#"{{"args": ["/guest-agent", "{}"], "cwd": "/"}}"#, + test_case.to_str().unwrap() + ); + let json_bytes: &'static [u8] = Vec::leak(json.into_bytes()); + + // Deterministic test payload for range-read tests. + let payload: &'static [u8] = Vec::leak(make_test_payload()); + + // A small marker file to test persistent reads. + let marker: &'static [u8] = b"virtual-file-marker-content-12345"; + + unsafe { + krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + let ctx = krun_call_u32!(krun_create_ctx())?; + krun_call!(krun_set_vm_config(ctx, 1, 512))?; + + // Disable the implicit init — we'll inject it ourselves. + krun_call!(krun_disable_implicit_init(ctx))?; + + // Get the default init binary. + let mut init_data: *const u8 = null_mut(); + let mut init_len: usize = 0; + krun_call!(krun_get_default_init(&mut init_data, &mut init_len))?; + + // Set up root with NO host directory (NullFs). + krun_call!(krun_add_virtiofs3( + ctx, + c"/dev/root".as_ptr(), + std::ptr::null(), // NULL path → NullFs + 0, // no SHM window + false, // not read-only + ))?; + + // Virtual directories needed by init as mount points. + for dir in [c"dev", c"proc", c"sys"] { + krun_call!(krun_fs_add_overlay_dir( + ctx, + c"/dev/root".as_ptr(), + dir.as_ptr(), + 0o040_755, + ))?; + } + + // Overlay init.krun (one-shot, executable). + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c"init.krun".as_ptr(), + init_data, + init_len, + 0o100_755, + true, + ))?; + + // Overlay guest-agent (one-shot, executable). After init + // execs it, the file should no longer be visible. + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c"guest-agent".as_ptr(), + guest_agent_bytes.as_ptr(), + guest_agent_bytes.len(), + 0o100_755, + true, + ))?; + + // Overlay .krun_config.json (one-shot). + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c".krun_config.json".as_ptr(), + json_bytes.as_ptr(), + json_bytes.len(), + 0o100_644, + true, + ))?; + + // Overlay a persistent marker file. + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c"marker.txt".as_ptr(), + marker.as_ptr(), + marker.len(), + 0o100_644, + false, + ))?; + + // Overlay a deterministic 8 KiB payload for range-read tests. + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c"testdata.bin".as_ptr(), + payload.as_ptr(), + payload.len(), + 0o100_444, + false, + ))?; + + // --- Nested path test (2-level) --- + // etc/ -> etc/nested/ -> etc/nested/deep.txt + krun_call!(krun_fs_add_overlay_dir( + ctx, + c"/dev/root".as_ptr(), + c"etc".as_ptr(), + 0o040_755, + ))?; + krun_call!(krun_fs_add_overlay_dir( + ctx, + c"/dev/root".as_ptr(), + c"etc/nested".as_ptr(), + 0o040_755, + ))?; + let nested_content: &'static [u8] = b"deep-nested-content"; + krun_call!(krun_fs_add_overlay_file( + ctx, + c"/dev/root".as_ptr(), + c"etc/nested/deep.txt".as_ptr(), + nested_content.as_ptr(), + nested_content.len(), + 0o100_644, + false, + ))?; + + krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; + krun_call!(krun_start_enter(ctx))?; + } + Ok(()) + } + } +} + +#[guest] +mod guest { + use super::*; + use crate::Test; + use std::fs; + use std::io::{ErrorKind, Read, Seek, SeekFrom}; + use std::path::Path; + + impl Test for TestAugmentFs { + fn in_guest(self: Box) { + // --- One-shot files should be gone --- + assert!( + !Path::new("/.krun_config.json").exists(), + ".krun_config.json should be gone (one-shot)" + ); + assert!( + !Path::new("/init.krun").exists(), + "init.krun should be gone (one-shot)" + ); + + // --- One-shot guest-agent can't see itself --- + assert!( + !Path::new("/guest-agent").exists(), + "guest-agent should be gone (one-shot)" + ); + + // --- Virtual directories should be accessible --- + // init already mounted over these, but let's verify they + // exist as directories (the mount points came from our + // virtual dir overlay). + for dir in ["/dev", "/proc", "/sys"] { + let meta = fs::metadata(dir).unwrap_or_else(|e| panic!("{dir} should exist: {e}")); + assert!(meta.is_dir(), "{dir} should be a directory"); + } + + // Verify the mounts actually worked by checking known entries. + assert!( + Path::new("/dev/null").exists(), + "/dev/null should exist (devtmpfs)" + ); + assert!( + Path::new("/proc/self").exists(), + "/proc/self should exist (procfs)" + ); + assert!( + Path::new("/sys/kernel").exists(), + "/sys/kernel should exist (sysfs)" + ); + + // Verify directory listing works on each mounted fs. + let dev_entries: Vec<_> = fs::read_dir("/dev").expect("read_dir /dev").collect(); + assert!(!dev_entries.is_empty(), "/dev listing should not be empty"); + + let proc_entries: Vec<_> = fs::read_dir("/proc").expect("read_dir /proc").collect(); + assert!( + !proc_entries.is_empty(), + "/proc listing should not be empty" + ); + + let sys_entries: Vec<_> = fs::read_dir("/sys").expect("read_dir /sys").collect(); + assert!(!sys_entries.is_empty(), "/sys listing should not be empty"); + + // --- Persistent files should still exist --- + assert!(Path::new("/marker.txt").exists(), "marker.txt should exist"); + assert!( + Path::new("/testdata.bin").exists(), + "testdata.bin should exist" + ); + + // --- Read + verify marker content --- + let content = fs::read_to_string("/marker.txt").expect("read marker.txt"); + assert_eq!(content, "virtual-file-marker-content-12345"); + + // --- Repeated reads return the same data --- + let content2 = fs::read_to_string("/marker.txt").expect("re-read marker.txt"); + assert_eq!(content, content2, "repeated reads differ"); + + // --- Write should fail --- + let err = fs::OpenOptions::new() + .write(true) + .open("/marker.txt") + .expect_err("write-open should fail"); + assert_eq!(err.kind(), ErrorKind::PermissionDenied); + + // --- stat reports correct size --- + let meta = fs::metadata("/testdata.bin").expect("stat testdata.bin"); + assert_eq!(meta.len(), 8192, "testdata.bin size mismatch"); + + // --- Range reads on the 8 KiB payload --- + let expected = make_test_payload(); + let mut f = fs::File::open("/testdata.bin").expect("open testdata.bin"); + + // Full read. + let got = fs::read("/testdata.bin").expect("full read"); + assert_eq!(got, expected, "full read mismatch"); + + // Read first 256 bytes. + let mut buf = vec![0u8; 256]; + f.read_exact(&mut buf).expect("read first 256"); + assert_eq!(buf, &expected[..256], "first 256 bytes mismatch"); + + // Seek to offset 4000, read 512 bytes. + f.seek(SeekFrom::Start(4000)).expect("seek to 4000"); + let mut buf = vec![0u8; 512]; + f.read_exact(&mut buf).expect("read at offset 4000"); + assert_eq!(buf, &expected[4000..4512], "range [4000..4512] mismatch"); + + // Seek to last 10 bytes. + f.seek(SeekFrom::End(-10)).expect("seek to end-10"); + let mut buf = vec![0u8; 10]; + f.read_exact(&mut buf).expect("read last 10"); + assert_eq!(buf, &expected[8182..8192], "last 10 bytes mismatch"); + + // Read past EOF should return 0 bytes. + f.seek(SeekFrom::Start(8192)).expect("seek to EOF"); + let mut buf = vec![0u8; 100]; + let n = f.read(&mut buf).expect("read past EOF"); + assert_eq!(n, 0, "read past EOF should return 0"); + + // Seek back to start, re-read, verify consistency. + f.seek(SeekFrom::Start(0)).expect("seek to start"); + let mut full = Vec::new(); + f.read_to_end(&mut full).expect("read_to_end"); + assert_eq!(full, expected, "read_to_end mismatch"); + + // --- Nested path test (2-level: etc/nested/deep.txt) --- + let deep = + fs::read_to_string("/etc/nested/deep.txt").expect("read /etc/nested/deep.txt"); + assert_eq!(deep, "deep-nested-content"); + + println!("OK"); + } + } +} From 2596f98ed1b91651179beb7013a8854311bce3e2 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 12 May 2026 15:32:34 +0200 Subject: [PATCH 13/34] tests: add root-disk-remount integration test Boot from an ext4 block device via krun_set_root_disk_remount. The virtiofs root uses NullFs with init.krun and virtual mount-point directories overlaid. The guest verifies it pivoted to the block device root successfully. Uses dlsym for krun_add_disk/krun_set_root_disk_remount so the test compiles without BLK and skips gracefully at runtime. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- tests/run.sh | 5 + tests/test_cases/src/lib.rs | 4 + .../test_cases/src/test_root_disk_remount.rs | 164 ++++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 tests/test_cases/src/test_root_disk_remount.rs diff --git a/tests/run.sh b/tests/run.sh index 3d7b1e6ef..87bd65310 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -42,6 +42,11 @@ if [ "$OS" = "Darwin" ]; then export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER="clang" export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-C link-arg=-target -C link-arg=aarch64-linux-gnu -C link-arg=-fuse-ld=lld -C link-arg=--sysroot=$SYSROOT -C link-arg=-static" echo "Cross-compiling guest-agent for $GUEST_TARGET" + + # e2fsprogs is keg-only on macOS; add it to PATH for mke2fs. + if [ -d "/opt/homebrew/opt/e2fsprogs/sbin" ]; then + export PATH="/opt/homebrew/opt/e2fsprogs/sbin:$PATH" + fi fi cargo build --target=$GUEST_TARGET -p guest-agent diff --git a/tests/test_cases/src/lib.rs b/tests/test_cases/src/lib.rs index a8ad7eaa6..0f0b88290 100644 --- a/tests/test_cases/src/lib.rs +++ b/tests/test_cases/src/lib.rs @@ -25,6 +25,9 @@ use test_virtiofs_root_ro::TestVirtiofsRootRo; mod test_augmentfs; use test_augmentfs::TestAugmentFs; +mod test_root_disk_remount; +use test_root_disk_remount::TestRootDiskRemount; + mod test_pjdfstest; use test_pjdfstest::TestPjdfstest; @@ -88,6 +91,7 @@ pub fn test_cases() -> Vec { TestCase::new("multiport-console", Box::new(TestMultiportConsole)), TestCase::new("virtiofs-root-ro", Box::new(TestVirtiofsRootRo)), TestCase::new("augmentfs", Box::new(TestAugmentFs)), + TestCase::new("root-disk-remount", Box::new(TestRootDiskRemount)), TestCase::new("virtiofs-misc", Box::new(TestVirtioFsMisc)), TestCase::new("pjdfstest", Box::new(TestPjdfstest)), TestCase::new("perf-net-passt-tx", Box::new(TestNetPerf::new_passt_tx())), diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs new file mode 100644 index 000000000..56011698f --- /dev/null +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -0,0 +1,164 @@ +// Test that krun_set_root_disk_remount works with NullFs. +// +// Creates a tiny ext4 disk image containing only the guest-agent binary, +// boots from it via krun_set_root_disk_remount (which uses NullFs for the +// initial virtiofs root with init.krun overlaid), and verifies the guest +// successfully pivoted to the block device root. + +use macros::{guest, host}; + +pub struct TestRootDiskRemount; + +#[host] +mod host { + use super::*; + + use crate::{krun_call, krun_call_u32, ShouldRun}; + use crate::{Test, TestSetup}; + use krun_sys::*; + use nix::libc; + use std::ffi::CString; + use std::process::Command; + use std::ptr::null; + + type KrunAddDiskFn = unsafe extern "C" fn( + ctx_id: u32, + block_id: *const std::ffi::c_char, + disk_path: *const std::ffi::c_char, + read_only: bool, + ) -> i32; + + type KrunSetRootDiskRemountFn = unsafe extern "C" fn( + ctx_id: u32, + device: *const std::ffi::c_char, + fstype: *const std::ffi::c_char, + options: *const std::ffi::c_char, + ) -> i32; + + fn get_krun_add_disk() -> KrunAddDiskFn { + let symbol = CString::new("krun_add_disk").unwrap(); + let ptr = unsafe { libc::dlsym(libc::RTLD_DEFAULT, symbol.as_ptr()) }; + assert!(!ptr.is_null(), "krun_add_disk not found"); + unsafe { std::mem::transmute(ptr) } + } + + fn get_krun_set_root_disk_remount() -> KrunSetRootDiskRemountFn { + let symbol = CString::new("krun_set_root_disk_remount").unwrap(); + let ptr = unsafe { libc::dlsym(libc::RTLD_DEFAULT, symbol.as_ptr()) }; + assert!(!ptr.is_null(), "krun_set_root_disk_remount not found"); + unsafe { std::mem::transmute(ptr) } + } + + fn create_disk_image(guest_agent_path: &str, output_path: &str) { + // Populate from a staging directory using mke2fs -d (no root needed). + let staging = format!("{output_path}.staging"); + std::fs::create_dir_all(&staging).expect("mkdir staging"); + + std::fs::copy(guest_agent_path, format!("{staging}/guest-agent")) + .expect("copy guest-agent"); + + // Marker file to verify the guest booted from the block device. + std::fs::write( + format!("{staging}/block-marker"), + "booted-from-block-device", + ) + .expect("write marker"); + + let status = Command::new("mke2fs") + .args(["-q", "-t", "ext4", "-d", &staging, output_path, "32M"]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .expect("mke2fs failed"); + assert!(status.success(), "mke2fs failed"); + + std::fs::remove_dir_all(&staging).expect("cleanup staging"); + } + + impl Test for TestRootDiskRemount { + fn should_run(&self) -> ShouldRun { + if unsafe { krun_call_u32!(krun_has_feature(KRUN_FEATURE_BLK.into())) }.ok() != Some(1) + { + return ShouldRun::No("libkrun compiled without BLK"); + } + ShouldRun::Yes + } + + fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { + let krun_add_disk = get_krun_add_disk(); + let krun_set_root_disk_remount = get_krun_set_root_disk_remount(); + + let guest_agent_path = std::env::var("KRUN_TEST_GUEST_AGENT_PATH") + .expect("KRUN_TEST_GUEST_AGENT_PATH not set"); + + let disk_path = format!("{}/rootfs.ext4", test_setup.tmp_dir.display()); + create_disk_image(&guest_agent_path, &disk_path); + + let c_disk_path = CString::new(disk_path)?; + let test_case = CString::new(test_setup.test_case)?; + + unsafe { + krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + let ctx = krun_call_u32!(krun_create_ctx())?; + krun_call!(krun_set_vm_config(ctx, 1, 512))?; + + let argv = [test_case.as_ptr(), null()]; + let envp = [null()]; + krun_call!(krun_set_exec( + ctx, + c"/guest-agent".as_ptr(), + argv.as_ptr(), + envp.as_ptr(), + ))?; + + krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; + + // Add a block device with the ext4 image. + krun_call!(krun_add_disk( + ctx, + c"vda".as_ptr(), + c_disk_path.as_ptr(), + false, + ))?; + + // Configure block device as root, pivot from NullFs. + krun_call!(krun_set_root_disk_remount( + ctx, + c"/dev/vda".as_ptr(), + c"ext4".as_ptr(), + std::ptr::null(), + ))?; + + krun_call!(krun_start_enter(ctx))?; + } + Ok(()) + } + } +} + +#[guest] +mod guest { + use super::*; + use crate::Test; + use std::fs; + use std::path::Path; + + impl Test for TestRootDiskRemount { + fn in_guest(self: Box) { + // Verify we're running from the block device root. + let marker = fs::read_to_string("/block-marker") + .expect("Failed to read /block-marker — not on block device root?"); + assert_eq!(marker, "booted-from-block-device"); + + // The init.krun virtual file should be gone (one-shot, and we + // pivoted away from the NullFs root anyway). + assert!(!Path::new("/init.krun").exists()); + + // /proc and /dev should be mounted (init re-mounts after pivot). + assert!(Path::new("/proc/self").exists(), "/proc/self missing"); + assert!(Path::new("/dev/null").exists(), "/dev/null missing"); + + println!("OK"); + } + } +} From da074abbf2739d7276f86f3f80d95442c7aad878 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 13 May 2026 13:09:22 +0200 Subject: [PATCH 14/34] CI: enable BLK=1 and install e2fsprogs in integration tests Build and test with the block device feature so the root-disk-remount test runs in CI. Install e2fsprogs (provides mke2fs) which the test needs to create the ext4 disk image. Assisted-by: OpenCode:claude-opus-4.6 Signed-off-by: Matej Hrica --- .github/workflows/integration_tests.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 811ecf3fd..24a8ee15f 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -15,7 +15,7 @@ jobs: run: rustup target add x86_64-unknown-linux-musl - name: Build and install libkrun to test prefix - run: make test-prefix NET=1 + run: make test-prefix NET=1 BLK=1 - name: Clippy (test_cases guest) run: | @@ -45,7 +45,7 @@ jobs: sudo usermod -a -G kvm $USER - name: Install additional packages - run: sudo apt-get install -y --no-install-recommends build-essential patchelf pkg-config net-tools buildah dnsmasq iperf3 + run: sudo apt-get install -y --no-install-recommends build-essential patchelf pkg-config net-tools buildah dnsmasq iperf3 e2fsprogs - name: Install passt from source run: | @@ -58,7 +58,7 @@ jobs: run: TAG=`curl -sL https://api.github.com/repos/containers/libkrunfw/releases/latest |jq -r .tag_name` && curl -L -o /tmp/libkrunfw-x86_64.tgz https://github.com/containers/libkrunfw/releases/download/$TAG/libkrunfw-x86_64.tgz && mkdir tmp && tar xf /tmp/libkrunfw-x86_64.tgz -C tmp && sudo mv tmp/lib64/* /lib/x86_64-linux-gnu - name: Integration tests - run: KRUN_ENOMEM_WORKAROUND=1 KRUN_TEST_BASE_DIR=/tmp/libkrun-tests make test NET=1 IPERF_DURATION=3 TEST_FLAGS="--keep-all --github-summary" + run: KRUN_ENOMEM_WORKAROUND=1 KRUN_TEST_BASE_DIR=/tmp/libkrun-tests make test NET=1 BLK=1 IPERF_DURATION=3 TEST_FLAGS="--keep-all --github-summary" - name: Upload test logs if: always() @@ -84,7 +84,7 @@ jobs: run: rustup target add aarch64-unknown-linux-musl - name: Build and install libkrun to test prefix - run: make test-prefix NET=1 + run: make test-prefix NET=1 BLK=1 - name: Clippy (test_cases guest) run: | @@ -107,7 +107,7 @@ jobs: cargo clippy --locked --target aarch64-unknown-linux-musl -p guest-agent -- -D warnings - name: Install additional packages - run: sudo apt-get install -y --no-install-recommends build-essential patchelf pkg-config net-tools dnsmasq iperf3 git uidmap + run: sudo apt-get install -y --no-install-recommends build-essential patchelf pkg-config net-tools dnsmasq iperf3 git uidmap e2fsprogs - name: Install passt from source run: | @@ -123,7 +123,7 @@ jobs: run: rm -fr /tmp/libkrun-tests - name: Integration tests - run: KRUN_ENOMEM_WORKAROUND=1 KRUN_NO_UNSHARE=1 KRUN_TEST_BASE_DIR=/tmp/libkrun-tests make test NET=1 IPERF_DURATION=3 TEST_FLAGS="--keep-all --github-summary" + run: KRUN_ENOMEM_WORKAROUND=1 KRUN_NO_UNSHARE=1 KRUN_TEST_BASE_DIR=/tmp/libkrun-tests make test NET=1 BLK=1 IPERF_DURATION=3 TEST_FLAGS="--keep-all --github-summary" - name: Upload test logs if: always() From 1240ab26281a62085496aa09486f2e9dfff9a7fc Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 16:45:20 +0200 Subject: [PATCH 15/34] init-blob: add InitConfig builder for guest init configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce InitConfig, InitConfigBuilder, GuestFile, and from_oci_spec_json() to construct the .krun_config.json file consumed by the in-guest init. InitConfig is the abstraction boundary between the host API and the guest init. guest_files() returns all files init needs on the guest root filesystem (the init binary + config JSON) as GuestFile structs, letting the caller decide how to materialize them (virtiofs overlay, block device, etc.). The JSON schema currently matches the OCI-image-config subset that init.c's config_parse_file() expects (Entrypoint, Cmd, Env, WorkingDir, mounts), but callers should not rely on this — the serialization format is an internal detail. Assisted-by: OpenCode:claude-opus-4.6 --- Cargo.lock | 4 + src/init-blob/Cargo.toml | 4 + src/init-blob/src/config.rs | 300 ++++++++++++++++++++++++++++++++++++ src/init-blob/src/lib.rs | 3 + 4 files changed, 311 insertions(+) create mode 100644 src/init-blob/src/config.rs diff --git a/Cargo.lock b/Cargo.lock index c0c4dd9b0..0b419f3b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -530,6 +530,10 @@ dependencies = [ [[package]] name = "init-blob" version = "0.1.0-1.18.0" +dependencies = [ + "serde", + "serde_json", +] [[package]] name = "iocuddle" diff --git a/src/init-blob/Cargo.toml b/src/init-blob/Cargo.toml index 7792e2042..fd799901e 100644 --- a/src/init-blob/Cargo.toml +++ b/src/init-blob/Cargo.toml @@ -7,5 +7,9 @@ license = "Apache-2.0" repository = "https://github.com/containers/libkrun" build = "build.rs" +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" + [lib] path = "src/lib.rs" diff --git a/src/init-blob/src/config.rs b/src/init-blob/src/config.rs new file mode 100644 index 000000000..77ab7e74d --- /dev/null +++ b/src/init-blob/src/config.rs @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: Apache-2.0 +// +//! Builder for the `/.krun_config.json` file consumed by the in-guest init. +//! +//! The JSON schema matches what `init/init.c` (`config_parse_file()`) expects: +//! +//! ```json +//! { +//! "Entrypoint": ["/usr/bin/bash"], +//! "Cmd": ["--login"], +//! "Env": ["HOME=/root", "TERM=xterm-256color"], +//! "WorkingDir": "/home/user", +//! "mounts": [{"destination": "/tmp", "type": "tmpfs", "source": "tmpfs"}] +//! } +//! ``` +//! +//! # Example +//! +//! ``` +//! use init_blob::InitConfig; +//! +//! let json_bytes = InitConfig::builder() +//! .entrypoint(["/usr/bin/bash"]) +//! .args(["--login"]) +//! .env(["HOME=/root", "TERM=xterm-256color"]) +//! .workdir("/home/user") +//! .build() +//! .to_json(); +//! ``` + +use std::borrow::Cow; +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +/// Kernel cmdline `init=` path (absolute, as seen by the guest). +pub const INIT_PATH: &str = "/init.krun"; + +/// A file that the init process expects to find on the guest root filesystem. +/// +/// The caller decides how to materialize these (virtiofs overlay, block +/// device, etc.) — init-blob only describes *what* init needs. +pub struct GuestFile { + /// Path on the guest root filesystem. + pub path: &'static Path, + /// File contents. + pub data: Cow<'static, [u8]>, + /// Permission bits (e.g. `0o755` for executables). + pub mode: u32, + /// If true, the file is only needed during early init and can be + /// removed after first use. + pub one_shot: bool, +} + +/// Init configuration for the in-guest init process. +/// +/// Constructed via [`InitConfigBuilder`] or [`InitConfig::from_oci_spec_json`]. +/// +/// This type is the abstraction boundary between the host API and the +/// guest init — callers describe *what* to run, and the init-blob crate +/// handles the serialization format and guest-side conventions. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[serde(default)] +pub struct InitConfig { + /// The entrypoint executable (and its fixed prefix args). + /// Maps to the `"Entrypoint"` JSON key. + #[serde(rename = "Entrypoint", skip_serializing_if = "Vec::is_empty")] + pub entrypoint: Vec, + + /// Additional command arguments appended after the entrypoint. + /// Maps to the `"Cmd"` JSON key. + #[serde(rename = "Cmd", skip_serializing_if = "Vec::is_empty")] + pub args: Vec, + + /// Environment variables in `KEY=value` form. + /// Maps to the `"Env"` JSON key. + #[serde(rename = "Env", skip_serializing_if = "Vec::is_empty")] + pub env: Vec, + + /// Working directory inside the guest. + /// Maps to the `"WorkingDir"` JSON key. + #[serde(rename = "WorkingDir", skip_serializing_if = "Option::is_none")] + pub workdir: Option, + + /// Additional mounts to perform inside the guest. + #[serde(skip_serializing_if = "Vec::is_empty")] + pub mounts: Vec, + + /// Resource limits in `"id=cur:max"` form (e.g. `"7=0:0"`). + #[serde(skip)] + pub rlimits: Vec, +} + +/// A mount specification for the guest init. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Mount { + pub destination: String, + #[serde(rename = "type")] + pub fs_type: String, + pub source: String, +} + +impl InitConfig { + /// Start building a new init configuration. + pub fn builder() -> InitConfigBuilder { + InitConfigBuilder::default() + } + + /// Construct from an OCI container-spec JSON string. + /// + /// The JSON is expected to use the same key names as the OCI image + /// config (`Entrypoint`, `Cmd`, `Env`, `WorkingDir`, `mounts`). + /// Currently this is a thin deserialization — the internal + /// representation happens to match the OCI schema, but callers + /// should not rely on that. + /// + /// # Errors + /// + /// Returns `Err` if the JSON is syntactically invalid or contains + /// unexpected types (e.g. `Entrypoint` is not an array of strings). + pub fn from_oci_spec_json(json: &str) -> Result { + // TODO: actually validate the config (e.g. entrypoint is non-empty, + // env entries contain '=', workdir is absolute, etc.) + serde_json::from_str(json) + } + + /// Serialize to a JSON string. + pub fn to_json(&self) -> Box { + serde_json::to_string(self) + .expect("InitConfig serialization cannot fail") + .into_boxed_str() + } + + /// Returns the files that the init process expects on the guest root + /// filesystem: the init binary itself and the config JSON. + pub fn guest_files(&self) -> [GuestFile; 2] { + let config_json = self.to_json(); + [ + GuestFile { + path: Path::new(INIT_PATH), + data: Cow::Borrowed(super::INIT_BINARY), + mode: 0o755, + one_shot: true, + }, + GuestFile { + path: Path::new("/.krun_config.json"), + data: Cow::Owned(config_json.into_string().into_bytes()), + mode: 0o644, + one_shot: true, + }, + ] + } +} + +/// Builder for [`InitConfig`]. +#[derive(Clone, Debug, Default)] +pub struct InitConfigBuilder { + config: InitConfig, +} + +impl InitConfigBuilder { + /// Set the entrypoint executable (and optional fixed prefix arguments). + /// + /// ``` + /// # use init_blob::InitConfig; + /// InitConfig::builder().entrypoint(["/usr/bin/bash", "-l"]); + /// ``` + pub fn entrypoint(mut self, argv: impl IntoIterator>) -> Self { + self.config.entrypoint = argv.into_iter().map(Into::into).collect(); + self + } + + /// Set additional command arguments (appended after entrypoint). + pub fn args(mut self, argv: impl IntoIterator>) -> Self { + self.config.args = argv.into_iter().map(Into::into).collect(); + self + } + + /// Set environment variables. Each entry should be `"KEY=value"`. + pub fn env(mut self, vars: impl IntoIterator>) -> Self { + self.config.env = vars.into_iter().map(Into::into).collect(); + self + } + + /// Set the guest working directory. + pub fn workdir(mut self, dir: impl Into) -> Self { + self.config.workdir = Some(dir.into()); + self + } + + /// Add a mount specification. + pub fn mount( + mut self, + destination: impl Into, + fs_type: impl Into, + source: impl Into, + ) -> Self { + self.config.mounts.push(Mount { + destination: destination.into(), + fs_type: fs_type.into(), + source: source.into(), + }); + self + } + + /// Consume the builder and return the finished [`InitConfig`]. + pub fn build(self) -> InitConfig { + self.config + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_config_produces_empty_object() { + let cfg = InitConfig::builder().build(); + let json: serde_json::Value = serde_json::from_str(&cfg.to_json()).unwrap(); + assert_eq!(json, serde_json::json!({})); + } + + #[test] + fn full_config_round_trips() { + let cfg = InitConfig::builder() + .entrypoint(["/usr/bin/bash"]) + .args(["--login"]) + .env(["HOME=/root", "TERM=xterm-256color"]) + .workdir("/home/user") + .mount("/tmp", "tmpfs", "tmpfs") + .build(); + + let json: serde_json::Value = serde_json::from_str(&cfg.to_json()).unwrap(); + assert_eq!(json["Entrypoint"], serde_json::json!(["/usr/bin/bash"])); + assert_eq!(json["Cmd"], serde_json::json!(["--login"])); + assert_eq!( + json["Env"], + serde_json::json!(["HOME=/root", "TERM=xterm-256color"]) + ); + assert_eq!(json["WorkingDir"], serde_json::json!("/home/user")); + assert_eq!(json["mounts"][0]["destination"], "/tmp"); + assert_eq!(json["mounts"][0]["type"], "tmpfs"); + } + + #[test] + fn skip_serializing_empty_fields() { + let cfg = InitConfig::builder().workdir("/tmp").build(); + + let json: serde_json::Value = serde_json::from_str(&cfg.to_json()).unwrap(); + assert!(json.get("Entrypoint").is_none()); + assert!(json.get("Cmd").is_none()); + assert!(json.get("Env").is_none()); + assert!(json.get("mounts").is_none()); + assert_eq!(json["WorkingDir"], "/tmp"); + } + + #[test] + fn from_oci_spec_json_parses() { + let json = + r#"{"Entrypoint":["/bin/sh"],"Cmd":["-c","echo hi"],"Env":["A=1"],"WorkingDir":"/"}"#; + let cfg = InitConfig::from_oci_spec_json(json).unwrap(); + assert_eq!(cfg.entrypoint, ["/bin/sh"]); + assert_eq!(cfg.args, ["-c", "echo hi"]); + assert_eq!(cfg.env, ["A=1"]); + assert_eq!(cfg.workdir.as_deref(), Some("/")); + } + + #[test] + fn from_oci_spec_json_rejects_bad_types() { + // Entrypoint should be an array, not a string. + let json = r#"{"Entrypoint":"/bin/sh"}"#; + assert!(InitConfig::from_oci_spec_json(json).is_err()); + } + + #[test] + fn from_oci_spec_json_ignores_unknown_fields() { + let json = r#"{"Entrypoint":["/bin/sh"],"Labels":{"foo":"bar"}}"#; + let cfg = InitConfig::from_oci_spec_json(json).unwrap(); + assert_eq!(cfg.entrypoint, ["/bin/sh"]); + } + + #[test] + fn guest_files_returns_init_binary_and_config() { + let cfg = InitConfig::builder().entrypoint(["/bin/sh"]).build(); + let files = cfg.guest_files(); + + assert_eq!(files.len(), 2); + + assert_eq!(files[0].path, Path::new("/init.krun")); + assert_eq!(files[0].mode, 0o755); + assert!(files[0].one_shot); + assert!(!files[0].data.is_empty()); + + assert_eq!(files[1].path, Path::new("/.krun_config.json")); + assert_eq!(files[1].mode, 0o644); + assert!(files[1].one_shot); + let json: serde_json::Value = serde_json::from_slice(&files[1].data).unwrap(); + assert_eq!(json["Entrypoint"], serde_json::json!(["/bin/sh"])); + } +} diff --git a/src/init-blob/src/lib.rs b/src/init-blob/src/lib.rs index 4397da679..5909392ff 100644 --- a/src/init-blob/src/lib.rs +++ b/src/init-blob/src/lib.rs @@ -1 +1,4 @@ pub static INIT_BINARY: &[u8] = include_bytes!(env!("KRUN_INIT_BINARY_PATH")); + +mod config; +pub use config::{GuestFile, InitConfig, InitConfigBuilder, Mount, INIT_PATH}; From 67705214281eeedf61fe59557a9be9b1871c81d9 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 16:45:28 +0200 Subject: [PATCH 16/34] lib: switch krun_set_exec/env/workdir/rlimits to InitConfig Replace the five separate ContextConfig fields (exec_path, args, env, workdir, rlimits) with a single init_config: InitConfig. The legacy krun_set_exec/env/workdir/rlimits functions now populate InitConfig directly instead of formatting strings for the kernel cmdline. At krun_start_enter time, guest_files() produces the init binary and .krun_config.json, which are injected into the root virtiofs device via guest_file_to_virtual_entry(). The kernel cmdline no longer carries KRUN_INIT, KRUN_WORKDIR, KRUN_RLIMITS, env vars, or -- args. Only KRUN_BLOCK_ROOT_DEVICE (when applicable) remains on the cmdline. Assisted-by: OpenCode:claude-opus-4.6 --- src/libkrun/src/lib.rs | 265 ++++++++++++++++------------------------- 1 file changed, 104 insertions(+), 161 deletions(-) diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 2c7976b3c..b36fedea5 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -86,22 +86,29 @@ const KRUNFW_NAME: &str = "libkrunfw.5.dylib"; #[cfg(feature = "aws-nitro")] static KRUN_NITRO_DEBUG: Mutex = Mutex::new(false); -// Path to the init binary to be executed inside the VM. -const INIT_PATH: &str = "/init.krun"; - +/// Convert an [`init_blob::GuestFile`] into a virtiofs [`VirtualDirEntry`]. +/// +/// Owned data is leaked to satisfy the `'static` lifetime required by +/// [`VirtualEntryContent::File`] — acceptable because the VM runs once +/// per process. #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -const DEFAULT_INIT_PAYLOAD: &[u8] = init_blob::INIT_BINARY; +fn guest_file_to_virtual_entry(gf: init_blob::GuestFile) -> VirtualDirEntry { + use std::borrow::Cow; -#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -fn init_virtual_entry() -> VirtualDirEntry { + let file_name = gf + .path + .file_name() + .expect("GuestFile path must have a filename"); + let data: &'static [u8] = match gf.data { + Cow::Borrowed(b) => b, + Cow::Owned(v) => Box::leak(v.into_boxed_slice()), + }; VirtualDirEntry { - name: CString::new("init.krun").unwrap(), + name: CString::new(file_name.as_encoded_bytes()).unwrap(), entry: VirtualEntry { - mode: 0o755, - one_shot: true, - content: VirtualEntryContent::File { - data: DEFAULT_INIT_PAYLOAD, - }, + mode: gf.mode, + one_shot: gf.one_shot, + content: VirtualEntryContent::File { data }, }, } } @@ -153,11 +160,7 @@ enum LegacyNetworkConfig { struct ContextConfig { krunfw: Option, vmr: VmResources, - workdir: Option, - exec_path: Option, - env: Option, - args: Option, - rlimits: Option, + init_config: init_blob::InitConfig, #[cfg(feature = "net")] legacy_net_cfg: Option, #[cfg(feature = "net")] @@ -187,28 +190,6 @@ struct ContextConfig { } impl ContextConfig { - fn set_workdir(&mut self, workdir: String) { - self.workdir = Some(workdir); - } - - fn get_workdir(&self) -> String { - match &self.workdir { - Some(workdir) => format!("KRUN_WORKDIR={workdir}"), - None => "".to_string(), - } - } - - fn set_exec_path(&mut self, exec_path: String) { - self.exec_path = Some(exec_path); - } - - fn get_exec_path(&self) -> String { - match &self.exec_path { - Some(exec_path) => format!("KRUN_INIT={exec_path}"), - None => "".to_string(), - } - } - #[cfg(all(feature = "blk", not(feature = "tee")))] fn set_block_root(&mut self, device: String, fstype: Option, options: Option) { self.block_root = Some(BlockRootConfig { @@ -237,39 +218,6 @@ impl ContextConfig { "".to_string() } - fn set_env(&mut self, env: String) { - self.env = Some(env); - } - - fn get_env(&self) -> String { - match &self.env { - Some(env) => env.clone(), - None => "".to_string(), - } - } - - fn set_args(&mut self, args: String) { - self.args = Some(args); - } - - fn get_args(&self) -> String { - match &self.args { - Some(args) => args.clone(), - None => "".to_string(), - } - } - - fn set_rlimits(&mut self, rlimits: String) { - self.rlimits = Some(rlimits); - } - - fn get_rlimits(&self) -> String { - match &self.rlimits { - Some(rlimits) => format!("KRUN_RLIMITS={rlimits}"), - None => "".to_string(), - } - } - #[cfg(feature = "blk")] fn add_block_cfg(&mut self, block_cfg: BlockDeviceConfig) { self.block_cfgs.push(block_cfg); @@ -377,20 +325,25 @@ impl TryFrom for NitroEnclave { return Err(-libc::EINVAL); }; - let Some(exec_path) = ctx.exec_path else { - error!("exec path not specified"); - return Err(-libc::EINVAL); + let exec_path = match ctx.init_config.entrypoint.first() { + Some(p) => p.clone(), + None => { + error!("exec path not specified"); + return Err(-libc::EINVAL); + } }; - let Some(exec_env) = ctx.env else { + if ctx.init_config.env.is_empty() { error!("execution env not specified"); return Err(-libc::EINVAL); - }; + } + let exec_env = ctx.init_config.env.join(" "); - let Some(exec_args) = ctx.args else { + if ctx.init_config.args.is_empty() { error!("execution args not specified"); return Err(-libc::EINVAL); - }; + } + let exec_args = ctx.init_config.args.join(" "); let net_unixfd = { let mut list = ctx.vmr.net.list; @@ -615,13 +568,7 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) // Default to a conservative 512 MB window. shm_size: Some(1 << 29), read_only: false, - virtual_entries: { - let mut v = Vec::new(); - if !cfg.disable_implicit_init { - v.push(init_virtual_entry()); - } - v - }, + virtual_entries: Vec::new(), }); } Entry::Vacant(_) => return -libc::ENOENT, @@ -694,16 +641,12 @@ pub unsafe extern "C" fn krun_add_virtiofs3( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); - let mut virtual_entries = Vec::new(); - if tag == "/dev/root" && !cfg.disable_implicit_init { - virtual_entries.push(init_virtual_entry()); - } cfg.vmr.add_fs_device(FsDeviceConfig { fs_id: tag.to_string(), shared_dir: path.map(|p| p.to_string()), shm_size: shm, read_only, - virtual_entries, + virtual_entries: Vec::new(), }); } Entry::Vacant(_) => return -libc::ENOENT, @@ -1290,30 +1233,29 @@ pub unsafe extern "C" fn krun_set_port_map(ctx_id: u32, c_port_map: *const *cons #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_rlimits(ctx_id: u32, c_rlimits: *const *const c_char) -> i32 { - let rlimits = if c_rlimits.is_null() { + if c_rlimits.is_null() { return -libc::EINVAL; - } else { - let mut strvec = Vec::new(); - - let array: &[*const c_char] = slice::from_raw_parts(c_rlimits, MAX_ARGS); - for item in array.iter().take(MAX_ARGS) { - if item.is_null() { - break; - } else { - let s = match CStr::from_ptr(*item).to_str() { - Ok(s) => s, - Err(_) => return -libc::EINVAL, - }; - strvec.push(s); - } - } + } - format!("\"{}\"", strvec.join(",")) + let array: &[*const c_char] = slice::from_raw_parts(c_rlimits, MAX_ARGS); + let rlimit_strs = match collect_str_array(array) { + Ok(v) => v, + Err(_) => return -libc::EINVAL, }; + // FIXME: rlimits should be a proper field in the config JSON (the OCI + // runtime spec has `process.rlimits`), not smuggled as an env var. + // The current init reads them from `KRUN_RLIMITS` in the process + // environment, which conflates init-internal knobs with the payload's + // env vars — the payload can see and even override them. This needs + // a coordinated fix in both the init binary and the config schema. + let rlimits_value = rlimit_strs.join(","); + match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { - ctx_cfg.get_mut().set_rlimits(rlimits); + let env = &mut ctx_cfg.get_mut().init_config.env; + env.retain(|e| !e.starts_with("KRUN_RLIMITS=")); + env.push(format!("KRUN_RLIMITS={rlimits_value}")); } Entry::Vacant(_) => return -libc::ENOENT, } @@ -1331,7 +1273,7 @@ pub unsafe extern "C" fn krun_set_workdir(ctx_id: u32, c_workdir_path: *const c_ match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { - ctx_cfg.get_mut().set_workdir(workdir_path.to_string()); + ctx_cfg.get_mut().init_config.workdir = Some(workdir_path.to_string()); } Entry::Vacant(_) => return -libc::ENOENT, } @@ -1339,22 +1281,18 @@ pub unsafe extern "C" fn krun_set_workdir(ctx_id: u32, c_workdir_path: *const c_ KRUN_SUCCESS } -unsafe fn collapse_str_array(array: &[*const c_char]) -> Result { - let mut strvec = Vec::new(); - +/// Collect a null-terminated C string array into a `Vec`. +unsafe fn collect_str_array(array: &[*const c_char]) -> Result, std::str::Utf8Error> { + let mut out = Vec::new(); for item in array.iter().take(MAX_ARGS) { if item.is_null() { break; - } else { - let s = CStr::from_ptr(*item).to_str()?; - strvec.push(format!("\"{s}\"")); } + out.push(CStr::from_ptr(*item).to_str()?.to_owned()); } - - Ok(strvec.join(" ")) + Ok(out) } -#[allow(clippy::format_collect)] #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_exec( @@ -1373,38 +1311,38 @@ pub unsafe extern "C" fn krun_set_exec( let args = if !c_argv.is_null() { let argv_array: &[*const c_char] = slice::from_raw_parts(c_argv, MAX_ARGS); - match collapse_str_array(argv_array) { - Ok(s) => s, + match collect_str_array(argv_array) { + Ok(v) => v, Err(e) => { debug!("Error parsing args: {e:?}"); return -libc::EINVAL; } } } else { - "".to_string() + Vec::new() }; - let env = if !c_envp.is_null() { + let env_vars = if !c_envp.is_null() { let envp_array: &[*const c_char] = slice::from_raw_parts(c_envp, MAX_ARGS); - match collapse_str_array(envp_array) { - Ok(s) => s, + match collect_str_array(envp_array) { + Ok(v) => v, Err(e) => { - debug!("Error parsing args: {e:?}"); + debug!("Error parsing env: {e:?}"); return -libc::EINVAL; } } } else { env::vars() - .map(|(key, value)| format!(" {key}=\"{value}\"")) + .map(|(key, value)| format!("{key}={value}")) .collect() }; match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); - cfg.set_exec_path(exec_path.to_string()); - cfg.set_env(env); - cfg.set_args(args); + cfg.init_config.entrypoint = vec![exec_path.to_string()]; + cfg.init_config.args = args; + cfg.init_config.env = env_vars; } Entry::Vacant(_) => return -libc::ENOENT, } @@ -1412,29 +1350,27 @@ pub unsafe extern "C" fn krun_set_exec( KRUN_SUCCESS } -#[allow(clippy::format_collect)] #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_env(ctx_id: u32, c_envp: *const *const c_char) -> i32 { - let env = if !c_envp.is_null() { + let env_vars = if !c_envp.is_null() { let envp_array: &[*const c_char] = slice::from_raw_parts(c_envp, MAX_ARGS); - match collapse_str_array(envp_array) { - Ok(s) => s, + match collect_str_array(envp_array) { + Ok(v) => v, Err(e) => { - debug!("Error parsing args: {e:?}"); + debug!("Error parsing env: {e:?}"); return -libc::EINVAL; } } } else { env::vars() - .map(|(key, value)| format!(" {key}=\"{value}\"")) + .map(|(key, value)| format!("{key}={value}")) .collect() }; match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - cfg.set_env(env); + ctx_cfg.get_mut().init_config.env = env_vars; } Entry::Vacant(_) => return -libc::ENOENT, } @@ -2414,12 +2350,10 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( } // Boot from a block device: the virtiofs root only needs to - // serve init.krun and provide mount points for /dev, /proc, /sys. + // provide mount points for /dev, /proc, /sys. The init binary + // and config JSON are injected at krun_start_enter time. // Use a NullFs (no host directory) with the inode overlay. let mut virtual_entries = Vec::new(); - if !ctx_cfg.disable_implicit_init { - virtual_entries.push(init_virtual_entry()); - } // init.c needs these directories as mount points before // pivoting to the block device root. for name in ["dev", "proc", "sys", "newroot"] { @@ -2527,8 +2461,8 @@ pub unsafe extern "C" fn krun_get_default_init( if data_out.is_null() || len_out.is_null() { return -libc::EINVAL; } - *data_out = DEFAULT_INIT_PAYLOAD.as_ptr(); - *len_out = DEFAULT_INIT_PAYLOAD.len(); + *data_out = init_blob::INIT_BINARY.as_ptr(); + *len_out = init_blob::INIT_BINARY.len(); KRUN_SUCCESS } @@ -2902,17 +2836,30 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { return -libc::EINVAL; } + // Inject init binary + config JSON into the root virtiofs device. + #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] + if !ctx_cfg.disable_implicit_init { + if let Some(root_fs) = ctx_cfg.vmr.fs.iter_mut().find(|f| f.fs_id == "/dev/root") { + for gf in ctx_cfg.init_config.guest_files() { + root_fs + .virtual_entries + .push(guest_file_to_virtual_entry(gf)); + } + } + } + + let block_root = ctx_cfg.get_block_root(); let kernel_cmdline = KernelCmdlineConfig { - prolog: Some(format!("{DEFAULT_KERNEL_CMDLINE} init={INIT_PATH}")), - krun_env: Some(format!( - " {} {} {} {} {}", - ctx_cfg.get_exec_path(), - ctx_cfg.get_workdir(), - ctx_cfg.get_block_root(), - ctx_cfg.get_rlimits(), - ctx_cfg.get_env(), + prolog: Some(format!( + "{DEFAULT_KERNEL_CMDLINE} init={}", + init_blob::INIT_PATH )), - epilog: Some(format!(" -- {}", ctx_cfg.get_args())), + krun_env: if block_root.is_empty() { + None + } else { + Some(format!(" {block_root}")) + }, + epilog: None, }; if ctx_cfg.vmr.set_kernel_cmdline(kernel_cmdline).is_err() { @@ -3068,18 +3015,14 @@ mod test_disable_implicit_init { #[test] fn test_disable_implicit_init() { - let ctx = unsafe { krun_create_ctx() } as u32; - unsafe { - krun_disable_implicit_init(ctx); - krun_set_root(ctx, c"/tmp".as_ptr()); - } + let ctx = krun_create_ctx() as u32; + krun_disable_implicit_init(ctx); let ctx_map = CTX_MAP.lock().unwrap(); let cfg = ctx_map.get(&ctx).unwrap(); - assert_eq!(cfg.vmr.fs.len(), 1); assert!( - cfg.vmr.fs[0].virtual_entries.is_empty(), - "root virtiofs should not inject init.krun after krun_disable_implicit_init()" + cfg.disable_implicit_init, + "krun_disable_implicit_init should set the flag" ); drop(ctx_map); From 7c8ea8ab6c9e811bc9ba651e22684e54f9e8a39c Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:12:44 +0200 Subject: [PATCH 17/34] lib: add krun_set_oci_config_json() and krun_inject_init() APIs krun_set_oci_config_json(ctx, json) sets the init configuration from an OCI container-spec JSON string, replacing any config previously set via krun_set_exec/krun_set_workdir/krun_set_env. krun_inject_init(ctx, fs_tag) injects the init binary and config JSON into the specified virtiofs device. This is the explicit replacement for the implicit init injection that krun_disable_implicit_init opts out of. Together these allow a fully explicit init flow: krun_set_oci_config_json(ctx, json); // or krun_set_exec(...) krun_inject_init(ctx, KRUN_FS_ROOT_TAG); Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 34 +++++++++++++++++++ src/libkrun/src/lib.rs | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/include/libkrun.h b/include/libkrun.h index 5a63be917..46ced9524 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1187,6 +1187,40 @@ int32_t krun_disable_implicit_console(uint32_t ctx_id); */ int32_t krun_disable_implicit_init(uint32_t ctx_id); +/** + * Sets the init configuration from an OCI container-spec JSON string. + * + * The JSON should use OCI image config keys ("Entrypoint", "Cmd", "Env", + * "WorkingDir", "mounts"). This replaces any configuration previously set + * via krun_set_exec, krun_set_workdir, krun_set_env, etc. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "json" - a null-terminated JSON string with the OCI config. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_set_oci_config_json(uint32_t ctx_id, const char *json); + +/** + * Injects the built-in init binary and config JSON into the specified + * virtiofs device. + * + * Call this after configuring the init (via krun_set_exec/krun_set_workdir/ + * krun_set_env or krun_set_oci_config_json), and after the target virtiofs + * device has been created (e.g. via krun_set_root or krun_add_virtiofs3). + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "fs_tag" - the virtiofs tag identifying the target device + * (typically KRUN_FS_ROOT_TAG, i.e. "/dev/root"). + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_inject_init(uint32_t ctx_id, const char *fs_tag); + /** * Get a pointer to the built-in default init binary. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index b36fedea5..5baab6566 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2399,6 +2399,80 @@ pub extern "C" fn krun_disable_implicit_init(ctx_id: u32) -> i32 { KRUN_SUCCESS } +/// Sets the init configuration from an OCI container-spec JSON string. +/// +/// The JSON should use OCI image config keys (`Entrypoint`, `Cmd`, `Env`, +/// `WorkingDir`, `mounts`). This replaces any configuration previously set +/// via `krun_set_exec`, `krun_set_workdir`, `krun_set_env`, etc. +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub unsafe extern "C" fn krun_set_oci_config_json(ctx_id: u32, c_json: *const c_char) -> i32 { + if c_json.is_null() { + return -libc::EINVAL; + } + + let json = match CStr::from_ptr(c_json).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + + let config = match init_blob::InitConfig::from_oci_spec_json(json) { + Ok(c) => c, + Err(e) => { + debug!("Error parsing OCI config JSON: {e}"); + return -libc::EINVAL; + } + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + ctx_cfg.get_mut().init_config = config; + } + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + +/// Injects the init binary and config JSON into the specified virtiofs device. +/// +/// Call this after configuring the init via `krun_set_exec`/`krun_set_workdir`/ +/// `krun_set_env` or `krun_set_oci_config_json`, and after the target virtiofs +/// device has been created (e.g. via `krun_set_root` or `krun_add_virtiofs3`). +/// +/// `c_fs_tag` identifies which virtiofs device receives the init files +/// (typically `KRUN_FS_ROOT_TAG`, i.e. `"/dev/root"`). +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +pub unsafe extern "C" fn krun_inject_init(ctx_id: u32, c_fs_tag: *const c_char) -> i32 { + if c_fs_tag.is_null() { + return -libc::EINVAL; + } + + let fs_tag = match CStr::from_ptr(c_fs_tag).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + let Some(fs) = cfg.vmr.fs.iter_mut().find(|f| f.fs_id == fs_tag) else { + debug!("krun_inject_init: virtiofs device '{fs_tag}' not found"); + return -libc::ENOENT; + }; + for gf in cfg.init_config.guest_files() { + fs.virtual_entries.push(guest_file_to_virtual_entry(gf)); + } + } + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + /// Resolve a path like "a/b/c" into parent directory children + leaf name. /// Errors with a libc errno if any intermediate component is missing or not a Dir. #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] From 5bc8efce4668d9f478d905431c7b3c7d829bb1a7 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:12:47 +0200 Subject: [PATCH 18/34] examples: replace deprecated krun_set_root_disk/krun_set_data_disk with krun_add_disk Assisted-by: OpenCode:claude-opus-4.6 --- examples/boot_efi.c | 2 +- examples/launch-tee.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/boot_efi.c b/examples/boot_efi.c index 5105d46df..5891c35ee 100644 --- a/examples/boot_efi.c +++ b/examples/boot_efi.c @@ -197,7 +197,7 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_set_root_disk(ctx_id, cmdline.disk_image)) { + if (err = krun_add_disk(ctx_id, "root", cmdline.disk_image, false)) { errno = -err; perror("Error configuring disk image"); return -1; diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 063cdd5f3..1eadf193e 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -68,7 +68,7 @@ int main(int argc, char *const argv[]) } // Use the first command line argument as the disk image containing the root fs. - if (err = krun_set_root_disk(ctx_id, argv[1])) { + if (err = krun_add_disk(ctx_id, "root", argv[1], false)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -114,7 +114,7 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_set_data_disk(ctx_id, argv[3])) { + if (err = krun_add_disk(ctx_id, "data", argv[3], false)) { errno = -err; perror("Error configuring the TEE config data disk"); return -1; From a438481263e85439c86a200245f20d91df4132f6 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:16:41 +0200 Subject: [PATCH 19/34] tests: use krun_disable_implicit_init + krun_inject_init everywhere Port all tests to the explicit init injection flow: - common.rs: add krun_disable_implicit_init + krun_inject_init - test_virtiofs_root_ro: same - test_root_disk_remount: same - test_augmentfs: replace manual init.krun/config overlay with krun_set_oci_config_json + krun_inject_init No test relies on implicit init injection anymore. Assisted-by: OpenCode:claude-opus-4.6 --- tests/test_cases/src/common.rs | 2 + tests/test_cases/src/test_augmentfs.rs | 43 ++++--------------- .../test_cases/src/test_root_disk_remount.rs | 2 + tests/test_cases/src/test_virtiofs_root_ro.rs | 2 + 4 files changed, 14 insertions(+), 35 deletions(-) diff --git a/tests/test_cases/src/common.rs b/tests/test_cases/src/common.rs index 3d8881ff8..8f401a95a 100644 --- a/tests/test_cases/src/common.rs +++ b/tests/test_cases/src/common.rs @@ -52,6 +52,7 @@ pub fn setup_fs_and_enter_with_env( .collect(); envp.push(null()); unsafe { + krun_call!(krun_disable_implicit_init(ctx))?; krun_call!(krun_set_root(ctx, path_str.as_ptr()))?; krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; let test_case_cstr = CString::new(test_setup.test_case).context("CString::new")?; @@ -62,6 +63,7 @@ pub fn setup_fs_and_enter_with_env( argv.as_ptr(), envp.as_ptr(), ))?; + krun_call!(krun_inject_init(ctx, c"/dev/root".as_ptr()))?; krun_call!(krun_start_enter(ctx))?; } unreachable!() diff --git a/tests/test_cases/src/test_augmentfs.rs b/tests/test_cases/src/test_augmentfs.rs index 34edce96b..cc41fb4cd 100644 --- a/tests/test_cases/src/test_augmentfs.rs +++ b/tests/test_cases/src/test_augmentfs.rs @@ -21,7 +21,6 @@ mod host { use crate::{Test, TestSetup}; use krun_sys::*; use std::ffi::CString; - use std::ptr::null_mut; impl Test for TestAugmentFs { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { @@ -34,12 +33,11 @@ mod host { let guest_agent_bytes: &'static [u8] = Vec::leak(std::fs::read(&guest_agent_path).expect("Failed to read guest-agent")); - // Build JSON config: exec the guest-agent with our test name. - let json = format!( - r#"{{"args": ["/guest-agent", "{}"], "cwd": "/"}}"#, + // Build OCI config JSON: exec the guest-agent with our test name. + let json = CString::new(format!( + r#"{{"Entrypoint": ["/guest-agent"], "Cmd": ["{}"], "WorkingDir": "/"}}"#, test_case.to_str().unwrap() - ); - let json_bytes: &'static [u8] = Vec::leak(json.into_bytes()); + ))?; // Deterministic test payload for range-read tests. let payload: &'static [u8] = Vec::leak(make_test_payload()); @@ -51,15 +49,8 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; - - // Disable the implicit init — we'll inject it ourselves. krun_call!(krun_disable_implicit_init(ctx))?; - // Get the default init binary. - let mut init_data: *const u8 = null_mut(); - let mut init_len: usize = 0; - krun_call!(krun_get_default_init(&mut init_data, &mut init_len))?; - // Set up root with NO host directory (NullFs). krun_call!(krun_add_virtiofs3( ctx, @@ -79,16 +70,10 @@ mod host { ))?; } - // Overlay init.krun (one-shot, executable). - krun_call!(krun_fs_add_overlay_file( - ctx, - c"/dev/root".as_ptr(), - c"init.krun".as_ptr(), - init_data, - init_len, - 0o100_755, - true, - ))?; + // Configure init from OCI JSON and inject init binary + + // config into the root virtiofs. + krun_call!(krun_set_oci_config_json(ctx, json.as_ptr()))?; + krun_call!(krun_inject_init(ctx, c"/dev/root".as_ptr()))?; // Overlay guest-agent (one-shot, executable). After init // execs it, the file should no longer be visible. @@ -102,17 +87,6 @@ mod host { true, ))?; - // Overlay .krun_config.json (one-shot). - krun_call!(krun_fs_add_overlay_file( - ctx, - c"/dev/root".as_ptr(), - c".krun_config.json".as_ptr(), - json_bytes.as_ptr(), - json_bytes.len(), - 0o100_644, - true, - ))?; - // Overlay a persistent marker file. krun_call!(krun_fs_add_overlay_file( ctx, @@ -160,7 +134,6 @@ mod host { false, ))?; - krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; krun_call!(krun_start_enter(ctx))?; } Ok(()) diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs index 56011698f..65dbbdb7b 100644 --- a/tests/test_cases/src/test_root_disk_remount.rs +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -101,6 +101,7 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_disable_implicit_init(ctx))?; let argv = [test_case.as_ptr(), null()]; let envp = [null()]; @@ -129,6 +130,7 @@ mod host { std::ptr::null(), ))?; + krun_call!(krun_inject_init(ctx, c"/dev/root".as_ptr()))?; krun_call!(krun_start_enter(ctx))?; } Ok(()) diff --git a/tests/test_cases/src/test_virtiofs_root_ro.rs b/tests/test_cases/src/test_virtiofs_root_ro.rs index 1fff83ce2..5227d02fb 100644 --- a/tests/test_cases/src/test_virtiofs_root_ro.rs +++ b/tests/test_cases/src/test_virtiofs_root_ro.rs @@ -43,6 +43,7 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_disable_implicit_init(ctx))?; // Use "/dev/root" tag (KRUN_FS_ROOT_TAG) with read_only=true krun_call!(krun_add_virtiofs3( @@ -60,6 +61,7 @@ mod host { argv.as_ptr(), envp.as_ptr(), ))?; + krun_call!(krun_inject_init(ctx, c"/dev/root".as_ptr()))?; krun_call!(krun_start_enter(ctx))?; } Ok(()) From 116d2e885ed23fd56b7ed978982217a4c549ed50 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:19:31 +0200 Subject: [PATCH 20/34] lib: remove krun_disable_implicit_init and implicit init injection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Init injection is now fully explicit via krun_inject_init(). The default behavior is to not inject init — callers must opt in. Remove the disable_implicit_init field from ContextConfig, the krun_disable_implicit_init() function, the implicit injection block in krun_start_enter, and all test calls to krun_disable_implicit_init. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 12 ----- src/libkrun/src/lib.rs | 48 ------------------- tests/test_cases/src/common.rs | 1 - tests/test_cases/src/test_augmentfs.rs | 1 - .../test_cases/src/test_root_disk_remount.rs | 1 - tests/test_cases/src/test_virtiofs_root_ro.rs | 1 - 6 files changed, 64 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 46ced9524..f49d5a107 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -1175,18 +1175,6 @@ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); */ int32_t krun_disable_implicit_console(uint32_t ctx_id); -/** - * Do not inject the default init binary (/init.krun) into the root - * filesystem. Must be called before krun_set_root(). - * - * Arguments: - * "ctx_id" - the configuration context ID. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_disable_implicit_init(uint32_t ctx_id); - /** * Sets the init configuration from an OCI container-spec JSON string. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 5baab6566..21f7637b7 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -185,8 +185,6 @@ struct ContextConfig { console_output: Option, vmm_uid: Option, vmm_gid: Option, - #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] - disable_implicit_init: bool, } impl ContextConfig { @@ -2386,19 +2384,6 @@ pub unsafe extern "C" fn krun_set_root_disk_remount( KRUN_SUCCESS } -#[no_mangle] -#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -pub extern "C" fn krun_disable_implicit_init(ctx_id: u32) -> i32 { - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - ctx_cfg.get_mut().disable_implicit_init = true; - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - /// Sets the init configuration from an OCI container-spec JSON string. /// /// The JSON should use OCI image config keys (`Entrypoint`, `Cmd`, `Env`, @@ -2910,18 +2895,6 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { return -libc::EINVAL; } - // Inject init binary + config JSON into the root virtiofs device. - #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] - if !ctx_cfg.disable_implicit_init { - if let Some(root_fs) = ctx_cfg.vmr.fs.iter_mut().find(|f| f.fs_id == "/dev/root") { - for gf in ctx_cfg.init_config.guest_files() { - root_fs - .virtual_entries - .push(guest_file_to_virtual_entry(gf)); - } - } - } - let block_root = ctx_cfg.get_block_root(); let kernel_cmdline = KernelCmdlineConfig { prolog: Some(format!( @@ -3082,24 +3055,3 @@ fn krun_start_enter_nitro(ctx_id: u32) -> i32 { } } } - -#[cfg(all(test, not(feature = "tee")))] -mod test_disable_implicit_init { - use super::*; - - #[test] - fn test_disable_implicit_init() { - let ctx = krun_create_ctx() as u32; - krun_disable_implicit_init(ctx); - - let ctx_map = CTX_MAP.lock().unwrap(); - let cfg = ctx_map.get(&ctx).unwrap(); - assert!( - cfg.disable_implicit_init, - "krun_disable_implicit_init should set the flag" - ); - drop(ctx_map); - - assert_eq!(krun_free_ctx(ctx), KRUN_SUCCESS); - } -} diff --git a/tests/test_cases/src/common.rs b/tests/test_cases/src/common.rs index 8f401a95a..7e5a18eea 100644 --- a/tests/test_cases/src/common.rs +++ b/tests/test_cases/src/common.rs @@ -52,7 +52,6 @@ pub fn setup_fs_and_enter_with_env( .collect(); envp.push(null()); unsafe { - krun_call!(krun_disable_implicit_init(ctx))?; krun_call!(krun_set_root(ctx, path_str.as_ptr()))?; krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; let test_case_cstr = CString::new(test_setup.test_case).context("CString::new")?; diff --git a/tests/test_cases/src/test_augmentfs.rs b/tests/test_cases/src/test_augmentfs.rs index cc41fb4cd..21900aa38 100644 --- a/tests/test_cases/src/test_augmentfs.rs +++ b/tests/test_cases/src/test_augmentfs.rs @@ -49,7 +49,6 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; - krun_call!(krun_disable_implicit_init(ctx))?; // Set up root with NO host directory (NullFs). krun_call!(krun_add_virtiofs3( diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs index 65dbbdb7b..3ad709b6d 100644 --- a/tests/test_cases/src/test_root_disk_remount.rs +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -101,7 +101,6 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; - krun_call!(krun_disable_implicit_init(ctx))?; let argv = [test_case.as_ptr(), null()]; let envp = [null()]; diff --git a/tests/test_cases/src/test_virtiofs_root_ro.rs b/tests/test_cases/src/test_virtiofs_root_ro.rs index 5227d02fb..4263bdd13 100644 --- a/tests/test_cases/src/test_virtiofs_root_ro.rs +++ b/tests/test_cases/src/test_virtiofs_root_ro.rs @@ -43,7 +43,6 @@ mod host { krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; - krun_call!(krun_disable_implicit_init(ctx))?; // Use "/dev/root" tag (KRUN_FS_ROOT_TAG) with read_only=true krun_call!(krun_add_virtiofs3( From ef8385305e803a3cc0e72c62513202f3017a8fd6 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:23:09 +0200 Subject: [PATCH 21/34] lib: remove deprecated krun_set_root_disk and krun_set_data_disk These were replaced by krun_add_disk. Also remove the internal root_block_cfg/data_block_cfg fields and their setters, and simplify get_block_cfg() now that the legacy compat path is gone. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 39 ------------------ init/init.c | 5 +-- src/libkrun/src/lib.rs | 92 +----------------------------------------- 3 files changed, 3 insertions(+), 133 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index f49d5a107..a1dac8afb 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -118,45 +118,12 @@ int32_t krun_set_vm_config(uint32_t ctx_id, uint8_t num_vcpus, uint32_t ram_mib) */ int32_t krun_set_root(uint32_t ctx_id, const char *root_path); -/** - * DEPRECATED. Use krun_add_disk instead. - * - * Sets the path to the disk image that contains the file-system to be used as root for the microVM. - * The only supported image format is "raw". - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "disk_path" - a null-terminated string representing the path leading to the disk image that - * contains the root file-system. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_root_disk(uint32_t ctx_id, const char *disk_path); -/** - * DEPRECATED. Use krun_add_disk instead. - * - * Sets the path to the disk image that contains the file-system to be used as - * a data partition for the microVM. The only supported image format is "raw". - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "disk_path" - a null-terminated string representing the path leading to the disk image that - * contains the root file-system. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_data_disk(uint32_t ctx_id, const char *disk_path); /** * Adds a disk image to be used as a general partition for the microVM. The only supported image * format is "raw". * - * This API is mutually exclusive with the deprecated krun_set_root_disk and - * krun_set_data_disk methods and must not be used together. - * * This function deliberately only handles images in the Raw format, because it doesn't allow * specifying an image format, and probing an image's format is dangerous. For more information, * see the security note on `krun_add_disk2`, which allows opening non-Raw images. @@ -183,9 +150,6 @@ int32_t krun_add_disk(uint32_t ctx_id, const char *block_id, const char *disk_pa * Adds a disk image to be used as a general partition for the microVM. The supported * image formats are: "raw" and "qcow2". * - * This API is mutually exclusive with the deprecated krun_set_root_disk and - * krun_set_data_disk methods and must not be used together. - * * SECURITY NOTE: * Non-Raw images can reference other files, which libkrun will automatically open, and to which the * guest will have access. Libkrun should therefore never be asked to open an image in a non-Raw @@ -254,9 +218,6 @@ int32_t krun_add_disk2(uint32_t ctx_id, /** * Adds a disk image to be used as a general partition for the microVM. * - * This API is mutually exclusive with the deprecated krun_set_root_disk and - * krun_set_data_disk methods and must not be used together. - * * SECURITY NOTE: * See the security note for `krun_add_disk2`. * diff --git a/init/init.c b/init/init.c index 2d2be2834..1d937712f 100644 --- a/init/init.c +++ b/init/init.c @@ -210,9 +210,8 @@ static char *get_luks_passphrase(int *pass_len) return_str = NULL; /* - * If a user registered the TEE config data disk with - * krun_set_data_disk(), it would appear as /dev/vdb in the guest. - * Mount this device and read the config. + * If a TEE config data disk was registered, it would appear as + * /dev/vdb in the guest. Mount this device and read the config. */ if (mkdir("/dev", 0755) < 0 && errno != EEXIST) { perror("mkdir(/dev)"); diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 21f7637b7..ba4010ba9 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -171,10 +171,6 @@ struct ContextConfig { #[cfg(feature = "blk")] block_cfgs: Vec, #[cfg(feature = "blk")] - root_block_cfg: Option, - #[cfg(feature = "blk")] - data_block_cfg: Option, - #[cfg(feature = "blk")] block_root: Option, #[cfg(feature = "tee")] tee_config_file: Option, @@ -221,31 +217,9 @@ impl ContextConfig { self.block_cfgs.push(block_cfg); } - #[cfg(feature = "blk")] - fn set_root_block_cfg(&mut self, block_cfg: BlockDeviceConfig) { - self.root_block_cfg = Some(block_cfg); - } - - #[cfg(feature = "blk")] - fn set_data_block_cfg(&mut self, block_cfg: BlockDeviceConfig) { - self.data_block_cfg = Some(block_cfg); - } - #[cfg(feature = "blk")] fn get_block_cfg(&self) -> Vec { - // For backwards compat, when cfgs is empty (the new API is not used), this needs to be - // root and then data, in that order. Also for backwards compat, root/data are setters and - // need to discard redundant calls. So we have simple setters above and fix up here. - // - // When the new API is used, this is simpler. - if self.block_cfgs.is_empty() { - [&self.root_block_cfg, &self.data_block_cfg] - .into_iter() - .filter_map(|cfg| cfg.clone()) - .collect() - } else { - self.block_cfgs.clone() - } + self.block_cfgs.clone() } #[cfg(feature = "net")] @@ -805,70 +779,6 @@ pub unsafe extern "C" fn krun_add_disk3( KRUN_SUCCESS } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "blk")] -pub unsafe extern "C" fn krun_set_root_disk(ctx_id: u32, c_disk_path: *const c_char) -> i32 { - let disk_path = match CStr::from_ptr(c_disk_path).to_str() { - Ok(disk) => disk, - Err(_) => return -libc::EINVAL, - }; - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - let block_device_config = BlockDeviceConfig { - block_id: "root".to_string(), - cache_type: CacheType::auto(disk_path), - disk_image_path: disk_path.to_string(), - disk_image_format: ImageType::Raw, - is_disk_read_only: false, - direct_io: false, - #[cfg(not(target_os = "macos"))] - sync_mode: SyncMode::Full, - #[cfg(target_os = "macos")] - sync_mode: SyncMode::Relaxed, - }; - cfg.set_root_block_cfg(block_device_config); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "blk")] -pub unsafe extern "C" fn krun_set_data_disk(ctx_id: u32, c_disk_path: *const c_char) -> i32 { - let disk_path = match CStr::from_ptr(c_disk_path).to_str() { - Ok(disk) => disk, - Err(_) => return -libc::EINVAL, - }; - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - let block_device_config = BlockDeviceConfig { - block_id: "data".to_string(), - cache_type: CacheType::auto(disk_path), - disk_image_path: disk_path.to_string(), - disk_image_format: ImageType::Raw, - is_disk_read_only: false, - direct_io: false, - #[cfg(not(target_os = "macos"))] - sync_mode: SyncMode::Full, - #[cfg(target_os = "macos")] - sync_mode: SyncMode::Relaxed, - }; - cfg.set_data_block_cfg(block_device_config); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - /* * Send the VFKIT magic after establishing the connection, * as required by gvproxy in vfkit mode. From c69fe6d87267c803126260c7a2b799bb8a8d8fc3 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:25:46 +0200 Subject: [PATCH 22/34] lib: remove deprecated krun_set_passt_fd, krun_set_gvproxy_path, krun_set_net_mac These were replaced by krun_add_net_unixstream, krun_add_net_unixgram, and krun_add_net_tap (which take mac as a parameter directly). Also remove the internal LegacyNetworkConfig enum, legacy_net_cfg and legacy_mac fields, the compat path in krun_start_enter that converted them to the new net backend, and the now-unused NET_COMPAT_FEATURES constant. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 58 ++------------------ src/libkrun/src/lib.rs | 117 +---------------------------------------- 2 files changed, 6 insertions(+), 169 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index a1dac8afb..8761dd169 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -332,7 +332,7 @@ int32_t krun_add_virtiofs3(uint32_t ctx_id, #define NET_FEATURE_HOST_TSO6 1 << 12 #define NET_FEATURE_HOST_UFO 1 << 14 -/* These are the features enabled by krun_set_passt_fd and krun_set_gvproxy_path. */ +/* These are the default features used by krun_add_net_unixstream and krun_add_net_unixgram. */ #define COMPAT_NET_FEATURES NET_FEATURE_CSUM | NET_FEATURE_GUEST_CSUM | \ NET_FEATURE_GUEST_TSO4 | NET_FEATURE_GUEST_UFO | \ NET_FEATURE_HOST_TSO4 | NET_FEATURE_HOST_UFO @@ -453,57 +453,6 @@ int32_t krun_add_net_tap(uint32_t ctx_id, uint32_t features, uint32_t flags); -/** - * DEPRECATED. Use krun_add_net_unixstream instead. - * - * Configures the networking to use passt. - * Call to this function disables TSI backend to use passt instead. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "fd" - a file descriptor to communicate with passt - * - * Notes: - * If you never call this function, networking uses the TSI backend. - * This function should be called before krun_set_port_map. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_passt_fd(uint32_t ctx_id, int fd); - -/** - * DEPRECATED. Use krun_add_net_unixgram instead. - * - * Configures the networking to use gvproxy in vfkit mode. - * Call to this function disables TSI backend to use gvproxy instead. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "c_path" - a null-terminated string representing the path for - * gvproxy's listen-vfkit unixdgram socket. - * - * Notes: - * If you never call this function, networking uses the TSI backend. - * This function should be called before krun_set_port_map. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_gvproxy_path(uint32_t ctx_id, char *c_path); - -/** - * Sets the MAC address for the virtio-net device when using the passt backend. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "mac" - MAC address as an array of 6 uint8_t entries. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_net_mac(uint32_t ctx_id, uint8_t *const c_mac); - /** * Configures a map of host to guest TCP ports for the microVM. * @@ -526,8 +475,9 @@ int32_t krun_set_net_mac(uint32_t ctx_id, uint8_t *const c_mac); * means that for a map such as "8080:80", applications running inside the guest will also * need to access the service through the "8080" port. * - * If past networking mode is used (krun_set_passt_fd was called), port mapping is not supported - * as an API of libkrun (but you can still do port mapping using command line arguments of passt) + * If passt networking mode is used, port mapping is not supported as an API + * of libkrun (but you can still do port mapping using command line arguments + * of passt) */ int32_t krun_set_port_map(uint32_t ctx_id, const char *const port_map[]); diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index ba4010ba9..6defa2579 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -149,22 +149,11 @@ impl KrunfwBindings { } } -#[derive(Clone)] -#[cfg(feature = "net")] -enum LegacyNetworkConfig { - VirtioNetPasst(RawFd), - VirtioNetGvproxy(PathBuf), -} - #[derive(Default)] struct ContextConfig { krunfw: Option, vmr: VmResources, init_config: init_blob::InitConfig, - #[cfg(feature = "net")] - legacy_net_cfg: Option, - #[cfg(feature = "net")] - legacy_mac: Option<[u8; 6]>, net_index: u8, tsi_port_map: Option>, vsock_config: VsockConfig, @@ -222,11 +211,6 @@ impl ContextConfig { self.block_cfgs.clone() } - #[cfg(feature = "net")] - fn set_net_mac(&mut self, mac: [u8; 6]) { - self.legacy_mac = Some(mac); - } - fn set_port_map(&mut self, new_port_map: HashMap) -> Result<(), ()> { if self.net_index != 0 { return Err(()); @@ -807,19 +791,7 @@ const NET_FEATURE_HOST_TSO4: u32 = 1 << 11; const NET_FEATURE_HOST_TSO6: u32 = 1 << 12; #[cfg(feature = "net")] const NET_FEATURE_HOST_UFO: u32 = 1 << 14; -/* - * These are the flags enabled by default on each virtio-net instance - * before the introduction of "krun_add_net_*". They are now used in - * the legacy API ("krun_set_passt_fd" and "krun_set_gvproxy_path") - * for compatiblity reasons. - */ -#[cfg(feature = "net")] -const NET_COMPAT_FEATURES: u32 = NET_FEATURE_CSUM - | NET_FEATURE_GUEST_CSUM - | NET_FEATURE_GUEST_TSO4 - | NET_FEATURE_GUEST_UFO - | NET_FEATURE_HOST_TSO4 - | NET_FEATURE_HOST_UFO; + #[cfg(feature = "net")] const NET_ALL_FEATURES: u32 = NET_FEATURE_CSUM | NET_FEATURE_GUEST_CSUM @@ -1015,75 +987,6 @@ pub unsafe extern "C" fn krun_add_net_tap( -libc::EINVAL } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "net")] -pub unsafe extern "C" fn krun_set_passt_fd(ctx_id: u32, fd: c_int) -> i32 { - if fd < 0 { - return -libc::EINVAL; - } - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - // The legacy interface only supports a single network interface. - if cfg.net_index != 0 { - return -libc::EINVAL; - } - cfg.legacy_net_cfg = Some(LegacyNetworkConfig::VirtioNetPasst(fd)); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - KRUN_SUCCESS -} - -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "net")] -pub unsafe extern "C" fn krun_set_gvproxy_path(ctx_id: u32, c_path: *const c_char) -> i32 { - let path_str = match CStr::from_ptr(c_path).to_str() { - Ok(path) => path, - Err(e) => { - debug!("Error parsing gvproxy_path: {e:?}"); - return -libc::EINVAL; - } - }; - - let path = PathBuf::from(path_str); - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - // The legacy interface only supports a single network interface. - if cfg.net_index != 0 { - return -libc::EINVAL; - } - cfg.legacy_net_cfg = Some(LegacyNetworkConfig::VirtioNetGvproxy(path)); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - KRUN_SUCCESS -} - -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "net")] -pub unsafe extern "C" fn krun_set_net_mac(ctx_id: u32, c_mac: *const u8) -> i32 { - let mac: [u8; 6] = match slice::from_raw_parts(c_mac, 6).try_into() { - Ok(m) => m, - Err(_) => return -libc::EINVAL, - }; - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - cfg.set_net_mac(mac); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - KRUN_SUCCESS -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_port_map(ctx_id: u32, c_port_map: *const *const c_char) -> i32 { @@ -2823,22 +2726,6 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { return -libc::EINVAL; } - #[cfg(feature = "net")] - { - if let Some(legacy_net_cfg) = ctx_cfg.legacy_net_cfg.clone() { - let backend = match legacy_net_cfg { - LegacyNetworkConfig::VirtioNetGvproxy(path) => { - VirtioNetBackend::UnixgramPath(path, true) - } - LegacyNetworkConfig::VirtioNetPasst(fd) => VirtioNetBackend::UnixstreamFd(fd), - }; - let mac = ctx_cfg - .legacy_mac - .unwrap_or([0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee]); - create_virtio_net(&mut ctx_cfg, backend, mac, NET_COMPAT_FEATURES); - } - } - match &ctx_cfg.vsock_config { VsockConfig::Disabled => (), VsockConfig::Explicit { tsi_flags } => { @@ -2855,7 +2742,7 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { // Implicit vsock configuration - use heuristics // Check if TSI should be enabled based on network configuration #[cfg(feature = "net")] - let enable_tsi = ctx_cfg.vmr.net.list.is_empty() && ctx_cfg.legacy_net_cfg.is_none(); + let enable_tsi = ctx_cfg.vmr.net.list.is_empty(); #[cfg(not(feature = "net"))] let enable_tsi = true; From eb51844671003e6faf1d53353f8810001623c0de Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:28:19 +0200 Subject: [PATCH 23/34] tests/examples: replace krun_set_log_level with krun_init_log Assisted-by: OpenCode:claude-opus-4.6 --- examples/boot_efi.c | 2 +- examples/consoles.c | 2 +- examples/external_kernel.c | 2 +- examples/launch-tee.c | 2 +- examples/nitro.c | 2 +- tests/test_cases/src/test_augmentfs.rs | 2 +- tests/test_cases/src/test_multiport_console.rs | 2 +- tests/test_cases/src/test_net/mod.rs | 2 +- tests/test_cases/src/test_root_disk_remount.rs | 7 ++++++- tests/test_cases/src/test_tsi_tcp_guest_connect.rs | 2 +- tests/test_cases/src/test_tsi_tcp_guest_listen.rs | 2 +- tests/test_cases/src/test_virtiofs_misc.rs | 2 +- tests/test_cases/src/test_virtiofs_root_ro.rs | 7 ++++++- tests/test_cases/src/test_vm_config.rs | 2 +- tests/test_cases/src/test_vsock_guest_connect.rs | 2 +- 15 files changed, 25 insertions(+), 15 deletions(-) diff --git a/examples/boot_efi.c b/examples/boot_efi.c index 5891c35ee..089fd39e9 100644 --- a/examples/boot_efi.c +++ b/examples/boot_efi.c @@ -169,7 +169,7 @@ int main(int argc, char *const argv[]) } // Set the log level to "off". - err = krun_set_log_level(0); + err = krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_OFF, KRUN_LOG_STYLE_AUTO, 0); if (err) { errno = -err; perror("Error configuring log level"); diff --git a/examples/consoles.c b/examples/consoles.c index 30a17a492..fe3a98271 100644 --- a/examples/consoles.c +++ b/examples/consoles.c @@ -119,7 +119,7 @@ int main(int argc, char *const argv[]) const char *const *command_args = (argc > 3) ? (const char *const *)&argv[3] : NULL; const char *const envp[] = { 0 }; - krun_set_log_level(KRUN_LOG_LEVEL_WARN); + krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_WARN, KRUN_LOG_STYLE_AUTO, 0); int err; int ctx_id = krun_create_ctx(); diff --git a/examples/external_kernel.c b/examples/external_kernel.c index 14649881d..ab857719d 100644 --- a/examples/external_kernel.c +++ b/examples/external_kernel.c @@ -218,7 +218,7 @@ int main(int argc, char *const argv[]) } // Set the log level to "off". - err = krun_set_log_level(0); + err = krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_OFF, KRUN_LOG_STYLE_AUTO, 0); if (err) { errno = -err; diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 1eadf193e..df5f78f66 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -45,7 +45,7 @@ int main(int argc, char *const argv[]) } // Set the log level to "error". - err = krun_set_log_level(1); + err = krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_ERROR, KRUN_LOG_STYLE_AUTO, 0); if (err) { errno = -err; perror("Error configuring log level"); diff --git a/examples/nitro.c b/examples/nitro.c index 2a80e02fd..2b4578d54 100644 --- a/examples/nitro.c +++ b/examples/nitro.c @@ -180,7 +180,7 @@ int main(int argc, char *const argv[]) // Enable debug output if configured. log_level = (cmdline.debug) ? KRUN_LOG_LEVEL_DEBUG : KRUN_LOG_LEVEL_OFF; - err = krun_set_log_level(log_level); + err = krun_init_log(KRUN_LOG_TARGET_DEFAULT, log_level, KRUN_LOG_STYLE_AUTO, 0); if (err) { errno = -err; perror("Error configuring log level"); diff --git a/tests/test_cases/src/test_augmentfs.rs b/tests/test_cases/src/test_augmentfs.rs index 21900aa38..684b241eb 100644 --- a/tests/test_cases/src/test_augmentfs.rs +++ b/tests/test_cases/src/test_augmentfs.rs @@ -46,7 +46,7 @@ mod host { let marker: &'static [u8] = b"virtual-file-marker-content-12345"; unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; diff --git a/tests/test_cases/src/test_multiport_console.rs b/tests/test_cases/src/test_multiport_console.rs index b9c4c1fd6..44583a629 100644 --- a/tests/test_cases/src/test_multiport_console.rs +++ b/tests/test_cases/src/test_multiport_console.rs @@ -50,7 +50,7 @@ mod host { impl Test for TestMultiportConsole { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_disable_implicit_console(ctx))?; diff --git a/tests/test_cases/src/test_net/mod.rs b/tests/test_cases/src/test_net/mod.rs index 9eb973a81..b64062708 100644 --- a/tests/test_cases/src/test_net/mod.rs +++ b/tests/test_cases/src/test_net/mod.rs @@ -116,7 +116,7 @@ mod host { thread::spawn(move || tcp_tester.run_server(listener)); unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs index 3ad709b6d..0708382f2 100644 --- a/tests/test_cases/src/test_root_disk_remount.rs +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -98,7 +98,12 @@ mod host { let test_case = CString::new(test_setup.test_case)?; unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; diff --git a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs index 426cdad05..55e599cbb 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs @@ -31,7 +31,7 @@ mod host { let listener = self.tcp_tester.create_server_socket(); thread::spawn(move || self.tcp_tester.run_server(listener)); unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; setup_fs_and_enter(ctx, test_setup)?; diff --git a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs index 41e0ffc2d..05b050764 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs @@ -35,7 +35,7 @@ mod host { self.tcp_tester.run_client(); }); - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; let port_mapping = format!("{PORT}:{PORT}"); let port_mapping = CString::new(port_mapping).unwrap(); diff --git a/tests/test_cases/src/test_virtiofs_misc.rs b/tests/test_cases/src/test_virtiofs_misc.rs index 2bd8b69cb..d7074a141 100644 --- a/tests/test_cases/src/test_virtiofs_misc.rs +++ b/tests/test_cases/src/test_virtiofs_misc.rs @@ -15,7 +15,7 @@ mod host { impl Test for TestVirtioFsMisc { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 1024))?; setup_fs_and_enter(ctx, test_setup)?; diff --git a/tests/test_cases/src/test_virtiofs_root_ro.rs b/tests/test_cases/src/test_virtiofs_root_ro.rs index 4263bdd13..e67908d5d 100644 --- a/tests/test_cases/src/test_virtiofs_root_ro.rs +++ b/tests/test_cases/src/test_virtiofs_root_ro.rs @@ -40,7 +40,12 @@ mod host { let envp = [null()]; unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; diff --git a/tests/test_cases/src/test_vm_config.rs b/tests/test_cases/src/test_vm_config.rs index 9ccae5de1..60b8703bd 100644 --- a/tests/test_cases/src/test_vm_config.rs +++ b/tests/test_cases/src/test_vm_config.rs @@ -17,7 +17,7 @@ mod host { impl Test for TestVmConfig { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, self.num_cpus, self.ram_mib))?; setup_fs_and_enter(ctx, test_setup)?; diff --git a/tests/test_cases/src/test_vsock_guest_connect.rs b/tests/test_cases/src/test_vsock_guest_connect.rs index bb0482f29..3d93dbad8 100644 --- a/tests/test_cases/src/test_vsock_guest_connect.rs +++ b/tests/test_cases/src/test_vsock_guest_connect.rs @@ -63,7 +63,7 @@ mod host { thread::spawn(move || server(listener)); unsafe { - krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?; + krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_add_vsock_port( ctx, From 53ea22ff1b4a2eb749f50f4d79d94b81748d08bb Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:29:17 +0200 Subject: [PATCH 24/34] lib: propagate KRUN_NITRO_DEBUG flag in krun_init_log krun_set_log_level set KRUN_NITRO_DEBUG when level==4 (debug), but krun_init_log did not. Fix the omission so removing krun_set_log_level doesn't regress nitro debug logging. Also fix the condition to level >= 4 so that trace (level 5) also enables nitro debug. Assisted-by: OpenCode:claude-opus-4.6 --- src/libkrun/src/lib.rs | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 6defa2579..0d0fe32ee 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -370,26 +370,6 @@ fn log_level_to_filter_str(level: u32) -> &'static str { } } -#[no_mangle] -pub extern "C" fn krun_set_log_level(level: u32) -> i32 { - let filter = log_level_to_filter_str(level); - env_logger::Builder::from_env(Env::default().default_filter_or(filter)) - .format_timestamp_micros() - .init(); - - #[cfg(feature = "aws-nitro")] - { - // Notify krun-awsnitro to enable debug for log level. - if level == 4 { - let mut debug = KRUN_NITRO_DEBUG.lock().unwrap(); - - *debug = true; - } - } - - KRUN_SUCCESS -} - mod log_defs { pub const KRUN_LOG_STYLE_AUTO: u32 = 0; pub const KRUN_LOG_STYLE_ALWAYS: u32 = 1; @@ -437,6 +417,14 @@ pub unsafe extern "C" fn krun_init_log(target: RawFd, level: u32, style: u32, op }; builder.format_timestamp_micros().target(target).init(); + #[cfg(feature = "aws-nitro")] + { + // Notify krun-awsnitro to enable debug for log level. + if level >= 4 { + *KRUN_NITRO_DEBUG.lock().unwrap() = true; + } + } + KRUN_SUCCESS } From 750f8252280809ce80787faf25f2e7b4970641bf Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:30:47 +0200 Subject: [PATCH 25/34] lib: remove deprecated krun_set_log_level Superseded by krun_init_log which provides control over target fd, log style, and env override options. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 8761dd169..496615df1 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -10,24 +10,6 @@ extern "C" { #include #include -/** - * Sets the log level for the library. - * - * Arguments: - * "level" can be one of the following values: - * 0: Off - * 1: Error - * 2: Warn - * 3: Info - * 4: Debug - * 5: Trace - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_log_level(uint32_t level); - - #define KRUN_LOG_TARGET_DEFAULT -1 #define KRUN_LOG_LEVEL_OFF 0 From 1cc6d5bc7f706b310d2b2194c0b2bba12e8913c9 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:35:52 +0200 Subject: [PATCH 26/34] tests/examples: replace old numbered API variants with latest - krun_add_disk -> krun_add_disk3 - krun_add_vsock_port -> krun_add_vsock_port2 - krun_set_gpu_options -> krun_set_gpu_options2 Assisted-by: OpenCode:claude-opus-4.6 --- examples/boot_efi.c | 2 +- examples/chroot_vm.c | 2 +- examples/external_kernel.c | 4 ++-- examples/launch-tee.c | 4 ++-- tests/test_cases/src/test_root_disk_remount.rs | 18 ++++++++++++------ .../test_cases/src/test_vsock_guest_connect.rs | 12 +++++++++--- 6 files changed, 27 insertions(+), 15 deletions(-) diff --git a/examples/boot_efi.c b/examples/boot_efi.c index 089fd39e9..450a0ee23 100644 --- a/examples/boot_efi.c +++ b/examples/boot_efi.c @@ -197,7 +197,7 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_add_disk(ctx_id, "root", cmdline.disk_image, false)) { + if (err = krun_add_disk3(ctx_id, "root", cmdline.disk_image, KRUN_DISK_FORMAT_RAW, false, false, KRUN_SYNC_FULL)) { errno = -err; perror("Error configuring disk image"); return -1; diff --git a/examples/chroot_vm.c b/examples/chroot_vm.c index b0ab0a05e..e6c642e1e 100644 --- a/examples/chroot_vm.c +++ b/examples/chroot_vm.c @@ -413,7 +413,7 @@ int main(int argc, char *const argv[]) uint32_t virgl_flags = VIRGLRENDERER_USE_EGL | VIRGLRENDERER_DRM | VIRGLRENDERER_THREAD_SYNC | VIRGLRENDERER_USE_ASYNC_FENCE_CB; - if (err = krun_set_gpu_options(ctx_id, virgl_flags)) { + if (err = krun_set_gpu_options2(ctx_id, virgl_flags, (uint64_t)1 << 33)) { errno = -err; perror("Error configuring gpu"); return -1; diff --git a/examples/external_kernel.c b/examples/external_kernel.c index ab857719d..341374a3e 100644 --- a/examples/external_kernel.c +++ b/examples/external_kernel.c @@ -245,7 +245,7 @@ int main(int argc, char *const argv[]) if (cmdline.boot_disk) { - if (err = krun_add_disk(ctx_id, "boot", cmdline.boot_disk, 0)) + if (err = krun_add_disk3(ctx_id, "boot", cmdline.boot_disk, KRUN_DISK_FORMAT_RAW, 0, 0, KRUN_SYNC_FULL)) { errno = -err, perror("Error configuring boot disk"); @@ -254,7 +254,7 @@ int main(int argc, char *const argv[]) } if (cmdline.data_disk) { - if (err = krun_add_disk(ctx_id, "data", cmdline.data_disk, 0)) + if (err = krun_add_disk3(ctx_id, "data", cmdline.data_disk, KRUN_DISK_FORMAT_RAW, 0, 0, KRUN_SYNC_FULL)) { errno = -err, perror("Error configuring data disk"); diff --git a/examples/launch-tee.c b/examples/launch-tee.c index df5f78f66..a70329553 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -68,7 +68,7 @@ int main(int argc, char *const argv[]) } // Use the first command line argument as the disk image containing the root fs. - if (err = krun_add_disk(ctx_id, "root", argv[1], false)) { + if (err = krun_add_disk3(ctx_id, "root", argv[1], KRUN_DISK_FORMAT_RAW, false, false, KRUN_SYNC_FULL)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -114,7 +114,7 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_add_disk(ctx_id, "data", argv[3], false)) { + if (err = krun_add_disk3(ctx_id, "data", argv[3], KRUN_DISK_FORMAT_RAW, false, false, KRUN_SYNC_FULL)) { errno = -err; perror("Error configuring the TEE config data disk"); return -1; diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs index 0708382f2..96a283676 100644 --- a/tests/test_cases/src/test_root_disk_remount.rs +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -21,11 +21,14 @@ mod host { use std::process::Command; use std::ptr::null; - type KrunAddDiskFn = unsafe extern "C" fn( + type KrunAddDisk3Fn = unsafe extern "C" fn( ctx_id: u32, block_id: *const std::ffi::c_char, disk_path: *const std::ffi::c_char, + disk_format: u32, read_only: bool, + direct_io: bool, + sync_mode: u32, ) -> i32; type KrunSetRootDiskRemountFn = unsafe extern "C" fn( @@ -35,10 +38,10 @@ mod host { options: *const std::ffi::c_char, ) -> i32; - fn get_krun_add_disk() -> KrunAddDiskFn { - let symbol = CString::new("krun_add_disk").unwrap(); + fn get_krun_add_disk3() -> KrunAddDisk3Fn { + let symbol = CString::new("krun_add_disk3").unwrap(); let ptr = unsafe { libc::dlsym(libc::RTLD_DEFAULT, symbol.as_ptr()) }; - assert!(!ptr.is_null(), "krun_add_disk not found"); + assert!(!ptr.is_null(), "krun_add_disk3 not found"); unsafe { std::mem::transmute(ptr) } } @@ -85,7 +88,7 @@ mod host { } fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { - let krun_add_disk = get_krun_add_disk(); + let krun_add_disk3 = get_krun_add_disk3(); let krun_set_root_disk_remount = get_krun_set_root_disk_remount(); let guest_agent_path = std::env::var("KRUN_TEST_GUEST_AGENT_PATH") @@ -119,11 +122,14 @@ mod host { krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; // Add a block device with the ext4 image. - krun_call!(krun_add_disk( + krun_call!(krun_add_disk3( ctx, c"vda".as_ptr(), c_disk_path.as_ptr(), + KRUN_DISK_FORMAT_RAW, false, + false, + KRUN_SYNC_FULL, ))?; // Configure block device as root, pivot from NullFs. diff --git a/tests/test_cases/src/test_vsock_guest_connect.rs b/tests/test_cases/src/test_vsock_guest_connect.rs index 3d93dbad8..2022a4019 100644 --- a/tests/test_cases/src/test_vsock_guest_connect.rs +++ b/tests/test_cases/src/test_vsock_guest_connect.rs @@ -63,12 +63,18 @@ mod host { thread::spawn(move || server(listener)); unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; - krun_call!(krun_add_vsock_port( + krun_call!(krun_add_vsock_port2( ctx, VSOCK_PORT, - sock_path_cstr.as_ptr() + sock_path_cstr.as_ptr(), + false, ))?; krun_call!(krun_set_vm_config(ctx, 1, 1024))?; setup_fs_and_enter(ctx, test_setup)?; From 7e27698ac8e6094df9bfb009495f58477f891db0 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:38:34 +0200 Subject: [PATCH 27/34] lib: remove old numbered API variants Remove the older versions of functions that have numbered successors: - krun_add_disk, krun_add_disk2 (superseded by krun_add_disk3) - krun_add_virtiofs, krun_add_virtiofs2 (superseded by krun_add_virtiofs3) - krun_add_vsock_port (superseded by krun_add_vsock_port2) - krun_set_gpu_options (superseded by krun_set_gpu_options2) Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 133 ++------------------------------------- src/libkrun/src/lib.rs | 137 ----------------------------------------- 2 files changed, 4 insertions(+), 266 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 496615df1..055c67a44 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -102,80 +102,12 @@ int32_t krun_set_root(uint32_t ctx_id, const char *root_path); -/** - * Adds a disk image to be used as a general partition for the microVM. The only supported image - * format is "raw". - * - * This function deliberately only handles images in the Raw format, because it doesn't allow - * specifying an image format, and probing an image's format is dangerous. For more information, - * see the security note on `krun_add_disk2`, which allows opening non-Raw images. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "block_id" - a null-terminated string representing the partition. - * "disk_path" - a null-terminated string representing the path leading to the disk image. - * "read_only" - whether the mount should be read-only. Required if the caller does not have - * write permissions (for disk images in /usr/share). - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_add_disk(uint32_t ctx_id, const char *block_id, const char *disk_path, bool read_only); - /* Supported disk image formats */ #define KRUN_DISK_FORMAT_RAW 0 #define KRUN_DISK_FORMAT_QCOW2 1 /* Note: Only supports FLAT/ZERO formats without delta links */ #define KRUN_DISK_FORMAT_VMDK 2 -/** - * Adds a disk image to be used as a general partition for the microVM. The supported - * image formats are: "raw" and "qcow2". - * - * SECURITY NOTE: - * Non-Raw images can reference other files, which libkrun will automatically open, and to which the - * guest will have access. Libkrun should therefore never be asked to open an image in a non-Raw - * format when it doesn't come from a fully trustworthy source. - * - * Consequently, probing an image's format is quite dangerous and to be avoided if at all possible, - * which is why libkrun provides no facilities for doing so. If it's not clear what format an image - * has, it may also not be clear whether it can be trusted to not reference files to which the guest - * shouldn't have access. - * - * If probing absolutely can't be avoided, it must only be done on images that are fully trusted, i.e. - * before a potentially untrusted guest had write access to it. Specifically, consider that a guest has - * full access to all of a Raw image, and can therefore turn it into a file in an arbitrary format, for - * example, into a Qcow2 image, referencing and granting a malicious guest access to arbitrary files. - * To hand a Raw image to an untrusted and potentially malicious guest, and then to re-probe it after - * the guest was able to write to it (when it can no longer be trusted), would therefore be a severe - * security vulnerability. - * - * Therefore, after having probed a yet fully trusted image once, the result must be remembered so the - * image will from then on always be opened in the format that was detected originally. When adhering - * to this, a guest can write anything they want to a Raw image, it's always going to be opened as a - * Raw image, preventing the security vulnerability outlined above. - * - * However, if at all possible, the image format should be explicitly selected based on knowledge - * obtained separately from the pure image data, for example by the user. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "block_id" - a null-terminated string representing the partition. - * "disk_path" - a null-terminated string representing the path leading to the disk image. - * "disk_format" - the disk image format (i.e. KRUN_DISK_FORMAT_{RAW, QCOW2}) - * "read_only" - whether the mount should be read-only. Required if the caller does not have - * write permissions (for disk images in /usr/share). - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_add_disk2(uint32_t ctx_id, - const char *block_id, - const char *disk_path, - uint32_t disk_format, - bool read_only); - - /* Supported sync modes */ /** @@ -201,7 +133,10 @@ int32_t krun_add_disk2(uint32_t ctx_id, * Adds a disk image to be used as a general partition for the microVM. * * SECURITY NOTE: - * See the security note for `krun_add_disk2`. + * Non-Raw images can reference other files, which libkrun will automatically + * open, and to which the guest will have access. Libkrun should therefore never + * be asked to open an image in a non-Raw format when it doesn't come from a + * fully trustworthy source. * * Arguments: * "ctx_id" - the configuration context ID. @@ -249,40 +184,6 @@ int32_t krun_set_mapped_volumes(uint32_t ctx_id, const char *const mapped_volume * "ctx_id" - the configuration context ID. * "c_tag" - tag to identify the filesystem in the guest. * "c_path" - full path to the directory in the host to be exposed to the guest. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_add_virtiofs(uint32_t ctx_id, - const char *c_tag, - const char *c_path); - -/** - * Adds an independent virtio-fs device pointing to a host's directory with a tag. This - * variant allows specifying the size of the DAX window. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "c_tag" - tag to identify the filesystem in the guest. - * "c_path" - full path to the directory in the host to be exposed to the guest. - * "shm_size" - size of the DAX SHM window in bytes. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_add_virtiofs2(uint32_t ctx_id, - const char *c_tag, - const char *c_path, - uint64_t shm_size); - -/** - * Adds an independent virtio-fs device pointing to a host's directory with a tag. This - * variant allows specifying the size of the DAX window and a read-only flag. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "c_tag" - tag to identify the filesystem in the guest. - * "c_path" - full path to the directory in the host to be exposed to the guest. * "shm_size" - size of the DAX SHM window in bytes. * "read_only" - if true, the filesystem will be exposed as read-only to the guest. * @@ -481,19 +382,6 @@ int32_t krun_set_port_map(uint32_t ctx_id, const char *const port_map[]); * Arguments: * "ctx_id" - the configuration context ID. * "virgl_flags" - flags to pass to virglrenderer. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_gpu_options(uint32_t ctx_id, uint32_t virgl_flags); - -/** - * Enables and configures a virtio-gpu device. This variant allows specifying - * the size of the host window (acting as vRAM in the guest). - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "virgl_flags" - flags to pass to virglrenderer. * "shm_size" - size of the SHM host window in bytes. * * Returns: @@ -867,19 +755,6 @@ int32_t krun_set_env(uint32_t ctx_id, const char *const envp[]); */ int32_t krun_set_tee_config_file(uint32_t ctx_id, const char *filepath); -/** - * Adds a port-path pairing for guest IPC with a process in the host. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "port" - a vsock port that the guest will connect to for IPC. - * "filepath" - a null-terminated string representing the path of the UNIX - * socket in the host. - */ -int32_t krun_add_vsock_port(uint32_t ctx_id, - uint32_t port, - const char *c_filepath); - /** * Adds a port-path pairing for guest IPC with a process in the host. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 0d0fe32ee..756af33ac 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -521,29 +521,6 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) KRUN_SUCCESS } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -pub unsafe extern "C" fn krun_add_virtiofs( - ctx_id: u32, - c_tag: *const c_char, - c_path: *const c_char, -) -> i32 { - krun_add_virtiofs3(ctx_id, c_tag, c_path, 0, false) -} - -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -pub unsafe extern "C" fn krun_add_virtiofs2( - ctx_id: u32, - c_tag: *const c_char, - c_path: *const c_char, - shm_size: u64, -) -> i32 { - krun_add_virtiofs3(ctx_id, c_tag, c_path, shm_size, false) -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] @@ -609,96 +586,6 @@ pub unsafe extern "C" fn krun_set_mapped_volumes( -libc::EINVAL } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "blk")] -pub unsafe extern "C" fn krun_add_disk( - ctx_id: u32, - c_block_id: *const c_char, - c_disk_path: *const c_char, - read_only: bool, -) -> i32 { - let disk_path = match CStr::from_ptr(c_disk_path).to_str() { - Ok(disk) => disk, - Err(_) => return -libc::EINVAL, - }; - - let block_id = match CStr::from_ptr(c_block_id).to_str() { - Ok(block_id) => block_id, - Err(_) => return -libc::EINVAL, - }; - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - let block_device_config = BlockDeviceConfig { - block_id: block_id.to_string(), - cache_type: CacheType::auto(disk_path), - disk_image_path: disk_path.to_string(), - disk_image_format: ImageType::Raw, - is_disk_read_only: read_only, - direct_io: false, - #[cfg(not(target_os = "macos"))] - sync_mode: SyncMode::Full, - #[cfg(target_os = "macos")] - sync_mode: SyncMode::Relaxed, - }; - cfg.add_block_cfg(block_device_config); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(feature = "blk")] -pub unsafe extern "C" fn krun_add_disk2( - ctx_id: u32, - c_block_id: *const c_char, - c_disk_path: *const c_char, - disk_format: u32, - read_only: bool, -) -> i32 { - let disk_path = match CStr::from_ptr(c_disk_path).to_str() { - Ok(disk) => disk, - Err(_) => return -libc::EINVAL, - }; - - let block_id = match CStr::from_ptr(c_block_id).to_str() { - Ok(block_id) => block_id, - Err(_) => return -libc::EINVAL, - }; - - let format = match ImageType::try_from(disk_format) { - Ok(format) => format, - Err(_) => return -libc::EINVAL, - }; - - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - let block_device_config = BlockDeviceConfig { - block_id: block_id.to_string(), - cache_type: CacheType::auto(disk_path), - disk_image_path: disk_path.to_string(), - disk_image_format: format, - is_disk_read_only: read_only, - direct_io: false, - #[cfg(not(target_os = "macos"))] - sync_mode: SyncMode::Full, - #[cfg(target_os = "macos")] - sync_mode: SyncMode::Relaxed, - }; - cfg.add_block_cfg(block_device_config); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] #[cfg(feature = "blk")] @@ -1197,16 +1084,6 @@ pub unsafe extern "C" fn krun_set_tee_config_file(ctx_id: u32, c_filepath: *cons KRUN_SUCCESS } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -pub unsafe extern "C" fn krun_add_vsock_port( - ctx_id: u32, - port: u32, - c_filepath: *const c_char, -) -> i32 { - krun_add_vsock_port2(ctx_id, port, c_filepath, false) -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_add_vsock_port2( @@ -1247,20 +1124,6 @@ pub unsafe extern "C" fn krun_add_vsock_port2( KRUN_SUCCESS } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -pub unsafe extern "C" fn krun_set_gpu_options(ctx_id: u32, virgl_flags: u32) -> i32 { - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - cfg.set_gpu_virgl_flags(virgl_flags); - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_gpu_options2( From 3d80dbd6c10cd56b33763b47ff028db4c1684f92 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:44:40 +0200 Subject: [PATCH 28/34] lib: remove unsupported krun_set_mapped_volumes This function has been returning -EINVAL unconditionally. Remove it. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 16 ---------------- src/libkrun/src/lib.rs | 9 --------- 2 files changed, 25 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 055c67a44..e9bee788f 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -161,22 +161,6 @@ int32_t krun_set_root(uint32_t ctx_id, const char *root_path); bool direct_io, uint32_t sync_mode); -/** - * NO LONGER SUPPORTED. DO NOT USE. - * - * Configures the mapped volumes for the microVM. Only supported on macOS, on Linux use - * user_namespaces and bind-mounts instead. Not available in libkrun-SEV. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "mapped_volumes" - an array of string pointers with format "host_path:guest_path" representing - * the volumes to be mapped inside the microVM - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_set_mapped_volumes(uint32_t ctx_id, const char *const mapped_volumes[]); - /** * Adds an independent virtio-fs device pointing to a host's directory with a tag. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 756af33ac..67ee6d7c1 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -577,15 +577,6 @@ pub unsafe extern "C" fn krun_add_virtiofs3( } #[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(not(feature = "tee"))] -pub unsafe extern "C" fn krun_set_mapped_volumes( - _ctx_id: u32, - _c_mapped_volumes: *const *const c_char, -) -> i32 { - -libc::EINVAL -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] #[cfg(feature = "blk")] From c0a0d41eff6df3792eb6f43f52eb8f6bfa94d7d1 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:46:31 +0200 Subject: [PATCH 29/34] lib: remove krun_get_default_init Superseded by krun_inject_init which handles both the init binary and config JSON injection in one call. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 19 ------------------- src/libkrun/src/lib.rs | 15 --------------- 2 files changed, 34 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index e9bee788f..2933a4c52 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -961,25 +961,6 @@ int32_t krun_set_oci_config_json(uint32_t ctx_id, const char *json); */ int32_t krun_inject_init(uint32_t ctx_id, const char *fs_tag); -/** - * Get a pointer to the built-in default init binary. - * - * This is the same binary that libkrun injects as /init.krun by default. - * Callers that use krun_disable_implicit_init() can use this to inject the - * init binary themselves (e.g. via krun_fs_add_overlay_file with custom - * settings). - * - * The returned pointer is valid for the lifetime of the process (static data). - * - * Arguments: - * "data_out" - receives a pointer to the init binary bytes. - * "len_out" - receives the length in bytes. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_get_default_init(const uint8_t **data_out, size_t *len_out); - /** * Add a virtual overlay file to a virtiofs device. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 67ee6d7c1..d2346f397 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2165,21 +2165,6 @@ fn fs_add_overlay_entry(ctx_id: u32, fs_tag: &str, path: &str, entry: VirtualEnt KRUN_SUCCESS } -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] -pub unsafe extern "C" fn krun_get_default_init( - data_out: *mut *const u8, - len_out: *mut size_t, -) -> i32 { - if data_out.is_null() || len_out.is_null() { - return -libc::EINVAL; - } - *data_out = init_blob::INIT_BINARY.as_ptr(); - *len_out = init_blob::INIT_BINARY.len(); - KRUN_SUCCESS -} - #[allow(clippy::missing_safety_doc)] #[no_mangle] #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] From d49d62dc6b34a5333c42c615e50cd4e6da3be357 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 17:54:07 +0200 Subject: [PATCH 30/34] tests/examples: use explicit krun_add_virtio_console_default everywhere Replace implicit console creation with explicit krun_add_virtio_console_default calls. Also replace krun_set_console_output in nitro.c with krun_add_virtio_console_default. No test or example relies on implicit console injection anymore. Assisted-by: OpenCode:claude-opus-4.6 --- examples/boot_efi.c | 6 +++++ examples/chroot_vm.c | 6 +++++ examples/external_kernel.c | 7 ++++++ examples/gui_vm/src/main.rs | 22 +++++++++++++------ examples/launch-tee.c | 6 +++++ examples/nitro.c | 4 ++-- tests/test_cases/src/test_augmentfs.rs | 7 ++++++ tests/test_cases/src/test_net/mod.rs | 14 +++++++++++- tests/test_cases/src/test_net_perf.rs | 7 ++++++ tests/test_cases/src/test_pjdfstest.rs | 7 ++++++ .../test_cases/src/test_root_disk_remount.rs | 7 ++++++ .../src/test_tsi_tcp_guest_connect.rs | 14 +++++++++++- .../src/test_tsi_tcp_guest_listen.rs | 14 +++++++++++- tests/test_cases/src/test_virtiofs_misc.rs | 14 +++++++++++- tests/test_cases/src/test_virtiofs_root_ro.rs | 7 ++++++ tests/test_cases/src/test_vm_config.rs | 14 +++++++++++- .../src/test_vsock_guest_connect.rs | 7 ++++++ 17 files changed, 149 insertions(+), 14 deletions(-) diff --git a/examples/boot_efi.c b/examples/boot_efi.c index 450a0ee23..ccbe313b5 100644 --- a/examples/boot_efi.c +++ b/examples/boot_efi.c @@ -191,6 +191,12 @@ int main(int argc, char *const argv[]) return -1; } + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) { + errno = -err; + perror("Error configuring console"); + return -1; + } + if (err = krun_set_firmware(ctx_id, cmdline.efi_fw)) { errno = -err; perror("Error configuring EFI FW path"); diff --git a/examples/chroot_vm.c b/examples/chroot_vm.c index e6c642e1e..6845242d8 100644 --- a/examples/chroot_vm.c +++ b/examples/chroot_vm.c @@ -308,6 +308,12 @@ int main(int argc, char *const argv[]) return -1; } + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) { + errno = -err; + perror("Error configuring console"); + return -1; + } + // Configure vhost-user RNG if requested if (cmdline.vhost_user_rng_socket != NULL) { // Test sentinel-terminated array: auto-detect queue count, use custom size diff --git a/examples/external_kernel.c b/examples/external_kernel.c index 341374a3e..81d3f5c72 100644 --- a/examples/external_kernel.c +++ b/examples/external_kernel.c @@ -243,6 +243,13 @@ int main(int argc, char *const argv[]) return -1; } + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) + { + errno = -err; + perror("Error configuring console"); + return -1; + } + if (cmdline.boot_disk) { if (err = krun_add_disk3(ctx_id, "boot", cmdline.boot_disk, KRUN_DISK_FORMAT_RAW, 0, 0, KRUN_SYNC_FULL)) diff --git a/examples/gui_vm/src/main.rs b/examples/gui_vm/src/main.rs index 660a2e21a..851b8e364 100644 --- a/examples/gui_vm/src/main.rs +++ b/examples/gui_vm/src/main.rs @@ -6,23 +6,24 @@ use gtk_display::{ }; use krun_sys::{ - KRUN_LOG_LEVEL_TRACE, KRUN_LOG_LEVEL_WARN, KRUN_LOG_STYLE_ALWAYS, KRUN_LOG_TARGET_DEFAULT, - VIRGLRENDERER_RENDER_SERVER, VIRGLRENDERER_THREAD_SYNC, VIRGLRENDERER_USE_ASYNC_FENCE_CB, - VIRGLRENDERER_USE_EGL, VIRGLRENDERER_VENUS, krun_add_display, krun_add_input_device, - krun_add_input_device_fd, krun_create_ctx, krun_display_set_dpi, + krun_add_display, krun_add_input_device, krun_add_input_device_fd, + krun_add_virtio_console_default, krun_create_ctx, krun_display_set_dpi, krun_display_set_physical_size, krun_display_set_refresh_rate, krun_init_log, krun_set_display_backend, krun_set_exec, krun_set_gpu_options2, krun_set_root, - krun_set_vm_config, krun_start_enter, + krun_set_vm_config, krun_start_enter, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_LEVEL_WARN, + KRUN_LOG_STYLE_ALWAYS, KRUN_LOG_TARGET_DEFAULT, VIRGLRENDERER_RENDER_SERVER, + VIRGLRENDERER_THREAD_SYNC, VIRGLRENDERER_USE_ASYNC_FENCE_CB, VIRGLRENDERER_USE_EGL, + VIRGLRENDERER_VENUS, }; use log::LevelFilter; use regex::{Captures, Regex}; -use std::ffi::{CString, c_void}; +use std::ffi::{c_void, CString}; use std::fmt::Display; use std::fs::{File, OpenOptions}; use std::mem::size_of_val; use anyhow::Context; -use std::os::fd::IntoRawFd; +use std::os::fd::{AsRawFd, IntoRawFd}; use std::path::PathBuf; use std::process::exit; use std::ptr::null; @@ -150,6 +151,13 @@ fn krun_thread( krun_call!(krun_set_vm_config(ctx, 4, 4096))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; + krun_call!(krun_set_gpu_options2( ctx, VIRGLRENDERER_USE_EGL diff --git a/examples/launch-tee.c b/examples/launch-tee.c index a70329553..9da295ab6 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -67,6 +67,12 @@ int main(int argc, char *const argv[]) return -1; } + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) { + errno = -err; + perror("Error configuring console"); + return -1; + } + // Use the first command line argument as the disk image containing the root fs. if (err = krun_add_disk3(ctx_id, "root", argv[1], KRUN_DISK_FORMAT_RAW, false, false, KRUN_SYNC_FULL)) { errno = -err; diff --git a/examples/nitro.c b/examples/nitro.c index 2b4578d54..379e28d89 100644 --- a/examples/nitro.c +++ b/examples/nitro.c @@ -203,9 +203,9 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_set_console_output(ctx_id, "/dev/stdout")) { + if (err = krun_add_virtio_console_default(ctx_id, -1, STDOUT_FILENO, -1)) { errno = -err; - perror("Error configuring the console output"); + perror("Error configuring the console"); return -1; } diff --git a/tests/test_cases/src/test_augmentfs.rs b/tests/test_cases/src/test_augmentfs.rs index 684b241eb..c205e2a43 100644 --- a/tests/test_cases/src/test_augmentfs.rs +++ b/tests/test_cases/src/test_augmentfs.rs @@ -21,6 +21,7 @@ mod host { use crate::{Test, TestSetup}; use krun_sys::*; use std::ffi::CString; + use std::os::fd::AsRawFd; impl Test for TestAugmentFs { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { @@ -49,6 +50,12 @@ mod host { krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; // Set up root with NO host directory (NullFs). krun_call!(krun_add_virtiofs3( diff --git a/tests/test_cases/src/test_net/mod.rs b/tests/test_cases/src/test_net/mod.rs index b64062708..acac92956 100644 --- a/tests/test_cases/src/test_net/mod.rs +++ b/tests/test_cases/src/test_net/mod.rs @@ -86,6 +86,7 @@ mod host { use crate::common::setup_fs_and_enter; use crate::{krun_call, krun_call_u32, Test, TestOutcome, TestSetup}; use krun_sys::*; + use std::os::fd::AsRawFd; use std::thread; impl Test for TestNet { @@ -116,13 +117,24 @@ mod host { thread::spawn(move || tcp_tester.run_server(listener)); unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; // Backend-specific setup (self.setup_backend)(ctx, &test_setup)?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) diff --git a/tests/test_cases/src/test_net_perf.rs b/tests/test_cases/src/test_net_perf.rs index 4c9c7acbe..3e16d3bba 100644 --- a/tests/test_cases/src/test_net_perf.rs +++ b/tests/test_cases/src/test_net_perf.rs @@ -155,6 +155,7 @@ mod host { use crate::common::setup_fs_and_enter; use crate::{krun_call, krun_call_u32, Test, TestOutcome, TestSetup}; use krun_sys::*; + use std::os::fd::AsRawFd; use std::process::{Child, Command, Stdio}; const CONTAINERFILE: &str = "\ @@ -360,6 +361,12 @@ RUN dnf install -y iperf3 && dnf clean all // Backend-specific setup (self.setup_backend)(ctx, &test_setup)?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) diff --git a/tests/test_cases/src/test_pjdfstest.rs b/tests/test_cases/src/test_pjdfstest.rs index 741895cb5..ecfeb2006 100644 --- a/tests/test_cases/src/test_pjdfstest.rs +++ b/tests/test_cases/src/test_pjdfstest.rs @@ -9,6 +9,7 @@ mod host { use crate::{krun_call, krun_call_u32, ShouldRun, Test, TestOutcome, TestSetup}; use krun_sys::*; use std::ffi::CString; + use std::os::fd::AsRawFd; use macros::env_or_default; @@ -54,6 +55,12 @@ mod host { unsafe { let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 2, 1024))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter_with_env(ctx, test_setup, &[host_os_env.as_c_str()])?; } Ok(()) diff --git a/tests/test_cases/src/test_root_disk_remount.rs b/tests/test_cases/src/test_root_disk_remount.rs index 96a283676..c52cdc422 100644 --- a/tests/test_cases/src/test_root_disk_remount.rs +++ b/tests/test_cases/src/test_root_disk_remount.rs @@ -18,6 +18,7 @@ mod host { use krun_sys::*; use nix::libc; use std::ffi::CString; + use std::os::fd::AsRawFd; use std::process::Command; use std::ptr::null; @@ -109,6 +110,12 @@ mod host { ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; let argv = [test_case.as_ptr(), null()]; let envp = [null()]; diff --git a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs index 55e599cbb..2eb7b30dc 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs @@ -24,6 +24,7 @@ mod host { use crate::{krun_call, krun_call_u32}; use crate::{Test, TestSetup}; use krun_sys::*; + use std::os::fd::AsRawFd; use std::thread; impl Test for TestTsiTcpGuestConnect { @@ -31,9 +32,20 @@ mod host { let listener = self.tcp_tester.create_server_socket(); thread::spawn(move || self.tcp_tester.run_server(listener)); unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) diff --git a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs index 05b050764..09ca79ebf 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs @@ -23,6 +23,7 @@ mod host { use crate::{krun_call, krun_call_u32, Test, TestSetup}; use krun_sys::*; use std::ffi::CString; + use std::os::fd::AsRawFd; use std::ptr::null; use std::thread; use std::time::Duration; @@ -35,7 +36,12 @@ mod host { self.tcp_tester.run_client(); }); - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; let port_mapping = format!("{PORT}:{PORT}"); let port_mapping = CString::new(port_mapping).unwrap(); @@ -43,6 +49,12 @@ mod host { krun_call!(krun_set_port_map(ctx, port_map.as_ptr()))?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; println!("OK"); } diff --git a/tests/test_cases/src/test_virtiofs_misc.rs b/tests/test_cases/src/test_virtiofs_misc.rs index d7074a141..137d58fa9 100644 --- a/tests/test_cases/src/test_virtiofs_misc.rs +++ b/tests/test_cases/src/test_virtiofs_misc.rs @@ -11,13 +11,25 @@ mod host { use crate::{Test, TestOutcome, TestSetup}; use krun_sys::*; use std::io::Read; + use std::os::fd::AsRawFd; impl Test for TestVirtioFsMisc { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 1024))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) diff --git a/tests/test_cases/src/test_virtiofs_root_ro.rs b/tests/test_cases/src/test_virtiofs_root_ro.rs index e67908d5d..26bce9f78 100644 --- a/tests/test_cases/src/test_virtiofs_root_ro.rs +++ b/tests/test_cases/src/test_virtiofs_root_ro.rs @@ -20,6 +20,7 @@ mod host { use krun_sys::*; use std::ffi::CString; use std::fs; + use std::os::fd::AsRawFd; use std::os::unix::ffi::OsStrExt; use std::ptr::null; @@ -48,6 +49,12 @@ mod host { ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; // Use "/dev/root" tag (KRUN_FS_ROOT_TAG) with read_only=true krun_call!(krun_add_virtiofs3( diff --git a/tests/test_cases/src/test_vm_config.rs b/tests/test_cases/src/test_vm_config.rs index 60b8703bd..604856912 100644 --- a/tests/test_cases/src/test_vm_config.rs +++ b/tests/test_cases/src/test_vm_config.rs @@ -13,13 +13,25 @@ mod host { use crate::{krun_call, krun_call_u32}; use crate::{Test, TestSetup}; use krun_sys::*; + use std::os::fd::AsRawFd; impl Test for TestVmConfig { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; krun_call!(krun_set_vm_config(ctx, self.num_cpus, self.ram_mib))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) diff --git a/tests/test_cases/src/test_vsock_guest_connect.rs b/tests/test_cases/src/test_vsock_guest_connect.rs index 2022a4019..8343495ae 100644 --- a/tests/test_cases/src/test_vsock_guest_connect.rs +++ b/tests/test_cases/src/test_vsock_guest_connect.rs @@ -39,6 +39,7 @@ mod host { use krun_sys::*; use std::ffi::CString; use std::io::Write; + use std::os::fd::AsRawFd; use std::os::unix::net::UnixListener; use std::os::unix::prelude::OsStrExt; use std::{mem, thread}; @@ -77,6 +78,12 @@ mod host { false, ))?; krun_call!(krun_set_vm_config(ctx, 1, 1024))?; + krun_call!(krun_add_virtio_console_default( + ctx, + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ))?; setup_fs_and_enter(ctx, test_setup)?; } Ok(()) From f5e0c8748f460c1676bbf5a1bf963e2d45149283 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 18:02:16 +0200 Subject: [PATCH 31/34] lib: remove krun_disable_implicit_console, krun_set_console_output, and implicit console Console creation is now fully explicit via krun_add_virtio_console_default or krun_add_virtio_console_multiport. No console is created unless the caller requests one. Remove the disable_implicit_console field from VmResources, the implicit console and serial device creation paths in builder.rs, the console_output field and setter on VmResources, and krun_set_console_output (kept only behind cfg(aws-nitro) where NitroEnclave still needs it). Assisted-by: OpenCode:claude-opus-4.6 --- examples/consoles.c | 6 -- include/libkrun.h | 41 +---------- src/libkrun/src/lib.rs | 61 +++++++--------- src/vmm/src/builder.rs | 72 +++---------------- src/vmm/src/resources.rs | 15 +--- .../test_cases/src/test_multiport_console.rs | 9 ++- 6 files changed, 47 insertions(+), 157 deletions(-) diff --git a/examples/consoles.c b/examples/consoles.c index fe3a98271..23ee8226f 100644 --- a/examples/consoles.c +++ b/examples/consoles.c @@ -125,12 +125,6 @@ int main(int argc, char *const argv[]) int ctx_id = krun_create_ctx(); if (ctx_id < 0) { errno = -ctx_id; perror("krun_create_ctx"); return 1; } - if ((err = krun_disable_implicit_console(ctx_id))) { - errno = -err; - perror("krun_disable_implicit_console"); - return 1; - } - int console_id = krun_add_virtio_console_multiport(ctx_id); if (console_id < 0) { errno = -console_id; diff --git a/include/libkrun.h b/include/libkrun.h index 2933a4c52..a237fd59b 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -786,22 +786,6 @@ int32_t krun_add_vsock(uint32_t ctx_id, uint32_t tsi_features); */ int32_t krun_get_shutdown_eventfd(uint32_t ctx_id); -/** - * Configures the console device to ignore stdin and write the output to "c_filepath". - * - * Arguments: - * "ctx_id" - the configuration context ID. - * "filepath" - a null-terminated string representing the path of the file to write the - * console output. - * - * Notes: - * This API only applies to the implicitly created console. If the implicit console is - * disabled via `krun_disable_implicit_console` the operation is a NOOP. Additionally, - * this API does not have any effect on consoles created via the `krun_add_*_console_default` - * APIs. - */ -int32_t krun_set_console_output(uint32_t ctx_id, const char *c_filepath); - /** * Configures uid which is set right before the microVM is started. * @@ -913,20 +897,6 @@ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); */ -/* - * Do not create an implicit console device in the guest. By using this API, - * libkrun will create zero console devices on behalf of the user. Any - * console devices needed by the user must be added manually via other API - * calls. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_disable_implicit_console(uint32_t ctx_id); - /** * Sets the init configuration from an OCI container-spec JSON string. * @@ -1042,9 +1012,7 @@ int32_t krun_set_kernel_console(uint32_t ctx_id, const char *console_id); * * The function can be called multiple times for adding multiple virtio-console devices. * In the guest, the consoles will appear in the same order as they are added (that is, - * the first added console will be "hvc0", the second "hvc1", ...). However, if the - * implicit console is not disabled via `krun_disable_implicit_console`, the first - * console created with the function will occupy the "hvc1" ID. + * the first added console will be "hvc0", the second "hvc1", ...). * * This function attaches a multi port virtio-console to the guest. If the input, output and error * file descriptors are TTYs, the device will be created with just a single console port (`err_fd` @@ -1072,9 +1040,7 @@ int32_t krun_add_virtio_console_default(uint32_t ctx_id, * * The function can be called multiple times for adding multiple serial devices. * In the guest, the consoles will appear in the same order as they are added (that is, - * the first added console will be "ttyS0", the second "ttyS1", ...). However, if the - * implicit console is not disabled via `krun_disable_implicit_console` on aarch64 or macOS, - * the first console created with the function will occupy the "ttyS1" ID. + * the first added console will be "ttyS0", the second "ttyS1", ...). * * Arguments: * "ctx_id" - the configuration context ID. @@ -1097,8 +1063,7 @@ int32_t krun_add_serial_console_default(uint32_t ctx_id, * * The function can be called multiple times for adding multiple virtio-console devices. * Each device appears in the guest with port 0 accessible as /dev/hvcN (hvc0, hvc1, etc.) in the order - * devices are added. If the implicit console is not disabled via `krun_disable_implicit_console`, - * the first explicitly added device will occupy the "hvc1" ID. Additional ports within each device + * devices are added. Additional ports within each device * (port 1, 2, ...) appear as /dev/vportNpM character devices. * * Arguments: diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index d2346f397..b14c57645 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -167,6 +167,8 @@ struct ContextConfig { shutdown_efd: Option, gpu_virgl_flags: Option, gpu_shm_size: Option, + /// Console output path, only used by the aws-nitro TryFrom path. + #[cfg(feature = "aws-nitro")] console_output: Option, vmm_uid: Option, vmm_gid: Option, @@ -1494,27 +1496,12 @@ pub unsafe extern "C" fn krun_add_vhost_user_device( -libc::ENOTSUP } -#[allow(unused_assignments)] -#[no_mangle] -pub extern "C" fn krun_get_shutdown_eventfd(ctx_id: u32) -> i32 { - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - if let Some(efd) = cfg.shutdown_efd.as_ref() { - #[cfg(target_os = "macos")] - return efd.get_write_fd(); - #[cfg(target_os = "linux")] - return efd.as_raw_fd(); - } else { - -libc::EINVAL - } - } - Entry::Vacant(_) => -libc::ENOENT, - } -} - +// FIXME: aws-nitro builds its own NitroEnclave from ContextConfig and needs +// the console output path directly. This should be replaced with a proper +// console configuration in the nitro path. #[allow(clippy::missing_safety_doc)] #[no_mangle] +#[cfg(feature = "aws-nitro")] pub unsafe extern "C" fn krun_set_console_output(ctx_id: u32, c_filepath: *const c_char) -> i32 { let filepath = match CStr::from_ptr(c_filepath).to_str() { Ok(f) => f, @@ -1535,6 +1522,25 @@ pub unsafe extern "C" fn krun_set_console_output(ctx_id: u32, c_filepath: *const } } +#[allow(unused_assignments)] +#[no_mangle] +pub extern "C" fn krun_get_shutdown_eventfd(ctx_id: u32) -> i32 { + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + if let Some(efd) = cfg.shutdown_efd.as_ref() { + #[cfg(target_os = "macos")] + return efd.get_write_fd(); + #[cfg(target_os = "linux")] + return efd.as_raw_fd(); + } else { + -libc::EINVAL + } + } + Entry::Vacant(_) => -libc::ENOENT, + } +} + #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_nested_virt(ctx_id: u32, enabled: bool) -> i32 { @@ -2248,19 +2254,6 @@ pub unsafe extern "C" fn krun_fs_add_overlay_dir( ) } -#[no_mangle] -pub extern "C" fn krun_disable_implicit_console(ctx_id: u32) -> i32 { - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - cfg.vmr.disable_implicit_console = true; - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - #[no_mangle] pub extern "C" fn krun_disable_implicit_vsock(ctx_id: u32) -> i32 { match CTX_MAP.lock().unwrap().entry(ctx_id) { @@ -2601,10 +2594,6 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { ctx_cfg.vmr.set_gpu_shm_size(shm_size); } - if let Some(console_output) = ctx_cfg.console_output { - ctx_cfg.vmr.set_console_output(console_output); - } - if let Some(gid) = ctx_cfg.vmm_gid { if unsafe { libc::setgid(gid) } != 0 { error!("Failed to set gid {gid}"); diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8e8ca4e18..23793390b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -14,7 +14,6 @@ use std::fs::File; use std::io::{self, IsTerminal, Read}; use std::os::fd::AsRawFd; use std::os::fd::{BorrowedFd, FromRawFd}; -use std::path::PathBuf; use std::sync::atomic::AtomicI32; use std::sync::{Arc, Mutex}; @@ -728,16 +727,7 @@ pub fn build_microvm( let mut serial_devices = Vec::new(); - // Create the legacy serial device if we're booting from a firmware - if vm_resources.firmware_config.is_some() && !vm_resources.disable_implicit_console { - serial_devices.push(setup_serial_device( - event_manager, - None, - None, - // Uncomment this to get EFI output when debugging EDK2. - //Some(Box::new(io::stdout())), - )?); - }; + // We can't call to `setup_terminal_raw_mode` until `Vmm` is created, // so let's keep track of FDs connected to legacy serial devices here @@ -995,18 +985,6 @@ pub fn build_microvm( } } let mut console_id = 0; - if !vm_resources.disable_implicit_console { - attach_console_devices( - &mut vmm, - event_manager, - intc.clone(), - vm_resources, - None, - console_id, - )?; - console_id += 1; - } - for console_cfg in vm_resources.virtio_consoles.iter() { attach_console_devices( &mut vmm, @@ -2076,39 +2054,16 @@ fn attach_fs_devices( fn autoconfigure_console_ports( vmm: &mut Vmm, - vm_resources: &VmResources, + _vm_resources: &VmResources, cfg: Option<&DefaultVirtioConsoleConfig>, - creating_implicit_console: bool, ) -> std::result::Result, StartMicrovmError> { use self::StartMicrovmError::*; - let mut console_output_path: Option = None; - if let Some(path) = vm_resources.console_output.clone() { - if !vm_resources.disable_implicit_console && creating_implicit_console { - console_output_path = Some(path) - } - } - - if let Some(console_output_path) = console_output_path { - let file = File::create(console_output_path).map_err(OpenConsoleFile)?; - // Manually emulate our Legacy behavior: In the case of output_path we have always used the - // stdin to determine the console size - let stdin_fd = unsafe { BorrowedFd::borrow_raw(STDIN_FILENO) }; - let term_fd = if isatty(stdin_fd).is_ok_and(|v| v) { - port_io::term_fd(stdin_fd.as_raw_fd()).unwrap() - } else { - port_io::term_fixed_size(0, 0) - }; - Ok(vec![PortDescription::console( - Some(port_io::input_empty().unwrap()), - Some(port_io::output_file(file).unwrap()), - term_fd, - )]) - } else { - let (input_fd, output_fd, err_fd) = match cfg { - Some(c) => (c.input_fd, c.output_fd, c.err_fd), - None => (STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO), - }; + let (input_fd, output_fd, err_fd) = match cfg { + Some(c) => (c.input_fd, c.output_fd, c.err_fd), + None => (STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO), + }; + { let input_is_terminal = input_fd >= 0 && isatty(unsafe { BorrowedFd::borrow_raw(input_fd) }).unwrap_or(false); let output_is_terminal = @@ -2269,16 +2224,11 @@ fn attach_console_devices( ) -> std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; - let creating_implicit_console = cfg.is_none(); - let ports = match cfg { - None => autoconfigure_console_ports(vmm, vm_resources, None, creating_implicit_console)?, - Some(VirtioConsoleConfigMode::Autoconfigure(autocfg)) => autoconfigure_console_ports( - vmm, - vm_resources, - Some(autocfg), - creating_implicit_console, - )?, + None => autoconfigure_console_ports(vmm, vm_resources, None)?, + Some(VirtioConsoleConfigMode::Autoconfigure(autocfg)) => { + autoconfigure_console_ports(vmm, vm_resources, Some(autocfg))? + } Some(VirtioConsoleConfigMode::Explicit(ports)) => create_explicit_ports(vmm, ports)?, }; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index ffa9e6eac..66cca33f4 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -8,7 +8,6 @@ use std::fs::File; #[cfg(feature = "tee")] use std::io::BufReader; use std::os::fd::RawFd; -use std::path::PathBuf; #[cfg(feature = "tee")] use serde::{Deserialize, Serialize}; @@ -189,16 +188,13 @@ pub struct VmResources { #[cfg(feature = "vhost-user")] /// Vhost-user device configurations pub vhost_user_devices: Vec, - /// File to send console output. - pub console_output: Option, /// SMBIOS OEM Strings pub smbios_oem_strings: Option>, /// Whether to enable nested virtualization. pub nested_enabled: bool, /// Whether to enable split irqchip pub split_irqchip: bool, - /// Do not create an implicit console device in the guest - pub disable_implicit_console: bool, + /// The console id to use for console= in the kernel cmdline pub kernel_console: Option, /// Serial consoles to attach to the guest @@ -358,10 +354,6 @@ impl VmResources { self.gpu_shm_size = Some(shm_size); } - pub fn set_console_output(&mut self, console_output: PathBuf) { - self.console_output = Some(console_output); - } - /// Sets a network device to be attached when the VM starts. #[cfg(feature = "net")] pub fn add_network_interface( @@ -400,8 +392,6 @@ impl VmResources { #[cfg(test)] mod tests { - #[cfg(feature = "gpu")] - use crate::resources::DisplayBackendConfig; use crate::resources::VmResources; use crate::vmm_config::kernel_cmdline::KernelCmdlineConfig; use crate::vmm_config::machine_config::{CpuFeaturesTemplate, VmConfig, VmConfigError}; @@ -440,11 +430,10 @@ mod tests { input_backends: Vec::new(), #[cfg(feature = "vhost-user")] vhost_user_devices: Vec::new(), - console_output: None, smbios_oem_strings: None, nested_enabled: false, split_irqchip: false, - disable_implicit_console: false, + serial_consoles: Vec::new(), virtio_consoles: Vec::new(), kernel_console: None, diff --git a/tests/test_cases/src/test_multiport_console.rs b/tests/test_cases/src/test_multiport_console.rs index 44583a629..7717d0610 100644 --- a/tests/test_cases/src/test_multiport_console.rs +++ b/tests/test_cases/src/test_multiport_console.rs @@ -50,11 +50,14 @@ mod host { impl Test for TestMultiportConsole { fn start_vm(self: Box, test_setup: TestSetup) -> anyhow::Result<()> { unsafe { - krun_call!(krun_init_log(KRUN_LOG_TARGET_DEFAULT, KRUN_LOG_LEVEL_TRACE, KRUN_LOG_STYLE_AUTO, 0))?; + krun_call!(krun_init_log( + KRUN_LOG_TARGET_DEFAULT, + KRUN_LOG_LEVEL_TRACE, + KRUN_LOG_STYLE_AUTO, + 0 + ))?; let ctx = krun_call_u32!(krun_create_ctx())?; - krun_call!(krun_disable_implicit_console(ctx))?; - // Add a default console (as with other tests this uses stdout for writing "OK") krun_call!(krun_add_virtio_console_default( ctx, From f5b186564150fcb8e47fdbed1799607f2ce5c635 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 18:06:49 +0200 Subject: [PATCH 32/34] tests/examples: use explicit krun_add_vsock everywhere - test_tsi_tcp_guest_connect: add krun_add_vsock(ctx, KRUN_TSI_HIJACK_INET) - test_tsi_tcp_guest_listen: same - test_vsock_guest_connect: add krun_add_vsock(ctx, 0) - chroot_vm.c: replace krun_disable_implicit_vsock + vhost-user with explicit krun_add_vsock when not using vhost-user-vsock No test or example relies on implicit vsock creation anymore. Assisted-by: OpenCode:claude-opus-4.6 --- examples/chroot_vm.c | 18 ++++++++++-------- .../src/test_tsi_tcp_guest_connect.rs | 1 + .../src/test_tsi_tcp_guest_listen.rs | 1 + .../test_cases/src/test_vsock_guest_connect.rs | 1 + 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/chroot_vm.c b/examples/chroot_vm.c index 6845242d8..03171ba7b 100644 --- a/examples/chroot_vm.c +++ b/examples/chroot_vm.c @@ -363,14 +363,8 @@ int main(int argc, char *const argv[]) printf("Using vhost-user sound backend at %s\n", cmdline.vhost_user_snd_socket); } - // Configure vhost-user vsock if requested + // Configure vsock: either vhost-user or built-in with TSI if (cmdline.vhost_user_vsock_socket != NULL) { - // Disable the implicit vsock device to avoid conflict - if (!check_krun_error(krun_disable_implicit_vsock(ctx_id), - "Error disabling implicit vsock")) { - return -1; - } - if (!check_krun_error(krun_add_vhost_user_device(ctx_id, KRUN_VIRTIO_DEVICE_VSOCK, cmdline.vhost_user_vsock_socket, NULL, KRUN_VHOST_USER_VSOCK_NUM_QUEUES, @@ -425,8 +419,16 @@ int main(int argc, char *const argv[]) return -1; } + // Add built-in vsock with TSI when not using vhost-user-vsock + if (cmdline.vhost_user_vsock_socket == NULL) { + if (err = krun_add_vsock(ctx_id, KRUN_TSI_HIJACK_INET)) { + errno = -err; + perror("Error configuring vsock"); + return -1; + } + } + // Map port 18000 in the host to 8000 in the guest (if networking uses TSI) - // Skip port mapping when using vhost-user-vsock (TSI requires built-in vsock) if (cmdline.net_mode == NET_MODE_TSI && cmdline.vhost_user_vsock_socket == NULL) { if (err = krun_set_port_map(ctx_id, &port_map[0])) { errno = -err; diff --git a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs index 2eb7b30dc..f574eb3e1 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_connect.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_connect.rs @@ -39,6 +39,7 @@ mod host { 0 ))?; let ctx = krun_call_u32!(krun_create_ctx())?; + krun_call!(krun_add_vsock(ctx, KRUN_TSI_HIJACK_INET))?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; krun_call!(krun_add_virtio_console_default( ctx, diff --git a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs index 09ca79ebf..8197cb855 100644 --- a/tests/test_cases/src/test_tsi_tcp_guest_listen.rs +++ b/tests/test_cases/src/test_tsi_tcp_guest_listen.rs @@ -47,6 +47,7 @@ mod host { let port_mapping = CString::new(port_mapping).unwrap(); let port_map = [port_mapping.as_ptr(), null()]; + krun_call!(krun_add_vsock(ctx, KRUN_TSI_HIJACK_INET))?; krun_call!(krun_set_port_map(ctx, port_map.as_ptr()))?; krun_call!(krun_set_vm_config(ctx, 1, 512))?; krun_call!(krun_add_virtio_console_default( diff --git a/tests/test_cases/src/test_vsock_guest_connect.rs b/tests/test_cases/src/test_vsock_guest_connect.rs index 8343495ae..bc82b3800 100644 --- a/tests/test_cases/src/test_vsock_guest_connect.rs +++ b/tests/test_cases/src/test_vsock_guest_connect.rs @@ -71,6 +71,7 @@ mod host { 0 ))?; let ctx = krun_call_u32!(krun_create_ctx())?; + krun_call!(krun_add_vsock(ctx, 0))?; krun_call!(krun_add_vsock_port2( ctx, VSOCK_PORT, From 203f72f4bb0d7d306ae32652b670084cb32ef64c Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 18:06:57 +0200 Subject: [PATCH 33/34] lib: remove krun_disable_implicit_vsock and implicit vsock creation Vsock creation is now fully explicit via krun_add_vsock(). No vsock device is created unless the caller requests one. Remove the Implicit variant from VsockConfig, the implicit vsock creation heuristics in krun_start_enter, and krun_disable_implicit_vsock. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 18 ------------------ src/libkrun/src/lib.rs | 40 ---------------------------------------- src/vmm/src/resources.rs | 6 ++---- 3 files changed, 2 insertions(+), 62 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index a237fd59b..7efbc1d5a 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -757,10 +757,6 @@ int32_t krun_add_vsock_port2(uint32_t ctx_id, /** * Add a vsock device with specified TSI features. * - * By default, libkrun creates a vsock device implicitly with TSI hijacking - * enabled based on heuristics. To use this function, you must first call - * krun_disable_implicit_vsock() to disable the implicit vsock device. - * * Currently only one vsock device is supported. Calling this function * multiple times will return an error. * @@ -981,20 +977,6 @@ int32_t krun_fs_add_overlay_file(uint32_t ctx_id, const char *fs_tag, int32_t krun_fs_add_overlay_dir(uint32_t ctx_id, const char *fs_tag, const char *path, uint32_t mode); -/** - * Disable the implicit vsock device. - * - * By default, libkrun creates a vsock device automatically. This function - * disables that behavior entirely - no vsock device will be created. - * - * Arguments: - * "ctx_id" - the configuration context ID. - * - * Returns: - * Zero on success or a negative error number on failure. - */ -int32_t krun_disable_implicit_vsock(uint32_t ctx_id); - /* * Specify the value of `console=` in the kernel commandline. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index b14c57645..6fb69bea3 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2254,19 +2254,6 @@ pub unsafe extern "C" fn krun_fs_add_overlay_dir( ) } -#[no_mangle] -pub extern "C" fn krun_disable_implicit_vsock(ctx_id: u32) -> i32 { - match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let cfg = ctx_cfg.get_mut(); - cfg.vsock_config = VsockConfig::Disabled; - } - Entry::Vacant(_) => return -libc::ENOENT, - } - - KRUN_SUCCESS -} - #[no_mangle] pub extern "C" fn krun_add_vsock(ctx_id: u32, tsi_features: u32) -> i32 { let tsi_flags = match TsiFlags::from_bits(tsi_features) { @@ -2558,33 +2545,6 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { }; ctx_cfg.vmr.set_vsock_device(vsock_device_config).unwrap(); } - VsockConfig::Implicit => { - // Implicit vsock configuration - use heuristics - // Check if TSI should be enabled based on network configuration - #[cfg(feature = "net")] - let enable_tsi = ctx_cfg.vmr.net.list.is_empty(); - #[cfg(not(feature = "net"))] - let enable_tsi = true; - - let has_ipc_map = ctx_cfg.unix_ipc_port_map.is_some(); - - if enable_tsi || has_ipc_map { - let (tsi_flags, host_port_map) = if enable_tsi { - (TsiFlags::HIJACK_INET, ctx_cfg.tsi_port_map) - } else { - (TsiFlags::empty(), None) - }; - - let vsock_device_config = VsockDeviceConfig { - vsock_id: "vsock0".to_string(), - guest_cid: 3, - host_port_map, - unix_ipc_port_map: ctx_cfg.unix_ipc_port_map.clone(), - tsi_flags, - }; - ctx_cfg.vmr.set_vsock_device(vsock_device_config).unwrap(); - } - } } if let Some(virgl_flags) = ctx_cfg.gpu_virgl_flags { diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 66cca33f4..9b15bc66a 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -130,13 +130,11 @@ pub enum PortConfig { /// Configuration for the vsock device #[derive(Debug, Default, Clone, Eq, PartialEq)] pub enum VsockConfig { - /// Default behavior - vsock created implicitly with heuristics-based TSI + /// No vsock device #[default] - Implicit, + Disabled, /// Explicit configuration with specified TSI features Explicit { tsi_flags: TsiFlags }, - /// Vsock device disabled - Disabled, } /// A data structure that encapsulates the device configurations From a789674583b577d3d402eb4b28b4927916835546 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 20 May 2026 18:07:23 +0200 Subject: [PATCH 34/34] include: remove stale implicit resource creation comment All krun_disable_implicit_* functions are gone. The 2.0 API requires explicit resource creation. Assisted-by: OpenCode:claude-opus-4.6 --- include/libkrun.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index 7efbc1d5a..9356e0d96 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -885,14 +885,6 @@ int32_t krun_get_max_vcpus(void); */ int32_t krun_split_irqchip(uint32_t ctx_id, bool enable); -/* - * NOTE: Implicit resource creation is a legacy convenience. The 2.0 API - * (see https://github.com/containers/libkrun/issues/634) will not create - * any implicit resources. Callers should start using the - * krun_disable_implicit_* functions now to ease migration. - */ - - /** * Sets the init configuration from an OCI container-spec JSON string. *