Skip to content

Commit b5ebec1

Browse files
frankdavidIDX GitHub Automation
andauthored
chore: Extract reusable device-related code and add small improvements (#6060)
There was quite a bit of code related to devices in guest_vm_runner that would be useful in other codepaths so this PR extracts the code out to a separate crate. I've also found that when we attach a disk image to a loop device, there is a race condition since finding a free loop device and attaching the image is not atomic. This PR adds a fix. --------- Co-authored-by: IDX GitHub Automation <infra+github-automation@dfinity.org>
1 parent 381c7e9 commit b5ebec1

File tree

13 files changed

+208
-77
lines changed

13 files changed

+208
-77
lines changed

Cargo.lock

Lines changed: 18 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ members = [
144144
"rs/ic_os/config_types",
145145
"rs/ic_os/config_types/compatibility_tests",
146146
"rs/ic_os/deterministic_ips",
147+
"rs/ic_os/device",
147148
"rs/ic_os/dev_test_tools/launch-single-vm",
148149
"rs/ic_os/dev_test_tools/setupos-disable-checks",
149150
"rs/ic_os/dev_test_tools/setupos-image-config",
@@ -551,6 +552,7 @@ curve25519-dalek = { version = "4.1.3", features = [
551552
] }
552553
der = { version = "0.7", default-features = false }
553554
derive-new = "0.7.0"
555+
devicemapper = "0.34"
554556
dfx-core = { version = "0.1.4" }
555557
ed25519-dalek = { version = "2.1.1", features = [
556558
"std",
@@ -636,6 +638,7 @@ libc = "0.2.158"
636638
libflate = "2.1.0"
637639
libnss = "0.5.0"
638640
local-ip-address = "0.5.6"
641+
loopdev-3 = "0.5"
639642
macaddr = "1.0"
640643
memmap2 = "0.9.5"
641644
minicbor = { version = "0.19.1", features = ["alloc", "derive"] }

rs/ic_os/device/BUILD.bazel

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test")
2+
3+
package(default_visibility = ["//rs/ic_os:__subpackages__"])
4+
5+
DEPENDENCIES = [
6+
# Keep sorted.
7+
"//rs/ic_os/build_tools/partition_tools",
8+
"@crate_index//:anyhow",
9+
"@crate_index//:devicemapper",
10+
"@crate_index//:gpt",
11+
"@crate_index//:loopdev-3",
12+
"@crate_index//:nix",
13+
"@crate_index//:sys-mount",
14+
"@crate_index//:tempfile",
15+
"@crate_index//:tokio",
16+
"@crate_index//:uuid",
17+
]
18+
19+
MACRO_DEPENDENCIES = [
20+
# Keep sorted.
21+
"@crate_index//:async-trait",
22+
]
23+
24+
DEV_DEPENDENCIES = [
25+
# Keep sorted.
26+
]
27+
28+
rust_library(
29+
name = "device",
30+
srcs = glob(["src/**/*.rs"]),
31+
crate_name = "ic_device",
32+
proc_macro_deps = MACRO_DEPENDENCIES,
33+
target_compatible_with = [
34+
"@platforms//os:linux",
35+
],
36+
deps = DEPENDENCIES,
37+
)
38+
39+
rust_test(
40+
name = "device_test",
41+
crate = ":device",
42+
target_compatible_with = [
43+
"@platforms//os:linux",
44+
],
45+
deps = DEV_DEPENDENCIES,
46+
)

rs/ic_os/device/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "ic_device"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
anyhow = { workspace = true }
8+
async-trait = { workspace = true }
9+
devicemapper = { workspace = true }
10+
gpt = { workspace = true }
11+
loopdev-3 = { workspace = true }
12+
nix = { workspace = true }
13+
partition_tools = { path = "../build_tools/partition_tools" }
14+
tempfile = { workspace = true }
15+
tokio = { workspace = true }
16+
uuid = { workspace = true }
17+
18+
[target.'cfg(target_os = "linux")'.dependencies]
19+
sys-mount = { workspace = true }

rs/ic_os/os_tools/guest_vm_runner/src/device_mapping.rs renamed to rs/ic_os/device/src/device_mapping.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::io::retry_if_busy;
12
use anyhow::{ensure, Context, Result};
23
use devicemapper::{
34
devnode_to_devno, Bytes, DevId, Device, DmName, DmOptions, LinearDevTargetParams,
@@ -13,6 +14,7 @@ use std::path::{Path, PathBuf};
1314
use std::sync::Arc;
1415
use tempfile::{NamedTempFile, TempPath};
1516

17+
#[allow(clippy::len_without_is_empty)]
1618
pub trait DeviceTrait: Send + Sync {
1719
fn len(&self) -> Sectors;
1820
fn device(&self) -> Device;
@@ -158,10 +160,8 @@ impl TempDevice {
158160

159161
let temp_path = temp_file.into_temp_path();
160162

161-
let loop_device = LoopDeviceWrapper(loopdev::LoopControl::open()?.next_free()?);
162-
loop_device
163-
.attach_file(&temp_path)
164-
.context("Temp loopback attach_file failed")?;
163+
let loop_device = LoopDeviceWrapper::attach_to_next_free(&temp_path)
164+
.context("Temp loopback creation failed")?;
165165

166166
let minor = loop_device
167167
.minor()
@@ -192,6 +192,22 @@ impl DeviceTrait for TempDevice {
192192
/// Wrapper around a loop device that automatically detaches it when dropped.
193193
pub struct LoopDeviceWrapper(pub LoopDevice);
194194

195+
impl LoopDeviceWrapper {
196+
/// Opens a loop device and attaches it to the specified file.
197+
pub fn attach_to_next_free(path: &Path) -> Result<Self> {
198+
// next_free() will return the same loop device until a file is attached to it, so
199+
// it can happen that a parallel process gets the same loop device and attaches a file
200+
// to it before we do. In this case attach_file will fail with ResourceBusy.
201+
// We solve this by retrying the operation.
202+
retry_if_busy(|| {
203+
let loop_device = Self(loopdev::LoopControl::open()?.next_free()?);
204+
loop_device.attach_file(path)?;
205+
Ok(loop_device)
206+
})
207+
.context("Failed to attach loop device")
208+
}
209+
}
210+
195211
impl Drop for LoopDeviceWrapper {
196212
fn drop(&mut self) {
197213
#[cfg(debug_assertions)]
@@ -230,7 +246,7 @@ impl Deref for LoopDeviceWrapper {
230246
/// dependencies, these are other devices that the [MappedDevice] depends on and should be cleaned
231247
/// up when the [MappedDevice] is dropped.
232248
pub struct MappedDevice {
233-
name: &'static str,
249+
name: String,
234250
path: PathBuf,
235251
len: Sectors,
236252
device_mapper: Arc<DM>,
@@ -286,7 +302,7 @@ impl MappedDevice {
286302
/// `source` must stay valid for the lifetime of the snapshot.
287303
pub fn create_snapshot(
288304
device_mapper: Arc<DM>,
289-
name: &'static str,
305+
name: &str,
290306
source: Box<dyn DeviceTrait>,
291307
copy_on_write: Box<dyn DeviceTrait>,
292308
) -> Result<MappedDevice> {
@@ -307,7 +323,7 @@ impl MappedDevice {
307323

308324
fn create(
309325
dm: Arc<DM>,
310-
name: &'static str,
326+
name: &str,
311327
table: &[(u64, u64, String, String)],
312328
len: Sectors,
313329
dependencies: Vec<Box<dyn DeviceTrait>>,
@@ -319,7 +335,7 @@ impl MappedDevice {
319335
// Wrap the device right away by creating a MappedDevice so it gets detached in the
320336
// MappedDevice Drop impl if there is an error later.
321337
let mapped_device = MappedDevice {
322-
name,
338+
name: name.to_string(),
323339
path: format!("/dev/mapper/{name}").into(),
324340
device: device.device(),
325341
len,
@@ -351,7 +367,7 @@ impl Drop for MappedDevice {
351367
debug_assert_valid(self);
352368

353369
if let Err(err) = self.device_mapper.device_remove(
354-
&DevId::Name(DmName::new(self.name).unwrap()),
370+
&DevId::Name(DmName::new(&self.name).unwrap()),
355371
DmOptions::default(),
356372
) {
357373
debug_panic(&format!("Failed to remove device mapper device: {err}"));

rs/ic_os/device/src/io.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use std::io;
2+
3+
/// Retries a function, returning its result if it succeeds, or retrying if it fails with
4+
/// ResourceBusy.
5+
pub fn retry_if_busy<T>(mut f: impl FnMut() -> io::Result<T>) -> io::Result<T> {
6+
const MAX_ATTEMPTS: i32 = 10;
7+
let mut attempts = MAX_ATTEMPTS;
8+
loop {
9+
match f() {
10+
Ok(res) => return Ok(res),
11+
Err(e) => {
12+
if e.kind() == io::ErrorKind::ResourceBusy && attempts > 0 {
13+
attempts -= 1;
14+
} else {
15+
return Err(e);
16+
}
17+
}
18+
}
19+
}
20+
}
21+
22+
#[cfg(test)]
23+
mod tests {
24+
use super::*;
25+
26+
#[test]
27+
fn test_retry_if_busy() {
28+
let mut attempts = 0;
29+
let result = retry_if_busy(|| {
30+
attempts += 1;
31+
if attempts < 3 {
32+
Err(io::Error::new(io::ErrorKind::ResourceBusy, "busy"))
33+
} else {
34+
Ok("success")
35+
}
36+
});
37+
38+
assert_eq!(result.unwrap(), "success");
39+
assert_eq!(attempts, 3);
40+
}
41+
42+
#[test]
43+
fn test_retry_if_busy_failure() {
44+
let result: Result<(), _> =
45+
retry_if_busy(|| Err(io::Error::new(io::ErrorKind::Other, "fail")));
46+
assert!(result.is_err());
47+
}
48+
}

rs/ic_os/device/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pub mod device_mapping;
2+
mod io;
3+
pub mod mount;

rs/ic_os/os_tools/guest_vm_runner/src/mount.rs renamed to rs/ic_os/device/src/mount.rs

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::io::retry_if_busy;
12
use anyhow::{Context, Error, Result};
23
use async_trait::async_trait;
34
use gpt::GptDisk;
@@ -151,6 +152,7 @@ impl PartitionProvider for GptPartitionProvider {
151152
self.mounter
152153
.mount_range(self.device.clone(), offset_bytes, len_bytes, options)
153154
.await
155+
.context("mount_range failed")
154156
}
155157
}
156158

@@ -188,12 +190,15 @@ impl Mounter for LoopDeviceMounter {
188190
let tempdir = TempDir::new()?;
189191
let mount_point = tempdir.path();
190192
Ok::<LoopDeviceMount, Error>(LoopDeviceMount {
191-
mount: Mount::builder()
192-
.fstype(FilesystemType::Manual(options.file_system.as_str()))
193-
.loopback_offset(offset_bytes)
194-
.flags(MountFlags::empty())
195-
.explicit_loopback()
196-
.mount_autodrop(device, mount_point, UnmountFlags::empty())?,
193+
mount: retry_if_busy(|| {
194+
Mount::builder()
195+
.fstype(FilesystemType::Manual(options.file_system.as_str()))
196+
.loopback_offset(offset_bytes)
197+
.flags(MountFlags::empty())
198+
.explicit_loopback()
199+
.mount_autodrop(&device, mount_point, UnmountFlags::empty())
200+
})
201+
.context("Failed to create mount")?,
197202
_tempdir: tempdir,
198203
})
199204
})
@@ -202,8 +207,6 @@ impl Mounter for LoopDeviceMounter {
202207
}
203208
}
204209

205-
#[cfg(test)]
206-
#[allow(dead_code)]
207210
pub mod testing {
208211
use super::*;
209212
use anyhow::Context;
@@ -264,7 +267,7 @@ pub mod testing {
264267
///
265268
/// The mounter "remembers" the extracted partitions, so extracting the same device/offset/len
266269
/// always returns the same directory.
267-
#[derive(Clone)]
270+
#[derive(Clone, Default)]
268271
pub struct ExtractingFilesystemMounter {
269272
#[allow(clippy::disallowed_types)] // Using tokio Mutex in testing is fine.
270273
mounts: Arc<tokio::sync::Mutex<PartitionMap>>,
@@ -301,13 +304,6 @@ pub mod testing {
301304
}
302305

303306
impl ExtractingFilesystemMounter {
304-
#[allow(dead_code)]
305-
pub fn new() -> Self {
306-
Self {
307-
mounts: Default::default(),
308-
}
309-
}
310-
311307
async fn extract_partition_to_tempdir(
312308
&self,
313309
device: PathBuf,

rs/ic_os/os_tools/guest_vm_runner/BUILD.bazel

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ DEPENDENCIES = [
88
":build_script",
99
"//rs/ic_os/config_types",
1010
"//rs/ic_os/deterministic_ips",
11+
"//rs/ic_os/device",
1112
"//rs/ic_os/grub",
1213
"//rs/ic_os/metrics_tool",
1314
"//rs/ic_os/sev",
@@ -16,11 +17,9 @@ DEPENDENCIES = [
1617
"@crate_index//:clap",
1718
"@crate_index//:devicemapper",
1819
"@crate_index//:gpt",
19-
"@crate_index//:loopdev-3",
2020
"@crate_index//:macaddr",
2121
"@crate_index//:nix",
2222
"@crate_index//:regex",
23-
"@crate_index//:sys-mount",
2423
"@crate_index//:systemd",
2524
"@crate_index//:tempfile",
2625
"@crate_index//:thiserror",
@@ -38,9 +37,7 @@ DEV_DEPENDENCIES = [
3837
"@crate_index//:url",
3938
]
4039

41-
MACRO_DEPENDENCIES = [
42-
"@crate_index//:async-trait",
43-
]
40+
MACRO_DEPENDENCIES = []
4441

4542
cargo_build_script(
4643
name = "build_script",

0 commit comments

Comments
 (0)