From 19d90186551bfd9f3c8942f13b10578981f3f674 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:24:05 -0700
Subject: [PATCH 01/25] feat(hm-util): add platform-native COW clone +
fuse-overlayfs backend
---
Cargo.lock | 5 +-
crates/hm-util/Cargo.toml | 4 +
crates/hm-util/src/cow.rs | 327 ++++++++++++++++++++++++++++++++++++++
crates/hm-util/src/lib.rs | 1 +
4 files changed, 336 insertions(+), 1 deletion(-)
create mode 100644 crates/hm-util/src/cow.rs
diff --git a/Cargo.lock b/Cargo.lock
index a372b05..b7f1902 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1211,7 +1211,7 @@ dependencies = [
[[package]]
name = "hm-plugin-cloud"
-version = "0.1.0"
+version = "0.0.0-dev"
dependencies = [
"anyhow",
"base64",
@@ -1248,9 +1248,12 @@ dependencies = [
name = "hm-util"
version = "0.0.0-dev"
dependencies = [
+ "anyhow",
"dirs",
"tempfile",
"tokio",
+ "tracing",
+ "which 6.0.3",
"windows",
]
diff --git a/crates/hm-util/Cargo.toml b/crates/hm-util/Cargo.toml
index 9e10e8a..4eea739 100644
--- a/crates/hm-util/Cargo.toml
+++ b/crates/hm-util/Cargo.toml
@@ -7,8 +7,12 @@ repository.workspace = true
description = "Shared OS and filesystem utilities for Harmont crates."
[dependencies]
+anyhow = { workspace = true }
dirs = "6"
+tempfile = "3"
tokio = { version = "1", features = ["rt", "rt-multi-thread", "fs", "io-util"] }
+tracing = { workspace = true }
+which = "6"
[target.'cfg(windows)'.dependencies.windows]
version = "0.62"
diff --git a/crates/hm-util/src/cow.rs b/crates/hm-util/src/cow.rs
new file mode 100644
index 0000000..73ba301
--- /dev/null
+++ b/crates/hm-util/src/cow.rs
@@ -0,0 +1,327 @@
+//! Platform-native copy-on-write directory cloning.
+
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use anyhow::{Context, Result, bail};
+
+// -----------------------------------------------------------------------
+// Strategy detection
+// -----------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CowStrategy {
+ ApfsClone,
+ Reflink,
+ FuseOverlay,
+ FullCopy,
+}
+
+/// Detect the best available COW strategy for the current platform.
+#[must_use]
+#[allow(clippy::missing_const_for_fn)] // linux branch calls runtime functions
+pub fn detect_strategy() -> CowStrategy {
+ #[cfg(target_os = "macos")]
+ {
+ return CowStrategy::ApfsClone;
+ }
+
+ #[cfg(target_os = "linux")]
+ {
+ if probe_reflink() {
+ return CowStrategy::Reflink;
+ }
+ if probe_fuse_overlayfs() {
+ return CowStrategy::FuseOverlay;
+ }
+ return CowStrategy::FullCopy;
+ }
+
+ #[allow(unreachable_code)]
+ CowStrategy::FullCopy
+}
+
+#[cfg(target_os = "linux")]
+fn probe_reflink() -> bool {
+ let tmp = match tempfile::tempdir() {
+ Ok(t) => t,
+ Err(_) => return false,
+ };
+ let src = tmp.path().join("src");
+ let dst = tmp.path().join("dst");
+ if std::fs::write(&src, b"x").is_err() {
+ return false;
+ }
+ Command::new("cp")
+ .args(["--reflink=always"])
+ .arg(&src)
+ .arg(&dst)
+ .stderr(std::process::Stdio::null())
+ .status()
+ .is_ok_and(|s| s.success())
+}
+
+#[cfg(target_os = "linux")]
+fn probe_fuse_overlayfs() -> bool {
+ which::which("fuse-overlayfs").is_ok()
+}
+
+// -----------------------------------------------------------------------
+// cow_clone_dir
+// -----------------------------------------------------------------------
+
+/// Clone `src` to `dst` using the best available COW mechanism.
+///
+/// # Errors
+///
+/// Returns an error if `dst` already exists, if parent directories cannot
+/// be created, or if the underlying copy operation fails.
+pub fn cow_clone_dir(src: &Path, dst: &Path) -> Result<()> {
+ if dst.exists() {
+ bail!("destination already exists: {}", dst.display());
+ }
+ if let Some(parent) = dst.parent() {
+ std::fs::create_dir_all(parent)
+ .with_context(|| format!("create parent dirs for {}", dst.display()))?;
+ }
+
+ if try_platform_cow(src, dst)? {
+ return Ok(());
+ }
+
+ copy_dir_recursive(src, dst)
+}
+
+fn try_platform_cow(src: &Path, dst: &Path) -> Result {
+ #[cfg(target_os = "macos")]
+ {
+ let status = Command::new("cp")
+ .args(["-c", "-R", "-p"])
+ .arg(src)
+ .arg(dst)
+ .stderr(std::process::Stdio::null())
+ .status()
+ .context("spawn cp -c")?;
+ if status.success() {
+ return Ok(true);
+ }
+ let _ = std::fs::remove_dir_all(dst);
+ }
+
+ #[cfg(target_os = "linux")]
+ {
+ let status = Command::new("cp")
+ .args(["--reflink=always", "-a"])
+ .arg(src)
+ .arg(dst)
+ .stderr(std::process::Stdio::null())
+ .status()
+ .context("spawn cp --reflink")?;
+ if status.success() {
+ return Ok(true);
+ }
+ let _ = std::fs::remove_dir_all(dst);
+
+ let status = Command::new("cp")
+ .args(["-a"])
+ .arg(src)
+ .arg(dst)
+ .stderr(std::process::Stdio::null())
+ .status()
+ .context("spawn cp -a")?;
+ if status.success() {
+ return Ok(true);
+ }
+ let _ = std::fs::remove_dir_all(dst);
+ }
+
+ Ok(false)
+}
+
+fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> {
+ std::fs::create_dir_all(dst)
+ .with_context(|| format!("create {}", dst.display()))?;
+ for entry in std::fs::read_dir(src)
+ .with_context(|| format!("read dir {}", src.display()))?
+ {
+ let entry = entry?;
+ let ty = entry.file_type()?;
+ let src_path = entry.path();
+ let dst_path = dst.join(entry.file_name());
+ if ty.is_dir() {
+ copy_dir_recursive(&src_path, &dst_path)?;
+ } else if ty.is_symlink() {
+ let target = std::fs::read_link(&src_path)?;
+ #[cfg(unix)]
+ std::os::unix::fs::symlink(&target, &dst_path)?;
+ #[cfg(windows)]
+ std::os::windows::fs::symlink_file(&target, &dst_path)?;
+ } else {
+ std::fs::copy(&src_path, &dst_path)
+ .with_context(|| format!("copy {}", src_path.display()))?;
+ }
+ }
+ Ok(())
+}
+
+// -----------------------------------------------------------------------
+// OverlayMount — fuse-overlayfs lifecycle (strategy 3)
+// -----------------------------------------------------------------------
+
+#[derive(Debug)]
+pub struct OverlayMount {
+ merged: PathBuf,
+ upper: PathBuf,
+}
+
+impl OverlayMount {
+ /// Mount a fuse-overlayfs filesystem merging the given layers.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if directory creation fails or `fuse-overlayfs`
+ /// exits with a non-zero status.
+ pub fn mount(
+ lower_dirs: &[&Path],
+ upper_dir: &Path,
+ work_dir: &Path,
+ merged_path: &Path,
+ ) -> Result {
+ std::fs::create_dir_all(upper_dir)?;
+ std::fs::create_dir_all(work_dir)?;
+ std::fs::create_dir_all(merged_path)?;
+
+ let lowerdir: String = lower_dirs
+ .iter()
+ .map(|p| p.to_string_lossy().into_owned())
+ .collect::>()
+ .join(":");
+
+ let opts = format!(
+ "lowerdir={lowerdir},upperdir={},workdir={}",
+ upper_dir.display(),
+ work_dir.display(),
+ );
+
+ let status = Command::new("fuse-overlayfs")
+ .args(["-o", &opts])
+ .arg(merged_path)
+ .stderr(std::process::Stdio::piped())
+ .status()
+ .context("spawn fuse-overlayfs")?;
+
+ if !status.success() {
+ bail!(
+ "fuse-overlayfs mount failed (exit {}): lowerdir={}, upper={}, merged={}",
+ status.code().unwrap_or(-1),
+ lowerdir,
+ upper_dir.display(),
+ merged_path.display(),
+ );
+ }
+
+ Ok(Self {
+ merged: merged_path.to_path_buf(),
+ upper: upper_dir.to_path_buf(),
+ })
+ }
+
+ #[must_use]
+ pub fn merged_path(&self) -> &Path {
+ &self.merged
+ }
+
+ #[must_use]
+ pub fn upper_dir(&self) -> &Path {
+ &self.upper
+ }
+
+ /// Unmount the fuse-overlayfs filesystem.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if `fusermount` cannot be spawned or exits
+ /// with a non-zero status.
+ pub fn unmount(&self) -> Result<()> {
+ let bin = if which::which("fusermount3").is_ok() {
+ "fusermount3"
+ } else {
+ "fusermount"
+ };
+ let status = Command::new(bin)
+ .args(["-u"])
+ .arg(&self.merged)
+ .stderr(std::process::Stdio::null())
+ .status()
+ .with_context(|| format!("spawn {bin} -u"))?;
+ if !status.success() {
+ bail!("{bin} -u {} failed", self.merged.display());
+ }
+ Ok(())
+ }
+}
+
+impl Drop for OverlayMount {
+ fn drop(&mut self) {
+ if let Err(e) = self.unmount() {
+ tracing::warn!(%e, path = %self.merged.display(), "fuse-overlayfs unmount failed");
+ }
+ }
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+ use super::*;
+ use std::fs;
+
+ #[test]
+ fn cow_clone_creates_identical_tree() {
+ let tmp = tempfile::tempdir().unwrap();
+ let src = tmp.path().join("src");
+ fs::create_dir_all(src.join("sub")).unwrap();
+ fs::write(src.join("a.txt"), b"hello").unwrap();
+ fs::write(src.join("sub/b.txt"), b"world").unwrap();
+
+ let dst = tmp.path().join("dst");
+ cow_clone_dir(&src, &dst).unwrap();
+
+ assert_eq!(fs::read_to_string(dst.join("a.txt")).unwrap(), "hello");
+ assert_eq!(fs::read_to_string(dst.join("sub/b.txt")).unwrap(), "world");
+ }
+
+ #[test]
+ fn cow_clone_is_isolated() {
+ let tmp = tempfile::tempdir().unwrap();
+ let src = tmp.path().join("src");
+ fs::create_dir(&src).unwrap();
+ fs::write(src.join("f.txt"), b"original").unwrap();
+
+ let dst = tmp.path().join("dst");
+ cow_clone_dir(&src, &dst).unwrap();
+
+ // Mutate dst; src must be unchanged.
+ fs::write(dst.join("f.txt"), b"modified").unwrap();
+ assert_eq!(fs::read_to_string(src.join("f.txt")).unwrap(), "original");
+ assert_eq!(fs::read_to_string(dst.join("f.txt")).unwrap(), "modified");
+ }
+
+ #[test]
+ fn cow_clone_fails_if_dst_exists() {
+ let tmp = tempfile::tempdir().unwrap();
+ let src = tmp.path().join("src");
+ fs::create_dir(&src).unwrap();
+ let dst = tmp.path().join("dst");
+ fs::create_dir(&dst).unwrap();
+
+ assert!(cow_clone_dir(&src, &dst).is_err());
+ }
+
+ #[test]
+ fn detect_strategy_returns_something() {
+ // Should always detect at least FullCopy.
+ let s = detect_strategy();
+ assert!(!matches!(s, CowStrategy::FuseOverlay));
+ // Can't assert specific strategy (platform-dependent) but it must not panic.
+ }
+}
diff --git a/crates/hm-util/src/lib.rs b/crates/hm-util/src/lib.rs
index c5284c5..e35ba64 100644
--- a/crates/hm-util/src/lib.rs
+++ b/crates/hm-util/src/lib.rs
@@ -1,2 +1,3 @@
+pub mod cow;
pub mod dirs;
pub mod os;
From 8255473e2c63cb01d7f191170d385d3fbbc90aab Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:28:37 -0700
Subject: [PATCH 02/25] =?UTF-8?q?fix(cow):=20address=20code=20review=20?=
=?UTF-8?q?=E2=80=94=20cache=20strategy,=20fix=20deadlock,=20guard=20doubl?=
=?UTF-8?q?e-unmount?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crates/hm-util/src/cow.rs | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/crates/hm-util/src/cow.rs b/crates/hm-util/src/cow.rs
index 73ba301..ee798ec 100644
--- a/crates/hm-util/src/cow.rs
+++ b/crates/hm-util/src/cow.rs
@@ -2,6 +2,7 @@
use std::path::{Path, PathBuf};
use std::process::Command;
+use std::sync::OnceLock;
use anyhow::{Context, Result, bail};
@@ -18,9 +19,14 @@ pub enum CowStrategy {
}
/// Detect the best available COW strategy for the current platform.
+/// Result is cached after the first call.
#[must_use]
-#[allow(clippy::missing_const_for_fn)] // linux branch calls runtime functions
pub fn detect_strategy() -> CowStrategy {
+ static STRATEGY: OnceLock = OnceLock::new();
+ *STRATEGY.get_or_init(detect_strategy_inner)
+}
+
+fn detect_strategy_inner() -> CowStrategy {
#[cfg(target_os = "macos")]
{
return CowStrategy::ApfsClone;
@@ -168,10 +174,10 @@ fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> {
// OverlayMount — fuse-overlayfs lifecycle (strategy 3)
// -----------------------------------------------------------------------
-#[derive(Debug)]
pub struct OverlayMount {
merged: PathBuf,
upper: PathBuf,
+ mounted: std::sync::atomic::AtomicBool,
}
impl OverlayMount {
@@ -203,17 +209,17 @@ impl OverlayMount {
work_dir.display(),
);
- let status = Command::new("fuse-overlayfs")
+ let output = Command::new("fuse-overlayfs")
.args(["-o", &opts])
.arg(merged_path)
- .stderr(std::process::Stdio::piped())
- .status()
+ .output()
.context("spawn fuse-overlayfs")?;
- if !status.success() {
+ if !output.status.success() {
+ let stderr = String::from_utf8_lossy(&output.stderr);
bail!(
- "fuse-overlayfs mount failed (exit {}): lowerdir={}, upper={}, merged={}",
- status.code().unwrap_or(-1),
+ "fuse-overlayfs mount failed (exit {}): {stderr}\nlowerdir={}, upper={}, merged={}",
+ output.status.code().unwrap_or(-1),
lowerdir,
upper_dir.display(),
merged_path.display(),
@@ -223,6 +229,7 @@ impl OverlayMount {
Ok(Self {
merged: merged_path.to_path_buf(),
upper: upper_dir.to_path_buf(),
+ mounted: std::sync::atomic::AtomicBool::new(true),
})
}
@@ -236,13 +243,16 @@ impl OverlayMount {
&self.upper
}
- /// Unmount the fuse-overlayfs filesystem.
+ /// Unmount the fuse-overlayfs filesystem. Safe to call multiple times.
///
/// # Errors
///
/// Returns an error if `fusermount` cannot be spawned or exits
/// with a non-zero status.
pub fn unmount(&self) -> Result<()> {
+ if !self.mounted.swap(false, std::sync::atomic::Ordering::AcqRel) {
+ return Ok(());
+ }
let bin = if which::which("fusermount3").is_ok() {
"fusermount3"
} else {
@@ -261,6 +271,15 @@ impl OverlayMount {
}
}
+impl std::fmt::Debug for OverlayMount {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("OverlayMount")
+ .field("merged", &self.merged)
+ .field("upper", &self.upper)
+ .finish()
+ }
+}
+
impl Drop for OverlayMount {
fn drop(&mut self) {
if let Err(e) = self.unmount() {
From 9eb928c2433d3bd55a5c4f4d38816afc3ace5056 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:32:11 -0700
Subject: [PATCH 03/25] feat(docker): add start_long_lived_with_mounts for bind
mount support
Add build_host_config helper and start_long_lived_with_mounts method to
DockerClient so containers can mount host workspace directories via
HostConfig bind mounts. This enables the COW workspace caching feature
to bind-mount overlay filesystems into build containers.
---
crates/hm/src/orchestrator/docker_client.rs | 84 +++++++++++++++++++++
1 file changed, 84 insertions(+)
diff --git a/crates/hm/src/orchestrator/docker_client.rs b/crates/hm/src/orchestrator/docker_client.rs
index 866ffb8..89210e2 100644
--- a/crates/hm/src/orchestrator/docker_client.rs
+++ b/crates/hm/src/orchestrator/docker_client.rs
@@ -14,6 +14,7 @@ use bollard::container::{
StopContainerOptions,
};
use bollard::exec::{CreateExecOptions, StartExecResults};
+use bollard::models::HostConfig;
use bollard::image::{
CommitContainerOptions, CreateImageOptions, ImportImageOptions, ListImagesOptions,
RemoveImageOptions,
@@ -23,6 +24,25 @@ use tokio::io::AsyncWrite;
use crate::error::HmError;
+/// Build a [`HostConfig`] with optional bind mounts and Linux capabilities.
+///
+/// Empty slices become `None` so Docker applies its defaults.
+fn build_host_config(binds: &[String], cap_add: &[String]) -> HostConfig {
+ HostConfig {
+ binds: if binds.is_empty() {
+ None
+ } else {
+ Some(binds.to_vec())
+ },
+ cap_add: if cap_add.is_empty() {
+ None
+ } else {
+ Some(cap_add.to_vec())
+ },
+ ..Default::default()
+ }
+}
+
#[derive(Debug, Clone)]
pub struct DockerClient {
inner: Arc,
@@ -161,6 +181,50 @@ impl DockerClient {
Ok(create.id)
}
+ /// Like [`Self::start_long_lived`] but with bind mounts via `HostConfig`.
+ ///
+ /// Each entry in `binds` is a Docker bind-mount string of the form
+ /// `"/host/path:/container/path"` (with an optional `:ro` suffix).
+ ///
+ /// # Errors
+ ///
+ /// Returns [`HmError::Docker`] if the container cannot be created
+ /// (image not pulled, name conflict, OCI runtime failure) or if
+ /// `start_container` rejects the create.
+ pub async fn start_long_lived_with_mounts(
+ &self,
+ image: &str,
+ env: &[String],
+ workdir: &str,
+ name: &str,
+ binds: &[String],
+ ) -> Result {
+ let cfg = Config {
+ image: Some(image.to_string()),
+ cmd: Some(vec!["sh".into(), "-c".into(), "sleep infinity".into()]),
+ env: Some(env.to_vec()),
+ working_dir: Some(workdir.to_string()),
+ host_config: Some(build_host_config(binds, &[])),
+ ..Default::default()
+ };
+ let create = self
+ .inner
+ .create_container(
+ Some(CreateContainerOptions {
+ name,
+ ..Default::default()
+ }),
+ cfg,
+ )
+ .await
+ .map_err(|e| HmError::Docker(format!("create_container: {e}")))?;
+ self.inner
+ .start_container(&create.id, None::>)
+ .await
+ .map_err(|e| HmError::Docker(format!("start_container: {e}")))?;
+ Ok(create.id)
+ }
+
/// Exec a command inside a running container and stream stdout+stderr
/// to `out`. Returns the command's exit code.
///
@@ -530,4 +594,24 @@ mod smoke {
.unwrap();
assert!(tags.is_empty());
}
+
+ #[test]
+ fn build_host_config_with_binds_and_no_caps() {
+ let hc = super::build_host_config(
+ &["/host/path:/container/path".to_string()],
+ &[],
+ );
+ assert_eq!(
+ hc.binds.as_ref().unwrap(),
+ &["/host/path:/container/path".to_string()]
+ );
+ assert!(hc.cap_add.is_none());
+ }
+
+ #[test]
+ fn build_host_config_empty_binds_is_none() {
+ let hc = super::build_host_config(&[], &[]);
+ assert!(hc.binds.is_none());
+ assert!(hc.cap_add.is_none());
+ }
}
From c88fd4196cff751c671e706147c369f064d49f00 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:32:42 -0700
Subject: [PATCH 04/25] feat(orchestrator): add WorkspaceManager with COW
cloning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduces WorkspaceManager that manages per-step workspace directories
for a single pipeline run. Auto-selects between clone strategy (macOS
APFS / Linux reflink / fallback cp) and overlay strategy (fuse-overlayfs)
based on platform detection from hm-util::cow. The rest of the system
only sees workspace_path() — strategy is transparent.
---
crates/hm/src/orchestrator/mod.rs | 1 +
crates/hm/src/orchestrator/workspace.rs | 358 ++++++++++++++++++++++++
2 files changed, 359 insertions(+)
create mode 100644 crates/hm/src/orchestrator/workspace.rs
diff --git a/crates/hm/src/orchestrator/mod.rs b/crates/hm/src/orchestrator/mod.rs
index 7c23600..f6fffcd 100644
--- a/crates/hm/src/orchestrator/mod.rs
+++ b/crates/hm/src/orchestrator/mod.rs
@@ -14,5 +14,6 @@ pub mod output_subscriber;
pub mod scheduler;
pub mod signal;
pub mod source;
+pub mod workspace;
pub use scheduler::run;
diff --git a/crates/hm/src/orchestrator/workspace.rs b/crates/hm/src/orchestrator/workspace.rs
new file mode 100644
index 0000000..ada1439
--- /dev/null
+++ b/crates/hm/src/orchestrator/workspace.rs
@@ -0,0 +1,358 @@
+//! Per-run workspace orchestration.
+//!
+//! [`WorkspaceManager`] auto-selects between two strategies:
+//!
+//! - **Clone strategy** (macOS APFS, Linux reflink, fallback `cp`):
+//! each step gets a full directory clone via [`hm_util::cow::cow_clone_dir`].
+//! - **Overlay strategy** (Linux ext4 + `fuse-overlayfs`):
+//! each step gets a `fuse-overlayfs` mount with shared lower layers.
+//!
+//! The rest of the system (scheduler, runner) only sees
+//! [`WorkspaceManager::workspace_path`] — strategy is transparent.
+
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+use hm_util::cow::{CowStrategy, OverlayMount};
+
+/// Manages workspace directories for a single pipeline run.
+///
+/// Each step gets an isolated directory that is either a full COW clone
+/// or a `fuse-overlayfs` mount, depending on the platform.
+pub struct WorkspaceManager {
+ run_dir: PathBuf,
+ base_dir: PathBuf,
+ strategy: CowStrategy,
+ workspaces: HashMap,
+ overlays: HashMap,
+}
+
+struct OverlayLayer {
+ upper_dir: PathBuf,
+ merged_dir: PathBuf,
+ ancestor_uppers: Vec,
+ _mount: Option,
+}
+
+impl WorkspaceManager {
+ /// Create a new workspace manager that clones from `base_dir` into
+ /// per-step sub-directories under `run_dir`.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if `run_dir` cannot be created.
+ pub fn from_base(run_dir: PathBuf, base_dir: PathBuf) -> Result {
+ std::fs::create_dir_all(&run_dir)
+ .with_context(|| format!("create run dir {}", run_dir.display()))?;
+ let strategy = hm_util::cow::detect_strategy();
+ tracing::info!(?strategy, "COW workspace strategy");
+ Ok(Self {
+ run_dir,
+ base_dir,
+ strategy,
+ workspaces: HashMap::new(),
+ overlays: HashMap::new(),
+ })
+ }
+
+ /// Create a new workspace manager that first extracts a tar.gz
+ /// archive into `run_dir/base`, then delegates to [`Self::from_base`].
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the archive cannot be extracted or the run
+ /// directory cannot be created.
+ pub fn from_archive(run_dir: PathBuf, archive_bytes: &[u8]) -> Result {
+ let base_dir = run_dir.join("base");
+ std::fs::create_dir_all(&base_dir)
+ .with_context(|| format!("create base dir {}", base_dir.display()))?;
+ extract_tar_gz(archive_bytes, &base_dir)?;
+ Self::from_base(run_dir, base_dir)
+ }
+
+ /// Create an isolated workspace directory for `step_key`.
+ ///
+ /// If `parent_key` is `Some`, the workspace inherits the contents of
+ /// the parent workspace (including any modifications made after
+ /// creation). If `None`, the workspace is cloned from the base
+ /// directory.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the parent workspace is not registered, or if
+ /// the clone / overlay operation fails.
+ pub fn create_workspace(
+ &mut self,
+ step_key: &str,
+ parent_key: Option<&str>,
+ ) -> Result {
+ match self.strategy {
+ CowStrategy::FuseOverlay => self.create_overlay(step_key, parent_key),
+ _ => self.create_clone(step_key, parent_key, None),
+ }
+ }
+
+ /// Create a workspace from a cached directory, bypassing parent
+ /// relationships.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the clone operation fails.
+ pub fn create_workspace_from_cache(
+ &mut self,
+ step_key: &str,
+ cached_workspace: &Path,
+ ) -> Result {
+ self.create_clone(step_key, None, Some(cached_workspace))
+ }
+
+ /// Look up the filesystem path for a previously created workspace.
+ #[must_use]
+ pub fn workspace_path(&self, step_key: &str) -> Option<&Path> {
+ if let Some(p) = self.workspaces.get(step_key) {
+ return Some(p.as_path());
+ }
+ self.overlays
+ .get(step_key)
+ .map(|l| l.merged_dir.as_path())
+ }
+
+ /// The base directory that root workspaces are cloned from.
+ #[must_use]
+ pub fn base_dir(&self) -> &Path {
+ &self.base_dir
+ }
+
+ /// The COW strategy in use for this run.
+ #[must_use]
+ pub const fn strategy(&self) -> CowStrategy {
+ self.strategy
+ }
+
+ /// Remove the entire run directory, including all workspaces and
+ /// overlay mounts.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the run directory cannot be removed.
+ pub fn cleanup(&mut self) -> Result<()> {
+ // Drop overlay mounts before removing the filesystem tree.
+ self.overlays.clear();
+ if self.run_dir.exists() {
+ std::fs::remove_dir_all(&self.run_dir).with_context(|| {
+ format!("cleanup run dir {}", self.run_dir.display())
+ })?;
+ }
+ Ok(())
+ }
+
+ // ------------------------------------------------------------------
+ // Clone strategy
+ // ------------------------------------------------------------------
+
+ fn create_clone(
+ &mut self,
+ step_key: &str,
+ parent_key: Option<&str>,
+ cached: Option<&Path>,
+ ) -> Result {
+ let safe = sanitize_key(step_key);
+ let ws_dir = self.run_dir.join("workspaces").join(&safe);
+
+ let source = if let Some(c) = cached {
+ c.to_path_buf()
+ } else if let Some(pk) = parent_key {
+ self.workspaces
+ .get(pk)
+ .cloned()
+ .ok_or_else(|| anyhow::anyhow!("parent workspace '{pk}' not registered"))?
+ } else {
+ self.base_dir.clone()
+ };
+
+ hm_util::cow::cow_clone_dir(&source, &ws_dir).with_context(|| {
+ format!("cow clone {} -> {}", source.display(), ws_dir.display())
+ })?;
+
+ self.workspaces.insert(step_key.to_string(), ws_dir.clone());
+ Ok(ws_dir)
+ }
+
+ // ------------------------------------------------------------------
+ // Overlay strategy
+ // ------------------------------------------------------------------
+
+ fn create_overlay(
+ &mut self,
+ step_key: &str,
+ parent_key: Option<&str>,
+ ) -> Result {
+ let safe = sanitize_key(step_key);
+ let layer_dir = self.run_dir.join("layers").join(&safe);
+ let upper_dir = layer_dir.join("upper");
+ let work_dir = layer_dir.join("work");
+ let merged_dir = layer_dir.join("merged");
+
+ std::fs::create_dir_all(&upper_dir)?;
+ std::fs::create_dir_all(&work_dir)?;
+ std::fs::create_dir_all(&merged_dir)?;
+
+ let ancestor_uppers = if let Some(pk) = parent_key {
+ let parent = self.overlays.get(pk).ok_or_else(|| {
+ anyhow::anyhow!("parent overlay '{pk}' not registered")
+ })?;
+ let mut ancestors = vec![parent.upper_dir.clone()];
+ ancestors.extend(parent.ancestor_uppers.iter().cloned());
+ ancestors
+ } else {
+ vec![]
+ };
+
+ let mut lower_dirs: Vec<&Path> =
+ ancestor_uppers.iter().map(PathBuf::as_path).collect();
+ lower_dirs.push(&self.base_dir);
+
+ let mount = OverlayMount::mount(
+ &lower_dirs,
+ &upper_dir,
+ &work_dir,
+ &merged_dir,
+ )?;
+
+ let merged_path = merged_dir.clone();
+ self.overlays.insert(
+ step_key.to_string(),
+ OverlayLayer {
+ upper_dir,
+ merged_dir,
+ ancestor_uppers,
+ _mount: Some(mount),
+ },
+ );
+ Ok(merged_path)
+ }
+}
+
+impl std::fmt::Debug for WorkspaceManager {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("WorkspaceManager")
+ .field("run_dir", &self.run_dir)
+ .field("workspaces", &self.workspaces.keys().collect::>())
+ .finish_non_exhaustive()
+ }
+}
+
+fn sanitize_key(s: &str) -> String {
+ s.chars()
+ .map(|c| {
+ if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
+ c
+ } else {
+ '-'
+ }
+ })
+ .collect()
+}
+
+fn extract_tar_gz(bytes: &[u8], dest: &Path) -> Result<()> {
+ use flate2::read::GzDecoder;
+
+ let decoder = GzDecoder::new(bytes);
+ let mut archive = tar::Archive::new(decoder);
+ archive
+ .unpack(dest)
+ .with_context(|| format!("extract archive to {}", dest.display()))
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+ use super::*;
+ use std::fs;
+
+ fn make_base(tmp: &std::path::Path) -> PathBuf {
+ let base = tmp.join("base");
+ fs::create_dir(&base).unwrap();
+ fs::write(base.join("main.rs"), b"fn main() {}").unwrap();
+ base
+ }
+
+ #[test]
+ fn root_step_clones_base() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+
+ let ws = mgr.create_workspace("build", None).unwrap();
+ assert_eq!(
+ fs::read_to_string(ws.join("main.rs")).unwrap(),
+ "fn main() {}"
+ );
+ }
+
+ #[test]
+ fn child_step_inherits_parent_changes() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+
+ let ws_a = mgr.create_workspace("a", None).unwrap();
+ fs::write(ws_a.join("artifact.bin"), b"built").unwrap();
+
+ let ws_b = mgr.create_workspace("b", Some("a")).unwrap();
+ assert_eq!(
+ fs::read_to_string(ws_b.join("main.rs")).unwrap(),
+ "fn main() {}"
+ );
+ assert_eq!(
+ fs::read_to_string(ws_b.join("artifact.bin")).unwrap(),
+ "built"
+ );
+ }
+
+ #[test]
+ fn fork_children_are_isolated() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+
+ let ws_a = mgr.create_workspace("a", None).unwrap();
+ fs::write(ws_a.join("from_a"), b"a").unwrap();
+
+ let ws_b = mgr.create_workspace("b", Some("a")).unwrap();
+ let ws_c = mgr.create_workspace("c", Some("a")).unwrap();
+
+ fs::write(ws_b.join("from_b"), b"b").unwrap();
+ assert!(!ws_c.join("from_b").exists(), "c must not see b's changes");
+ }
+
+ #[test]
+ fn workspace_path_returns_created() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+
+ mgr.create_workspace("s", None).unwrap();
+ assert!(mgr.workspace_path("s").is_some());
+ assert!(mgr.workspace_path("nonexistent").is_none());
+ }
+
+ #[test]
+ fn cleanup_removes_run_dir() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let run_dir = tmp.path().join("run");
+ let mut mgr =
+ WorkspaceManager::from_base(run_dir.clone(), base).unwrap();
+ mgr.create_workspace("s", None).unwrap();
+ assert!(run_dir.exists());
+
+ mgr.cleanup().unwrap();
+ assert!(!run_dir.exists());
+ }
+}
From ec61830afe885ed524ca31b922b8889dbf1133ca Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:37:49 -0700
Subject: [PATCH 05/25] fix: code review fixes for Tasks 2 and 3
- Add duplicate step_key guard in WorkspaceManager
- Add tests for create_workspace_from_cache and duplicate key
- Deduplicate start_long_lived via delegation to start_long_lived_with_mounts
---
crates/hm/src/orchestrator/docker_client.rs | 23 +--------------
crates/hm/src/orchestrator/workspace.rs | 31 +++++++++++++++++++++
2 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/crates/hm/src/orchestrator/docker_client.rs b/crates/hm/src/orchestrator/docker_client.rs
index 89210e2..6fefc28 100644
--- a/crates/hm/src/orchestrator/docker_client.rs
+++ b/crates/hm/src/orchestrator/docker_client.rs
@@ -156,29 +156,8 @@ impl DockerClient {
workdir: &str,
name: &str,
) -> Result {
- let cfg = Config {
- image: Some(image.to_string()),
- cmd: Some(vec!["sh".into(), "-c".into(), "sleep infinity".into()]),
- env: Some(env.to_vec()),
- working_dir: Some(workdir.to_string()),
- ..Default::default()
- };
- let create = self
- .inner
- .create_container(
- Some(CreateContainerOptions {
- name,
- ..Default::default()
- }),
- cfg,
- )
+ self.start_long_lived_with_mounts(image, env, workdir, name, &[])
.await
- .map_err(|e| HmError::Docker(format!("create_container: {e}")))?;
- self.inner
- .start_container(&create.id, None::>)
- .await
- .map_err(|e| HmError::Docker(format!("start_container: {e}")))?;
- Ok(create.id)
}
/// Like [`Self::start_long_lived`] but with bind mounts via `HostConfig`.
diff --git a/crates/hm/src/orchestrator/workspace.rs b/crates/hm/src/orchestrator/workspace.rs
index ada1439..f49e7d1 100644
--- a/crates/hm/src/orchestrator/workspace.rs
+++ b/crates/hm/src/orchestrator/workspace.rs
@@ -87,6 +87,9 @@ impl WorkspaceManager {
step_key: &str,
parent_key: Option<&str>,
) -> Result {
+ if self.workspaces.contains_key(step_key) || self.overlays.contains_key(step_key) {
+ anyhow::bail!("workspace for step '{step_key}' already exists");
+ }
match self.strategy {
CowStrategy::FuseOverlay => self.create_overlay(step_key, parent_key),
_ => self.create_clone(step_key, parent_key, None),
@@ -342,6 +345,34 @@ mod tests {
assert!(mgr.workspace_path("nonexistent").is_none());
}
+ #[test]
+ fn create_workspace_from_cache_clones_cached_dir() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let cached = tmp.path().join("cached");
+ fs::create_dir(&cached).unwrap();
+ fs::write(cached.join("cached_file.txt"), b"from_cache").unwrap();
+
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+ let ws = mgr.create_workspace_from_cache("s", &cached).unwrap();
+ assert_eq!(
+ fs::read_to_string(ws.join("cached_file.txt")).unwrap(),
+ "from_cache"
+ );
+ assert!(!ws.join("main.rs").exists());
+ }
+
+ #[test]
+ fn duplicate_step_key_errors() {
+ let tmp = tempfile::tempdir().unwrap();
+ let base = make_base(tmp.path());
+ let mut mgr =
+ WorkspaceManager::from_base(tmp.path().join("run"), base).unwrap();
+ mgr.create_workspace("dup", None).unwrap();
+ assert!(mgr.create_workspace("dup", None).is_err());
+ }
+
#[test]
fn cleanup_removes_run_dir() {
let tmp = tempfile::tempdir().unwrap();
From 8daf6312f045d2b3bc5c98a447fd5d98e8e2e8c2 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:40:25 -0700
Subject: [PATCH 06/25] feat(runner): add COW workspace execution path with
bind mounts
When RunContext.workspace is Some, DockerRunner dispatches to a new
run_step_cow path that bind-mounts the host workspace directory into
the container instead of extracting a tar archive and committing a
docker snapshot. This eliminates the tar extraction and docker commit
overhead for the COW execution mode.
---
crates/hm/src/orchestrator/scheduler.rs | 1 +
crates/hm/src/runner/docker.rs | 144 +++++++++++++++++++++++-
crates/hm/src/runner/mod.rs | 6 +-
3 files changed, 149 insertions(+), 2 deletions(-)
diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs
index 45e7209..b2985fa 100644
--- a/crates/hm/src/orchestrator/scheduler.rs
+++ b/crates/hm/src/orchestrator/scheduler.rs
@@ -96,6 +96,7 @@ pub async fn run(
event_bus: bus.clone(),
archives: archives.clone(),
cancel: cancel.clone(),
+ workspace: None,
};
let parallelism = parallelism.max(1);
diff --git a/crates/hm/src/runner/docker.rs b/crates/hm/src/runner/docker.rs
index 0b814a1..fe862e0 100644
--- a/crates/hm/src/runner/docker.rs
+++ b/crates/hm/src/runner/docker.rs
@@ -70,7 +70,13 @@ impl StepRunner for DockerRunner {
input: ExecutorInput,
) -> Pin> + Send + '_>> {
let ctx = ctx.clone();
- Box::pin(async move { run_step(&ctx, input).await })
+ Box::pin(async move {
+ if ctx.workspace.is_some() {
+ run_step_cow(&ctx, input).await
+ } else {
+ run_step(&ctx, input).await
+ }
+ })
}
}
@@ -237,6 +243,128 @@ async fn run_in_container(
})
}
+// ---------------------------------------------------------------------------
+// COW execution path
+// ---------------------------------------------------------------------------
+
+/// Pick the base image for a COW step.
+///
+/// In COW mode the workspace is bind-mounted, so there is no
+/// parent-snapshot chain. We use the step's declared image or fall
+/// back to `alpine:latest`.
+fn resolve_image_cow(step: &CommandStep) -> String {
+ step.image
+ .clone()
+ .unwrap_or_else(|| "alpine:latest".to_string())
+}
+
+async fn run_step_cow(ctx: &RunContext, input: ExecutorInput) -> Result {
+ let plan = decision_plan(&input.cache_lookup);
+
+ if !plan.run_command {
+ return Ok(StepResult {
+ exit_code: 0,
+ committed_snapshot: plan.hit_tag.clone(),
+ artifacts: vec![],
+ });
+ }
+
+ let workspace_mgr = ctx
+ .workspace
+ .as_ref()
+ .ok_or_else(|| anyhow::anyhow!("COW mode requires workspace manager"))?;
+
+ let workspace_path = {
+ let mgr = workspace_mgr.lock().unwrap_or_else(|e| e.into_inner());
+ mgr.workspace_path(&input.step.key)
+ .map(|p| p.to_path_buf())
+ .ok_or_else(|| {
+ anyhow::anyhow!("workspace for step '{}' not created", input.step.key)
+ })?
+ };
+
+ let image = resolve_image_cow(&input.step);
+ let container_name =
+ sanitize_container_name(&input.run_id.to_string(), &input.step.key);
+ let env_vec: Vec = input
+ .env
+ .iter()
+ .map(|(k, v)| format!("{k}={v}"))
+ .collect();
+
+ // Pull image if needed.
+ if !ctx.docker.image_exists(&image).await.unwrap_or(false) {
+ let docker = ctx.docker.clone();
+ let cancel = ctx.cancel.clone();
+ let img = image.clone();
+ let pull_fut = async move { docker.pull_image(&img).await };
+ tokio::select! {
+ result = pull_fut => result.with_context(|| format!("pull '{image}'"))?,
+ () = cancel.cancelled() => anyhow::bail!("cancelled during image pull"),
+ }
+ }
+
+ // Start container with workspace bind mount.
+ let binds = vec![format!("{}:/workspace", workspace_path.display())];
+ let cid = ctx
+ .docker
+ .start_long_lived_with_mounts(
+ &image,
+ &env_vec,
+ &input.workdir,
+ &container_name,
+ &binds,
+ )
+ .await
+ .context("docker start with mounts failed")?;
+
+ let result = run_cow_in_container(ctx, &cid, &input, &env_vec).await;
+ ctx.docker.stop_remove(&cid).await;
+ result
+}
+
+async fn run_cow_in_container(
+ ctx: &RunContext,
+ cid: &str,
+ input: &ExecutorInput,
+ env_vec: &[String],
+) -> Result {
+ let mut writer = StepLogWriter::new(input.step_id, Arc::clone(&ctx.event_bus));
+ let docker = ctx.docker.clone();
+ let cancel = ctx.cancel.clone();
+ let cid_owned = cid.to_owned();
+ let cmd = vec!["sh".into(), "-c".into(), input.step.cmd.clone()];
+ let workdir = input.workdir.clone();
+ let env_owned = env_vec.to_vec();
+ let exec_fut = async move {
+ let rc = docker
+ .exec_streaming(&cid_owned, &cmd, &env_owned, &workdir, &mut writer)
+ .await?;
+ writer.flush_remaining();
+ Ok::(rc)
+ };
+
+ let rc = tokio::select! {
+ result = exec_fut => result.context("docker exec failed")?,
+ () = cancel.cancelled() => {
+ return Ok(StepResult {
+ exit_code: 130,
+ committed_snapshot: None,
+ artifacts: vec![],
+ });
+ }
+ };
+
+ #[allow(clippy::cast_possible_truncation)]
+ let exit_code = rc as i32;
+
+ Ok(StepResult {
+ exit_code,
+ committed_snapshot: None,
+ artifacts: vec![],
+ })
+}
+
// ---------------------------------------------------------------------------
// DecisionPlan
// ---------------------------------------------------------------------------
@@ -413,6 +541,20 @@ mod tests {
}
}
+ // -- resolve_image_cow ----------------------------------------------------
+
+ #[test]
+ fn resolve_image_cow_uses_step_image() {
+ let s = step_with_image(Some("rust:1.82"));
+ assert_eq!(resolve_image_cow(&s), "rust:1.82");
+ }
+
+ #[test]
+ fn resolve_image_cow_fallback_alpine() {
+ let s = step_with_image(None);
+ assert_eq!(resolve_image_cow(&s), "alpine:latest");
+ }
+
// -- resolve_image -------------------------------------------------------
#[test]
diff --git a/crates/hm/src/runner/mod.rs b/crates/hm/src/runner/mod.rs
index aefeec8..9e32b89 100644
--- a/crates/hm/src/runner/mod.rs
+++ b/crates/hm/src/runner/mod.rs
@@ -9,7 +9,7 @@ use std::collections::HashMap;
use std::fmt;
use std::future::Future;
use std::pin::Pin;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
use anyhow::Result;
use hm_plugin_protocol::{BuildEvent, ExecutorInput, StepResult};
@@ -18,6 +18,7 @@ use tokio_util::sync::CancellationToken;
use crate::orchestrator::archive::ArchiveStore;
use crate::orchestrator::docker_client::DockerClient;
use crate::orchestrator::events::EventBus;
+use crate::orchestrator::workspace::WorkspaceManager;
pub mod docker;
@@ -36,6 +37,9 @@ pub struct RunContext {
pub event_bus: Arc,
pub archives: Arc,
pub cancel: CancellationToken,
+ /// When present, steps use COW workspace bind mounts instead of
+ /// tar.gz extraction + docker commit.
+ pub workspace: Option>>,
}
// ---------------------------------------------------------------------------
From 962db63efb13803db206bc00c7ac0ad39374c8e5 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:43:24 -0700
Subject: [PATCH 07/25] fix(runner): use workdir for bind mount target, error
on mutex poison
---
crates/hm/src/runner/docker.rs | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/crates/hm/src/runner/docker.rs b/crates/hm/src/runner/docker.rs
index fe862e0..7187def 100644
--- a/crates/hm/src/runner/docker.rs
+++ b/crates/hm/src/runner/docker.rs
@@ -275,7 +275,9 @@ async fn run_step_cow(ctx: &RunContext, input: ExecutorInput) -> Result Result
Date: Tue, 26 May 2026 12:46:12 -0700
Subject: [PATCH 08/25] feat: wire COW workspace mode through scheduler and CLI
---
crates/hm/src/cli/run.rs | 4 +++
crates/hm/src/commands/run/local.rs | 3 ++-
crates/hm/src/orchestrator/scheduler.rs | 36 +++++++++++++++++++++++--
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/crates/hm/src/cli/run.rs b/crates/hm/src/cli/run.rs
index 2ac4697..f145c68 100644
--- a/crates/hm/src/cli/run.rs
+++ b/crates/hm/src/cli/run.rs
@@ -42,4 +42,8 @@ pub struct RunArgs {
/// Has no effect with `--format json`.
#[arg(long)]
pub logs: bool,
+
+ /// Use COW workspace bind mounts instead of Docker image commits.
+ #[arg(long)]
+ pub cow: bool,
}
diff --git a/crates/hm/src/commands/run/local.rs b/crates/hm/src/commands/run/local.rs
index 62523e4..93bb46d 100644
--- a/crates/hm/src/commands/run/local.rs
+++ b/crates/hm/src/commands/run/local.rs
@@ -84,6 +84,7 @@ pub async fn handle(args: RunArgs, ctx: RunContext) -> Result {
};
let exit_code =
- crate::orchestrator::run(graph, repo_root, parallelism, runner_registry, renderer).await?;
+ crate::orchestrator::run(graph, repo_root, parallelism, runner_registry, renderer, args.cow)
+ .await?;
Ok(exit_code)
}
diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs
index b2985fa..f77781c 100644
--- a/crates/hm/src/orchestrator/scheduler.rs
+++ b/crates/hm/src/orchestrator/scheduler.rs
@@ -72,6 +72,7 @@ pub async fn run(
parallelism: usize,
runner_registry: Arc,
renderer: Box,
+ cow: bool,
) -> Result {
// Set up per-run state.
let bus = EventBus::new();
@@ -89,6 +90,20 @@ pub async fn run(
// Build the source archive once.
let archive_bytes = build_archive_bytes(&repo_root).context("build source archive")?;
+
+ // When COW mode is enabled, extract the source archive into a
+ // temporary directory and create a workspace manager that will
+ // produce per-step COW clones. This must happen before
+ // `archives.register()` which consumes the bytes.
+ let workspace = if cow {
+ let run_dir = std::env::temp_dir().join(format!("harmont-run-{run_id}"));
+ let mgr = super::workspace::WorkspaceManager::from_archive(run_dir, &archive_bytes)
+ .context("init COW workspace")?;
+ Some(Arc::new(std::sync::Mutex::new(mgr)))
+ } else {
+ None
+ };
+
let archive_id = archives.register(archive_bytes);
let run_ctx = RunContext {
@@ -96,7 +111,7 @@ pub async fn run(
event_bus: bus.clone(),
archives: archives.clone(),
cancel: cancel.clone(),
- workspace: None,
+ workspace: workspace.clone(),
};
let parallelism = parallelism.max(1);
@@ -233,6 +248,14 @@ pub async fn run(
}
}
+ // Clean up the COW workspace tree if one was created.
+ if let Some(ref ws) = workspace {
+ let mut mgr = ws.lock().map_err(|_| anyhow::anyhow!("workspace manager mutex poisoned"))?;
+ if let Err(e) = mgr.cleanup() {
+ tracing::warn!(%e, "failed to clean up COW workspace");
+ }
+ }
+
bus.emit(BuildEvent::BuildEnd {
exit_code: overall,
duration_ms: dur,
@@ -283,7 +306,7 @@ async fn execute_step(
step_id,
key: step_key.clone(),
chain_idx: chain_pos,
- parent_key,
+ parent_key: parent_key.clone(),
display_name: display_name.clone(),
});
@@ -310,6 +333,15 @@ async fn execute_step(
});
}
+ // Create a COW workspace for this step when running in COW mode.
+ if let Some(ref workspace) = run_ctx.workspace {
+ let mut mgr = workspace
+ .lock()
+ .map_err(|_| anyhow::anyhow!("workspace manager mutex poisoned"))?;
+ mgr.create_workspace(&step_key, parent_key.as_deref())
+ .context("create workspace for step")?;
+ }
+
let input = ExecutorInput {
step: step_wire,
workspace_archive_id: archive_id,
From 1791cc51d5b01adcd83ea6099e40b2c9ee9ecfca Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:50:17 -0700
Subject: [PATCH 09/25] fix(scheduler): create COW workspace before cache-hit
short-circuit
---
crates/hm/src/orchestrator/scheduler.rs | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs
index f77781c..6508ec1 100644
--- a/crates/hm/src/orchestrator/scheduler.rs
+++ b/crates/hm/src/orchestrator/scheduler.rs
@@ -314,6 +314,17 @@ async fn execute_step(
let outcome = cache::decide(&run_ctx.docker, &step_wire).await?;
let decision = outcome.decision;
+ // Create a COW workspace for this step when running in COW mode.
+ // This must happen before the cache-hit short-circuit so that
+ // downstream steps can clone from this step's workspace.
+ if let Some(ref workspace) = run_ctx.workspace {
+ let mut mgr = workspace
+ .lock()
+ .map_err(|_| anyhow::anyhow!("workspace manager mutex poisoned"))?;
+ mgr.create_workspace(&step_key, parent_key.as_deref())
+ .context("create workspace for step")?;
+ }
+
if let hm_plugin_protocol::CacheDecision::Hit { tag } = &decision {
bus.emit(BuildEvent::StepCacheHit {
step_id,
@@ -324,24 +335,12 @@ async fn execute_step(
.unwrap_or_default(),
tag: tag.0.clone(),
});
- // Short-circuit: the cached image already exists locally, so
- // there is nothing for the executor to do. Return the
- // snapshot so downstream nodes can use it as their parent.
return Ok(StepOutcome {
exit_code: 0,
snapshot: Some(tag.clone()),
});
}
- // Create a COW workspace for this step when running in COW mode.
- if let Some(ref workspace) = run_ctx.workspace {
- let mut mgr = workspace
- .lock()
- .map_err(|_| anyhow::anyhow!("workspace manager mutex poisoned"))?;
- mgr.create_workspace(&step_key, parent_key.as_deref())
- .context("create workspace for step")?;
- }
-
let input = ExecutorInput {
step: step_wire,
workspace_archive_id: archive_id,
From e9b7ce6cf7eab62d401239cd6d6d6f3390de9952 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:51:43 -0700
Subject: [PATCH 10/25] fix: skip Docker ephemeral cleanup in COW workspace
mode
---
crates/hm/src/orchestrator/scheduler.rs | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/crates/hm/src/orchestrator/scheduler.rs b/crates/hm/src/orchestrator/scheduler.rs
index 6508ec1..e8449a2 100644
--- a/crates/hm/src/orchestrator/scheduler.rs
+++ b/crates/hm/src/orchestrator/scheduler.rs
@@ -235,16 +235,18 @@ pub async fn run(
let dur = started_total.elapsed().as_millis() as u64;
- // Clean up ephemeral images created during this run.
- let ephemeral_tags: Vec<&str> = outcomes
- .iter()
- .filter_map(|o| o.snapshot.as_ref())
- .filter(|s| s.0.starts_with("harmont-local-ephemeral/"))
- .map(|s| s.0.as_str())
- .collect();
- for tag in ephemeral_tags {
- if let Err(e) = docker.remove_image(tag).await {
- tracing::warn!(image = %tag, %e, "failed to remove ephemeral image");
+ // Clean up ephemeral images (legacy mode only — COW mode has no Docker commits).
+ if !cow {
+ let ephemeral_tags: Vec<&str> = outcomes
+ .iter()
+ .filter_map(|o| o.snapshot.as_ref())
+ .filter(|s| s.0.starts_with("harmont-local-ephemeral/"))
+ .map(|s| s.0.as_str())
+ .collect();
+ for tag in ephemeral_tags {
+ if let Err(e) = docker.remove_image(tag).await {
+ tracing::warn!(image = %tag, %e, "failed to remove ephemeral image");
+ }
}
}
From 08623c88856c3576cf8693eb2a1f220367d37c2a Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Tue, 26 May 2026 12:54:19 -0700
Subject: [PATCH 11/25] feat(cache): add COW workspace cache backend
Persist completed step workspaces to ~/.harmont/cache/workspaces/ and
restore them on cache hits, replacing Docker image-based caching when
running in COW mode. Stale cache directories from previous keys are
evicted after a successful build.
---
crates/hm/src/orchestrator/cache.rs | 176 +++++++++++++++++++++++-
crates/hm/src/orchestrator/scheduler.rs | 69 ++++++++--
2 files changed, 232 insertions(+), 13 deletions(-)
diff --git a/crates/hm/src/orchestrator/cache.rs b/crates/hm/src/orchestrator/cache.rs
index e0bdc5a..78fe3c4 100644
--- a/crates/hm/src/orchestrator/cache.rs
+++ b/crates/hm/src/orchestrator/cache.rs
@@ -8,7 +8,9 @@
//! along the JSON in `cache.key`. We turn them into Docker image tags
//! and consult the local image store.
-use anyhow::Result;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
use hm_plugin_protocol::{CacheDecision, CommandStep, SnapshotRef};
use crate::orchestrator::docker_client::DockerClient;
@@ -98,6 +100,116 @@ pub async fn decide(docker: &DockerClient, step: &CommandStep) -> Result,
+ pub stale_dirs: Vec,
+}
+
+/// Resolve the on-disk cache directory for a step's COW workspace.
+///
+/// Returns `None` when the step has no cache, a `"none"` policy, or no
+/// cache key — matching the same guard logic as [`cache_image_tag`].
+pub fn cow_cache_dir(step: &CommandStep) -> Result
- Website · Docs · Slack
+ Website · Docs · Slack
> [!WARNING]
@@ -174,7 +174,7 @@ Go, Python, Java, C++, React, Next.js, and more.
## Documentation
For the full pipeline reference, rich examples, and more — see the
-[docs](https://harmont.dev/docs).
+[docs](https://docs.harmont.dev).
## License
From 0cb736c2fa101b91113e2ad2650503b958b78876 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Wed, 27 May 2026 00:47:28 -0700
Subject: [PATCH 24/25] deslop
---
crates/hm/src/orchestrator/workspace.rs | 8 --------
1 file changed, 8 deletions(-)
diff --git a/crates/hm/src/orchestrator/workspace.rs b/crates/hm/src/orchestrator/workspace.rs
index a02028c..8a69a52 100644
--- a/crates/hm/src/orchestrator/workspace.rs
+++ b/crates/hm/src/orchestrator/workspace.rs
@@ -160,10 +160,6 @@ impl WorkspaceManager {
Ok(())
}
- // ------------------------------------------------------------------
- // Clone strategy
- // ------------------------------------------------------------------
-
fn create_clone(
&mut self,
step_key: &str,
@@ -191,10 +187,6 @@ impl WorkspaceManager {
Ok(ws_dir)
}
- // ------------------------------------------------------------------
- // Overlay strategy
- // ------------------------------------------------------------------
-
fn create_overlay(&mut self, step_key: &str, parent_key: Option<&str>) -> Result {
let safe = sanitize_key(step_key);
let layer_dir = self.run_dir.join("layers").join(&safe);
From 989baf793b8deb9349b5c49bf487c732fe760ef4 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic
Date: Wed, 27 May 2026 00:48:11 -0700
Subject: [PATCH 25/25] deslop
---
crates/hm-util/src/cow.rs | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/crates/hm-util/src/cow.rs b/crates/hm-util/src/cow.rs
index e2bdd7f..fd0ac7b 100644
--- a/crates/hm-util/src/cow.rs
+++ b/crates/hm-util/src/cow.rs
@@ -6,10 +6,6 @@ use std::sync::OnceLock;
use anyhow::{Context, Result, bail};
-// -----------------------------------------------------------------------
-// Strategy detection
-// -----------------------------------------------------------------------
-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CowStrategy {
ApfsClone,
@@ -167,10 +163,6 @@ fn probe_fuse_overlayfs() -> bool {
ok
}
-// -----------------------------------------------------------------------
-// cow_clone_dir
-// -----------------------------------------------------------------------
-
/// Clone `src` to `dst` using the best available COW mechanism.
///
/// # Errors
@@ -262,10 +254,6 @@ fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> {
Ok(())
}
-// -----------------------------------------------------------------------
-// OverlayMount — fuse-overlayfs lifecycle (strategy 3)
-// -----------------------------------------------------------------------
-
pub struct OverlayMount {
merged: PathBuf,
upper: PathBuf,