Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/forge_loop/runner/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,21 @@ def reap_orphan_worktrees(repo: Path, events_file: Path) -> int:
"""
reaped = 0
for path in sorted(glob.glob("/tmp/wt-loop-*")):
name = Path(path).name
# Quarantined dirs (wt-loop-<N>.stale-<ts>) from failed cleanups —
# try to rm them at boot. If still un-removable due to uid
# mismatch, leave them; operator sweep can take over.
if ".stale-" in name:
try:
import shutil
shutil.rmtree(path, ignore_errors=True)
except Exception: # noqa: BLE001 — best-effort boot cleanup
pass
if not Path(path).exists():
reaped += 1
continue
try:
issue = int(Path(path).name.removeprefix("wt-loop-").split("-")[0])
issue = int(name.removeprefix("wt-loop-").split("-")[0])
except ValueError:
# Defensive: skip non-numeric suffixes without crashing the
# boot path. A future operator tool might leave a marker dir
Expand Down
37 changes: 35 additions & 2 deletions src/forge_loop/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,26 @@ def _drop_permissive_settings(worktree: Path) -> None:
cdir.chmod(0o555)


def _quarantine_if_blocking(wt: Path) -> Path | None:
"""If `wt` still exists after normal cleanup (e.g. worker-planted files
owned by a different uid that we can't chmod/rm), rename it out of the
way so `git worktree add` can proceed. Returns the quarantined path,
or None if the dir is already gone.

Quarantined dirs use the suffix ``.stale-<unix-ts>`` so the boot
reaper + operator can find + sweep them later without risk of colliding
with the live path.
"""
if not wt.exists():
return None
quarantine = wt.with_name(f"{wt.name}.stale-{int(time.time())}")
try:
wt.rename(quarantine)
except OSError:
return None
return quarantine


def _prep_worktree(
repo: Path,
n: int,
Expand All @@ -295,7 +315,16 @@ def _prep_worktree(
capture_output=True,
)
if wt.exists():
shutil.rmtree(wt)
try:
shutil.rmtree(wt)
except (OSError, PermissionError):
pass
# If the worker planted files owned by a different uid (subprocess
# ran under a different namespace), chmod+rmtree above will silently
# fail and leave the dir behind. Quarantine it so the new worktree
# add doesn't collide. Without this, every retry of this issue hits
# `worktree-create-failed` → infinite loop until operator intervenes.
_quarantine_if_blocking(wt)
# If a previous failed attempt left a local branch lying around, delete
# it so `git worktree add -B` can recreate it cleanly off the freshest
# origin/<base_branch>. `-B` would overwrite anyway, but we use plain `-b`
Expand Down Expand Up @@ -345,7 +374,11 @@ def _prep_repair_worktree(
subprocess.run(["chmod", "-R", "u+w", str(claude_dir)], capture_output=True)
subprocess.run(["git", "worktree", "remove", "--force", str(wt)], cwd=repo, capture_output=True)
if wt.exists():
shutil.rmtree(wt)
try:
shutil.rmtree(wt)
except (OSError, PermissionError):
pass
_quarantine_if_blocking(wt)
remote_ref = f"refs/remotes/origin/{branch}"
subprocess.run(
["git", "fetch", "--prune", "origin", f"+refs/heads/{branch}:{remote_ref}"],
Expand Down
59 changes: 59 additions & 0 deletions tests/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,65 @@ def fake_run(cmd: list[str], **kwargs: Any) -> _Completed:
assert ["git", "worktree", "add", str(worktree), "-B", "loop/12-demo", "origin/main"] in calls


def test_prep_worktree_quarantines_undeletable_dir(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When a prior worker planted files we can't chmod/rm (uid mismatch),
the dir is renamed out of the way so the new `git worktree add`
doesn't collide. Without this, every retry of the same issue hits
`worktree-create-failed` and the loop spins forever.

Repro: simulate `shutil.rmtree` raising PermissionError, then
verify (a) `worktree add` still gets called, and (b) a quarantine
dir matching `wt-loop-<N>.stale-<ts>` exists.
"""
import shutil as _real_shutil

real_rmtree = _real_shutil.rmtree # capture before monkeypatch

blocking = Path("/tmp/wt-loop-9999")
# Pre-clean from any prior failed run before we monkeypatch rmtree.
for q in Path("/tmp").glob("wt-loop-9999*"):
real_rmtree(q, ignore_errors=True)
blocking.mkdir(exist_ok=True)
(blocking / "marker").write_text("planted")

class _Completed:
returncode = 0
stderr = ""

calls: list[list[str]] = []

def fake_run(cmd: list[str], **kwargs: Any) -> _Completed:
calls.append(cmd)
return _Completed()

def boom_rmtree(_path: str | Path) -> None:
raise PermissionError("simulated: planted by another uid")

monkeypatch.setattr("forge_loop.worker.subprocess.run", fake_run)
monkeypatch.setattr("forge_loop.worker.shutil.rmtree", boom_rmtree)
monkeypatch.setattr("forge_loop.worker._drop_permissive_settings", lambda _wt: None)

try:
worktree, err = _prep_worktree(tmp_path, 9999, "loop/9999-demo")
assert err is None
# `git worktree add` MUST still get called — quarantine unblocked it.
assert any(
cmd[:3] == ["git", "worktree", "add"] for cmd in calls
), f"worktree add was not called: {calls!r}"
# The blocking dir got renamed out of the way.
assert not blocking.exists(), "blocking dir should have been quarantined"
quarantined = list(Path("/tmp").glob("wt-loop-9999.stale-*"))
assert quarantined, "quarantine dir was not created"
# Quarantined dir still holds the original marker (rename, not delete).
assert (quarantined[0] / "marker").read_text() == "planted"
finally:
for q in Path("/tmp").glob("wt-loop-9999*"):
real_rmtree(q, ignore_errors=True)


# Gradle/WSL-OOM guard tests removed: forge-loop is stack-agnostic; the
# generic brief now says "avoid full-suite runs" without project-specific
# JVM flag pinning. Operators add their own gates via project tooling.
Expand Down
Loading