From 202685d3b8ea476dce64d1771f1e1a8a3f99d623 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 06:13:04 +0000 Subject: [PATCH 1/9] Initial plan From 08e1fc2b8fda51cf6778e96cef12ea0e6638c22c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 06:20:31 +0000 Subject: [PATCH 2/9] fix(safeoutputs): prevent symlink exfiltration in create-pr Stage 3 Replace is_file() (follows symlinks) with symlink_metadata() checks in collect_changes_from_worktree and collect_changes_from_diff_tree so that symlinks in the applied patch are silently skipped rather than followed to arbitrary filesystem paths (e.g. /proc/self/environ). Also add symlink mode (120000) detection to validate_patch_paths as a belt-and-suspenders defence: patches that create or convert files to symlinks are rejected before git apply is attempted. Adds two unit tests covering the new rejection paths. Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/99d4d7f5-eaea-4a34-87e3-ac34bab53ddb Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 147 ++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 16 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 6f244b32..ec868de8 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -1656,14 +1656,32 @@ async fn collect_changes_from_worktree( } // New/untracked files "??" | "A " | " A" | "AM" => { - if full_path.is_file() { - changes.push(read_file_change("add", file_path, &full_path).await?); + match tokio::fs::symlink_metadata(&full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("add", file_path, &full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} } } // Modified files " M" | "M " | "MM" => { - if full_path.is_file() { - changes.push(read_file_change("edit", file_path, &full_path).await?); + match tokio::fs::symlink_metadata(&full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("edit", file_path, &full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} } } // Renamed files - format is "R old_path -> new_path" @@ -1687,16 +1705,34 @@ async fn collect_changes_from_worktree( // If status is "RM" (renamed + modified), also emit content if status_code == "RM" { let new_full_path = worktree_path.join(new_path.trim()); - if new_full_path.is_file() { - changes.push(read_file_change("edit", new_path.trim(), &new_full_path).await?); + match tokio::fs::symlink_metadata(&new_full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("edit", new_path.trim(), &new_full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + new_path.trim() + ); + } + _ => {} } } } } // Other statuses - try to handle as edit if file exists _ => { - if full_path.is_file() { - changes.push(read_file_change("edit", file_path, &full_path).await?); + match tokio::fs::symlink_metadata(&full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("edit", file_path, &full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} } } } @@ -1744,8 +1780,17 @@ async fn collect_changes_from_diff_tree( })); } else if status_code == "A" { // Added file - if full_path.is_file() { - changes.push(read_file_change("add", file_path, &full_path).await?); + match tokio::fs::symlink_metadata(&full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("add", file_path, &full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} } } else if status_code.starts_with('R') && parts.len() >= 3 { // Renamed file: R100\told_path\tnew_path @@ -1765,8 +1810,19 @@ async fn collect_changes_from_diff_tree( // If the file was also modified (similarity < 100), emit an edit with content let new_full_path = worktree_path.join(new_path); - if status_code != "R100" && new_full_path.is_file() { - changes.push(read_file_change("edit", new_path, &new_full_path).await?); + if status_code != "R100" { + match tokio::fs::symlink_metadata(&new_full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("edit", new_path, &new_full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + new_path + ); + } + _ => {} + } } } else if status_code.starts_with('C') && parts.len() >= 3 { // Copied file: C100\tsrc_path\tdest_path @@ -1774,13 +1830,31 @@ async fn collect_changes_from_diff_tree( validate_single_path(dest_path)?; let dest_full_path = worktree_path.join(dest_path); - if dest_full_path.is_file() { - changes.push(read_file_change("add", dest_path, &dest_full_path).await?); + match tokio::fs::symlink_metadata(&dest_full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("add", dest_path, &dest_full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + dest_path + ); + } + _ => {} } } else { // Modified or other — read current content - if full_path.is_file() { - changes.push(read_file_change("edit", file_path, &full_path).await?); + match tokio::fs::symlink_metadata(&full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change("edit", file_path, &full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} } } } @@ -1833,6 +1907,7 @@ async fn read_file_change( /// - No .git directory modifications /// - No absolute paths /// - No null bytes +/// - No symlink entries (mode 120000) fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { let mut in_diff = false; for line in patch_content.lines() { @@ -1872,6 +1947,15 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { { let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; + } else if line.starts_with("new file mode 120000") + || line.starts_with("old mode 120000") + || line.starts_with("new mode 120000") + { + // Reject symlink entries (git mode 120000 = symbolic link). + // Symlinks in a patch could be used to make Stage 3 follow them to + // arbitrary filesystem paths (e.g. /proc/self/environ) when collecting + // file changes to upload to ADO. + anyhow::bail!("Patch contains a symlink entry (mode 120000), which is not allowed"); } } Ok(()) @@ -2225,6 +2309,37 @@ new file mode 100755 ); } + #[test] + fn test_validate_patch_paths_symlink_rejected() { + // A patch that creates a new symlink (mode 120000) must be rejected. + // This is the primary attack vector for symlink exfiltration of Stage 3 secrets. + let patch = r#"diff --git a/secrets.txt b/secrets.txt +new file mode 120000 +index 0000000..abcdefg +--- /dev/null ++++ b/secrets.txt +@@ -0,0 +1 @@ ++/proc/self/environ +\ No newline at end of file +"#; + assert!( + validate_patch_paths(patch).is_err(), + "patch with symlink mode 120000 should be rejected" + ); + } + + #[test] + fn test_validate_patch_paths_symlink_mode_change_rejected() { + // A patch that changes a file to a symlink (old mode → new mode 120000) is rejected. + let patch = "diff --git a/file.txt b/file.txt\n\ + old mode 100644\n\ + new mode 120000\n"; + assert!( + validate_patch_paths(patch).is_err(), + "patch that introduces symlink via mode change should be rejected" + ); + } + #[test] fn test_validate_single_path_valid() { assert!(validate_single_path("src/main.rs").is_ok()); From 26c32ecd811052343c637ec9cd6ff9df6f0d38f0 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 18:23:53 +0100 Subject: [PATCH 3/9] refactor(safeoutputs): extract push_file_change_skipping_symlinks helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse 7 duplicated symlink_metadata match blocks introduced by the symlink-exfiltration fix into a single helper. Same security semantics: regular file → push change; symlink → warn and skip; other → ignore. Centralizes the use of symlink_metadata so the no-follow invariant lives in one place. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 151 +++++++++---------------- 1 file changed, 54 insertions(+), 97 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index ec868de8..5090b11b 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -1656,33 +1656,13 @@ async fn collect_changes_from_worktree( } // New/untracked files "??" | "A " | " A" | "AM" => { - match tokio::fs::symlink_metadata(&full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("add", file_path, &full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - file_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path) + .await?; } // Modified files " M" | "M " | "MM" => { - match tokio::fs::symlink_metadata(&full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("edit", file_path, &full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - file_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) + .await?; } // Renamed files - format is "R old_path -> new_path" // For "RM" (renamed + modified), we emit both a rename and an edit change. @@ -1704,36 +1684,22 @@ async fn collect_changes_from_worktree( // If status is "RM" (renamed + modified), also emit content if status_code == "RM" { - let new_full_path = worktree_path.join(new_path.trim()); - match tokio::fs::symlink_metadata(&new_full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("edit", new_path.trim(), &new_full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - new_path.trim() - ); - } - _ => {} - } + let new_path_trimmed = new_path.trim(); + let new_full_path = worktree_path.join(new_path_trimmed); + push_file_change_skipping_symlinks( + &mut changes, + "edit", + new_path_trimmed, + &new_full_path, + ) + .await?; } } } // Other statuses - try to handle as edit if file exists _ => { - match tokio::fs::symlink_metadata(&full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("edit", file_path, &full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - file_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) + .await?; } } } @@ -1780,18 +1746,7 @@ async fn collect_changes_from_diff_tree( })); } else if status_code == "A" { // Added file - match tokio::fs::symlink_metadata(&full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("add", file_path, &full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - file_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path).await?; } else if status_code.starts_with('R') && parts.len() >= 3 { // Renamed file: R100\told_path\tnew_path let old_path = file_path; @@ -1811,18 +1766,13 @@ async fn collect_changes_from_diff_tree( // If the file was also modified (similarity < 100), emit an edit with content let new_full_path = worktree_path.join(new_path); if status_code != "R100" { - match tokio::fs::symlink_metadata(&new_full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("edit", new_path, &new_full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - new_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks( + &mut changes, + "edit", + new_path, + &new_full_path, + ) + .await?; } } else if status_code.starts_with('C') && parts.len() >= 3 { // Copied file: C100\tsrc_path\tdest_path @@ -1830,38 +1780,45 @@ async fn collect_changes_from_diff_tree( validate_single_path(dest_path)?; let dest_full_path = worktree_path.join(dest_path); - match tokio::fs::symlink_metadata(&dest_full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("add", dest_path, &dest_full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - dest_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "add", dest_path, &dest_full_path) + .await?; } else { // Modified or other — read current content - match tokio::fs::symlink_metadata(&full_path).await { - Ok(meta) if meta.file_type().is_file() => { - changes.push(read_file_change("edit", file_path, &full_path).await?); - } - Ok(meta) if meta.file_type().is_symlink() => { - warn!( - "Skipping symlink in worktree: {} (symlink-following attack prevention)", - file_path - ); - } - _ => {} - } + push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) + .await?; } } Ok(changes) } +/// Push a file change into `changes`, skipping symlinks with a warning. +/// +/// Centralizes the "regular file → read & push; symlink → warn & skip; other → ignore" +/// logic used in multiple places when collecting changes from a worktree or diff tree. +/// Uses `symlink_metadata` so symlinks are detected without being followed — this is +/// the primary defense against symlink-following exfiltration attacks in Stage 3. +async fn push_file_change_skipping_symlinks( + changes: &mut Vec, + change_type: &str, + file_path: &str, + full_path: &std::path::Path, +) -> anyhow::Result<()> { + match tokio::fs::symlink_metadata(full_path).await { + Ok(meta) if meta.file_type().is_file() => { + changes.push(read_file_change(change_type, file_path, full_path).await?); + } + Ok(meta) if meta.file_type().is_symlink() => { + warn!( + "Skipping symlink in worktree: {} (symlink-following attack prevention)", + file_path + ); + } + _ => {} + } + Ok(()) +} + /// Read a file and produce an ADO push change entry. /// Handles both text (rawtext) and binary (base64encoded) content. async fn read_file_change( From 14ab2c12ae700a835607236553b42369c899ab45 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 18:33:35 +0100 Subject: [PATCH 4/9] fix(safeoutputs): surface metadata errors and allow symlink-removal patches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two review fixes for the symlink-exfiltration PR: 1. push_file_change_skipping_symlinks: split the catch-all arm so IO errors from symlink_metadata (e.g. permissions denied) are surfaced via warn! rather than silently swallowed. Non-file, non-symlink entries (directories, fifos, sockets) are still silently skipped. 2. validate_patch_paths: only reject patch lines that INTRODUCE a symlink (new file mode 120000, new mode 120000). Patches that convert a symlink into a regular file (old mode 120000 + new mode 100644) or delete an existing symlink (deleted file mode 120000) are now allowed — they are legitimate cleanup operations and produce a symlink-free worktree, so they pose no exfiltration risk. Adds two positive tests covering symlink→file conversion and symlink deletion; existing negative tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 65 +++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 5090b11b..96d794bf 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -1814,7 +1814,15 @@ async fn push_file_change_skipping_symlinks( file_path ); } - _ => {} + Ok(_) => { + // Not a regular file (e.g. directory, fifo, socket) — silently skip. + } + Err(e) => { + warn!( + "Failed to read metadata for {}: {} — skipping", + file_path, e + ); + } } Ok(()) } @@ -1905,14 +1913,23 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; } else if line.starts_with("new file mode 120000") - || line.starts_with("old mode 120000") || line.starts_with("new mode 120000") { - // Reject symlink entries (git mode 120000 = symbolic link). - // Symlinks in a patch could be used to make Stage 3 follow them to - // arbitrary filesystem paths (e.g. /proc/self/environ) when collecting - // file changes to upload to ADO. - anyhow::bail!("Patch contains a symlink entry (mode 120000), which is not allowed"); + // Reject patch lines that INTRODUCE a symlink (git mode 120000). + // Either of these lines means the resulting tree contains a symlink: + // - "new file mode 120000" — a freshly added symlink + // - "new mode 120000" — an existing file converted to a symlink + // A symlink in the worktree could make Stage 3 follow it to arbitrary + // filesystem paths (e.g. /proc/self/environ) when collecting file + // changes to upload to ADO. + // + // We deliberately do NOT reject "old mode 120000" on its own: a patch + // with "old mode 120000" + "new mode 100644" converts a symlink into a + // regular file, which is a legitimate cleanup operation and produces a + // safe worktree. + anyhow::bail!( + "Patch introduces a symlink (mode 120000), which is not allowed" + ); } } Ok(()) @@ -2297,6 +2314,40 @@ index 0000000..abcdefg ); } + #[test] + fn test_validate_patch_paths_symlink_to_file_allowed() { + // Converting a symlink into a regular file is a legitimate cleanup + // operation: the resulting worktree contains no symlinks, so there is + // no exfiltration risk. The "old mode 120000" line on its own must not + // cause rejection. + let patch = "diff --git a/file.txt b/file.txt\n\ + old mode 120000\n\ + new mode 100644\n\ + --- a/file.txt\n\ + +++ b/file.txt\n\ + @@ -1 +1 @@\n\ + -/etc/passwd\n\ + +hello world\n"; + assert!( + validate_patch_paths(patch).is_ok(), + "patch converting symlink → regular file should be allowed" + ); + } + + #[test] + fn test_validate_patch_paths_symlink_deletion_allowed() { + // Deleting an existing symlink is also legitimate cleanup — no symlink + // remains in the worktree afterwards. + let patch = "diff --git a/link b/link\n\ + deleted file mode 120000\n\ + --- a/link\n\ + +++ /dev/null\n"; + assert!( + validate_patch_paths(patch).is_ok(), + "patch deleting an existing symlink should be allowed" + ); + } + #[test] fn test_validate_single_path_valid() { assert!(validate_single_path("src/main.rs").is_ok()); From 1b953bf6afbeab6f57958db20d3c643edcdf5aa3 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 21:05:30 +0100 Subject: [PATCH 5/9] feat(safeoutputs): surface skipped symlinks in PR description and tighten patch check Addresses PR #549 review feedback ("Address bugs and suggestions"): 1. Skipped symlinks are no longer invisible to the PR author. collect_changes_from_worktree and collect_changes_from_diff_tree now return (Vec, Vec). When non-empty, an explicit `> [!WARNING]` block listing each skipped path is appended to the PR description before posting, so the agent can see that some intended file content was deliberately dropped for safety. The Stage 3 warn! log is also retained for infrastructure observability. 2. Tightened patch-validation against suffix bypass. validate_patch_paths now compares the trimmed line for exact equality with "new file mode 120000" / "new mode 120000", rather than using starts_with. This eliminates any ambiguity from hypothetical future mode strings sharing the 120000 prefix, while still catching the real attack vectors. A new test exercises trailing whitespace and patch-body content containing '120000' to confirm neither bypasses nor false-rejects occur. 3. New tests: - test_validate_patch_paths_symlink_mode_suffix_not_bypass - test_append_skipped_symlink_notice_empty_is_passthrough - test_append_skipped_symlink_notice_lists_paths All 48 create_pull_request tests pass; full bin suite (1510 tests) green; cargo clippy clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 175 ++++++++++++++++++++++--- 1 file changed, 155 insertions(+), 20 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 96d794bf..ed851f52 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -955,12 +955,19 @@ impl Executor for CreatePrResult { }; debug!("Change detection output:\n{}", status_str); - let changes = if use_diff_tree { + let (changes, skipped_symlinks) = if use_diff_tree { collect_changes_from_diff_tree(&worktree_path, &status_str).await? } else { collect_changes_from_worktree(&worktree_path, &status_str).await? }; debug!("Collected {} file changes for push", changes.len()); + if !skipped_symlinks.is_empty() { + warn!( + "Skipped {} symlink(s) when collecting PR file changes: {}", + skipped_symlinks.len(), + skipped_symlinks.join(", ") + ); + } if changes.is_empty() { // Handle no-changes based on config @@ -1189,12 +1196,19 @@ impl Executor for CreatePrResult { // Append agent stats then provenance footer to description. // Footer goes last as the final unambiguous provenance marker. + // If any symlinks were skipped during file collection, surface that in the + // PR description so the agent/PR author can see that some intended file + // content was dropped for safety (otherwise the warning only appears in + // Stage 3 infrastructure logs). let description_with_stats = crate::agent_stats::append_stats_to_body( &self.description, ctx, config.include_stats, ); - let description_final = format!("{}{}", description_with_stats, generate_pr_footer()); + let description_with_symlink_notice = + append_skipped_symlink_notice(&description_with_stats, &skipped_symlinks); + let description_final = + format!("{}{}", description_with_symlink_notice, generate_pr_footer()); // Create the pull request via REST API info!("Creating pull request"); @@ -1623,8 +1637,9 @@ async fn add_reviewers_to_pr( async fn collect_changes_from_worktree( worktree_path: &std::path::Path, status_output: &str, -) -> anyhow::Result> { +) -> anyhow::Result<(Vec, Vec)> { let mut changes = Vec::new(); + let mut skipped_symlinks: Vec = Vec::new(); for line in status_output.lines() { if line.len() < 3 { @@ -1656,13 +1671,25 @@ async fn collect_changes_from_worktree( } // New/untracked files "??" | "A " | " A" | "AM" => { - push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path) - .await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "add", + file_path, + &full_path, + ) + .await?; } // Modified files " M" | "M " | "MM" => { - push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) - .await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "edit", + file_path, + &full_path, + ) + .await?; } // Renamed files - format is "R old_path -> new_path" // For "RM" (renamed + modified), we emit both a rename and an edit change. @@ -1688,6 +1715,7 @@ async fn collect_changes_from_worktree( let new_full_path = worktree_path.join(new_path_trimmed); push_file_change_skipping_symlinks( &mut changes, + &mut skipped_symlinks, "edit", new_path_trimmed, &new_full_path, @@ -1698,13 +1726,19 @@ async fn collect_changes_from_worktree( } // Other statuses - try to handle as edit if file exists _ => { - push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) - .await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "edit", + file_path, + &full_path, + ) + .await?; } } } - Ok(changes) + Ok((changes, skipped_symlinks)) } /// Collect file changes from a git diff-tree --name-status output. @@ -1714,8 +1748,9 @@ async fn collect_changes_from_worktree( async fn collect_changes_from_diff_tree( worktree_path: &std::path::Path, diff_tree_output: &str, -) -> anyhow::Result> { +) -> anyhow::Result<(Vec, Vec)> { let mut changes = Vec::new(); + let mut skipped_symlinks: Vec = Vec::new(); for line in diff_tree_output.lines() { let line = line.trim(); @@ -1746,7 +1781,14 @@ async fn collect_changes_from_diff_tree( })); } else if status_code == "A" { // Added file - push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path).await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "add", + file_path, + &full_path, + ) + .await?; } else if status_code.starts_with('R') && parts.len() >= 3 { // Renamed file: R100\told_path\tnew_path let old_path = file_path; @@ -1768,6 +1810,7 @@ async fn collect_changes_from_diff_tree( if status_code != "R100" { push_file_change_skipping_symlinks( &mut changes, + &mut skipped_symlinks, "edit", new_path, &new_full_path, @@ -1780,16 +1823,28 @@ async fn collect_changes_from_diff_tree( validate_single_path(dest_path)?; let dest_full_path = worktree_path.join(dest_path); - push_file_change_skipping_symlinks(&mut changes, "add", dest_path, &dest_full_path) - .await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "add", + dest_path, + &dest_full_path, + ) + .await?; } else { // Modified or other — read current content - push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path) - .await?; + push_file_change_skipping_symlinks( + &mut changes, + &mut skipped_symlinks, + "edit", + file_path, + &full_path, + ) + .await?; } } - Ok(changes) + Ok((changes, skipped_symlinks)) } /// Push a file change into `changes`, skipping symlinks with a warning. @@ -1798,8 +1853,13 @@ async fn collect_changes_from_diff_tree( /// logic used in multiple places when collecting changes from a worktree or diff tree. /// Uses `symlink_metadata` so symlinks are detected without being followed — this is /// the primary defense against symlink-following exfiltration attacks in Stage 3. +/// +/// Skipped symlink paths are appended to `skipped_symlinks` so the caller can surface +/// them in the PR description (the agent that produced the PR would otherwise have no +/// way to see that some of its intended file content was dropped). async fn push_file_change_skipping_symlinks( changes: &mut Vec, + skipped_symlinks: &mut Vec, change_type: &str, file_path: &str, full_path: &std::path::Path, @@ -1813,6 +1873,7 @@ async fn push_file_change_skipping_symlinks( "Skipping symlink in worktree: {} (symlink-following attack prevention)", file_path ); + skipped_symlinks.push(file_path.to_string()); } Ok(_) => { // Not a regular file (e.g. directory, fifo, socket) — silently skip. @@ -1827,6 +1888,29 @@ async fn push_file_change_skipping_symlinks( Ok(()) } +/// If any symlinks were skipped during PR file collection, append a clearly +/// marked notice to the PR description so the agent/author can see that some +/// intended content was deliberately dropped. +fn append_skipped_symlink_notice(body: &str, skipped_symlinks: &[String]) -> String { + if skipped_symlinks.is_empty() { + return body.to_string(); + } + let mut notice = String::from( + "\n\n> [!WARNING]\n\ + > **Symbolic links omitted from this pull request**\n\ + >\n\ + > The following symlinked paths were detected in the worktree and skipped\n\ + > when uploading file changes (symlinks are never followed for safety):\n\ + >\n", + ); + for path in skipped_symlinks { + notice.push_str("> - `"); + notice.push_str(path); + notice.push_str("`\n"); + } + format!("{}{}", body, notice) +} + /// Read a file and produce an ADO push change entry. /// Handles both text (rawtext) and binary (base64encoded) content. async fn read_file_change( @@ -1912,9 +1996,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { { let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; - } else if line.starts_with("new file mode 120000") - || line.starts_with("new mode 120000") - { + } else if { + let trimmed = line.trim_end(); + trimmed == "new file mode 120000" || trimmed == "new mode 120000" + } { // Reject patch lines that INTRODUCE a symlink (git mode 120000). // Either of these lines means the resulting tree contains a symlink: // - "new file mode 120000" — a freshly added symlink @@ -1927,6 +2012,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { // with "old mode 120000" + "new mode 100644" converts a symlink into a // regular file, which is a legitimate cleanup operation and produces a // safe worktree. + // + // The match is on the exact trimmed line (rather than `starts_with`) so + // we don't accidentally reject hypothetical future mode strings that + // happen to share the "120000" prefix. anyhow::bail!( "Patch introduces a symlink (mode 120000), which is not allowed" ); @@ -2348,6 +2437,52 @@ index 0000000..abcdefg ); } + #[test] + fn test_validate_patch_paths_symlink_mode_suffix_not_bypass() { + // The mode check uses exact line equality (after trim_end), so a fake + // mode string with extra digits like "120000 1" or "1200001" must NOT + // be misclassified — but we still want any genuine "new file mode 120000" + // followed by trailing whitespace to be rejected. + let patch_trailing_ws = "diff --git a/x b/x\nnew file mode 120000 \n"; + assert!( + validate_patch_paths(patch_trailing_ws).is_err(), + "trailing whitespace must not let a symlink mode line bypass the check" + ); + + // A line that merely happens to share a "120000" prefix segment must + // not pass the check accidentally either — but it's also not a real + // git mode line. We just want to make sure exact-match doesn't reject + // benign content that contains "120000" in a non-mode context. + let patch_benign = "diff --git a/x b/x\n\ + --- a/x\n\ + +++ b/x\n\ + @@ -1 +1 @@\n\ + -count: 1200000\n\ + +count: 1200001\n"; + assert!( + validate_patch_paths(patch_benign).is_ok(), + "patch body lines containing '120000' as data must not be rejected" + ); + } + + #[test] + fn test_append_skipped_symlink_notice_empty_is_passthrough() { + let body = "Some PR description"; + let out = append_skipped_symlink_notice(body, &[]); + assert_eq!(out, body); + } + + #[test] + fn test_append_skipped_symlink_notice_lists_paths() { + let body = "Some PR description"; + let skipped = vec!["secrets.txt".to_string(), "subdir/leak".to_string()]; + let out = append_skipped_symlink_notice(body, &skipped); + assert!(out.starts_with(body)); + assert!(out.contains("Symbolic links omitted")); + assert!(out.contains("`secrets.txt`")); + assert!(out.contains("`subdir/leak`")); + } + #[test] fn test_validate_single_path_valid() { assert!(validate_single_path("src/main.rs").is_ok()); From 6d55eedf8c474bcd638e2db6ac94f7878db1dcf2 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 21:23:36 +0100 Subject: [PATCH 6/9] fix(safeoutputs): sanitize symlink paths embedded in PR description markdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses follow-up review feedback on PR #549. The append_skipped_symlink_notice helper previously embedded raw filenames inside an inline-code span in the PR description. Filenames may legally contain backticks (e.g. `fooar`) or control characters (newlines, tabs), which would terminate the code span and garble or break out of the blockquote. The agent is the adversary in this code path, so the risk is display-only (no secondary exfiltration vector), but the previous output was malformed when adversarial filenames were involved. CommonMark code spans do NOT honour backslash escapes — the backtick-count rule terminates the span instead — so the naive `path.replace('`', `\\`)` suggested in review is not actually an escape. Instead, sanitize_path_for_markdown: - Replaces backticks with apostrophes (visually clear, terminator-safe). - Collapses all ASCII control characters (newline, CR, tab, etc.) to '?'. Display-only sanitisation: the canonical path the agent originally requested is unchanged in the upload pipeline; only the markdown rendering of the skipped-symlinks notice is affected. Adds four targeted tests covering backticks, control characters, pass-through of normal paths, and end-to-end sanitisation through append_skipped_symlink_notice. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 70 +++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index ed851f52..65eb62e2 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -1905,12 +1905,39 @@ fn append_skipped_symlink_notice(body: &str, skipped_symlinks: &[String]) -> Str ); for path in skipped_symlinks { notice.push_str("> - `"); - notice.push_str(path); + notice.push_str(&sanitize_path_for_markdown(path)); notice.push_str("`\n"); } format!("{}{}", body, notice) } +/// Make an arbitrary filesystem path safe to embed inside an inline-code span +/// in a markdown PR description. +/// +/// The agent that produced the PR is the adversary in this code path: it can +/// plant filenames containing characters that would otherwise break out of the +/// inline-code context and garble (or HTML-inject into) the description. +/// CommonMark code spans do NOT honour backslash escapes (the backtick-count +/// rule terminates the span instead), so naive `path.replace('`', "\\`")` is +/// not actually an escape — it just inserts a stray backslash. Instead we: +/// +/// - Replace backticks with an apostrophe (visually clear, terminator-safe). +/// - Collapse all ASCII control characters — including newlines, carriage +/// returns, and tabs — to a single `?` so a multi-line filename can't break +/// the blockquote. +/// +/// This is display-only sanitisation; the canonical path the agent actually +/// requested is unchanged in the upload pipeline. +fn sanitize_path_for_markdown(path: &str) -> String { + path.chars() + .map(|c| match c { + '`' => '\'', + c if c.is_control() => '?', + c => c, + }) + .collect() +} + /// Read a file and produce an ADO push change entry. /// Handles both text (rawtext) and binary (base64encoded) content. async fn read_file_change( @@ -2483,6 +2510,47 @@ index 0000000..abcdefg assert!(out.contains("`subdir/leak`")); } + #[test] + fn test_sanitize_path_for_markdown_replaces_backticks() { + // Backticks would otherwise terminate the inline-code span and let the + // adversarial filename break out of the PR description's blockquote. + let out = sanitize_path_for_markdown("foo`bar`baz"); + assert!(!out.contains('`'), "all backticks must be removed: {out}"); + assert_eq!(out, "foo'bar'baz"); + } + + #[test] + fn test_sanitize_path_for_markdown_collapses_control_chars() { + // Newlines, carriage returns, tabs, and other ASCII control characters + // are all replaced with '?' so a multi-line filename can't break the + // blockquote / list layout. + let out = sanitize_path_for_markdown("evil\nname\rwith\ttabs\x07bell"); + assert_eq!(out, "evil?name?with?tabs?bell"); + } + + #[test] + fn test_sanitize_path_for_markdown_passthrough_normal() { + let out = sanitize_path_for_markdown("src/safeoutputs/create_pull_request.rs"); + assert_eq!(out, "src/safeoutputs/create_pull_request.rs"); + } + + #[test] + fn test_append_skipped_symlink_notice_sanitizes_paths() { + // End-to-end: backticks and newlines in a filename must not break the + // blockquote. + let body = "Some PR description"; + let skipped = vec!["evil`name\nwith\rnewlines".to_string()]; + let out = append_skipped_symlink_notice(body, &skipped); + // The sanitised path appears in the listing, + assert!(out.contains("`evil'name?with?newlines`")); + // and the raw garbling characters are not anywhere in the notice. + let notice_only = &out[body.len()..]; + assert!( + !notice_only.contains('\n') || notice_only.lines().all(|l| l.is_empty() || l.starts_with('>')), + "every non-empty line of the notice must remain inside the blockquote" + ); + } + #[test] fn test_validate_single_path_valid() { assert!(validate_single_path("src/main.rs").is_ok()); From f9fc78fbc2e91973d26602e6bf9f164a46c600cd Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 21:30:51 +0100 Subject: [PATCH 7/9] fix(safeoutputs): trim() mode-line check and differentiate NotFound metadata errors Two minor review fixes on PR #549. 1. validate_patch_paths now compares mode lines after a full trim() rather than trim_end(). trim_end() leaves leading whitespace intact, so a line like ' new file mode 120000' would silently bypass the check. Git's own format-patch never produces leading-indented mode lines so this was not a realistic attack path, but trim() costs nothing and closes the gap. The existing test now also covers the leading-whitespace and CRLF cases. 2. push_file_change_skipping_symlinks now distinguishes io::ErrorKind::NotFound from other metadata errors. NotFound is a normal transient condition (worktree mid-rebase, file pruned by git apply, etc.) and is logged at debug level. PermissionDenied and other unusual kinds remain at warn so triage isn't drowned by alert fatigue from benign races. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 51 ++++++++++++++++++++------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 65eb62e2..71bc5e21 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -1879,10 +1879,19 @@ async fn push_file_change_skipping_symlinks( // Not a regular file (e.g. directory, fifo, socket) — silently skip. } Err(e) => { - warn!( - "Failed to read metadata for {}: {} — skipping", - file_path, e - ); + // NotFound is a normal transient condition (worktree mid-rebase, file + // already pruned by git apply, etc.). Anything else — most notably + // PermissionDenied — is unusual and worth flagging at warn for triage. + if e.kind() == std::io::ErrorKind::NotFound { + debug!("File no longer present for {} — skipping", file_path); + } else { + warn!( + "Failed to read metadata for {}: {} (kind={:?}) — skipping", + file_path, + e, + e.kind() + ); + } } } Ok(()) @@ -2024,7 +2033,7 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; } else if { - let trimmed = line.trim_end(); + let trimmed = line.trim(); trimmed == "new file mode 120000" || trimmed == "new mode 120000" } { // Reject patch lines that INTRODUCE a symlink (git mode 120000). @@ -2040,9 +2049,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { // regular file, which is a legitimate cleanup operation and produces a // safe worktree. // - // The match is on the exact trimmed line (rather than `starts_with`) so - // we don't accidentally reject hypothetical future mode strings that - // happen to share the "120000" prefix. + // The match is on the fully trimmed line (rather than `starts_with` or + // `trim_end`) so neither leading nor trailing whitespace can bypass + // the check, and we don't accidentally reject hypothetical future mode + // strings that happen to share the "120000" prefix. anyhow::bail!( "Patch introduces a symlink (mode 120000), which is not allowed" ); @@ -2466,16 +2476,33 @@ index 0000000..abcdefg #[test] fn test_validate_patch_paths_symlink_mode_suffix_not_bypass() { - // The mode check uses exact line equality (after trim_end), so a fake - // mode string with extra digits like "120000 1" or "1200001" must NOT - // be misclassified — but we still want any genuine "new file mode 120000" - // followed by trailing whitespace to be rejected. + // The mode check uses full trim() equality, so neither leading nor + // trailing whitespace can bypass the check, while genuine ASCII text + // before/after the keyword stays distinguishable. let patch_trailing_ws = "diff --git a/x b/x\nnew file mode 120000 \n"; assert!( validate_patch_paths(patch_trailing_ws).is_err(), "trailing whitespace must not let a symlink mode line bypass the check" ); + // Git's own format-patch never produces leading-indented mode lines, but + // a hand-crafted patch could try to smuggle one through any matcher that + // anchored at the start of the line. Using trim() (not trim_end()) on the + // line ensures leading whitespace is no bypass either. + let patch_leading_ws = "diff --git a/x b/x\n new file mode 120000\n"; + assert!( + validate_patch_paths(patch_leading_ws).is_err(), + "leading whitespace must not let a symlink mode line bypass the check" + ); + + // Carriage returns (Windows line endings) are also whitespace and must + // not bypass. + let patch_crlf = "diff --git a/x b/x\nnew file mode 120000\r\n"; + assert!( + validate_patch_paths(patch_crlf).is_err(), + "trailing \\r must not let a symlink mode line bypass the check" + ); + // A line that merely happens to share a "120000" prefix segment must // not pass the check accidentally either — but it's also not a real // git mode line. We just want to make sure exact-match doesn't reject From cc70b25914542c6e19c5e458b579013a25241448 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 21:43:09 +0100 Subject: [PATCH 8/9] fix(safeoutputs): surface symlink-skip count and anchor mode check to non-ws header lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-up review fixes on PR #549. 1. All-symlinks early-return now reports the cause. When every file collected for the PR is a symlink, `changes` is empty and the executor previously returned the generic `No changes detected after applying patch` message ÔÇö the agent had no way to see that its files were dropped for safety (the per-symlink `warn!` only reached Stage 3 infrastructure logs). The IfNoChanges Error/Warn/Ignore branches now append ` (N symlink(s) were skipped for safety: path1, path2, ÔǪ)` to both the log and the ExecutionResult message whenever skipped_symlinks is non-empty. 2. validate_patch_paths mode check anchored to no-leading-whitespace lines. Previously `line.trim() == "new file mode 120000"` could be triggered by a diff CONTEXT line ( `" new file mode 120000"`, with a single leading space) ÔÇö after trim() the two are indistinguishable. Real git header lines never start with whitespace, while diff context lines always do, so the check now also requires the first character of the raw line to be non-whitespace. Trailing whitespace and `\r` still bypass-proofed by `trim()` on the right-hand side of the equality. Added a new explicit test that a context line whose body is literally `new file mode 120000` is allowed through. (The test patch is built with `String + &str` concatenation rather than backslash line-continuation, because Rust eats next-line leading whitespace after `\`, which would silently defeat the property under test.) 3. Documented TOCTOU assumption. Added a doc comment to push_file_change_skipping_symlinks acknowledging the theoretical TOCTOU window between symlink_metadata (lstat) and the subsequent tokio::fs::read inside read_file_change. Not exploitable in Stage 3's deployment model (no concurrent writer to the worktree), but the assumption is now explicit, with a pointer to the proper mitigation (O_NOFOLLOW fd reads) should the deployment model change. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 118 ++++++++++++++++++------- 1 file changed, 87 insertions(+), 31 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 71bc5e21..5fd67b43 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -970,25 +970,49 @@ impl Executor for CreatePrResult { } if changes.is_empty() { + // If every collected entry was a symlink, the agent's payload is + // effectively gone — surface that to the agent so it can debug + // (the per-symlink warn! only goes to Stage 3 infrastructure logs). + let symlink_suffix = if skipped_symlinks.is_empty() { + String::new() + } else { + format!( + " ({} symlink(s) were skipped for safety: {})", + skipped_symlinks.len(), + skipped_symlinks.join(", ") + ) + }; // Handle no-changes based on config match config.if_no_changes { IfNoChanges::Error => { - warn!("No changes detected after applying patch (if-no-changes: error)"); - return Ok(ExecutionResult::failure( - "No changes detected after applying patch".to_string(), - )); + warn!( + "No changes detected after applying patch (if-no-changes: error){}", + symlink_suffix + ); + return Ok(ExecutionResult::failure(format!( + "No changes detected after applying patch{}", + symlink_suffix + ))); } IfNoChanges::Ignore => { - info!("No changes detected after applying patch (if-no-changes: ignore)"); - return Ok(ExecutionResult::success( - "No changes detected — nothing to do".to_string(), - )); + info!( + "No changes detected after applying patch (if-no-changes: ignore){}", + symlink_suffix + ); + return Ok(ExecutionResult::success(format!( + "No changes detected — nothing to do{}", + symlink_suffix + ))); } IfNoChanges::Warn => { - warn!("No changes detected after applying patch (if-no-changes: warn)"); - return Ok(ExecutionResult::warning( - "No changes detected after applying patch".to_string(), - )); + warn!( + "No changes detected after applying patch (if-no-changes: warn){}", + symlink_suffix + ); + return Ok(ExecutionResult::warning(format!( + "No changes detected after applying patch{}", + symlink_suffix + ))); } } } @@ -1864,6 +1888,16 @@ async fn push_file_change_skipping_symlinks( file_path: &str, full_path: &std::path::Path, ) -> anyhow::Result<()> { + // Note: there is a theoretical TOCTOU window between the symlink_metadata + // (lstat) call below and the subsequent tokio::fs::read inside + // read_file_change (which follows symlinks). A concurrent rename(2) could + // swap a regular file for a symlink between the two syscalls. This is not + // exploitable in Stage 3's deployment model: the worktree has no concurrent + // writer (the agent's patch has already been applied; only this serial + // collector reads from it). Closing the window fully would require platform- + // specific O_NOFOLLOW open syscalls, which is overkill given the threat + // model. If that ever changes, switch to opening the fd here with + // O_NOFOLLOW and reading from the fd inside read_file_change. match tokio::fs::symlink_metadata(full_path).await { Ok(meta) if meta.file_type().is_file() => { changes.push(read_file_change(change_type, file_path, full_path).await?); @@ -2033,8 +2067,21 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; } else if { + // Only consider this a real header line if it has no leading + // whitespace — diff context lines are space-prefixed, so a line + // body of "new file mode 120000" inside a hunk would look identical + // to a real mode header after trim(). Real git header lines never + // start with whitespace; we require the same here to avoid false + // positives on adversarial-but-benign diff content. Added (+)/ + // removed (-) lines start with a non-whitespace prefix that + // survives trim() and so cannot match the exact mode-line strings. + let starts_with_ws = line + .chars() + .next() + .is_some_and(|c| c.is_whitespace()); let trimmed = line.trim(); - trimmed == "new file mode 120000" || trimmed == "new mode 120000" + !starts_with_ws + && (trimmed == "new file mode 120000" || trimmed == "new mode 120000") } { // Reject patch lines that INTRODUCE a symlink (git mode 120000). // Either of these lines means the resulting tree contains a symlink: @@ -2048,11 +2095,6 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { // with "old mode 120000" + "new mode 100644" converts a symlink into a // regular file, which is a legitimate cleanup operation and produces a // safe worktree. - // - // The match is on the fully trimmed line (rather than `starts_with` or - // `trim_end`) so neither leading nor trailing whitespace can bypass - // the check, and we don't accidentally reject hypothetical future mode - // strings that happen to share the "120000" prefix. anyhow::bail!( "Patch introduces a symlink (mode 120000), which is not allowed" ); @@ -2476,25 +2518,17 @@ index 0000000..abcdefg #[test] fn test_validate_patch_paths_symlink_mode_suffix_not_bypass() { - // The mode check uses full trim() equality, so neither leading nor - // trailing whitespace can bypass the check, while genuine ASCII text - // before/after the keyword stays distinguishable. + // The mode check is anchored to lines that have NO leading whitespace + // (real git header lines never start with whitespace; diff context + // lines always do). Trailing whitespace including \r is still stripped + // before equality, so a genuine header line cannot bypass via tail + // padding. let patch_trailing_ws = "diff --git a/x b/x\nnew file mode 120000 \n"; assert!( validate_patch_paths(patch_trailing_ws).is_err(), "trailing whitespace must not let a symlink mode line bypass the check" ); - // Git's own format-patch never produces leading-indented mode lines, but - // a hand-crafted patch could try to smuggle one through any matcher that - // anchored at the start of the line. Using trim() (not trim_end()) on the - // line ensures leading whitespace is no bypass either. - let patch_leading_ws = "diff --git a/x b/x\n new file mode 120000\n"; - assert!( - validate_patch_paths(patch_leading_ws).is_err(), - "leading whitespace must not let a symlink mode line bypass the check" - ); - // Carriage returns (Windows line endings) are also whitespace and must // not bypass. let patch_crlf = "diff --git a/x b/x\nnew file mode 120000\r\n"; @@ -2503,6 +2537,28 @@ index 0000000..abcdefg "trailing \\r must not let a symlink mode line bypass the check" ); + // A diff CONTEXT line whose body happens to be the literal string + // "new file mode 120000" (i.e. a leading-space-prefixed hunk line) + // must NOT trigger the check — it's data, not a header. Git's own + // format-patch never emits leading-indented mode headers, so anchoring + // the check to no-leading-whitespace is airtight. + // + // Note: we build this patch with explicit "\n" concatenation rather + // than backslash continuation because backslash continuation in Rust + // string literals eats the leading whitespace on the next line, which + // would defeat the very property we're testing. + let patch_context_line = String::new() + + "diff --git a/x b/x\n" + + "--- a/x\n" + + "+++ b/x\n" + + "@@ -1,2 +1,2 @@\n" + + " new file mode 120000\n" + + " keep line\n"; + assert!( + validate_patch_paths(&patch_context_line).is_ok(), + "diff context line containing the mode string as data must not be rejected" + ); + // A line that merely happens to share a "120000" prefix segment must // not pass the check accidentally either — but it's also not a real // git mode line. We just want to make sure exact-match doesn't reject From d658c0ff598f5a7c7ba7b13cc941b009899eb946 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 17 May 2026 21:51:43 +0100 Subject: [PATCH 9/9] fix(safeoutputs): filter bidi controls in symlink paths and sanitize executor messages Two follow-up review fixes on PR #549. 1. sanitize_path_for_markdown now filters Unicode bidi/zero-width formatting characters in addition to ASCII control chars and backticks. char::is_control() only covers U+0000-U+001F and U+007F-U+009F, so characters like U+202E (RIGHT-TO-LEFT OVERRIDE), U+2066 (LRI), and U+FEFF (BOM) previously passed through unchanged. A filename containing U+202E could visually reverse part of its displayed name inside the GitHub code span and deceive a PR author into misreading the skipped-symlinks warning. No exfiltration vector (the symlink is already blocked) but a real display-spoofing concern; now collapsed to '?' alongside other formatting chars. Explicit ranges filtered: U+200B-U+200F (ZW joiners + LRM/RLM), U+202A-U+202E (LRE/RLE/PDF/LRO/RLO), U+2066-U+2069 (LRI/RLI/FSI/PDI), U+FEFF. Ordinary non-ASCII letters/emoji/CJK pass through unchanged. 2. ExecutionResult symlink_suffix paths now reuse sanitize_path_for_markdown. The early-return failure/success/warning messages previously embedded raw skipped-symlink filenames via skipped_symlinks.join(', '). Filenames containing commas, control chars, or bidi controls could garble or spoof the message. Sanitizing per-path before joining keeps the message readable and consistent with the PR-description block. Two new tests: - test_sanitize_path_for_markdown_filters_bidi_controls (U+202E + sample set) - test_sanitize_path_for_markdown_keeps_normal_unicode (cafe / Japanese / emoji passthrough) Full suite (1516 tests) green; cargo clippy clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pull_request.rs | 69 +++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/src/safeoutputs/create_pull_request.rs b/src/safeoutputs/create_pull_request.rs index 5fd67b43..58546b9e 100644 --- a/src/safeoutputs/create_pull_request.rs +++ b/src/safeoutputs/create_pull_request.rs @@ -973,13 +973,20 @@ impl Executor for CreatePrResult { // If every collected entry was a symlink, the agent's payload is // effectively gone — surface that to the agent so it can debug // (the per-symlink warn! only goes to Stage 3 infrastructure logs). + // Paths are passed through sanitize_path_for_markdown so adversarial + // filenames (bidi overrides, control chars, etc.) cannot garble the + // ExecutionResult message or the corresponding log line. let symlink_suffix = if skipped_symlinks.is_empty() { String::new() } else { + let sanitized: Vec = skipped_symlinks + .iter() + .map(|p| sanitize_path_for_markdown(p)) + .collect(); format!( " ({} symlink(s) were skipped for safety: {})", - skipped_symlinks.len(), - skipped_symlinks.join(", ") + sanitized.len(), + sanitized.join(", ") ) }; // Handle no-changes based on config @@ -1955,19 +1962,29 @@ fn append_skipped_symlink_notice(body: &str, skipped_symlinks: &[String]) -> Str } /// Make an arbitrary filesystem path safe to embed inside an inline-code span -/// in a markdown PR description. +/// in a markdown PR description (and, by extension, any human-readable message +/// such as the `ExecutionResult` failure suffix). /// /// The agent that produced the PR is the adversary in this code path: it can /// plant filenames containing characters that would otherwise break out of the -/// inline-code context and garble (or HTML-inject into) the description. -/// CommonMark code spans do NOT honour backslash escapes (the backtick-count -/// rule terminates the span instead), so naive `path.replace('`', "\\`")` is -/// not actually an escape — it just inserts a stray backslash. Instead we: +/// inline-code context, garble the description, or visually deceive the PR +/// author. CommonMark code spans do NOT honour backslash escapes (the +/// backtick-count rule terminates the span instead), so naive +/// `path.replace('`', "\\`")` is not actually an escape — it just inserts a +/// stray backslash. Instead we: /// /// - Replace backticks with an apostrophe (visually clear, terminator-safe). /// - Collapse all ASCII control characters — including newlines, carriage /// returns, and tabs — to a single `?` so a multi-line filename can't break /// the blockquote. +/// - Collapse Unicode bidirectional-control / zero-width characters to `?`. +/// `char::is_control()` in Rust covers only U+0000–U+001F and U+007F–U+009F, +/// so explicit ranges are required: U+200B–U+200F (ZW joiners + LRM/RLM), +/// U+202A–U+202E (LRE/RLE/PDF/LRO/RLO), U+2066–U+2069 (LRI/RLI/FSI/PDI), +/// U+FEFF (BOM / ZWNBSP). Without this, a filename containing U+202E could +/// visually reverse a section of the displayed name and deceive the PR +/// author into misreading the warning (no exfiltration vector, but a real +/// display-spoofing concern). /// /// This is display-only sanitisation; the canonical path the agent actually /// requested is unchanged in the upload pipeline. @@ -1976,6 +1993,12 @@ fn sanitize_path_for_markdown(path: &str) -> String { .map(|c| match c { '`' => '\'', c if c.is_control() => '?', + // Bidi controls + zero-width formatting characters that + // `is_control()` does not cover. See doc comment above. + '\u{200B}'..='\u{200F}' + | '\u{202A}'..='\u{202E}' + | '\u{2066}'..='\u{2069}' + | '\u{FEFF}' => '?', c => c, }) .collect() @@ -2617,6 +2640,38 @@ index 0000000..abcdefg assert_eq!(out, "src/safeoutputs/create_pull_request.rs"); } + #[test] + fn test_sanitize_path_for_markdown_filters_bidi_controls() { + // U+202E (RIGHT-TO-LEFT OVERRIDE) and friends can visually reverse a + // section of the displayed filename and deceive a PR author into + // misreading the warning. They are NOT covered by char::is_control(), + // so an explicit range filter is required. + let bidi_attack = "evil\u{202E}txt.sr"; // would display as "evilrs.txt" in some renderers + let out = sanitize_path_for_markdown(bidi_attack); + assert!( + !out.contains('\u{202E}'), + "U+202E (RLO) must be filtered: got {:?}", + out + ); + assert_eq!(out, "evil?txt.sr"); + + // Sample of other bidi/zero-width formatting characters that must all + // be collapsed. + let various = "a\u{200B}b\u{200E}c\u{202A}d\u{2066}e\u{FEFF}f"; + let out = sanitize_path_for_markdown(various); + assert_eq!(out, "a?b?c?d?e?f"); + } + + #[test] + fn test_sanitize_path_for_markdown_keeps_normal_unicode() { + // Ordinary non-ASCII characters (letters, accents, CJK, emoji) are + // legal in filenames and must not be replaced — only formatting + // characters that affect rendering are filtered. + let path = "café/日本語/💡.txt"; + let out = sanitize_path_for_markdown(path); + assert_eq!(out, path); + } + #[test] fn test_append_skipped_symlink_notice_sanitizes_paths() { // End-to-end: backticks and newlines in a filename must not break the