From ac4537a5340a16d395b74a120eb9cb42846d65be Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 10 May 2026 07:26:34 +0100 Subject: [PATCH] feat(safeoutputs): add ado-aw-debug.create-issue for dogfood pipelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new top-level `ado-aw-debug:` front-matter section that gates two debug-only knobs intended for dogfooding ado-aw pipelines from Azure DevOps back into `githubnext/ado-aw`: * `skip-integrity: bool` — OR-ed with the existing `--skip-integrity` CLI flag. * `create-issue:` — files a GitHub issue against an operator-configured target repository when the agent calls the `create-issue` MCP tool. The `create-issue` tool is **not** a regular safe output. It is default-deny at three independent layers: 1. The SafeOutputs MCP filter strips it from the tool router via a new `DEBUG_ONLY_TOOLS` constant unless explicitly enabled. 2. The compiler only emits `--enabled-tools create-issue` when `ado-aw-debug.create-issue:` is set, and rejects `safe-outputs.create-issue:` outright so the tool can't be smuggled in via the regular safe-outputs surface. 3. Stage 3 maintains an `ExecutionContext.debug_enabled_tools` set populated only from `ado-aw-debug:`. The executor refuses any `create-issue` NDJSON entry whose tool name is absent from the set, closing the gap where a forged entry could otherwise bypass the MCP-layer gate. Stage 3 authenticates against GitHub using a dedicated `ADO_AW_DEBUG_GITHUB_TOKEN` ADO pipeline variable surfaced through a new `github_token` field on `ExecutionContext`. The token is separate from the read-only `GITHUB_TOKEN` the agent sees in Stage 1. Other notable design choices: * `target-repo` is operator-only; the agent has no parameter to redirect issues elsewhere. * `allowed-labels` is **default-deny** — empty/absent rejects every agent-supplied label. Operators must opt in to unrestricted with `["*"]`. * The `target-repo` validator follows GitHub's login spec (no underscores or dots in owner segments). * Final issue title length is validated **after** `title-prefix` application. * Stage 3 error messages neutralise `##vso[…]` sequences in agent-supplied labels so a forged NDJSON entry can't echo a live pipeline command into stdout. Adds 30+ targeted tests across `safeoutputs::create_issue`, `mcp::tests`, `compile::common`, `compile::types`, and a fixture-based integration test asserting the YAML wiring. All 1356 dev-mode tests pass; clippy net-new errors: 0. See `docs/ado-aw-debug.md` for the full schema, security framing, and PAT setup instructions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 3 + docs/ado-aw-debug.md | 195 +++++ docs/cli.md | 2 +- docs/safe-outputs.md | 6 + docs/template-markers.md | 9 +- examples/dogfood-failure-reporter.md | 51 ++ src/compile/common.rs | 403 +++++++++- src/compile/types.rs | 111 +++ src/execute.rs | 27 +- src/main.rs | 22 + src/mcp.rs | 163 +++- src/safeoutputs/create_issue.rs | 830 +++++++++++++++++++++ src/safeoutputs/create_pr.rs | 2 + src/safeoutputs/mod.rs | 14 + src/safeoutputs/result.rs | 13 + src/safeoutputs/upload_build_attachment.rs | 2 + tests/compiler_tests.rs | 59 ++ tests/fixtures/ado-aw-debug-agent.md | 22 + 18 files changed, 1888 insertions(+), 46 deletions(-) create mode 100644 docs/ado-aw-debug.md create mode 100644 examples/dogfood-failure-reporter.md create mode 100644 src/safeoutputs/create_issue.rs create mode 100644 tests/fixtures/ado-aw-debug-agent.md diff --git a/AGENTS.md b/AGENTS.md index 02de6d1f..322b753b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -172,6 +172,9 @@ index to jump to the right page. - [`docs/safe-outputs.md`](docs/safe-outputs.md) — full reference for every safe-output tool agents can use to propose actions (PRs, work items, wiki pages, comments, etc.) plus their per-agent configuration. +- [`docs/ado-aw-debug.md`](docs/ado-aw-debug.md) — debug-only `ado-aw-debug:` + front-matter section (`skip-integrity`, `create-issue` for filing GitHub + issues from dogfood pipelines). NOT a regular safe-output. ### Compiler internals & operations diff --git a/docs/ado-aw-debug.md b/docs/ado-aw-debug.md new file mode 100644 index 00000000..bc08e46b --- /dev/null +++ b/docs/ado-aw-debug.md @@ -0,0 +1,195 @@ +# `ado-aw-debug:` — Debug-only front-matter section + +_Part of the [ado-aw documentation](../AGENTS.md)._ + +> ⚠️ **This section is for dogfood pipelines only.** +> Anything declared under `ado-aw-debug:` is **not** part of the regular +> agent surface and is not recommended for general use. Knobs here exist so +> the team can validate `githubnext/ado-aw` changes against real Azure +> DevOps pipelines and file failures back to GitHub for triage. Each knob +> bypasses or weakens a normal safety control. + +The compiler accepts a top-level `ado-aw-debug:` block in agent front +matter. Currently exposed knobs: + +| Knob | Purpose | Default | +| --- | --- | --- | +| `skip-integrity` | Omit the "Verify pipeline integrity" step from the generated YAML. OR-ed with the `--skip-integrity` CLI flag. | `false` | +| `create-issue` | Enable the [debug-only `create-issue`](#create-issue) safe output. | absent (disabled) | + +Unrecognised keys under `ado-aw-debug:` cause a compile-time error +(`#[serde(deny_unknown_fields)]`). + +## `create-issue` + +Files a GitHub issue against an operator-configured target repository. +Used to surface failures from ADO-hosted dogfood pipelines back to +`githubnext/ado-aw` for triage. + +### Why it's gated + +`create-issue` is **default-deny** at three layers: + +1. **MCP layer.** The SafeOutputs MCP server lists `create-issue` in + [`DEBUG_ONLY_TOOLS`][debug-only-tools], so the route is removed from + the tool router unless the compiler explicitly opts in via + `--enabled-tools`. +2. **Compiler layer.** `--enabled-tools create-issue` is only emitted + when `ado-aw-debug.create-issue:` is present in front matter. The + compiler also rejects `safe-outputs.create-issue:` outright, so the + tool can't be smuggled in via the regular safe-outputs surface. +3. **Executor layer.** Stage 3 maintains a separate + `ExecutionContext.debug_enabled_tools` set populated only from + `ado-aw-debug:`. The executor refuses any NDJSON `create-issue` + entry that isn't in that set, so a forged or smuggled NDJSON entry + fails closed before any token is read. + +[debug-only-tools]: ../src/safeoutputs/mod.rs + +### Front-matter schema + +```yaml +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw # REQUIRED. Operator-only; agent has no override. + title-prefix: "[pipeline-failure] " # Optional; prepended to every agent title. + labels: # Optional; static labels always applied. + - pipeline-failure + - automated + allowed-labels: # Optional; default-deny — see below. + - "agent-*" + - "pipeline-failure" + assignees: # Optional; static assignees always applied. + - "jamesdevine" + max: 3 # Optional; per-run budget. Default 1. +``` + +* **`target-repo`** is required. Format `owner/repo`. The agent has no + parameter to override it; you cannot redirect issues to a different + repository at runtime. +* **`title-prefix`** is appended at execution time. The final title length + (prefix + agent title) must be ≤ 256 characters; longer titles fail at + Stage 3. +* **`labels`** are applied unconditionally to every issue, on top of any + agent-supplied labels that pass `allowed-labels`. +* **`allowed-labels`** is **default-deny**: an empty or absent list means + **no agent-supplied labels are accepted**. To accept any agent label, + set `allowed-labels: ["*"]` explicitly. Patterns may include `*` + wildcards (e.g. `"agent-*"`). +* **Allowed-label matching is case-insensitive.** It uses the same + `tag_matches_pattern` helper as ADO tag allow-lists. GitHub labels are + case-sensitive, so `allowed-labels: ["safe"]` will also admit + `SAFE` and `Safe` — keep that in mind when modelling policy. +* **`assignees`** are merged with agent-supplied assignees. There is + intentionally no `allowed-assignees` allowlist in v1; if you need + one, configure assignees only via the static `assignees:` list and + skip the agent parameter. +* **`max`** controls per-run budget the same way it does for other + safe-output tools. + +### Agent-supplied parameters + +The agent calls the `create-issue` MCP tool with: + +```jsonc +{ + "title": "Pipeline failure on main", + "body": "", + "labels": ["pipeline-failure"], // optional + "assignees": ["copilot"] // optional +} +``` + +The MCP-side `Validate` impl rejects ADO pipeline-command sequences in +labels and assignees (see [src/safeoutputs/create_issue.rs](../src/safeoutputs/create_issue.rs)). +Stage 3 also neutralises `##vso[…]` in any error messages it produces, so +agent-supplied content cannot escape the executor's stdout. + +### Pipeline variable: `ADO_AW_DEBUG_GITHUB_TOKEN` + +Stage 3 authenticates against GitHub using the +**`ADO_AW_DEBUG_GITHUB_TOKEN`** ADO pipeline variable. The compiler emits + +```yaml +env: + SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN) # if write permissions: are set + ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN) # only when ado-aw-debug.create-issue is set +``` + +into the executor step's `env:` block. The token is **not** exposed to +the agent in Stage 1 — the read-only `GITHUB_TOKEN` the agent sees is a +separate variable wired through `engine.env` and used only for GitHub +MCP read access. + +### Setting up the PAT + +1. **Generate a fine-grained PAT** scoped to **only** `target-repo` (e.g. + `githubnext/ado-aw`). Required permissions: + * Repository access: only the target repo. + * Permissions: **Issues** = Read and write. Nothing else. +2. **Store as a secret pipeline variable** named exactly + `ADO_AW_DEBUG_GITHUB_TOKEN`. Mark it secret. Do **not** copy it into + `engine.env` or any non-secret variable. +3. **Confirm the operator-configured target-repo matches the PAT scope.** + The compiler validator only checks shape (`owner/repo`); it cannot + verify the PAT has access. If the PAT lacks Issues:write, the Stage 3 + call fails with the GitHub API error and Stage 3 reports + `succeeded with issues`. +4. `ado-aw configure` does **not** automate this variable today — set it + manually in the ADO pipeline definition. + +### Auto-footer + +Every issue gets an auto-appended traceability footer that looks like: + +```markdown + +--- +Pipeline: `dogfood-failure-reporter` +Run: +Trigger: `Manual` +``` + +The `` marker is stable so that future tooling can locate +the generated content without parsing prose. The footer is built from +`BUILD_BUILDID`, `BUILD_DEFINITIONNAME`, `BUILD_REASON`, +`SYSTEM_TEAMFOUNDATIONCOLLECTIONURI` and `SYSTEM_TEAMPROJECT` — these are +present whenever Stage 3 runs inside an ADO pipeline. + +If your pipeline / org / project names are sensitive, do not enable +`create-issue` against a public repo. + +### Security checklist + +- [ ] Target repo's GitHub PAT is scoped to that repo only and only has + Issues:write. +- [ ] `ADO_AW_DEBUG_GITHUB_TOKEN` is stored as a secret pipeline + variable, never hard-coded or printed. +- [ ] `allowed-labels` is set explicitly. Empty means default-deny; + `["*"]` accepts any agent label — pick deliberately. +- [ ] `target-repo` is private if the agent's prompts or pipeline + metadata are sensitive (the auto-footer publishes ADO run URLs and + pipeline names). +- [ ] `skip-integrity` is **not** enabled in pipelines triggered by + untrusted PRs. + +## `skip-integrity` + +Equivalent to passing `--skip-integrity` on the `ado-aw compile` CLI. +Setting either OR setting both omits the `Verify pipeline integrity` +step from the generated YAML. + +The integrity step downloads the same `ado-aw` binary the pipeline was +compiled with and runs `ado-aw check` against the committed pipeline +file. Without it, a tampered `*.yml` won't be caught at run time. + +Use this only for short-lived dogfood pipelines where you're iterating +on the compiler and re-compiling frequently. + +## See also + +- [`docs/safe-outputs.md`](safe-outputs.md) — regular safe-outputs + surface (`create-issue` is **not** in it). +- [`docs/cli.md`](cli.md) — `--skip-integrity` CLI flag. +- [`docs/template-markers.md`](template-markers.md) — `{{ executor_ado_env }}` + and `{{ integrity_check }}` markers and their conditional behaviour. diff --git a/docs/cli.md b/docs/cli.md index bb272c69..a9840ae6 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -13,7 +13,7 @@ Global flags (apply to all subcommands): `--verbose, -v` (enable info-level logg - The agent auto-downloads the ado-aw compiler and handles the full lifecycle (create → compile → check) - `compile []` - Compile a markdown file to Azure DevOps pipeline YAML. If no path is given, auto-discovers and recompiles all detected agentic pipelines in the current directory. - `--output, -o ` - Optional output path for the generated YAML (only valid when a path is provided). If the path is an existing directory, the compiled YAML is written inside that directory using the default filename derived from the markdown source (e.g. `foo.md` → `/foo.lock.yml`). - - `--skip-integrity` - *(debug builds only)* Omit the "Verify pipeline integrity" step from the generated pipeline. Useful during local development when the compiled output won't match a released compiler version. This flag is not available in release builds. + - `--skip-integrity` - *(debug builds only)* Omit the "Verify pipeline integrity" step from the generated pipeline. Useful during local development when the compiled output won't match a released compiler version. This flag is not available in release builds. OR-ed with `ado-aw-debug.skip-integrity:` in front matter — either is sufficient. See [`docs/ado-aw-debug.md`](ado-aw-debug.md). - `--debug-pipeline` - *(debug builds only)* Include MCPG debug diagnostics in the generated pipeline: `DEBUG=*` environment variable for verbose MCPG logging, stderr streaming to log files, and a "Verify MCP backends" step that probes each backend with MCP initialize + tools/list before the agent runs. This flag is not available in release builds. - `check ` - Verify that a compiled pipeline matches its source markdown - `` - Path to the pipeline YAML file to verify diff --git a/docs/safe-outputs.md b/docs/safe-outputs.md index d070fbdf..8e4fabed 100644 --- a/docs/safe-outputs.md +++ b/docs/safe-outputs.md @@ -2,6 +2,12 @@ _Part of the [ado-aw documentation](../AGENTS.md)._ +> ℹ️ The debug-only `create-issue` tool (used by dogfood pipelines to file +> failure reports back to GitHub) is **not** a safe output and is not +> configurable here. It is gated by a separate `ado-aw-debug:` front-matter +> section and stripped from the SafeOutputs MCP server unless explicitly +> enabled. See [`docs/ado-aw-debug.md`](ado-aw-debug.md). + ## Safe Outputs Configuration The front matter supports a `safe-outputs:` field for configuring specific tool behaviors: diff --git a/docs/template-markers.md b/docs/template-markers.md index a5fe6dae..3780f7a3 100644 --- a/docs/template-markers.md +++ b/docs/template-markers.md @@ -265,7 +265,7 @@ Generates the "Verify pipeline integrity" pipeline step that downloads the relea The step sets `workingDirectory: {{ trigger_repo_directory }}` so that the relative `{{ pipeline_path }}` argument resolves correctly when `checkout:` produces a multi-repo `$(Build.SourcesDirectory)` layout, and so `ado-aw check`'s internal recompile can infer the ADO org from the trigger repo's git remote. -When the compiler is built with `--skip-integrity` (debug builds only), this placeholder is replaced with an empty string and the integrity step is omitted from the generated pipeline. +When the compiler is built with `--skip-integrity` (debug builds only) **OR** when the agent's front matter sets `ado-aw-debug.skip-integrity: true`, this placeholder is replaced with an empty string and the integrity step is omitted from the generated pipeline. The two flags are OR-ed — either is sufficient. See [`docs/ado-aw-debug.md`](ado-aw-debug.md). ## {{ mcpg_debug_flags }} @@ -444,9 +444,12 @@ If `permissions.write` is not configured, this marker is replaced with an empty ## {{ executor_ado_env }} -Generates the complete `env:` block (including the `env:` key) for the Stage 3 executor step when `permissions.write` is configured. Sets `SYSTEM_ACCESSTOKEN` to the write service connection token (`SC_WRITE_TOKEN`). +Generates the complete `env:` block (including the `env:` key) for the Stage 3 executor step. The block contains zero, one, or two lines depending on which features are configured: -If `permissions.write` is not configured, this marker is replaced with an empty string so that no `env:` block is emitted at all. Note: `System.AccessToken` is never used directly — all ADO tokens come from explicitly configured service connections. +* `SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)` — emitted when `permissions.write` is configured. Provides the write-capable ADO token to the executor. +* `ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)` — emitted when `ado-aw-debug.create-issue` is configured. Provides the GitHub PAT used by the debug-only `create-issue` safe output. See [`docs/ado-aw-debug.md`](ado-aw-debug.md). + +If neither feature is configured, this marker is replaced with an empty string so that no `env:` block is emitted at all. Note: `System.AccessToken` is never used directly — all ADO tokens come from explicitly configured service connections, and the GitHub PAT is sourced from a dedicated pipeline variable separate from the read-only `GITHUB_TOKEN` the agent sees in Stage 1. ## {{ compiler_version }} diff --git a/examples/dogfood-failure-reporter.md b/examples/dogfood-failure-reporter.md new file mode 100644 index 00000000..f615874c --- /dev/null +++ b/examples/dogfood-failure-reporter.md @@ -0,0 +1,51 @@ +--- +name: "Dogfood Failure Reporter" +description: "Files a GitHub issue on githubnext/ado-aw when a dogfood pipeline run fails" +on: + schedule: daily +permissions: + read: my-read-arm-connection +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw + title-prefix: "[pipeline-failure] " + labels: + - pipeline-failure + - automated + allowed-labels: + - "agent-*" + - "pipeline-failure" + assignees: + - jamesdevine + max: 3 +--- + +## Dogfood Failure Reporter + +You are a dogfood failure-reporting agent for `githubnext/ado-aw`. You run +in Azure DevOps inside an AWF-isolated sandbox. + +### Tasks + +1. Read the pipeline run logs available under `$BUILD_SOURCESDIRECTORY` + for any signs of recent failures. +2. For each distinct failure, file **one** GitHub issue using the + `create-issue` MCP tool with: + - A concise `title` describing the failure. + - A markdown `body` with reproduction steps, log excerpts, and links + to relevant ADO build URLs. + - `labels: ["pipeline-failure"]` (must match the `allowed-labels` allowlist + configured by the operator). +3. Limit yourself to **at most 3** issues per run (the `max` budget). +4. If you cannot file an issue (e.g., the failure isn't reproducible), + call `report-incomplete` instead — do **not** invent details. + +### Important + +- Do not attempt to redirect issues to a different repository — the agent + has no `target_repo` parameter and the target is fixed by the operator. +- The `ADO_AW_DEBUG_GITHUB_TOKEN` PAT is **not** visible to you; it is + used only by Stage 3 to authenticate against GitHub. +- Issues are reviewed for prompt injection by Stage 2 before they are + filed, so do not include text that looks like ADO pipeline commands + (`##vso[...]`) — they will be flagged and the run rejected. diff --git a/src/compile/common.rs b/src/compile/common.rs index dfee1b57..79c76a87 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -701,6 +701,90 @@ pub fn generate_integrity_check(skip: bool) -> String { .to_string() } +/// Returns `true` when the agent's front matter sets +/// `ado-aw-debug.create-issue:` — the gate that activates the debug-only +/// `create-issue` safe output. +pub(crate) fn debug_create_issue_enabled(front_matter: &FrontMatter) -> bool { + front_matter + .ado_aw_debug + .as_ref() + .and_then(|d| d.create_issue.as_ref()) + .is_some() +} + +/// Validate the `ado-aw-debug:` section. +/// +/// When `create-issue:` is present: +/// * `target-repo` is required and must be `owner/repo`-shaped. +/// * Operator-supplied strings (target-repo, title-prefix, labels, +/// allowed-labels, assignees) must not contain ADO pipeline-injection +/// sequences (per `reject_pipeline_injection`). +/// +/// Independently, `safe-outputs:` must NOT contain any `DEBUG_ONLY_TOOLS` +/// keys. The MCP layer ignores them, but their config would otherwise +/// flow into `ctx.tool_configs` and create a path for forged NDJSON +/// entries to bypass the `ado-aw-debug` gate at Stage 3. +/// +/// Pure config check — no I/O, runs at compile time. +pub fn validate_ado_aw_debug_config(front_matter: &FrontMatter) -> Result<()> { + use crate::safeoutputs::DEBUG_ONLY_TOOLS; + + // Defence-in-depth: reject any debug-only tool name appearing under the + // regular safe-outputs surface. There is no legitimate reason for it to + // be there — it's either a typo or an attempt to smuggle a debug-only + // tool into a non-debug pipeline. + for debug_tool in DEBUG_ONLY_TOOLS { + if front_matter.safe_outputs.contains_key(*debug_tool) { + anyhow::bail!( + "safe-outputs.{0} is a debug-only tool and must be configured \ + under `ado-aw-debug.{0}` instead of `safe-outputs.{0}`. \ + The MCP layer hides debug-only tools by default; the \ + `ado-aw-debug:` section is the only place to enable them.", + debug_tool + ); + } + } + + let Some(debug) = front_matter.ado_aw_debug.as_ref() else { + return Ok(()); + }; + let Some(ci) = debug.create_issue.as_ref() else { + return Ok(()); + }; + + crate::safeoutputs::validate_target_repo(&ci.target_repo)?; + + crate::validate::reject_pipeline_injection( + &ci.target_repo, + "ado-aw-debug.create-issue.target-repo", + )?; + if let Some(prefix) = ci.title_prefix.as_deref() { + crate::validate::reject_pipeline_injection( + prefix, + "ado-aw-debug.create-issue.title-prefix", + )?; + } + for label in &ci.labels { + crate::validate::reject_pipeline_injection( + label, + "ado-aw-debug.create-issue.labels", + )?; + } + for label in &ci.allowed_labels { + crate::validate::reject_pipeline_injection( + label, + "ado-aw-debug.create-issue.allowed-labels", + )?; + } + for assignee in &ci.assignees { + crate::validate::reject_pipeline_injection( + assignee, + "ado-aw-debug.create-issue.assignees", + )?; + } + Ok(()) +} + /// Generate debug pipeline replacement values for template markers. /// /// When `debug` is `true`, returns content for MCPG debug diagnostics: @@ -910,23 +994,54 @@ pub fn generate_acquire_ado_token(service_connection: Option<&str>, variable_nam } /// Generate the env block entries for the executor step (Stage 3 Execution). -/// Uses the write token from the write service connection. -/// When not configured, omits ADO access tokens entirely. -pub fn generate_executor_ado_env(write_service_connection: Option<&str>) -> String { - match write_service_connection { - // The two-space indent on the value line is the YAML relative indent for a - // key nested under `env:`. replace_with_indent prepends the base indentation - // from the marker's position in the template to each subsequent line. - Some(_) => "env:\n SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)".to_string(), - None => String::new(), +/// +/// Composed of two independent lines, each conditional on its caller flag: +/// * `SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)` when `write_service_connection` +/// is `Some` — write-capable ADO token minted via ARM service connection. +/// * `ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)` when +/// `debug_create_issue_enabled` is `true` — GitHub PAT used by the +/// `ado-aw-debug.create-issue` safe output. Sourced from a dedicated +/// pipeline variable so it stays separate from the read-only `GITHUB_TOKEN` +/// the agent (Stage 1) sees. +/// +/// Returns an empty string when both flags are off (no `env:` block emitted +/// — keeps the executor step minimal in pipelines that need neither token). +pub fn generate_executor_ado_env( + write_service_connection: Option<&str>, + debug_create_issue_enabled: bool, +) -> String { + let mut lines: Vec = Vec::new(); + if write_service_connection.is_some() { + lines.push("SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)".to_string()); + } + if debug_create_issue_enabled { + lines.push("ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)".to_string()); + } + if lines.is_empty() { + return String::new(); } + // The two-space indent on each value line is the YAML relative indent for + // a key nested under `env:`. replace_with_indent prepends the base + // indentation from the marker's position in the template to each + // subsequent line. + let body = lines + .into_iter() + .map(|l| format!(" {}", l)) + .collect::>() + .join("\n"); + format!("env:\n{}", body) } /// Generate `--enabled-tools` CLI args for the SafeOutputs MCP server. /// /// Derives the tool list from `safe-outputs:` front matter keys plus always-on -/// diagnostic tools. If `safe-outputs:` is empty, returns an empty string -/// (all tools enabled for backward compatibility). +/// diagnostic tools, plus any debug-only safe outputs activated via the +/// `ado-aw-debug:` section (e.g. `create-issue`). +/// +/// If `safe-outputs:` is empty AND no `ado-aw-debug` debug-only tool is +/// configured, returns an empty string (all non-debug tools enabled for +/// backward compatibility — debug-only tools remain stripped at the MCP +/// layer regardless). /// /// Tool names are validated to contain only ASCII alphanumerics and hyphens /// to prevent shell injection when the args are embedded in bash commands. @@ -935,12 +1050,15 @@ pub fn generate_enabled_tools_args(front_matter: &FrontMatter) -> String { use crate::safeoutputs::{ALL_KNOWN_SAFE_OUTPUTS, ALWAYS_ON_TOOLS, NON_MCP_SAFE_OUTPUT_KEYS}; use std::collections::HashSet; - if front_matter.safe_outputs.is_empty() { + let debug_create_issue = debug_create_issue_enabled(front_matter); + + if front_matter.safe_outputs.is_empty() && !debug_create_issue { return String::new(); } - // `seen` deduplicates across user keys and ALWAYS_ON_TOOLS (e.g. if the user - // configures `noop` explicitly, it shouldn't appear twice in the output). + // `seen` deduplicates across user keys, ALWAYS_ON_TOOLS, and debug-only + // tools (e.g. if the user configures `noop` explicitly, it shouldn't + // appear twice in the output). let mut seen = HashSet::new(); let mut tools: Vec = Vec::new(); let mut effective_mcp_tool_count = 0usize; @@ -976,6 +1094,13 @@ pub fn generate_enabled_tools_args(front_matter: &FrontMatter) -> String { } } + // Debug-only tools must be added explicitly — they're stripped from the + // MCP layer by default and only become reachable when listed here. + if debug_create_issue && seen.insert("create-issue".to_string()) { + tools.push("create-issue".to_string()); + effective_mcp_tool_count += 1; + } + if effective_mcp_tool_count == 0 { // Every user-specified key was either invalid or unrecognized. // Return empty to keep all tools available (backward compat). @@ -2191,6 +2316,7 @@ pub async fn compile_shared( .permissions .as_ref() .and_then(|p| p.write.as_deref()), + debug_create_issue_enabled(front_matter), ); // 10. Validations @@ -2200,6 +2326,7 @@ pub async fn compile_shared( validate_submit_pr_review_events(front_matter)?; validate_update_pr_votes(front_matter)?; validate_resolve_pr_thread_statuses(front_matter)?; + validate_ado_aw_debug_config(front_matter)?; // 11. Threat analysis prompt let threat_analysis_prompt = include_str!("../data/threat-analysis.md"); @@ -2227,7 +2354,13 @@ pub async fn compile_shared( // 14. Shared replacements let compiler_version = env!("CARGO_PKG_VERSION"); - let integrity_check = generate_integrity_check(config.skip_integrity); + let skip_integrity = config.skip_integrity + || front_matter + .ado_aw_debug + .as_ref() + .map(|d| d.skip_integrity) + .unwrap_or(false); + let integrity_check = generate_integrity_check(skip_integrity); let replacements: Vec<(&str, &str)> = vec![ ("{{ parameters }}", ¶meters_yaml), ("{{ compiler_version }}", compiler_version), @@ -3223,6 +3356,37 @@ mod tests { ); } + #[test] + fn test_debug_create_issue_enabled_helper() { + let yaml_off = "---\nname: test\ndescription: test\n---\n"; + let (fm_off, _) = parse_markdown(yaml_off).unwrap(); + assert!(!debug_create_issue_enabled(&fm_off)); + + let yaml_on = r#"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw +--- +"#; + let (fm_on, _) = parse_markdown(yaml_on).unwrap(); + assert!(debug_create_issue_enabled(&fm_on)); + + let yaml_section_only = r#"--- +name: test +description: test +ado-aw-debug: + skip-integrity: true +--- +"#; + let (fm_section, _) = parse_markdown(yaml_section_only).unwrap(); + assert!( + !debug_create_issue_enabled(&fm_section), + "ado-aw-debug.skip-integrity alone must NOT enable create-issue" + ); + } + // ─── generate_debug_pipeline_replacements ──────────────────────────────── #[test] @@ -3503,6 +3667,184 @@ mod tests { assert!(args.is_empty(), "empty safe-outputs should produce no args (all tools available)"); } + // ─── ado-aw-debug wiring ──────────────────────────────────────────────── + + #[test] + fn test_generate_enabled_tools_args_debug_create_issue_alone() { + let yaml = r#"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let args = generate_enabled_tools_args(&fm); + assert!( + args.contains("--enabled-tools create-issue"), + "ado-aw-debug.create-issue should add create-issue to --enabled-tools, got: {}", + args + ); + // Always-on tools should also be present so the filter activates. + assert!(args.contains("--enabled-tools noop")); + } + + #[test] + fn test_generate_enabled_tools_args_debug_plus_safe_outputs() { + let yaml = r#"--- +name: test +description: test +safe-outputs: + create-pull-request: + target-branch: main +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let args = generate_enabled_tools_args(&fm); + assert!(args.contains("--enabled-tools create-pull-request")); + assert!(args.contains("--enabled-tools create-issue")); + // No duplicate + assert_eq!(args.matches("--enabled-tools create-issue").count(), 1); + } + + #[test] + fn test_generate_enabled_tools_args_no_debug_does_not_emit_create_issue() { + let yaml = r#"--- +name: test +description: test +safe-outputs: + create-pull-request: + target-branch: main +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let args = generate_enabled_tools_args(&fm); + assert!( + !args.contains("create-issue"), + "create-issue must not appear without ado-aw-debug.create-issue" + ); + } + + #[test] + fn test_validate_ado_aw_debug_config_accepts_valid_config() { + let yaml = r#"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw + title-prefix: "[bug] " + labels: [pipeline-failure] + allowed-labels: ["agent-*"] + assignees: [jamesdevine] +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + assert!(validate_ado_aw_debug_config(&fm).is_ok()); + } + + #[test] + fn test_validate_ado_aw_debug_config_passes_when_section_absent() { + let (fm, _) = parse_markdown("---\nname: test\ndescription: test\n---\n").unwrap(); + assert!(validate_ado_aw_debug_config(&fm).is_ok()); + } + + #[test] + fn test_validate_ado_aw_debug_config_rejects_missing_target_repo() { + let yaml = r#"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: "" +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let result = validate_ado_aw_debug_config(&fm); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("target-repo"), "msg: {}", msg); + } + + #[test] + fn test_validate_ado_aw_debug_config_rejects_invalid_target_repo() { + let yaml = r#"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: not-a-valid-shape +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let result = validate_ado_aw_debug_config(&fm); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("owner/repo"), "msg: {}", msg); + } + + #[test] + fn test_validate_ado_aw_debug_config_rejects_pipeline_injection_in_label() { + let yaml = r###"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw + labels: + - "##vso[task.complete]" +--- +"###; + let (fm, _) = parse_markdown(yaml).unwrap(); + let result = validate_ado_aw_debug_config(&fm); + assert!(result.is_err()); + } + + #[test] + fn test_validate_ado_aw_debug_config_rejects_pipeline_injection_in_title_prefix() { + let yaml = r###"--- +name: test +description: test +ado-aw-debug: + create-issue: + target-repo: githubnext/ado-aw + title-prefix: "##vso[task.complete]" +--- +"###; + let (fm, _) = parse_markdown(yaml).unwrap(); + let result = validate_ado_aw_debug_config(&fm); + assert!(result.is_err()); + } + + #[test] + fn test_validate_rejects_create_issue_under_safe_outputs() { + // Defence-in-depth: `create-issue` MUST NOT appear under + // `safe-outputs:` even when `ado-aw-debug:` isn't set. Allowing it + // there would let a forged config flow into ctx.tool_configs and + // sidestep the executor-side gate. + let yaml = r#"--- +name: test +description: test +safe-outputs: + create-issue: + target-repo: githubnext/ado-aw +--- +"#; + let (fm, _) = parse_markdown(yaml).unwrap(); + let result = validate_ado_aw_debug_config(&fm); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!( + msg.contains("debug-only") && msg.contains("ado-aw-debug"), + "expected debug-only redirection error, got: {}", + msg + ); + } + // ─── parameter name validation ────────────────────────────────────────── #[test] @@ -3745,7 +4087,7 @@ mod tests { #[test] fn test_generate_executor_ado_env_with_connection() { - let result = generate_executor_ado_env(Some("my-sc")); + let result = generate_executor_ado_env(Some("my-sc"), false); assert!( result.contains("env:"), "Executor env block should include the 'env:' key" @@ -3759,16 +4101,41 @@ mod tests { !result.contains("SC_READ_TOKEN"), "Executor env must not contain SC_READ_TOKEN" ); + assert!( + !result.contains("ADO_AW_DEBUG_GITHUB_TOKEN"), + "Without debug flag, GitHub token must not be exposed to executor" + ); } #[test] fn test_generate_executor_ado_env_none_empty() { assert!( - generate_executor_ado_env(None).is_empty(), - "None service connection should produce empty string (no env block)" + generate_executor_ado_env(None, false).is_empty(), + "Both flags off should produce empty string (no env block)" + ); + } + + #[test] + fn test_generate_executor_ado_env_with_create_issue_only() { + let result = generate_executor_ado_env(None, true); + assert!(result.starts_with("env:\n"), "Should emit env: block"); + assert!( + result.contains("ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)"), + "Debug flag should expose the GitHub PAT pipeline variable" + ); + assert!( + !result.contains("SYSTEM_ACCESSTOKEN"), + "No write SC means no ADO access token" ); } + #[test] + fn test_generate_executor_ado_env_with_both_tokens() { + let result = generate_executor_ado_env(Some("write-sc"), true); + assert!(result.contains("SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)")); + assert!(result.contains("ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)")); + } + // ─── Security validation tests ──────────────────────────────────────────── #[test] diff --git a/src/compile/types.rs b/src/compile/types.rs index 46ddb8c0..f53fc3d6 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -614,6 +614,13 @@ pub struct FrontMatter { /// Per-tool configuration for safe outputs #[serde(default, rename = "safe-outputs")] pub safe_outputs: HashMap, + /// Debug-only configuration. Top-level section that gates features only + /// intended for ado-aw dogfood/debug pipelines (e.g., `create-issue` for + /// filing failure reports back to GitHub during local testing). NOT a + /// regular safe-output section — anything declared here is omitted from + /// the regular safe-outputs documentation surface. + #[serde(default, rename = "ado-aw-debug")] + pub ado_aw_debug: Option, /// Unified trigger configuration: schedule, pipeline, PR triggers and filters #[serde(default, rename = "on")] pub on_config: Option, @@ -723,6 +730,9 @@ impl SanitizeConfigTrait for FrontMatter { for p in &mut self.parameters { p.sanitize_config_fields(); } + if let Some(ref mut d) = self.ado_aw_debug { + d.sanitize_config_fields(); + } } } @@ -779,6 +789,42 @@ pub struct PermissionsConfig { pub write: Option, } +/// Debug-only configuration block. +/// +/// Lives under the `ado-aw-debug:` top-level front-matter key. Holds knobs +/// that only make sense for pipelines we're actively dogfooding from +/// `githubnext/ado-aw` and that we explicitly do **not** want to advertise +/// as part of the regular agent surface. +/// +/// Adding a new field: pair the front-matter knob with a corresponding +/// compile-side hook (e.g., a debug-only safe output should also be added +/// to `crate::safeoutputs::DEBUG_ONLY_TOOLS` so the MCP layer enforces a +/// matching default-deny gate). +#[derive(Debug, Deserialize, Clone, Default)] +#[serde(deny_unknown_fields)] +pub struct AdoAwDebugConfig { + /// When true, the "Verify pipeline integrity" step is omitted from the + /// generated pipeline. Mirrors and OR-s with the `--skip-integrity` + /// CLI flag. + #[serde(default, rename = "skip-integrity")] + pub skip_integrity: bool, + + /// Configuration for the debug-only `create-issue` safe output. + /// Presence of this field is what enables the tool — when omitted + /// the SafeOutputs MCP layer hides it via `DEBUG_ONLY_TOOLS`. + #[serde(default, rename = "create-issue")] + pub create_issue: Option, +} + +impl SanitizeConfigTrait for AdoAwDebugConfig { + fn sanitize_config_fields(&mut self) { + // skip_integrity: bool — nothing to sanitize + if let Some(ref mut ci) = self.create_issue { + ci.sanitize_config_fields(); + } + } +} + /// Repository resource definition #[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct Repository { @@ -1783,6 +1829,71 @@ Body assert!(fm.safe_outputs.contains_key("upload-pipeline-artifact")); } + #[test] + fn test_front_matter_parses_ado_aw_debug() { + let content = r#"--- +name: "Test" +description: "Test" +ado-aw-debug: + skip-integrity: true + create-issue: + target-repo: githubnext/ado-aw + title-prefix: "[bug] " + labels: [pipeline-failure] + allowed-labels: ["agent-*"] + assignees: [jamesdevine] +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + let debug = fm.ado_aw_debug.expect("ado-aw-debug should parse"); + assert!(debug.skip_integrity); + let ci = debug.create_issue.expect("create-issue should parse"); + assert_eq!(ci.target_repo, "githubnext/ado-aw"); + assert_eq!(ci.title_prefix.as_deref(), Some("[bug] ")); + assert_eq!(ci.labels, vec!["pipeline-failure".to_string()]); + assert_eq!(ci.allowed_labels, vec!["agent-*".to_string()]); + assert_eq!(ci.assignees, vec!["jamesdevine".to_string()]); + } + + #[test] + fn test_front_matter_ado_aw_debug_defaults() { + let content = r#"--- +name: "Test" +description: "Test" +ado-aw-debug: {} +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + let debug = fm.ado_aw_debug.unwrap(); + assert!(!debug.skip_integrity); + assert!(debug.create_issue.is_none()); + } + + #[test] + fn test_front_matter_ado_aw_debug_rejects_unknown_field() { + let content = r#"--- +name: "Test" +description: "Test" +ado-aw-debug: + bogus-knob: true +--- + +Body +"#; + let result = super::super::common::parse_markdown(content); + assert!(result.is_err(), "unknown ado-aw-debug field should be rejected"); + let err = format!("{:#}", result.unwrap_err()); + assert!( + err.contains("bogus-knob") || err.contains("unknown field"), + "expected error to mention unknown field, got: {}", + err + ); + } + // ─── PrTriggerConfig deserialization ───────────────────────────────────── // NOTE: These tests use `triggers:` as a wrapper key and deserialize // OnConfig directly (not through FrontMatter). They test struct diff --git a/src/execute.rs b/src/execute.rs index 583c3afa..39f27685 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -14,11 +14,12 @@ use crate::ndjson::{self, SAFE_OUTPUT_FILENAME}; use crate::sanitize::neutralize_pipeline_commands; use crate::safeoutputs::{ AddBuildTagResult, AddPrCommentResult, CreateBranchResult, CreateGitTagResult, - CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, CommentOnWorkItemResult, - ExecutionContext, ExecutionResult, Executor, LinkWorkItemsResult, MissingDataResult, - MissingToolResult, NoopResult, QueueBuildResult, ReplyToPrCommentResult, - ReportIncompleteResult, ResolvePrThreadResult, SubmitPrReviewResult, ToolResult, - UpdatePrResult, UpdateWikiPageResult, UpdateWorkItemResult, UploadBuildAttachmentResult, + CreateIssueResult, CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, + CommentOnWorkItemResult, ExecutionContext, ExecutionResult, Executor, + LinkWorkItemsResult, MissingDataResult, MissingToolResult, NoopResult, + QueueBuildResult, ReplyToPrCommentResult, ReportIncompleteResult, + ResolvePrThreadResult, SubmitPrReviewResult, ToolResult, UpdatePrResult, + UpdateWikiPageResult, UpdateWorkItemResult, UploadBuildAttachmentResult, UploadPipelineArtifactResult, UploadWorkitemAttachmentResult, }; @@ -99,6 +100,7 @@ pub async fn execute_safe_outputs( SubmitPrReviewResult, ReplyToPrCommentResult, ResolvePrThreadResult, + CreateIssueResult, ); let mut results = Vec::new(); @@ -288,6 +290,9 @@ async fn find_tool_executor( if let Some(r) = dispatch_resource_tools(tool_name, entry, ctx).await? { return Ok(Some(r)); } + if let Some(r) = dispatch_debug_tools(tool_name, entry, ctx).await? { + return Ok(Some(r)); + } Ok(None) } @@ -354,6 +359,18 @@ async fn dispatch_resource_tools( }) } +/// Dispatch debug-only tools (gated by `ado-aw-debug:` front-matter section +/// at compile time and `DEBUG_ONLY_TOOLS` at the MCP layer at runtime). +async fn dispatch_debug_tools( + tool_name: &str, + entry: &Value, + ctx: &ExecutionContext, +) -> Result> { + dispatch_executor_tools!(tool_name, entry, ctx, { + "create-issue" => CreateIssueResult, + }) +} + /// Read the operator's `max` override from the tool's config JSON, falling back to the /// tool's `DEFAULT_MAX` (declared on the `ToolResult` trait) when not configured. fn resolve_max(ctx: &ExecutionContext, tool_name: &str, default_max: u32) -> usize { diff --git a/src/main.rs b/src/main.rs index 1b2985e3..6f376d09 100644 --- a/src/main.rs +++ b/src/main.rs @@ -298,6 +298,28 @@ async fn build_execution_context( } ctx.working_directory = safe_output_dir.clone(); ctx.tool_configs = front_matter.safe_outputs.clone(); + // Merge ado-aw-debug.create-issue config under the same tool_configs map + // so Stage 3's `ctx.get_tool_config::("create-issue")` + // works exactly like every other safe-output. Without this merge the + // executor would only ever see Default::default(). + // + // Crucially, also record `create-issue` in `debug_enabled_tools` so the + // Stage 3 executor can independently enforce the `ado-aw-debug` gate + // — without this, a forged NDJSON entry whose tool name is `create-issue` + // could bypass the MCP-layer default-deny. + if let Some(d) = front_matter.ado_aw_debug.as_ref() + && let Some(ci) = d.create_issue.as_ref() + { + match serde_json::to_value(ci) { + Ok(v) => { + ctx.tool_configs.insert("create-issue".to_string(), v); + } + Err(e) => log::warn!( + "Failed to serialize ado-aw-debug.create-issue config: {e}" + ), + } + ctx.debug_enabled_tools.insert("create-issue".to_string()); + } ctx.allowed_repositories = allowed_repositories; ctx.dry_run = dry_run; diff --git a/src/mcp.rs b/src/mcp.rs index d80dd04e..a0f53fe7 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -16,6 +16,7 @@ use crate::safeoutputs::{ CommentOnWorkItemParams, CommentOnWorkItemResult, CreateBranchParams, CreateBranchResult, CreateGitTagParams, CreateGitTagResult, + CreateIssueParams, CreateIssueResult, CreatePrParams, CreatePrResult, CreateWikiPageParams, CreateWikiPageResult, CreateWorkItemParams, CreateWorkItemResult, LinkWorkItemsParams, LinkWorkItemsResult, @@ -62,7 +63,7 @@ fn generate_short_id() -> String { } // Re-export from tools module -use crate::safeoutputs::ALWAYS_ON_TOOLS; +use crate::safeoutputs::{ALWAYS_ON_TOOLS, DEBUG_ONLY_TOOLS}; // ============================================================================ // SafeOutputs MCP Server @@ -146,26 +147,50 @@ impl SafeOutputs { let mut tool_router = Self::tool_router(); - // Filter tools if an enabled list is provided - if let Some(enabled) = enabled_tools { - let all_tools: Vec = tool_router.list_all().iter().map(|t| t.name.to_string()).collect(); - let total = all_tools.len(); - for tool_name in &all_tools { - let is_always_on = ALWAYS_ON_TOOLS.contains(&tool_name.as_str()); - let is_enabled = enabled.iter().any(|e| e == tool_name); - if !is_always_on && !is_enabled { - debug!("Filtering out tool: {}", tool_name); - tool_router.remove_route(tool_name); - } + // Apply tool filtering. Three categories: + // * ALWAYS_ON_TOOLS — diagnostic/transparency tools always served. + // * DEBUG_ONLY_TOOLS — gated tools (e.g. `create-issue`) that are + // stripped even when `enabled_tools` is `None`. They become + // reachable only when explicitly listed in `enabled_tools`. + // * Everything else — permissive default when `enabled_tools` is + // `None`; otherwise filtered against the explicit allowlist. + let all_tools: Vec = tool_router.list_all().iter().map(|t| t.name.to_string()).collect(); + let total = all_tools.len(); + let explicit_filter = enabled_tools.is_some(); + for tool_name in &all_tools { + let is_always_on = ALWAYS_ON_TOOLS.contains(&tool_name.as_str()); + let is_debug_only = DEBUG_ONLY_TOOLS.contains(&tool_name.as_str()); + let explicitly_enabled = enabled_tools + .map(|enabled| enabled.iter().any(|e| e == tool_name)) + .unwrap_or(false); + + let keep = if is_debug_only { + explicitly_enabled + } else if is_always_on { + true + } else if let Some(enabled) = enabled_tools { + enabled.iter().any(|e| e == tool_name) + } else { + true + }; + + if !keep { + debug!("Filtering out tool: {}", tool_name); + tool_router.remove_route(tool_name); } - // Warn about enabled-tools entries that don't match any registered route + } + if let Some(enabled) = enabled_tools { for name in enabled { if !all_tools.iter().any(|t| t == name) { warn!("Enabled-tools entry '{}' has no matching route (ignored)", name); } } - let remaining: Vec = tool_router.list_all().iter().map(|t| t.name.to_string()).collect(); + } + let remaining: Vec = tool_router.list_all().iter().map(|t| t.name.to_string()).collect(); + if explicit_filter { info!("Tool filtering applied: {} of {} tools enabled: {:?}", remaining.len(), total, remaining); + } else { + info!("Default tool exposure: {} of {} tools served (debug-only stripped)", remaining.len(), total); } Ok(Self { @@ -593,6 +618,35 @@ impl SafeOutputs { Ok(CallToolResult::success(vec![])) } + /// Debug-only: file a GitHub issue. Default-deny gated via + /// `DEBUG_ONLY_TOOLS` so this route is only reachable when the compiler + /// explicitly lists `create-issue` in `--enabled-tools` (which it does + /// only when the agent's front matter sets `ado-aw-debug.create-issue`). + /// Stage 3 authenticates against GitHub using the + /// `ADO_AW_DEBUG_GITHUB_TOKEN` pipeline variable. + #[tool( + name = "create-issue", + description = "Debug-only: file a GitHub issue against the operator-configured \ +target repository. Provide a concise title and a markdown body. \ +Optional `labels` and `assignees` are subject to operator-controlled allowlists. \ +This tool is gated by the agent's `ado-aw-debug.create-issue` front-matter section \ +and is not available in regular pipelines." + )] + async fn create_issue( + &self, + params: Parameters, + ) -> Result { + info!("Tool called: create-issue - '{}'", params.0.title); + debug!("Body length: {} chars", params.0.body.len()); + let mut sanitized = params.0; + sanitized.title = sanitize_text(&sanitized.title); + sanitized.body = sanitize_text(&sanitized.body); + let result: CreateIssueResult = sanitized.try_into()?; + let _ = self.write_safe_output_file(&result).await; + info!("Issue queued for creation"); + Ok(CallToolResult::success(vec![])) + } + #[tool( name = "comment-on-work-item", description = "Add a comment to an existing Azure DevOps work item. \ @@ -1857,15 +1911,19 @@ mod tests { "Non-enabled tool should be filtered out"); } - /// Asserts that ALL_KNOWN_SAFE_OUTPUTS contains every tool registered in the - /// router. This prevents the list from drifting when new tools are added to - /// the router but not to the constant. + /// Asserts that ALL_KNOWN_SAFE_OUTPUTS contains every NON-DEBUG-ONLY tool + /// registered in the router. Debug-only tools (e.g. `create-issue`) are + /// intentionally absent from the list because they're not regular + /// safe-outputs. #[tokio::test] async fn test_all_known_safe_outputs_covers_router() { use crate::safeoutputs::ALL_KNOWN_SAFE_OUTPUTS; let temp_dir = tempfile::tempdir().unwrap(); - let so = SafeOutputs::new(temp_dir.path(), temp_dir.path(), None) + // Pass an enable list that includes every debug-only tool so they + // remain in the router for this introspection check. + let enabled: Vec = DEBUG_ONLY_TOOLS.iter().map(|s| s.to_string()).collect(); + let so = SafeOutputs::new(temp_dir.path(), temp_dir.path(), Some(&enabled)) .await .unwrap(); let router_tools: Vec = so @@ -1876,11 +1934,78 @@ mod tests { .collect(); for tool_name in &router_tools { + if DEBUG_ONLY_TOOLS.contains(&tool_name.as_str()) { + // Debug-only tools intentionally bypass ALL_KNOWN_SAFE_OUTPUTS. + continue; + } assert!( ALL_KNOWN_SAFE_OUTPUTS.contains(&tool_name.as_str()), - "Tool '{}' is registered in the router but missing from ALL_KNOWN_SAFE_OUTPUTS in src/tools/mod.rs", + "Tool '{}' is registered in the router but missing from ALL_KNOWN_SAFE_OUTPUTS in src/safeoutputs/mod.rs", tool_name ); } } + + #[tokio::test] + async fn test_filter_strips_debug_only_when_no_enabled_list() { + let temp_dir = tempfile::tempdir().unwrap(); + let so = SafeOutputs::new(temp_dir.path(), temp_dir.path(), None) + .await + .unwrap(); + let tool_names: Vec = so + .tool_router + .list_all() + .iter() + .map(|t| t.name.to_string()) + .collect(); + for debug_tool in DEBUG_ONLY_TOOLS { + assert!( + !tool_names.contains(&debug_tool.to_string()), + "Debug-only tool '{}' must NOT be exposed when enabled_tools is None", + debug_tool + ); + } + // Spot check a regular tool is present in the permissive default. + assert!(tool_names.contains(&"create-work-item".to_string())); + } + + #[tokio::test] + async fn test_filter_keeps_debug_only_when_explicitly_enabled() { + let temp_dir = tempfile::tempdir().unwrap(); + let enabled = vec!["create-issue".to_string()]; + let so = SafeOutputs::new(temp_dir.path(), temp_dir.path(), Some(&enabled)) + .await + .unwrap(); + let tool_names: Vec = so + .tool_router + .list_all() + .iter() + .map(|t| t.name.to_string()) + .collect(); + assert!( + tool_names.contains(&"create-issue".to_string()), + "Explicitly enabled debug-only tool should be present, got: {:?}", + tool_names + ); + } + + #[tokio::test] + async fn test_filter_strips_debug_only_when_other_tool_enabled() { + let temp_dir = tempfile::tempdir().unwrap(); + let enabled = vec!["create-work-item".to_string()]; + let so = SafeOutputs::new(temp_dir.path(), temp_dir.path(), Some(&enabled)) + .await + .unwrap(); + let tool_names: Vec = so + .tool_router + .list_all() + .iter() + .map(|t| t.name.to_string()) + .collect(); + assert!( + !tool_names.contains(&"create-issue".to_string()), + "Debug-only tool must remain stripped when not in the explicit enable list" + ); + assert!(tool_names.contains(&"create-work-item".to_string())); + } } diff --git a/src/safeoutputs/create_issue.rs b/src/safeoutputs/create_issue.rs new file mode 100644 index 00000000..e168aeba --- /dev/null +++ b/src/safeoutputs/create_issue.rs @@ -0,0 +1,830 @@ +//! Debug-only `create-issue` safe output. +//! +//! Files a GitHub issue against an operator-configured target repository. +//! This is **not** a regular safe output — it is gated entirely by the +//! `ado-aw-debug.create-issue` front-matter section and stripped from the +//! SafeOutputs MCP server unless explicitly enabled (see +//! [`crate::safeoutputs::DEBUG_ONLY_TOOLS`]). +//! +//! Intended use: dogfood pipelines compiled from `githubnext/ado-aw` that need +//! to file failure reports back to GitHub for triage. Stage 3 authenticates +//! with a dedicated PAT exposed via the `ADO_AW_DEBUG_GITHUB_TOKEN` pipeline +//! variable and surfaced through [`ExecutionContext::github_token`]. +//! +//! Notable design points: +//! * `target-repo` is operator-only — the agent never supplies it and cannot +//! redirect issues to a different repo. +//! * Labels are merged from a static operator-configured list and an +//! agent-supplied list. Agent labels are validated against `allowed-labels` +//! (wildcard-aware via [`crate::safeoutputs::tag_matches_pattern`]). +//! * Assignees are merged the same way without an allowlist gate (out of +//! scope for v1). + +use anyhow::{Context, ensure}; +use log::{debug, info}; +use percent_encoding::utf8_percent_encode; +use regex_lite::Regex; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::sync::OnceLock; + +use super::PATH_SEGMENT; +use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; +use crate::tool_result; +use crate::validate::reject_pipeline_injection; +use ado_aw_derive::SanitizeConfig; + +/// Parameters the agent supplies when calling the `create-issue` MCP tool. +#[derive(Deserialize, JsonSchema)] +pub struct CreateIssueParams { + /// Concise issue title summarizing the bug, feature, or task. + pub title: String, + + /// Detailed issue description in Markdown. + pub body: String, + + /// Labels to apply to the issue. Subject to the operator-configured + /// `allowed-labels` allowlist. + #[serde(default)] + pub labels: Vec, + + /// GitHub usernames to assign to the issue. + #[serde(default)] + pub assignees: Vec, +} + +impl Validate for CreateIssueParams { + fn validate(&self) -> anyhow::Result<()> { + ensure!(self.title.len() >= 5, "title must be at least 5 characters"); + ensure!(self.body.len() >= 30, "body must be at least 30 characters"); + ensure!(self.title.len() <= 256, "title must be 256 characters or fewer"); + for label in &self.labels { + ensure!(!label.is_empty(), "label must not be empty"); + reject_pipeline_injection(label, "create-issue.label")?; + } + for assignee in &self.assignees { + ensure!(!assignee.is_empty(), "assignee must not be empty"); + reject_pipeline_injection(assignee, "create-issue.assignee")?; + } + Ok(()) + } +} + +tool_result! { + name = "create-issue", + write = true, + params = CreateIssueParams, + /// Result of filing a GitHub issue. + pub struct CreateIssueResult { + title: String, + body: String, + #[serde(default)] + labels: Vec, + #[serde(default)] + assignees: Vec, + } +} + +impl SanitizeContent for CreateIssueResult { + fn sanitize_content_fields(&mut self) { + self.title = sanitize_text(&self.title); + self.body = sanitize_text(&self.body); + for label in &mut self.labels { + *label = label.chars().filter(|c| !c.is_control()).collect(); + } + for assignee in &mut self.assignees { + *assignee = assignee.chars().filter(|c| !c.is_control()).collect(); + } + } +} + +/// Operator-side configuration for `ado-aw-debug.create-issue`. +/// +/// Lives under `ado-aw-debug:` rather than `safe-outputs:` to keep the tool +/// out of the regular safe-output surface. +#[derive(Debug, Clone, Default, SanitizeConfig, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CreateIssueConfig { + /// Required: target GitHub repository in `owner/repo` form. + #[serde(rename = "target-repo")] + pub target_repo: String, + + /// Optional prefix prepended to every agent-supplied title (e.g. + /// `"[pipeline-failure] "`). + #[serde(default, rename = "title-prefix")] + pub title_prefix: Option, + + /// Static labels always applied to the issue regardless of agent input. + #[serde(default)] + pub labels: Vec, + + /// Allowlist for agent-supplied labels. + /// + /// **Default-deny semantics**: an empty/absent list means **no + /// agent-supplied labels are accepted**. To accept any agent label, + /// set `allowed-labels: ["*"]` explicitly. Patterns may include `*` + /// wildcards (e.g. `"agent-*"`). + #[serde(default, rename = "allowed-labels")] + pub allowed_labels: Vec, + + /// Static assignees always added regardless of agent input. + #[serde(default)] + pub assignees: Vec, + + /// Per-run budget (max number of issues filed). Read by the generic + /// budget machinery in `crate::execute`. Stored here so + /// `deny_unknown_fields` accepts it under `ado-aw-debug.create-issue`. + #[serde(default, skip_serializing_if = "Option::is_none")] + #[sanitize_config(skip)] + pub max: Option, +} + +/// Compiled regex for `target-repo` validation. +/// +/// GitHub repo references take the form `owner/repo`. Owner segments +/// (logins of users or organisations) admit alphanumerics and hyphens +/// and must not start or end with a hyphen. Repository segments admit +/// alphanumerics, hyphens, dots, and underscores and must not be `.` +/// or `..`. We intentionally reject underscores and dots in the owner +/// because GitHub does too. +fn target_repo_regex() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r"^[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?/[A-Za-z0-9._-]+$") + .expect("target_repo regex is well-formed") + }) +} + +/// Validate that `target-repo` is shaped like `owner/repo`. +pub(crate) fn validate_target_repo(target_repo: &str) -> anyhow::Result<()> { + ensure!( + !target_repo.is_empty(), + "ado-aw-debug.create-issue.target-repo is required (expected 'owner/repo')" + ); + ensure!( + target_repo_regex().is_match(target_repo), + "ado-aw-debug.create-issue.target-repo '{}' is not in 'owner/repo' format \ + (owner: alphanumerics/hyphens; repo: alphanumerics/dots/hyphens/underscores)", + target_repo + ); + if let Some((_owner, repo)) = target_repo.split_once('/') { + ensure!( + repo != "." && repo != "..", + "ado-aw-debug.create-issue.target-repo repo segment must not be '.' or '..'" + ); + } + Ok(()) +} + +/// Build the auto-appended traceability footer. +/// +/// Embeds a stable `` marker so future tooling can locate +/// generated content without reflowing the body. +fn build_footer(ctx: &ExecutionContext) -> String { + let mut lines: Vec = Vec::new(); + lines.push("".to_string()); + lines.push("---".to_string()); + if let Some(name) = ctx.definition_name.as_ref() { + lines.push(format!("Pipeline: `{name}`")); + } + if let Some(build_id) = ctx.build_id { + if let (Some(org_url), Some(project)) = (ctx.ado_org_url.as_ref(), ctx.ado_project.as_ref()) { + let url = format!( + "{}/{}/_build/results?buildId={}", + org_url.trim_end_matches('/'), + project, + build_id + ); + lines.push(format!("Run: <{url}>")); + } else { + lines.push(format!("Build: {build_id}")); + } + } + if let Some(reason) = ctx.build_reason.as_ref() { + lines.push(format!("Trigger: `{reason}`")); + } + lines.join("\n") +} + +/// Merge static + agent-supplied labels (case-insensitive dedupe). +fn merge_labels(static_labels: &[String], agent_labels: &[String]) -> Vec { + let mut all = static_labels.to_vec(); + for label in agent_labels { + if !all.iter().any(|t| t.eq_ignore_ascii_case(label)) { + all.push(label.clone()); + } + } + all +} + +/// Sentinel pattern in `allowed-labels` that opts out of the default-deny +/// behaviour and admits any agent-supplied label. +const ALLOWED_LABELS_ANY: &str = "*"; + +/// Maximum length of the **final** issue title, after `title-prefix` is +/// applied. GitHub itself accepts up to 256 characters; we mirror the +/// agent-side `Validate` limit so a long prefix can't trick us into +/// hitting the API with an over-long string. +const MAX_FINAL_TITLE_LEN: usize = 256; + +#[async_trait::async_trait] +impl Executor for CreateIssueResult { + fn dry_run_summary(&self) -> String { + format!("create GitHub issue: '{}'", self.title) + } + + async fn execute_impl(&self, ctx: &ExecutionContext) -> anyhow::Result { + info!("Filing GitHub issue: '{}' ({} chars body)", self.title, self.body.len()); + + // SECURITY GATE: independently of the SafeOutputs MCP filter, refuse + // to act on a `create-issue` NDJSON entry unless the operator + // authorised it via the `ado-aw-debug.create-issue` front-matter + // section. This closes the gap where a forged NDJSON entry (or a + // mis-placed `safe-outputs.create-issue` config) could otherwise + // bypass the MCP-layer default-deny. + if !ctx + .debug_enabled_tools + .contains(::NAME) + { + return Ok(ExecutionResult::failure( + "create-issue is a debug-only tool and is not enabled for this \ + pipeline. Configure `ado-aw-debug.create-issue` in front matter \ + to authorise it.", + )); + } + + let token = match ctx.github_token.as_ref() { + Some(t) => t, + None => { + return Ok(ExecutionResult::failure( + "ADO_AW_DEBUG_GITHUB_TOKEN not set; required by ado-aw-debug.create-issue", + )); + } + }; + + let config: CreateIssueConfig = ctx.get_tool_config("create-issue"); + debug!("create-issue: target-repo={}", config.target_repo); + + if let Err(e) = validate_target_repo(&config.target_repo) { + return Ok(ExecutionResult::failure(e.to_string())); + } + + // Validate agent-supplied labels against allowed-labels. + // Default-deny semantics: an empty list means NO agent labels are + // accepted. Operators must opt in to unrestricted by setting + // `allowed-labels: ["*"]`. Static labels under `labels:` are always + // applied regardless. + if !self.labels.is_empty() { + let allow_any = config + .allowed_labels + .iter() + .any(|p| p == ALLOWED_LABELS_ANY); + if !allow_any { + let disallowed: Vec = self + .labels + .iter() + .filter(|label| { + !config + .allowed_labels + .iter() + .any(|pattern| super::tag_matches_pattern(label, pattern)) + }) + .map(|label| { + // Neutralise pipeline-command sequences before we + // echo agent-supplied content into our own log line + // and the failure message. + crate::sanitize::neutralize_pipeline_commands(label) + }) + .collect(); + if !disallowed.is_empty() { + let msg = if config.allowed_labels.is_empty() { + format!( + "Agent-supplied labels rejected (no `allowed-labels` configured; \ + set `allowed-labels: [\"*\"]` to permit any): {}", + disallowed.join(", ") + ) + } else { + format!( + "Agent-supplied labels not in allowed-labels: {}", + disallowed.join(", ") + ) + }; + return Ok(ExecutionResult::failure(msg)); + } + } + } + + let final_title = match &config.title_prefix { + Some(prefix) => format!("{}{}", prefix, self.title), + None => self.title.clone(), + }; + if final_title.len() > MAX_FINAL_TITLE_LEN { + return Ok(ExecutionResult::failure(format!( + "Final issue title exceeds {MAX_FINAL_TITLE_LEN} characters \ + ({} chars after applying title-prefix). Shorten title-prefix \ + or the agent title.", + final_title.len() + ))); + } + let body_with_footer = format!("{}\n\n{}", self.body, build_footer(ctx)); + let all_labels = merge_labels(&config.labels, &self.labels); + let all_assignees = merge_labels(&config.assignees, &self.assignees); + + // Split target-repo only after validation. + let (owner, repo) = config + .target_repo + .split_once('/') + .context("target-repo must be 'owner/repo'")?; + + let url = format!( + "https://api.github.com/repos/{}/{}/issues", + utf8_percent_encode(owner, PATH_SEGMENT), + utf8_percent_encode(repo, PATH_SEGMENT), + ); + debug!("POSTing to {}", url); + + let payload = serde_json::json!({ + "title": final_title, + "body": body_with_footer, + "labels": all_labels, + "assignees": all_assignees, + }); + + let user_agent = format!("ado-aw/{}", env!("CARGO_PKG_VERSION")); + let client = reqwest::Client::new(); + let response = client + .post(&url) + .header("Accept", "application/vnd.github+json") + .header("X-GitHub-Api-Version", "2022-11-28") + .header("User-Agent", user_agent) + .bearer_auth(token) + .json(&payload) + .send() + .await + .context("Failed to send request to GitHub API")?; + + let status = response.status(); + if status.is_success() { + let body: serde_json::Value = response + .json() + .await + .context("Failed to parse GitHub API response")?; + let number = body.get("number").and_then(|v| v.as_i64()).unwrap_or(0); + let html_url = body + .get("html_url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + info!( + "Filed GitHub issue {}#{}: {}", + config.target_repo, number, html_url + ); + Ok(ExecutionResult::success_with_data( + format!("Filed issue {}#{}: {}", config.target_repo, number, html_url), + serde_json::json!({ + "number": number, + "url": html_url, + "target_repo": config.target_repo, + }), + )) + } else { + let body_text = response + .text() + .await + .unwrap_or_else(|_| "".to_string()); + Ok(ExecutionResult::failure(format!( + "Failed to file GitHub issue (HTTP {}): {}", + status, body_text + ))) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::safeoutputs::ToolResult; + use std::collections::HashMap; + use std::path::PathBuf; + + fn ctx_with_config(config: serde_json::Value, github_token: Option) -> ExecutionContext { + let mut tool_configs: HashMap = HashMap::new(); + tool_configs.insert("create-issue".to_string(), config); + let mut debug_enabled_tools = std::collections::HashSet::new(); + debug_enabled_tools.insert("create-issue".to_string()); + ExecutionContext { + github_token, + tool_configs, + debug_enabled_tools, + working_directory: PathBuf::from("."), + source_directory: PathBuf::from("."), + ..Default::default() + } + } + + /// Build a context that mirrors a forged-NDJSON scenario: tool config is + /// present, but the operator never authorised the debug tool via + /// `ado-aw-debug.create-issue`, so `debug_enabled_tools` is empty. + fn ctx_unauthorized(config: serde_json::Value, github_token: Option) -> ExecutionContext { + let mut tool_configs: HashMap = HashMap::new(); + tool_configs.insert("create-issue".to_string(), config); + ExecutionContext { + github_token, + tool_configs, + debug_enabled_tools: std::collections::HashSet::new(), + working_directory: PathBuf::from("."), + source_directory: PathBuf::from("."), + ..Default::default() + } + } + + fn valid_params() -> CreateIssueParams { + CreateIssueParams { + title: "Pipeline failure on main".to_string(), + body: "The agent step failed during stage 1 with a network timeout.".to_string(), + labels: vec![], + assignees: vec![], + } + } + + #[test] + fn test_result_has_correct_name() { + assert_eq!(CreateIssueResult::NAME, "create-issue"); + } + + #[test] + fn test_result_requires_write() { + const _: () = assert!(CreateIssueResult::REQUIRES_WRITE); + } + + #[test] + fn test_validate_rejects_short_title() { + let params = CreateIssueParams { + title: "Hi".to_string(), + ..valid_params() + }; + assert!(::validate(¶ms).is_err()); + } + + #[test] + fn test_validate_rejects_short_body() { + let params = CreateIssueParams { + body: "too short".to_string(), + ..valid_params() + }; + assert!(::validate(¶ms).is_err()); + } + + #[test] + fn test_validate_rejects_pipeline_injection_in_label() { + let params = CreateIssueParams { + labels: vec!["##vso[task.complete]".to_string()], + ..valid_params() + }; + assert!(::validate(¶ms).is_err()); + } + + #[test] + fn test_validate_rejects_pipeline_injection_in_assignee() { + let params = CreateIssueParams { + assignees: vec!["$(SYSTEM_ACCESSTOKEN)".to_string()], + ..valid_params() + }; + assert!(::validate(¶ms).is_err()); + } + + #[test] + fn test_sanitize_strips_control_chars() { + let mut result = CreateIssueResult { + name: "create-issue".to_string(), + title: "ok\u{0007}title".to_string(), + body: "body\u{0008}with\u{0001}ctl chars (more than 30 characters total)".to_string(), + labels: vec!["la\u{0007}bel".to_string()], + assignees: vec!["jo\u{0008}hn".to_string()], + }; + result.sanitize_content_fields(); + assert!(!result.title.contains('\u{0007}')); + assert!(!result.body.contains('\u{0008}')); + assert!(!result.body.contains('\u{0001}')); + assert!(!result.labels[0].contains('\u{0007}')); + assert!(!result.assignees[0].contains('\u{0008}')); + } + + #[test] + fn test_dry_run_summary_format() { + let result = CreateIssueResult { + name: "create-issue".to_string(), + title: "Fix the build".to_string(), + body: "anything".to_string(), + labels: vec![], + assignees: vec![], + }; + assert_eq!(result.dry_run_summary(), "create GitHub issue: 'Fix the build'"); + } + + #[test] + fn test_target_repo_regex_accepts_canonical_forms() { + assert!(validate_target_repo("githubnext/ado-aw").is_ok()); + assert!(validate_target_repo("a/b").is_ok()); + assert!(validate_target_repo("My-Org/some.repo-here").is_ok()); + // Repo segment may include dots/underscores; owner segment may not. + assert!(validate_target_repo("user/repo_with_underscore").is_ok()); + assert!(validate_target_repo("user/.github").is_ok()); + } + + #[test] + fn test_target_repo_regex_rejects_bad_forms() { + assert!(validate_target_repo("").is_err()); + assert!(validate_target_repo("bare-name").is_err()); + assert!(validate_target_repo("a/b/c").is_err()); + assert!(validate_target_repo("/repo").is_err()); + assert!(validate_target_repo("owner/").is_err()); + assert!(validate_target_repo("-leading/repo").is_err()); + assert!(validate_target_repo("trailing-/repo").is_err()); + // GitHub does not admit dots or underscores in owner logins. + assert!(validate_target_repo("Acme.Inc/repo").is_err()); + assert!(validate_target_repo("under_score/repo").is_err()); + // Repo segment alone may not be `.` or `..`. + assert!(validate_target_repo("owner/.").is_err()); + assert!(validate_target_repo("owner/..").is_err()); + } + + #[test] + fn test_merge_labels_dedupes_case_insensitively() { + let merged = merge_labels(&["bug".into(), "Triage".into()], &["BUG".into(), "fresh".into()]); + assert_eq!(merged, vec!["bug".to_string(), "Triage".to_string(), "fresh".to_string()]); + } + + #[tokio::test] + async fn test_execute_fails_when_github_token_missing() { + let params = valid_params(); + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({"target-repo": "githubnext/ado-aw"}), + None, + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("ADO_AW_DEBUG_GITHUB_TOKEN"), + "expected ADO_AW_DEBUG_GITHUB_TOKEN message, got: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_fails_when_target_repo_invalid() { + let params = valid_params(); + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({"target-repo": "not-a-valid-repo"}), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("target-repo"), + "expected target-repo error, got: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_rejects_disallowed_label() { + let params = CreateIssueParams { + labels: vec!["manual".to_string()], + ..valid_params() + }; + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({ + "target-repo": "githubnext/ado-aw", + "allowed-labels": ["agent-*", "automated"] + }), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("not in allowed-labels"), + "expected allowed-labels error, got: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_accepts_label_matching_wildcard() { + let params = CreateIssueParams { + labels: vec!["agent-created".to_string()], + ..valid_params() + }; + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({ + "target-repo": "githubnext/ado-aw", + "allowed-labels": ["agent-*"] + }), + Some("fake-pat".to_string()), + ); + // The HTTP call will fail (no real network in CI), but we assert that + // failure is NOT the policy-rejection message — i.e., wildcard match + // passed. + let exec = result.execute_sanitized(&ctx).await.unwrap(); + if !exec.success { + assert!( + !exec.message.contains("not in allowed-labels"), + "expected wildcard match to pass, got policy rejection: {}", + exec.message + ); + } + } + + #[tokio::test] + async fn test_execute_rejects_when_debug_tool_not_authorized() { + // Forged-NDJSON scenario: payload contains a `create-issue` entry + // and ctx.tool_configs has a config under "create-issue" — but the + // operator never set `ado-aw-debug.create-issue`, so + // `debug_enabled_tools` is empty. The executor MUST refuse before + // touching the token. + let params = valid_params(); + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_unauthorized( + serde_json::json!({"target-repo": "githubnext/ado-aw"}), + Some("token-that-must-not-be-used".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("debug-only tool"), + "expected debug-only refusal, got: {}", + exec.message + ); + // Also confirm the message does NOT mention the token — a token + // mention would imply we made it past the gate. + assert!( + !exec.message.contains("GITHUB_TOKEN"), + "executor must refuse before checking token: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_rejects_agent_label_when_allowed_labels_empty() { + // Default-deny: empty allowed-labels means no agent labels allowed. + let params = CreateIssueParams { + labels: vec!["bug".to_string()], + ..valid_params() + }; + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({"target-repo": "githubnext/ado-aw"}), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("no `allowed-labels` configured"), + "expected default-deny message, got: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_accepts_any_agent_label_with_star_allowlist() { + let params = CreateIssueParams { + labels: vec!["arbitrary-label".to_string()], + ..valid_params() + }; + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({ + "target-repo": "githubnext/ado-aw", + "allowed-labels": ["*"] + }), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + // Network call will fail; ensure that failure is NOT the policy + // rejection — the `*` allowlist must let arbitrary labels through. + if !exec.success { + assert!( + !exec.message.contains("allowed-labels"), + "expected `*` to bypass the allowlist, got policy rejection: {}", + exec.message + ); + } + } + + #[tokio::test] + async fn test_execute_rejects_overlong_final_title_after_prefix() { + let long_prefix = "X".repeat(250); + let params = CreateIssueParams { + title: "valid title here".to_string(), + ..valid_params() + }; + let mut result: CreateIssueResult = params.try_into().unwrap(); + let ctx = ctx_with_config( + serde_json::json!({ + "target-repo": "githubnext/ado-aw", + "title-prefix": long_prefix, + }), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + assert!( + exec.message.contains("Final issue title"), + "expected length error, got: {}", + exec.message + ); + } + + #[tokio::test] + async fn test_execute_neutralizes_pipeline_command_in_label_error() { + // Even though Validate would reject this label up front, Stage 3 + // deserialises directly from NDJSON — so a forged payload could + // contain ##vso[...] in labels. The error message must neutralise + // these sequences so they can't act as live ADO pipeline commands + // when the message is echoed to stdout. + let mut result = CreateIssueResult { + name: "create-issue".to_string(), + title: "Pipeline failure on main".to_string(), + body: "This is a sufficiently long body for the issue parameters.".to_string(), + labels: vec!["##vso[task.complete]".to_string()], + assignees: vec![], + }; + let ctx = ctx_with_config( + serde_json::json!({ + "target-repo": "githubnext/ado-aw", + "allowed-labels": ["agent-*"] + }), + Some("fake-pat".to_string()), + ); + let exec = result.execute_sanitized(&ctx).await.unwrap(); + assert!(!exec.success); + // The neutraliser wraps `##vso[` in backticks so ADO's line-prefix + // parser ignores it. A live command would appear at the start of a + // line; after neutralisation, every `##vso[` instance must be + // preceded by a backtick. + for line in exec.message.lines() { + assert!( + !line.starts_with("##vso["), + "live pipeline command at start of line: {}", + line + ); + } + // And every occurrence of `##vso[` should be wrapped in backticks + // (the neutraliser's signature). + if exec.message.contains("##vso[") { + assert!( + exec.message.contains("`##vso[`"), + "expected neutralised `##vso[` form, got: {}", + exec.message + ); + } + } + + #[test] + fn test_config_round_trips_kebab_case() { + let yaml = r#" +target-repo: githubnext/ado-aw +title-prefix: "[bug] " +labels: [a] +allowed-labels: ["agent-*"] +assignees: [u1] +"#; + let cfg: CreateIssueConfig = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(cfg.target_repo, "githubnext/ado-aw"); + assert_eq!(cfg.title_prefix.as_deref(), Some("[bug] ")); + assert_eq!(cfg.labels, vec!["a".to_string()]); + assert_eq!(cfg.allowed_labels, vec!["agent-*".to_string()]); + assert_eq!(cfg.assignees, vec!["u1".to_string()]); + } + + #[test] + fn test_config_rejects_unknown_fields() { + let yaml = r#" +target-repo: githubnext/ado-aw +unexpected: oops +"#; + let result: Result = serde_yaml::from_str(yaml); + assert!(result.is_err(), "deny_unknown_fields should reject unexpected key"); + } + + #[test] + fn test_footer_includes_marker() { + let ctx = ExecutionContext { + ado_org_url: Some("https://dev.azure.com/myorg".to_string()), + ado_project: Some("MyProject".to_string()), + build_id: Some(42), + definition_name: Some("dogfood".to_string()), + build_reason: Some("Manual".to_string()), + ..Default::default() + }; + let footer = build_footer(&ctx); + assert!(footer.contains("")); + assert!(footer.contains("buildId=42")); + assert!(footer.contains("dogfood")); + } +} diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 1909b4b3..fe09f6e4 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -2446,9 +2446,11 @@ new file mode 100755 ado_project: Some("TestProject".to_string()), ado_project_id: None, access_token: Some("fake-token".to_string()), + github_token: None, source_directory: dir.path().to_path_buf(), working_directory: dir.path().to_path_buf(), tool_configs: std::collections::HashMap::new(), + debug_enabled_tools: std::collections::HashSet::new(), repository_id: Some("repo-id".to_string()), repository_name: Some("test-repo".to_string()), allowed_repositories: std::collections::HashMap::new(), diff --git a/src/safeoutputs/mod.rs b/src/safeoutputs/mod.rs index 33a85048..6443176e 100644 --- a/src/safeoutputs/mod.rs +++ b/src/safeoutputs/mod.rs @@ -55,6 +55,17 @@ pub const WRITE_REQUIRING_SAFE_OUTPUTS: &[&str] = tool_names![ /// filtered out (the router has no route for them). pub const NON_MCP_SAFE_OUTPUT_KEYS: &[&str] = &[]; +/// Tools that are gated behind `ado-aw-debug:` front-matter sections and must +/// NOT be exposed to a regular pipeline. The SafeOutputs MCP filter strips +/// these even when `enabled_tools` is `None`, so they only become reachable +/// when the compiler explicitly lists them in `--enabled-tools`. +/// +/// Adding a new debug-only tool: register its result type with +/// `tool_result! { write = true, ... }`, add it here, and gate the +/// compiler-side `--enabled-tools` injection on its corresponding +/// `ado-aw-debug.` front-matter section. +pub const DEBUG_ONLY_TOOLS: &[&str] = tool_names![CreateIssueResult]; + /// All recognised safe-output keys accepted in front matter `safe-outputs:`. /// This is the union of write-requiring tool types and diagnostic tool types. /// @@ -319,6 +330,7 @@ mod add_pr_comment; mod comment_on_work_item; mod create_branch; mod create_git_tag; +mod create_issue; mod create_pr; mod create_wiki_page; mod create_work_item; @@ -344,6 +356,8 @@ pub use add_pr_comment::*; pub use comment_on_work_item::*; pub use create_branch::*; pub use create_git_tag::*; +pub use create_issue::*; +pub(crate) use create_issue::validate_target_repo; pub use create_pr::*; pub use create_wiki_page::*; pub use create_work_item::*; diff --git a/src/safeoutputs/result.rs b/src/safeoutputs/result.rs index 6096d08b..6c513636 100644 --- a/src/safeoutputs/result.rs +++ b/src/safeoutputs/result.rs @@ -47,12 +47,23 @@ pub struct ExecutionContext { pub ado_project_id: Option, /// Personal access token or system access token pub access_token: Option, + /// GitHub PAT used by debug-only safe outputs (e.g. `ado-aw-debug.create-issue`). + /// Sourced from the `ADO_AW_DEBUG_GITHUB_TOKEN` pipeline variable. Intentionally + /// **separate** from `access_token` (ADO) and from the read-only `GITHUB_TOKEN` + /// the agent sees in Stage 1 — only Stage 3 ever sees this token. + pub github_token: Option, /// Working directory for file operations (safe outputs directory) pub working_directory: std::path::PathBuf, /// Source checkout directory (BUILD_SOURCESDIRECTORY) where git repos are checked out pub source_directory: std::path::PathBuf, /// Per-tool configuration, keyed by tool name pub tool_configs: HashMap, + /// Debug-only tools (e.g. `create-issue`) that the operator authorized + /// via the `ado-aw-debug:` front-matter section. Stage 3 executors for + /// `crate::safeoutputs::DEBUG_ONLY_TOOLS` MUST reject NDJSON entries + /// whose tool name is absent from this set — otherwise a forged entry + /// could bypass the MCP-layer default-deny gate. Empty by default. + pub debug_enabled_tools: HashSet, /// Repository ID (from BUILD_REPOSITORY_ID) pub repository_id: Option, /// Repository name (from BUILD_REPOSITORY_NAME) @@ -190,9 +201,11 @@ impl ExecutionContext { ado_project: env("SYSTEM_TEAMPROJECT"), ado_project_id: env("SYSTEM_TEAMPROJECTID"), access_token: env("SYSTEM_ACCESSTOKEN").or_else(|| env("AZURE_DEVOPS_EXT_PAT")), + github_token: env("ADO_AW_DEBUG_GITHUB_TOKEN"), working_directory: std::env::current_dir().unwrap_or_default(), source_directory, tool_configs: HashMap::new(), + debug_enabled_tools: HashSet::new(), repository_id: env("BUILD_REPOSITORY_ID"), repository_name: env("BUILD_REPOSITORY_NAME"), allowed_repositories: HashMap::new(), diff --git a/src/safeoutputs/upload_build_attachment.rs b/src/safeoutputs/upload_build_attachment.rs index 57734403..e348d9e6 100644 --- a/src/safeoutputs/upload_build_attachment.rs +++ b/src/safeoutputs/upload_build_attachment.rs @@ -806,9 +806,11 @@ attachment-type: "agent-artifact" ado_project: None, ado_project_id: None, access_token: None, + github_token: None, source_directory: working_directory.clone(), working_directory, tool_configs: std::collections::HashMap::new(), + debug_enabled_tools: std::collections::HashSet::new(), repository_id: None, repository_name: None, allowed_repositories: std::collections::HashMap::new(), diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 05ea4fad..a7b1ce70 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -3538,3 +3538,62 @@ fn test_executor_step_has_env_block_with_write_permissions() { "Executor step should include SYSTEM_ACCESSTOKEN: {after_execute}" ); } + +// ─── ado-aw-debug fixture ────────────────────────────────────────────────── + +/// Compile the `ado-aw-debug-agent.md` fixture and assert the +/// front-matter section's compile-time effects: +/// 1. The integrity check step is omitted (`skip-integrity: true`). +/// 2. The Stage 3 executor `env:` block exposes +/// `ADO_AW_DEBUG_GITHUB_TOKEN`. +/// 3. `--enabled-tools create-issue` is wired into the SafeOutputs MCP +/// invocation. +/// 4. The output is otherwise valid YAML. +#[test] +fn test_compile_ado_aw_debug_fixture() { + let compiled = compile_fixture("ado-aw-debug-agent.md"); + assert_valid_yaml(&compiled, "ado-aw-debug-agent.md"); + + // skip-integrity: integrity check should be absent + assert!( + !compiled.contains("Verify pipeline integrity"), + "ado-aw-debug.skip-integrity: true must omit the integrity check step" + ); + + // Executor env block exposes the GitHub PAT pipeline variable + let execute_block_start = compiled + .find("Execute safe outputs (Stage 3)") + .expect("Should have executor step"); + let after_execute = &compiled[execute_block_start..]; + assert!( + after_execute.contains("ADO_AW_DEBUG_GITHUB_TOKEN: $(ADO_AW_DEBUG_GITHUB_TOKEN)"), + "Executor step must expose ADO_AW_DEBUG_GITHUB_TOKEN when ado-aw-debug.create-issue is set: {after_execute}" + ); + + // --enabled-tools includes create-issue + assert!( + compiled.contains("--enabled-tools create-issue"), + "Compiler must add --enabled-tools create-issue when ado-aw-debug.create-issue is set" + ); +} + +/// The example file in `examples/dogfood-failure-reporter.md` must compile +/// cleanly. Mirror of the structural smoke test for `examples/sample-agent.md`. +#[test] +fn test_example_dogfood_failure_reporter_structure() { + let example_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("dogfood-failure-reporter.md"); + assert!( + example_path.exists(), + "examples/dogfood-failure-reporter.md should exist" + ); + let content = fs::read_to_string(&example_path).expect("Should be able to read example"); + assert!(content.starts_with("---"), "Example should start with front matter"); + assert!(content.contains("ado-aw-debug:"), "Example should declare ado-aw-debug section"); + assert!(content.contains("create-issue:"), "Example should configure create-issue"); + assert!( + content.contains("target-repo: githubnext/ado-aw"), + "Example should target githubnext/ado-aw" + ); +} diff --git a/tests/fixtures/ado-aw-debug-agent.md b/tests/fixtures/ado-aw-debug-agent.md new file mode 100644 index 00000000..331caaa3 --- /dev/null +++ b/tests/fixtures/ado-aw-debug-agent.md @@ -0,0 +1,22 @@ +--- +name: "Ado AW Debug Test Agent" +description: "Fixture exercising the ado-aw-debug.create-issue front matter section" +ado-aw-debug: + skip-integrity: true + create-issue: + target-repo: githubnext/ado-aw + title-prefix: "[pipeline-failure] " + labels: + - pipeline-failure + allowed-labels: + - "agent-*" + - "pipeline-failure" + assignees: + - jamesdevine + max: 3 +--- + +## Test Agent + +Files a GitHub issue when a pipeline run fails. Used by the +`test_compile_ado_aw_debug_fixture` integration test.