From 4b5a7979c4e7320dfa68acfdf214bca1d2e87356 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 14:59:57 +0000 Subject: [PATCH 1/3] Initial plan From e05f9daac3dfbcf15dc9c82d9652f64940cad161 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 15:05:05 +0000 Subject: [PATCH 2/3] Plan spec-enforcer sub-agent optimization Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/pr-sous-chef.lock.yml | 29 +++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pr-sous-chef.lock.yml b/.github/workflows/pr-sous-chef.lock.yml index f0f2e98790f..e7356322765 100644 --- a/.github/workflows/pr-sous-chef.lock.yml +++ b/.github/workflows/pr-sous-chef.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"4bbfd40cea1c2fad4fdcecf86a9fe469604e4cd1a773ff28a8fdb2d31816f84e","strict":true,"agent_id":"copilot","agent_model":"gpt-5-mini"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"f1496174ea1147d0a2b83b8d756c3a246355d211fea8553bbe1e100944ca4083","strict":true,"agent_id":"copilot","agent_model":"gpt-5-mini"} # gh-aw-manifest: {"version":1,"secrets":["GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.46"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.46"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.46"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.46"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.9","digest":"sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.9@sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388"},{"image":"ghcr.io/github/github-mcp-server:v1.0.4"},{"image":"node:lts-alpine","digest":"sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f","pinned_image":"node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"}]} # ___ _ _ # / _ \ | | (_) @@ -198,20 +198,20 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_76dba60faa5b02f6_EOF' + cat << 'GH_AW_PROMPT_be974995d6a3b77a_EOF' - GH_AW_PROMPT_76dba60faa5b02f6_EOF + GH_AW_PROMPT_be974995d6a3b77a_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_76dba60faa5b02f6_EOF' + cat << 'GH_AW_PROMPT_be974995d6a3b77a_EOF' Tools: add_comment(max:20), update_pull_request(max:10), missing_tool, missing_data, noop - GH_AW_PROMPT_76dba60faa5b02f6_EOF + GH_AW_PROMPT_be974995d6a3b77a_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md" - cat << 'GH_AW_PROMPT_76dba60faa5b02f6_EOF' + cat << 'GH_AW_PROMPT_be974995d6a3b77a_EOF' The following GitHub context information is available for this workflow: {{#if github.actor}} @@ -240,13 +240,13 @@ jobs: {{/if}} - GH_AW_PROMPT_76dba60faa5b02f6_EOF + GH_AW_PROMPT_be974995d6a3b77a_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/cli_proxy_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_76dba60faa5b02f6_EOF' + cat << 'GH_AW_PROMPT_be974995d6a3b77a_EOF' {{#runtime-import .github/workflows/shared/observability-otlp.md}} {{#runtime-import .github/workflows/pr-sous-chef.md}} - GH_AW_PROMPT_76dba60faa5b02f6_EOF + GH_AW_PROMPT_be974995d6a3b77a_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 @@ -448,7 +448,7 @@ jobs: if length == 0 then false else ( ((.[0].body // "" | contains($marker))) - ) end + ) end ' --arg marker "$sous_chef_nudge_marker" 2>/dev/null || echo "false" )" if [ "$last_comment_is_sous_chef" = "true" ]; then @@ -545,9 +545,9 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_76fee42fd0d18220_EOF' + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_5629bf8aacb5044e_EOF' {"add_comment":{"max":20,"target":"*"},"create_report_incomplete_issue":{},"mentions":{"allowed":["copilot"]},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{},"update_pull_request":{"allow_body":true,"allow_title":false,"default_operation":"append","max":10,"target":"*","update_branch":true}} - GH_AW_SAFE_OUTPUTS_CONFIG_76fee42fd0d18220_EOF + GH_AW_SAFE_OUTPUTS_CONFIG_5629bf8aacb5044e_EOF - name: Generate Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | @@ -778,7 +778,7 @@ jobs: mkdir -p /home/runner/.copilot GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node) - cat << GH_AW_MCP_CONFIG_979baa0c23d8ed42_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" + cat << GH_AW_MCP_CONFIG_4627e9a5951f3d51_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" { "mcpServers": { "safeoutputs": { @@ -808,7 +808,7 @@ jobs: } } } - GH_AW_MCP_CONFIG_979baa0c23d8ed42_EOF + GH_AW_MCP_CONFIG_4627e9a5951f3d51_EOF - name: Mount MCP servers as CLIs id: mount-mcp-clis continue-on-error: true @@ -1596,3 +1596,4 @@ jobs: /tmp/gh-aw/safe-output-items.jsonl /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + From 2c80919f9f1f25cf502088f71be38a1ac92e92c3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 15:06:59 +0000 Subject: [PATCH 3/3] Optimize spec-enforcer with inline sub-agents Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/spec-enforcer.md | 125 ++++++++++++++++++----------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/.github/workflows/spec-enforcer.md b/.github/workflows/spec-enforcer.md index b22d030eb90..74a61917f05 100644 --- a/.github/workflows/spec-enforcer.md +++ b/.github/workflows/spec-enforcer.md @@ -134,37 +134,17 @@ You MUST NOT: } ``` -3. If `rotation.json` is missing or empty, recover round-robin state from the most recently merged PR with the `pkg-specifications` label: - - Use `gh pr list --repo ${{ github.repository }} --state merged --label pkg-specifications --limit 1 --json number,body` to find the latest merged PR in this repository - - Parse this line from the PR body: - - `- **Next packages in rotation**: ` - - Use this matching pattern: - - `^- \*\*Next packages in rotation\*\*:\s*([A-Za-z0-9_.]+(?:-[A-Za-z0-9_.]+)*(?:\s*,\s*[A-Za-z0-9_.]+(?:-[A-Za-z0-9_.]+)*)*)\s*$` - - This is the final regex pattern (it already escapes literal `**` as `\*\*`) - - If you implement this in a string-literal context, escape backslashes as required by that language - - YAML/Markdown plain text: `\s` - - JSON string: `\\s` - - JavaScript/TypeScript string literal: `\\s` - - Expected list format: `pkg1, pkg2, pkg3` (comma-separated package directory names; the regex enforces package-name character constraints) - - Valid examples: `actionpins, cli`, `123-pkg, console` - - Invalid examples: `pkg1,,pkg2`, `pkg1, pkg two`, `pkg-, nextpkg` - - The regex requires at least one valid package token between commas, so consecutive commas are rejected - - Split the captured value by comma, trim each entry, and (defensively) discard empty entries - - Reconstruct `rotation.json` as: - - `last_packages`: recovered package list - - `last_index`: build a map of `eligible_package -> eligible_list_index`, then scan recovered packages left-to-right and keep the index for the last package in the recovered list that exists in the eligible map; if no recovered package matches, use `-1` - - Example: eligible=`[a,b,c,d]`, recovered=`[c,x,b]` → `last_index=1` (package `b`) - - `last_run`: merge date of the source PR (UTC date) - - `total_eligible`: current count of eligible packages with `README.md` - - If no such PR (or no parsable line) exists, initialize fallback state: - ```json - { - "last_index": -1, - "last_packages": [], - "last_run": "unknown", - "total_eligible": 0 - } - ``` +3. If `rotation.json` is missing or empty, fetch the most recently merged PR with: + - `gh pr list --repo ${{ github.repository }} --state merged --label pkg-specifications --limit 1 --json number,body,mergedAt` + Then use the `rotation-state-recoverer` agent — pass it the PR body text and the eligible package list — to produce the rotation JSON. Write the agent's output to `rotation.json` (set `last_run` to the PR's `mergedAt` UTC date). If no matching PR exists, write fallback state: + ```json + { + "last_index": -1, + "last_packages": [], + "last_run": "unknown", + "total_eligible": N + } + ``` ## Phase 1: Select Packages @@ -202,18 +182,9 @@ Determine the run mode first: For each selected package: -### Step 1: Read the README.md - -```bash -cat pkg//README.md -``` +### Step 1: Extract the specification -Extract from the specification: -- **Public API**: Functions, types, constants documented -- **Behavioral contracts**: What each function MUST do -- **Usage examples**: Expected input/output patterns -- **Design constraints**: Thread safety, error handling, etc. -- **Edge cases**: Documented limitations or special behavior +For each selected package, invoke the `readme-spec-extractor` agent in parallel — pass it the contents of `pkg//README.md`. Use the returned JSON as the source of truth when generating tests in Phase 3. ### Step 2: Minimal Source Code Reading @@ -326,7 +297,7 @@ Every test file MUST have the build tag as the first line: ## Phase 4: Validate Tests -After generating tests, validate they compile and pass: +After generating tests, run: ```bash # Check compilation @@ -336,11 +307,11 @@ go build ./pkg//... go test -v -run "TestSpec" ./pkg// ``` -If tests fail: -1. Re-read the specification section that the test maps to -2. Verify the test matches the specification (not implementation) -3. If the specification is ambiguous, add a `// SPEC_AMBIGUITY: ` comment in the test -4. If the implementation doesn't match the specification, add a `// SPEC_MISMATCH: ` comment and document it in the PR body +Then pass both outputs to the `test-output-classifier` agent. Use the returned JSON to decide per failure: +1. `fix_test` → revise the test against the spec +2. `flag_spec_ambiguity` → add `// SPEC_AMBIGUITY: ` +3. `flag_spec_mismatch` → add `// SPEC_MISMATCH: ` and document it in the PR body +4. `investigate` → re-read the spec section before deciding ## Phase 5: Save Cache and Create PR @@ -429,3 +400,61 @@ All tests are derived from README.md specifications, not from implementation sou - ✅ PR created with test changes **OR** `noop` called when all tests are already up-to-date {{#runtime-import shared/noop-reminder.md}} + +## agent: `rotation-state-recoverer` +--- +description: Parse merged PR body text to recover package rotation state. +model: small +--- +You receive: +- `pr_body`: the merged PR body text +- `eligible_packages`: array of currently eligible package names + +Extract `last_packages` from this PR-body line when present: +- `- **Next packages in rotation**: ` + +Use this regex exactly: +`^- \*\*Next packages in rotation\*\*:\s*([A-Za-z0-9_.]+(?:-[A-Za-z0-9_.]+)*(?:\s*,\s*[A-Za-z0-9_.]+(?:-[A-Za-z0-9_.]+)*)*)\s*$` + +Rules: +- Parse the captured comma-separated list, trim whitespace, discard empty entries. +- Compute `last_index` by mapping `eligible_packages` to indexes, scanning parsed `last_packages` left-to-right, and keeping the index of the last package that exists in `eligible_packages`. If none match, use `-1`. +- Set `total_eligible` to `len(eligible_packages)`. +- Do not invent packages. + +Output JSON only: +`{"last_index": , "last_packages": ["..."], "total_eligible": }` + +## agent: `readme-spec-extractor` +--- +description: Extract structured API contract from a Go package README.md. +model: small +--- +You are given the full contents of a single Go package README.md. + +Emit a JSON object with these fields (omit any field that the README does not document): +- `public_api`: list of `{name, kind: "func"|"type"|"const", documented_signature_or_value, behavior_summary}` items +- `behavioral_contracts`: list of short bullet strings (one obligation each) +- `usage_examples`: list of `{label, input, expected_output}` items, verbatim from the README where possible +- `design_constraints`: list of short bullet strings (thread safety, error handling, etc.) +- `edge_cases`: list of short bullet strings (documented limitations) +- `ambiguities`: list of short bullet strings — any places the spec is unclear and a test will need to make assumptions + +Do not invent details that the README does not state. Output JSON only. + +## agent: `test-output-classifier` +--- +description: Classify go test/go build failures into a fixed taxonomy. +model: small +--- +You receive raw `go build` and `go test` output for a single package. + +For each failure, emit one entry with these fields: +- `test_or_symbol`: the test function name or compile symbol +- `category`: one of `compile_error`, `missing_symbol`, `signature_mismatch`, `assertion_failure`, `panic`, `other` +- `evidence`: one verbatim line from the output that justifies the category +- `suggested_action`: one of `fix_test`, `flag_spec_mismatch`, `flag_spec_ambiguity`, `investigate` + +Also emit a top-level `summary`: `{total_failures, by_category: {...}, all_passing: bool}`. + +Output JSON only — no prose. If output shows all tests passing, emit `{"summary": {"total_failures": 0, "by_category": {}, "all_passing": true}, "failures": []}`.