Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/daily-cli-tools-tester.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ When problems are detected, create detailed GitHub issues with reproduction step
**MANDATORY**: Follow these rules on every tool call to keep token consumption under control.

- **`logs` calls**: Always pass `count: 3` and `max_tokens: 3000`. Never call `logs` without these limits.
- **`audit` calls**: Always pass `max_tokens: 5000`. Prefer auditing 1–2 representative runs rather than every run found.
- **`compile` calls**: Always pass `max_tokens: 5000`. Use targeted compilation of 3 representative workflows instead of bulk compilation when the goal is validation.
- **`audit` calls**: Prefer auditing 1–2 representative runs rather than every run found.
- **`compile` calls**: Use targeted compilation of 3 representative workflows instead of bulk compilation when the goal is validation.
- **Parallel batching**: Combine independent tool calls into a single turn whenever possible (e.g. run 2–3 targeted compiles in parallel rather than sequentially).
- **Skip redundant variants**: If a test variant (e.g. a second date-range filter) would produce essentially the same signal as one already run, skip it and document the skip reason.

Expand Down Expand Up @@ -338,8 +338,8 @@ Test compilation with a targeted sample of representative workflows instead of b

```
Select 3 representative workflows from Phase 1.2 (one simple, one complex, one with imports).
Use the agentic-workflows "compile" tool for each individually (max_tokens: 5000 per call).
After the targeted tests, run one bulk compile (no workflow-name specified) with max_tokens: 5000
Use the agentic-workflows "compile" tool for each individually.
After the targeted tests, run one bulk compile (no workflow-name specified)
to verify overall compilation health.
```

Expand Down
29 changes: 29 additions & 0 deletions pkg/cli/mcp_tools_output_streams_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package cli
import (
"context"
"os/exec"
"slices"
"strings"
"testing"

"github.com/modelcontextprotocol/go-sdk/mcp"
Expand Down Expand Up @@ -49,3 +51,30 @@ func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
assert.JSONEq(t, expectedStdout, output, "compile tool should return subprocess stdout only")
assert.NotContains(t, output, stderrNoise, "compile tool output should not contain stderr noise")
}

func TestCompileTool_AcceptsDeprecatedMaxTokensParameter(t *testing.T) {
const expectedStdout = `[{"workflow":"test.md","valid":true,"errors":[],"warnings":[]}]`

var capturedArgs []string
mockExecCmd := func(ctx context.Context, args ...string) *exec.Cmd {
capturedArgs = slices.Clone(args)
return exec.CommandContext(ctx, "sh", "-c", `printf '%s' "$1"`, "sh", expectedStdout)
}

server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
err := registerCompileTool(server, mockExecCmd, "")
require.NoError(t, err, "registerCompileTool should succeed")

session := connectInMemory(t, server)
result, err := session.CallTool(context.Background(), &mcp.CallToolParams{
Name: "compile",
Arguments: map[string]any{
"max_tokens": 5000,
},
})
require.NoError(t, err, "compile tool should accept deprecated max_tokens parameter")

output := extractTextResult(t, result)
assert.JSONEq(t, expectedStdout, output, "compile tool should still return subprocess stdout")
assert.NotContains(t, strings.Join(capturedArgs, " "), "max_tokens", "compile command args should ignore max_tokens")
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test asserts the CLI args don’t include max_tokens, but an accidental forwarding would more likely show up as a flag like --max-tokens. Strengthen the assertion to check for the actual flag forms (e.g., --max-tokens) so the test will fail on the intended regression.

Suggested change
assert.NotContains(t, strings.Join(capturedArgs, " "), "max_tokens", "compile command args should ignore max_tokens")
joinedArgs := strings.Join(capturedArgs, " ")
assert.NotContains(t, joinedArgs, "--max-tokens", "compile command args should ignore max_tokens")
assert.NotContains(t, joinedArgs, "--max_tokens", "compile command args should ignore max_tokens")

Copilot uses AI. Check for mistakes.
}
1 change: 1 addition & 0 deletions pkg/cli/mcp_tools_privileged.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ func registerAuditTool(server *mcp.Server, execCmd execCmdFunc, actor string, va
type auditArgs struct {
RunIDOrURL string `json:"run_id_or_url" jsonschema:"GitHub Actions workflow run ID or URL. Accepts: numeric run ID (e.g., 1234567890), run URL (https://github.com/owner/repo/actions/runs/1234567890), job URL (https://github.com/owner/repo/actions/runs/1234567890/job/9876543210), or job URL with step (https://github.com/owner/repo/actions/runs/1234567890/job/9876543210#step:7:1)"`
Artifacts []string `json:"artifacts,omitempty" jsonschema:"Artifact sets to download (default: all). Valid sets: all, activation, agent, detection, firewall, github-api, mcp"`
MaxTokens int `json:"max_tokens,omitempty" jsonschema:"Deprecated: accepted for backward compatibility but ignored."`
}

// Generate schema for audit tool
Expand Down
32 changes: 32 additions & 0 deletions pkg/cli/mcp_tools_privileged_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"fmt"
"os"
"os/exec"
"slices"
"strings"
"testing"

"github.com/modelcontextprotocol/go-sdk/mcp"
Expand Down Expand Up @@ -299,6 +301,36 @@ func TestAuditToolErrorEnvelopeSetsIsErrorTrueHelperProcess(t *testing.T) {
os.Exit(1)
}

func TestAuditTool_AcceptsDeprecatedMaxTokensParameter(t *testing.T) {
const expectedStdout = `{"overview":{"run_id":"1234567890"}}`

var capturedArgs []string
mockExecCmd := func(ctx context.Context, args ...string) *exec.Cmd {
capturedArgs = slices.Clone(args)
return exec.CommandContext(ctx, "sh", "-c", `printf '%s' "$1"`, "sh", expectedStdout)
Comment on lines +304 to +310
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test uses exec.CommandContext(..., "sh", "-c", ...) to mock subprocess output, which makes the unit test shell-dependent. In the same file, other tests avoid Unix-specific commands to stay cross-platform. Consider switching this mock to the existing helper-process pattern (invoke the test binary via os.Args[0] and an env var) so the test runs on Windows too.

This issue also appears in the following locations of the same file:

  • line 328
  • line 331
Suggested change
func TestAuditTool_AcceptsDeprecatedMaxTokensParameter(t *testing.T) {
const expectedStdout = `{"overview":{"run_id":"1234567890"}}`
var capturedArgs []string
mockExecCmd := func(ctx context.Context, args ...string) *exec.Cmd {
capturedArgs = slices.Clone(args)
return exec.CommandContext(ctx, "sh", "-c", `printf '%s' "$1"`, "sh", expectedStdout)
func TestAuditTool_AcceptsDeprecatedMaxTokensParameterHelperProcess(t *testing.T) {
if os.Getenv("GH_AW_AUDIT_MAX_TOKENS_HELPER_PROCESS") != "1" {
return
}
_, _ = fmt.Fprint(os.Stdout, os.Getenv("GH_AW_AUDIT_MAX_TOKENS_HELPER_STDOUT"))
os.Exit(0)
}
func TestAuditTool_AcceptsDeprecatedMaxTokensParameter(t *testing.T) {
const expectedStdout = `{"overview":{"run_id":"1234567890"}}`
var capturedArgs []string
mockExecCmd := func(ctx context.Context, args ...string) *exec.Cmd {
capturedArgs = slices.Clone(args)
cmd := exec.CommandContext(ctx, os.Args[0], "-test.run=TestAuditTool_AcceptsDeprecatedMaxTokensParameterHelperProcess")
cmd.Env = append(
os.Environ(),
"GH_AW_AUDIT_MAX_TOKENS_HELPER_PROCESS=1",
"GH_AW_AUDIT_MAX_TOKENS_HELPER_STDOUT="+expectedStdout,
)
return cmd

Copilot uses AI. Check for mistakes.
}

server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
err := registerAuditTool(server, mockExecCmd, "", false)
require.NoError(t, err, "registerAuditTool should succeed")

session := connectInMemory(t, server)
result, err := session.CallTool(context.Background(), &mcp.CallToolParams{
Name: "audit",
Arguments: map[string]any{
"run_id_or_url": "1234567890",
"max_tokens": 5000,
},
})
require.NoError(t, err, "audit tool should accept deprecated max_tokens parameter")
require.NotNil(t, result, "result should not be nil")

textContent, ok := result.Content[0].(*mcp.TextContent)
require.True(t, ok, "expected text content in audit response")
assert.JSONEq(t, expectedStdout, textContent.Text, "audit tool should return subprocess stdout")
assert.NotContains(t, strings.Join(capturedArgs, " "), "max_tokens", "audit command args should ignore max_tokens")
}

func TestAuditDiffToolErrorEnvelopeSetsIsErrorTrue(t *testing.T) {
mockExecCmd := func(ctx context.Context, args ...string) *exec.Cmd {
cmd := exec.CommandContext(ctx, os.Args[0], "-test.run=TestAuditDiffToolErrorEnvelopeSetsIsErrorTrueHelperProcess")
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/mcp_tools_readonly.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ func registerCompileTool(server *mcp.Server, execCmd execCmdFunc, manifestCacheF
Actionlint bool `json:"actionlint,omitempty" jsonschema:"Run actionlint linter on generated .lock.yml files"`
RunnerGuard bool `json:"runner-guard,omitempty" jsonschema:"Run runner-guard taint analysis scanner on generated .lock.yml files"`
Fix bool `json:"fix,omitempty" jsonschema:"Apply automatic codemod fixes to workflows before compiling"`
MaxTokens int `json:"max_tokens,omitempty" jsonschema:"Deprecated: accepted for backward compatibility but ignored."`
}

// Generate schema with elicitation defaults
Expand Down
Loading