From d01144d6f2e6b0d41b6125766fd1f5a6306fa5df Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Tue, 24 Mar 2026 15:44:06 +0100 Subject: [PATCH 01/12] Improve agentic audit baselines and execution observability --- .../js/generate_observability_summary.cjs | 133 ++++ .../generate_observability_summary.test.cjs | 79 +++ cmd/gh-aw/main.go | 3 + pkg/cli/audit.go | 7 + pkg/cli/audit_comparison.go | 311 +++++++++ pkg/cli/audit_comparison_test.go | 67 ++ pkg/cli/audit_report.go | 9 +- pkg/cli/audit_report_render.go | 53 ++ pkg/cli/audit_test.go | 5 + pkg/cli/logs_report.go | 11 + pkg/cli/mcp_schema_test.go | 38 ++ pkg/cli/observability_insights.go | 333 ++++++++++ pkg/cli/observability_insights_test.go | 121 ++++ pkg/cli/observability_policy.go | 250 ++++++++ pkg/cli/observability_policy_command.go | 261 ++++++++ pkg/cli/observability_policy_command_test.go | 141 ++++ pkg/cli/observability_policy_test.go | 127 ++++ pkg/cli/observability_schema_test.go | 65 ++ pkg/parser/schema_test.go | 22 + pkg/parser/schemas/main_workflow_schema.json | 12 + pkg/workflow/compiler_yaml_ai_execution.go | 48 ++ pkg/workflow/compiler_yaml_main_job.go | 3 + pkg/workflow/frontmatter_types.go | 6 + pkg/workflow/frontmatter_types_test.go | 21 + .../observability_job_summary_test.go | 85 +++ schemas/observability-policy.json | 97 +++ schemas/observability-report.json | 601 ++++++++++++++++++ 27 files changed, 2908 insertions(+), 1 deletion(-) create mode 100644 actions/setup/js/generate_observability_summary.cjs create mode 100644 actions/setup/js/generate_observability_summary.test.cjs create mode 100644 pkg/cli/audit_comparison.go create mode 100644 pkg/cli/audit_comparison_test.go create mode 100644 pkg/cli/observability_insights.go create mode 100644 pkg/cli/observability_insights_test.go create mode 100644 pkg/cli/observability_policy.go create mode 100644 pkg/cli/observability_policy_command.go create mode 100644 pkg/cli/observability_policy_command_test.go create mode 100644 pkg/cli/observability_policy_test.go create mode 100644 pkg/cli/observability_schema_test.go create mode 100644 pkg/workflow/observability_job_summary_test.go create mode 100644 schemas/observability-policy.json create mode 100644 schemas/observability-report.json diff --git a/actions/setup/js/generate_observability_summary.cjs b/actions/setup/js/generate_observability_summary.cjs new file mode 100644 index 00000000000..2b85c9707f9 --- /dev/null +++ b/actions/setup/js/generate_observability_summary.cjs @@ -0,0 +1,133 @@ +// @ts-check +/// + +const fs = require("fs"); + +const AW_INFO_PATH = "/tmp/gh-aw/aw_info.json"; +const AGENT_OUTPUT_PATH = "/tmp/gh-aw/agent_output.json"; +const gatewayEventPaths = ["/tmp/gh-aw/mcp-logs/gateway.jsonl", "/tmp/gh-aw/mcp-logs/rpc-messages.jsonl"]; + +function readJSONIfExists(path) { + if (!fs.existsSync(path)) { + return null; + } + + try { + return JSON.parse(fs.readFileSync(path, "utf8")); + } catch { + return null; + } +} + +function countBlockedRequests() { + for (const path of gatewayEventPaths) { + if (!fs.existsSync(path)) { + continue; + } + + const content = fs.readFileSync(path, "utf8"); + return content + .split("\n") + .map(line => line.trim()) + .filter(Boolean) + .reduce((count, line) => { + try { + const entry = JSON.parse(line); + return entry && entry.type === "DIFC_FILTERED" ? count + 1 : count; + } catch { + return count; + } + }, 0); + } + + return 0; +} + +function uniqueCreatedItemTypes(items) { + const types = new Set(); + + for (const item of items) { + if (item && typeof item.type === "string" && item.type.trim() !== "") { + types.add(item.type); + } + } + + return [...types].sort(); +} + +function collectObservabilityData() { + const awInfo = readJSONIfExists(AW_INFO_PATH) || {}; + const agentOutput = readJSONIfExists(AGENT_OUTPUT_PATH) || { items: [], errors: [] }; + const items = Array.isArray(agentOutput.items) ? agentOutput.items : []; + const errors = Array.isArray(agentOutput.errors) ? agentOutput.errors : []; + const traceId = awInfo.context && typeof awInfo.context.workflow_call_id === "string" ? awInfo.context.workflow_call_id : ""; + + return { + workflowName: awInfo.workflow_name || "", + engineId: awInfo.engine_id || "", + traceId, + staged: awInfo.staged === true, + firewallEnabled: awInfo.firewall_enabled === true, + createdItemCount: items.length, + createdItemTypes: uniqueCreatedItemTypes(items), + outputErrorCount: errors.length, + blockedRequests: countBlockedRequests(), + }; +} + +function buildObservabilitySummary(data) { + const posture = data.createdItemCount > 0 ? "write-capable" : "read-only"; + const lines = []; + + lines.push("
"); + lines.push("Observability"); + lines.push(""); + + if (data.workflowName) { + lines.push(`- **workflow**: ${data.workflowName}`); + } + if (data.engineId) { + lines.push(`- **engine**: ${data.engineId}`); + } + if (data.traceId) { + lines.push(`- **trace id**: ${data.traceId}`); + } + + lines.push(`- **posture**: ${posture}`); + lines.push(`- **created items**: ${data.createdItemCount}`); + lines.push(`- **blocked requests**: ${data.blockedRequests}`); + lines.push(`- **agent output errors**: ${data.outputErrorCount}`); + lines.push(`- **firewall enabled**: ${data.firewallEnabled}`); + lines.push(`- **staged**: ${data.staged}`); + + if (data.createdItemTypes.length > 0) { + lines.push("- **item types**:"); + for (const itemType of data.createdItemTypes) { + lines.push(` - ${itemType}`); + } + } + + lines.push(""); + lines.push("
"); + + return lines.join("\n") + "\n"; +} + +async function main(core) { + const mode = process.env.GH_AW_OBSERVABILITY_JOB_SUMMARY || ""; + if (mode !== "on") { + core.info(`Skipping observability summary: mode=${mode || "unset"}`); + return; + } + + const data = collectObservabilityData(); + const markdown = buildObservabilitySummary(data); + await core.summary.addRaw(markdown).write(); + core.info("Generated observability summary in step summary"); +} + +module.exports = { + buildObservabilitySummary, + collectObservabilityData, + main, +}; diff --git a/actions/setup/js/generate_observability_summary.test.cjs b/actions/setup/js/generate_observability_summary.test.cjs new file mode 100644 index 00000000000..56d7a06a8c2 --- /dev/null +++ b/actions/setup/js/generate_observability_summary.test.cjs @@ -0,0 +1,79 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import fs from "fs"; + +const mockCore = { + info: vi.fn(), + summary: { + addRaw: vi.fn().mockReturnThis(), + write: vi.fn().mockResolvedValue(), + }, +}; + +global.core = mockCore; + +describe("generate_observability_summary.cjs", () => { + let module; + + beforeEach(async () => { + vi.clearAllMocks(); + fs.mkdirSync("/tmp/gh-aw/mcp-logs", { recursive: true }); + process.env.GH_AW_OBSERVABILITY_JOB_SUMMARY = "on"; + module = await import("./generate_observability_summary.cjs"); + }); + + afterEach(() => { + delete process.env.GH_AW_OBSERVABILITY_JOB_SUMMARY; + for (const path of ["/tmp/gh-aw/aw_info.json", "/tmp/gh-aw/agent_output.json", "/tmp/gh-aw/mcp-logs/gateway.jsonl", "/tmp/gh-aw/mcp-logs/rpc-messages.jsonl"]) { + if (fs.existsSync(path)) { + fs.unlinkSync(path); + } + } + }); + + it("builds summary from runtime observability files", async () => { + fs.writeFileSync( + "/tmp/gh-aw/aw_info.json", + JSON.stringify({ + workflow_name: "triage-workflow", + engine_id: "copilot", + staged: false, + firewall_enabled: true, + context: { workflow_call_id: "trace-123" }, + }) + ); + fs.writeFileSync( + "/tmp/gh-aw/agent_output.json", + JSON.stringify({ + items: [{ type: "create_issue" }, { type: "add_comment" }], + errors: ["validation failed"], + }) + ); + fs.writeFileSync("/tmp/gh-aw/mcp-logs/gateway.jsonl", [JSON.stringify({ type: "DIFC_FILTERED" }), JSON.stringify({ type: "REQUEST" })].join("\n")); + + await module.main(mockCore); + + expect(mockCore.summary.addRaw).toHaveBeenCalledTimes(1); + const summary = mockCore.summary.addRaw.mock.calls[0][0]; + expect(summary).toContain("Observability"); + expect(summary).toContain("- **workflow**: triage-workflow"); + expect(summary).toContain("- **engine**: copilot"); + expect(summary).toContain("- **trace id**: trace-123"); + expect(summary).toContain("- **posture**: write-capable"); + expect(summary).toContain("- **created items**: 2"); + expect(summary).toContain("- **blocked requests**: 1"); + expect(summary).toContain("- **agent output errors**: 1"); + expect(summary).toContain(" - add_comment"); + expect(summary).toContain(" - create_issue"); + expect(mockCore.summary.write).toHaveBeenCalledTimes(1); + }); + + it("skips summary generation when opt-in mode is disabled", async () => { + process.env.GH_AW_OBSERVABILITY_JOB_SUMMARY = "off"; + + await module.main(mockCore); + + expect(mockCore.summary.addRaw).not.toHaveBeenCalled(); + expect(mockCore.summary.write).not.toHaveBeenCalled(); + expect(mockCore.info).toHaveBeenCalledWith("Skipping observability summary: mode=off"); + }); +}); diff --git a/cmd/gh-aw/main.go b/cmd/gh-aw/main.go index 8a0f5d2541b..16d889e71ec 100644 --- a/cmd/gh-aw/main.go +++ b/cmd/gh-aw/main.go @@ -718,6 +718,7 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all logsCmd := cli.NewLogsCommand() auditCmd := cli.NewAuditCommand() healthCmd := cli.NewHealthCommand() + observabilityPolicyCmd := cli.NewObservabilityPolicyCommand() mcpServerCmd := cli.NewMCPServerCommand() prCmd := cli.NewPRCommand() secretsCmd := cli.NewSecretsCommand() @@ -760,6 +761,7 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all logsCmd.GroupID = "analysis" auditCmd.GroupID = "analysis" healthCmd.GroupID = "analysis" + observabilityPolicyCmd.GroupID = "analysis" checksCmd.GroupID = "analysis" // Utilities @@ -789,6 +791,7 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all rootCmd.AddCommand(logsCmd) rootCmd.AddCommand(auditCmd) rootCmd.AddCommand(healthCmd) + rootCmd.AddCommand(observabilityPolicyCmd) rootCmd.AddCommand(checksCmd) rootCmd.AddCommand(mcpCmd) rootCmd.AddCommand(mcpServerCmd) diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go index 12bcc2f104b..9d6d4aa9af3 100644 --- a/pkg/cli/audit.go +++ b/pkg/cli/audit.go @@ -340,6 +340,9 @@ func AuditWorkflowRun(ctx context.Context, runID int64, owner, repo, hostname st fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to list artifacts: %v", err))) } + currentCreatedItems := extractCreatedItemsFromManifest(runOutputDir) + run.SafeItemsCount = len(currentCreatedItems) + // Create processed run for report generation processedRun := ProcessedRun{ Run: run, @@ -352,8 +355,12 @@ func AuditWorkflowRun(ctx context.Context, runID int64, owner, repo, hostname st JobDetails: jobDetails, } + currentSnapshot := buildAuditComparisonSnapshot(processedRun, currentCreatedItems) + comparison := buildAuditComparisonForRun(run, currentSnapshot, runOutputDir, owner, repo, hostname, verbose) + // Build structured audit data auditData := buildAuditData(processedRun, metrics, mcpToolUsage) + auditData.Comparison = comparison // Render output based on format preference if jsonOutput { diff --git a/pkg/cli/audit_comparison.go b/pkg/cli/audit_comparison.go new file mode 100644 index 00000000000..bf111e68d8b --- /dev/null +++ b/pkg/cli/audit_comparison.go @@ -0,0 +1,311 @@ +package cli + +import ( + "encoding/json" + "fmt" + "net/url" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/github/gh-aw/pkg/workflow" +) + +type AuditComparisonData struct { + BaselineFound bool `json:"baseline_found"` + Baseline *AuditComparisonBaseline `json:"baseline,omitempty"` + Delta *AuditComparisonDelta `json:"delta,omitempty"` + Classification *AuditComparisonClassification `json:"classification,omitempty"` + Recommendation *AuditComparisonRecommendation `json:"recommendation,omitempty"` +} + +type AuditComparisonBaseline struct { + RunID int64 `json:"run_id"` + WorkflowName string `json:"workflow_name,omitempty"` + Conclusion string `json:"conclusion,omitempty"` + CreatedAt string `json:"created_at,omitempty"` +} + +type AuditComparisonDelta struct { + Turns AuditComparisonIntDelta `json:"turns"` + Posture AuditComparisonStringDelta `json:"posture"` + BlockedRequests AuditComparisonIntDelta `json:"blocked_requests"` + MCPFailure *AuditComparisonMCPFailureDelta `json:"mcp_failure,omitempty"` +} + +type AuditComparisonIntDelta struct { + Before int `json:"before"` + After int `json:"after"` + Changed bool `json:"changed"` +} + +type AuditComparisonStringDelta struct { + Before string `json:"before"` + After string `json:"after"` + Changed bool `json:"changed"` +} + +type AuditComparisonMCPFailureDelta struct { + Before []string `json:"before,omitempty"` + After []string `json:"after,omitempty"` + NewlyPresent bool `json:"newly_present"` +} + +type AuditComparisonClassification struct { + Label string `json:"label"` + ReasonCodes []string `json:"reason_codes,omitempty"` +} + +type AuditComparisonRecommendation struct { + Action string `json:"action"` +} + +type auditComparisonSnapshot struct { + Turns int + Posture string + BlockedRequests int + MCPFailures []string +} + +func buildAuditComparisonSnapshot(processedRun ProcessedRun, createdItems []CreatedItemReport) auditComparisonSnapshot { + blockedRequests := 0 + if processedRun.FirewallAnalysis != nil { + blockedRequests = processedRun.FirewallAnalysis.BlockedRequests + } + + return auditComparisonSnapshot{ + Turns: processedRun.Run.Turns, + Posture: deriveAuditPosture(createdItems), + BlockedRequests: blockedRequests, + MCPFailures: collectMCPFailureServers(processedRun.MCPFailures), + } +} + +func loadAuditComparisonSnapshotFromArtifacts(run WorkflowRun, logsPath string, verbose bool) (auditComparisonSnapshot, error) { + metrics, err := extractLogMetrics(logsPath, verbose, run.WorkflowPath) + if err != nil { + return auditComparisonSnapshot{}, fmt.Errorf("failed to extract baseline metrics: %w", err) + } + + firewallAnalysis, err := analyzeFirewallLogs(logsPath, verbose) + if err != nil { + return auditComparisonSnapshot{}, fmt.Errorf("failed to analyze baseline firewall logs: %w", err) + } + + mcpFailures, err := extractMCPFailuresFromRun(logsPath, run, verbose) + if err != nil { + return auditComparisonSnapshot{}, fmt.Errorf("failed to extract baseline MCP failures: %w", err) + } + + blockedRequests := 0 + if firewallAnalysis != nil { + blockedRequests = firewallAnalysis.BlockedRequests + } + + return auditComparisonSnapshot{ + Turns: metrics.Turns, + Posture: deriveAuditPosture(extractCreatedItemsFromManifest(logsPath)), + BlockedRequests: blockedRequests, + MCPFailures: collectMCPFailureServers(mcpFailures), + }, nil +} + +func buildAuditComparison(current auditComparisonSnapshot, baselineRun *WorkflowRun, baseline *auditComparisonSnapshot) *AuditComparisonData { + if baselineRun == nil || baseline == nil { + return &AuditComparisonData{BaselineFound: false} + } + + reasonCodes := make([]string, 0, 4) + delta := &AuditComparisonDelta{ + Turns: AuditComparisonIntDelta{ + Before: baseline.Turns, + After: current.Turns, + Changed: baseline.Turns != current.Turns, + }, + Posture: AuditComparisonStringDelta{ + Before: baseline.Posture, + After: current.Posture, + Changed: baseline.Posture != current.Posture, + }, + BlockedRequests: AuditComparisonIntDelta{ + Before: baseline.BlockedRequests, + After: current.BlockedRequests, + Changed: baseline.BlockedRequests != current.BlockedRequests, + }, + } + + if current.Turns > baseline.Turns { + reasonCodes = append(reasonCodes, "turns_increase") + } + if baseline.Posture != current.Posture { + reasonCodes = append(reasonCodes, "posture_changed") + } + if current.BlockedRequests > baseline.BlockedRequests { + reasonCodes = append(reasonCodes, "blocked_requests_increase") + } + + newMCPFailure := len(baseline.MCPFailures) == 0 && len(current.MCPFailures) > 0 + if newMCPFailure || len(baseline.MCPFailures) > 0 || len(current.MCPFailures) > 0 { + delta.MCPFailure = &AuditComparisonMCPFailureDelta{ + Before: baseline.MCPFailures, + After: current.MCPFailures, + NewlyPresent: newMCPFailure, + } + } + if newMCPFailure { + reasonCodes = append(reasonCodes, "new_mcp_failure") + } + + label := "stable" + switch { + case delta.Posture.Before == "read_only" && delta.Posture.After == "write_capable": + label = "risky" + case newMCPFailure: + label = "risky" + case current.BlockedRequests > baseline.BlockedRequests: + label = "risky" + case len(reasonCodes) > 0: + label = "changed" + } + + return &AuditComparisonData{ + BaselineFound: true, + Baseline: &AuditComparisonBaseline{ + RunID: baselineRun.DatabaseID, + WorkflowName: baselineRun.WorkflowName, + Conclusion: baselineRun.Conclusion, + CreatedAt: baselineRun.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + }, + Delta: delta, + Classification: &AuditComparisonClassification{ + Label: label, + ReasonCodes: reasonCodes, + }, + Recommendation: &AuditComparisonRecommendation{ + Action: recommendAuditComparisonAction(label, delta), + }, + } +} + +func recommendAuditComparisonAction(label string, delta *AuditComparisonDelta) string { + if delta == nil || label == "stable" { + return "No action needed; this run matches the last successful baseline closely." + } + + if delta.Posture.Before == "read_only" && delta.Posture.After == "write_capable" { + return "Review first-time write-capable behavior and add a guardrail before enabling by default." + } + if delta.MCPFailure != nil && delta.MCPFailure.NewlyPresent { + return "Inspect the new MCP failure and restore tool availability before relying on this workflow." + } + if delta.BlockedRequests.After > delta.BlockedRequests.Before { + return "Review network policy changes before treating the new blocked requests as normal behavior." + } + if delta.Turns.After > delta.Turns.Before { + return "Compare prompt or task-shape changes because this run needed more turns than the last successful baseline." + } + + return "Review the behavior change against the previous successful run before treating it as the new normal." +} + +func deriveAuditPosture(createdItems []CreatedItemReport) string { + if len(createdItems) > 0 { + return "write_capable" + } + return "read_only" +} + +func collectMCPFailureServers(failures []MCPFailureReport) []string { + if len(failures) == 0 { + return nil + } + + serverSet := make(map[string]struct{}, len(failures)) + for _, failure := range failures { + if strings.TrimSpace(failure.ServerName) == "" { + continue + } + serverSet[failure.ServerName] = struct{}{} + } + + servers := make([]string, 0, len(serverSet)) + for server := range serverSet { + servers = append(servers, server) + } + sort.Strings(servers) + return servers +} + +func findPreviousSuccessfulWorkflowRun(current WorkflowRun, owner, repo, hostname string, verbose bool) (*WorkflowRun, error) { + workflowID := filepath.Base(current.WorkflowPath) + if workflowID == "." || workflowID == "" { + return nil, fmt.Errorf("workflow path unavailable for run %d", current.DatabaseID) + } + + encodedWorkflowID := url.PathEscape(workflowID) + var endpoint string + if owner != "" && repo != "" { + endpoint = fmt.Sprintf("repos/%s/%s/actions/workflows/%s/runs?per_page=50", owner, repo, encodedWorkflowID) + } else { + endpoint = fmt.Sprintf("repos/{owner}/{repo}/actions/workflows/%s/runs?per_page=50", encodedWorkflowID) + } + + jq := fmt.Sprintf(`[.workflow_runs[] | select(.id != %d and .conclusion == "success" and .created_at < "%s") | {databaseId: .id, number: .run_number, url: .html_url, status: .status, conclusion: .conclusion, workflowName: .name, workflowPath: .path, createdAt: .created_at, startedAt: .run_started_at, updatedAt: .updated_at, event: .event, headBranch: .head_branch, headSha: .head_sha, displayTitle: .display_title}] | .[0]`, current.DatabaseID, current.CreatedAt.Format("2006-01-02T15:04:05Z07:00")) + + args := []string{"api"} + if hostname != "" && hostname != "github.com" { + args = append(args, "--hostname", hostname) + } + args = append(args, endpoint, "--jq", jq) + + output, err := workflow.RunGHCombined("Fetching previous successful workflow run...", args...) + if err != nil { + return nil, fmt.Errorf("failed to fetch previous successful workflow run: %w", err) + } + + trimmed := strings.TrimSpace(string(output)) + if trimmed == "null" || trimmed == "" { + return nil, nil + } + + var run WorkflowRun + if err := json.Unmarshal(output, &run); err != nil { + return nil, fmt.Errorf("failed to parse previous successful workflow run: %w", err) + } + + if strings.HasPrefix(run.WorkflowName, ".github/") { + if displayName := resolveWorkflowDisplayName(run.WorkflowPath, owner, repo, hostname); displayName != "" { + run.WorkflowName = displayName + } + } + + return &run, nil +} + +func buildAuditComparisonForRun(currentRun WorkflowRun, currentSnapshot auditComparisonSnapshot, outputDir string, owner, repo, hostname string, verbose bool) *AuditComparisonData { + baselineRun, err := findPreviousSuccessfulWorkflowRun(currentRun, owner, repo, hostname, verbose) + if err != nil { + auditLog.Printf("Skipping audit comparison: failed to find baseline: %v", err) + return &AuditComparisonData{BaselineFound: false} + } + if baselineRun == nil { + return &AuditComparisonData{BaselineFound: false} + } + + baselineOutputDir := filepath.Join(outputDir, fmt.Sprintf("baseline-%d", baselineRun.DatabaseID)) + if _, err := os.Stat(baselineOutputDir); err != nil { + if downloadErr := downloadRunArtifacts(baselineRun.DatabaseID, baselineOutputDir, verbose, owner, repo, hostname); downloadErr != nil { + auditLog.Printf("Skipping baseline comparison for run %d: failed to download baseline artifacts: %v", baselineRun.DatabaseID, downloadErr) + return &AuditComparisonData{BaselineFound: false} + } + } + + baselineSnapshot, err := loadAuditComparisonSnapshotFromArtifacts(*baselineRun, baselineOutputDir, verbose) + if err != nil { + auditLog.Printf("Skipping baseline comparison for run %d: failed to load baseline snapshot: %v", baselineRun.DatabaseID, err) + return &AuditComparisonData{BaselineFound: false} + } + + return buildAuditComparison(currentSnapshot, baselineRun, &baselineSnapshot) +} diff --git a/pkg/cli/audit_comparison_test.go b/pkg/cli/audit_comparison_test.go new file mode 100644 index 00000000000..c3952c84aa3 --- /dev/null +++ b/pkg/cli/audit_comparison_test.go @@ -0,0 +1,67 @@ +//go:build !integration + +package cli + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildAuditComparison_NoBaseline(t *testing.T) { + comparison := buildAuditComparison(auditComparisonSnapshot{Turns: 4, Posture: "read_only"}, nil, nil) + require.NotNil(t, comparison, "comparison should still be returned when no baseline exists") + assert.False(t, comparison.BaselineFound, "baseline should be marked unavailable") + assert.Nil(t, comparison.Baseline, "baseline details should be omitted") + assert.Nil(t, comparison.Delta, "delta should be omitted when no baseline exists") + assert.Nil(t, comparison.Classification, "classification should be omitted when no baseline exists") +} + +func TestBuildAuditComparison_RiskyChange(t *testing.T) { + baselineRun := &WorkflowRun{ + DatabaseID: 100, + WorkflowName: "triage", + Conclusion: "success", + CreatedAt: time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC), + } + + comparison := buildAuditComparison( + auditComparisonSnapshot{Turns: 11, Posture: "write_capable", BlockedRequests: 7, MCPFailures: []string{"github"}}, + baselineRun, + &auditComparisonSnapshot{Turns: 4, Posture: "read_only", BlockedRequests: 0}, + ) + + require.NotNil(t, comparison, "comparison should be built") + require.True(t, comparison.BaselineFound, "baseline should be marked available") + require.NotNil(t, comparison.Delta, "delta should be present") + require.NotNil(t, comparison.Classification, "classification should be present") + require.NotNil(t, comparison.Recommendation, "recommendation should be present") + + assert.Equal(t, "risky", comparison.Classification.Label, "write-capable transition should be risky") + assert.Contains(t, comparison.Classification.ReasonCodes, "turns_increase") + assert.Contains(t, comparison.Classification.ReasonCodes, "posture_changed") + assert.Contains(t, comparison.Classification.ReasonCodes, "blocked_requests_increase") + assert.Contains(t, comparison.Classification.ReasonCodes, "new_mcp_failure") + assert.Equal(t, 4, comparison.Delta.Turns.Before) + assert.Equal(t, 11, comparison.Delta.Turns.After) + assert.Equal(t, "read_only", comparison.Delta.Posture.Before) + assert.Equal(t, "write_capable", comparison.Delta.Posture.After) + assert.True(t, comparison.Delta.MCPFailure.NewlyPresent, "new MCP failure should be marked") + assert.Contains(t, comparison.Recommendation.Action, "write-capable", "recommendation should address the risky posture change") +} + +func TestBuildAuditComparison_StableRun(t *testing.T) { + baselineRun := &WorkflowRun{DatabaseID: 99, WorkflowName: "triage", Conclusion: "success", CreatedAt: time.Now().Add(-time.Hour)} + comparison := buildAuditComparison( + auditComparisonSnapshot{Turns: 4, Posture: "read_only", BlockedRequests: 0}, + baselineRun, + &auditComparisonSnapshot{Turns: 4, Posture: "read_only", BlockedRequests: 0}, + ) + + require.NotNil(t, comparison.Classification, "classification should be present") + assert.Equal(t, "stable", comparison.Classification.Label, "unchanged runs should be stable") + assert.Empty(t, comparison.Classification.ReasonCodes, "stable runs should have no reason codes") + assert.Contains(t, comparison.Recommendation.Action, "No action needed", "stable runs should produce a no-op recommendation") +} diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 0df532c5a70..712af637231 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -20,9 +20,11 @@ var auditReportLog = logger.New("cli:audit_report") // AuditData represents the complete structured audit data for a workflow run type AuditData struct { Overview OverviewData `json:"overview"` + Comparison *AuditComparisonData `json:"comparison,omitempty"` Metrics MetricsData `json:"metrics"` KeyFindings []Finding `json:"key_findings,omitempty"` Recommendations []Recommendation `json:"recommendations,omitempty"` + ObservabilityInsights []ObservabilityInsight `json:"observability_insights,omitempty"` PerformanceMetrics *PerformanceMetrics `json:"performance_metrics,omitempty"` Jobs []JobData `json:"jobs,omitempty"` DownloadedFiles []FileInfo `json:"downloaded_files"` @@ -302,12 +304,16 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage toolUsage = append(toolUsage, *info) } + createdItems := extractCreatedItemsFromManifest(run.LogsPath) + // Generate key findings findings := generateFindings(processedRun, metricsData, errors, warnings) // Generate recommendations recommendations := generateRecommendations(processedRun, metricsData, findings) + observabilityInsights := buildAuditObservabilityInsights(processedRun, metricsData, toolUsage, createdItems) + // Generate performance metrics performanceMetrics := generatePerformanceMetrics(processedRun, metricsData, toolUsage) @@ -321,6 +327,7 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage Metrics: metricsData, KeyFindings: findings, Recommendations: recommendations, + ObservabilityInsights: observabilityInsights, PerformanceMetrics: performanceMetrics, Jobs: jobs, DownloadedFiles: downloadedFiles, @@ -334,7 +341,7 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage Warnings: warnings, ToolUsage: toolUsage, MCPToolUsage: mcpToolUsage, - CreatedItems: extractCreatedItemsFromManifest(run.LogsPath), + CreatedItems: createdItems, } } diff --git a/pkg/cli/audit_report_render.go b/pkg/cli/audit_report_render.go index 867a1d1b649..a9950ce8185 100644 --- a/pkg/cli/audit_report_render.go +++ b/pkg/cli/audit_report_render.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/stringutil" @@ -30,6 +31,12 @@ func renderConsole(data AuditData, logsPath string) { fmt.Fprintln(os.Stderr) renderOverview(data.Overview) + if data.Comparison != nil { + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Comparison To Last Successful Run")) + fmt.Fprintln(os.Stderr) + renderAuditComparison(data.Comparison) + } + // Key Findings Section - NEW if len(data.KeyFindings) > 0 { auditReportLog.Printf("Rendering %d key findings", len(data.KeyFindings)) @@ -46,6 +53,12 @@ func renderConsole(data AuditData, logsPath string) { renderRecommendations(data.Recommendations) } + if len(data.ObservabilityInsights) > 0 { + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Observability Insights")) + fmt.Fprintln(os.Stderr) + renderObservabilityInsights(data.ObservabilityInsights) + } + // Performance Metrics Section - NEW if data.PerformanceMetrics != nil { fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Performance Metrics")) @@ -178,6 +191,46 @@ func renderConsole(data AuditData, logsPath string) { fmt.Fprintln(os.Stderr) } +func renderAuditComparison(comparison *AuditComparisonData) { + if comparison == nil { + return + } + + if !comparison.BaselineFound || comparison.Baseline == nil || comparison.Delta == nil || comparison.Classification == nil { + fmt.Fprintln(os.Stderr, " No previous successful run was available for baseline comparison.") + fmt.Fprintln(os.Stderr) + return + } + + fmt.Fprintf(os.Stderr, " Baseline: run %d", comparison.Baseline.RunID) + if comparison.Baseline.Conclusion != "" { + fmt.Fprintf(os.Stderr, " (%s)", comparison.Baseline.Conclusion) + } + fmt.Fprintln(os.Stderr) + fmt.Fprintf(os.Stderr, " Classification: %s\n", comparison.Classification.Label) + fmt.Fprintln(os.Stderr, " Changes:") + + if comparison.Delta.Turns.Changed { + fmt.Fprintf(os.Stderr, " - Turns: %d -> %d\n", comparison.Delta.Turns.Before, comparison.Delta.Turns.After) + } + if comparison.Delta.Posture.Changed { + fmt.Fprintf(os.Stderr, " - Posture: %s -> %s\n", comparison.Delta.Posture.Before, comparison.Delta.Posture.After) + } + if comparison.Delta.BlockedRequests.Changed { + fmt.Fprintf(os.Stderr, " - Blocked requests: %d -> %d\n", comparison.Delta.BlockedRequests.Before, comparison.Delta.BlockedRequests.After) + } + if comparison.Delta.MCPFailure != nil && comparison.Delta.MCPFailure.NewlyPresent { + fmt.Fprintf(os.Stderr, " - New MCP failure: %s\n", strings.Join(comparison.Delta.MCPFailure.After, ", ")) + } + if len(comparison.Classification.ReasonCodes) == 0 { + fmt.Fprintln(os.Stderr, " - No meaningful behavior change from the last successful baseline") + } + if comparison.Recommendation != nil && comparison.Recommendation.Action != "" { + fmt.Fprintf(os.Stderr, " Recommended action: %s\n", comparison.Recommendation.Action) + } + fmt.Fprintln(os.Stderr) +} + // renderOverview renders the overview section using the new rendering system func renderOverview(overview OverviewData) { // Format Status with optional Conclusion diff --git a/pkg/cli/audit_test.go b/pkg/cli/audit_test.go index b4b48842cf2..836b1d32ce0 100644 --- a/pkg/cli/audit_test.go +++ b/pkg/cli/audit_test.go @@ -130,6 +130,7 @@ func TestBuildAuditData(t *testing.T) { // Build audit data auditData := buildAuditData(processedRun, metrics, nil) + auditData.Comparison = &AuditComparisonData{BaselineFound: false} // Verify overview if auditData.Overview.RunID != 123456 { @@ -163,6 +164,10 @@ func TestBuildAuditData(t *testing.T) { t.Errorf("Expected warning count 1, got %d", auditData.Metrics.WarningCount) } + if auditData.Comparison == nil { + t.Error("Expected comparison field to be assignable on audit data") + } + // Note: Error and warning extraction was removed from buildAuditData // The error/warning counts in metrics are preserved but individual error/warning // extraction via pattern matching is no longer performed diff --git a/pkg/cli/logs_report.go b/pkg/cli/logs_report.go index 05984b16b7b..411c2081958 100644 --- a/pkg/cli/logs_report.go +++ b/pkg/cli/logs_report.go @@ -24,6 +24,7 @@ type LogsData struct { Runs []RunData `json:"runs" console:"title:Workflow Logs Overview"` ToolUsage []ToolUsageSummary `json:"tool_usage,omitempty" console:"title:🛠️ Tool Usage Summary,omitempty"` MCPToolUsage *MCPToolUsageSummary `json:"mcp_tool_usage,omitempty" console:"title:🔧 MCP Tool Usage,omitempty"` + Observability []ObservabilityInsight `json:"observability_insights,omitempty" console:"-"` ErrorsAndWarnings []ErrorSummary `json:"errors_and_warnings,omitempty" console:"title:Errors and Warnings,omitempty"` MissingTools []MissingToolSummary `json:"missing_tools,omitempty" console:"title:🛠️ Missing Tools Summary,omitempty"` MissingData []MissingDataSummary `json:"missing_data,omitempty" console:"title:📊 Missing Data Summary,omitempty"` @@ -246,6 +247,8 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation // Build redacted domains summary redactedDomains := buildRedactedDomainsSummary(processedRuns) + observability := buildLogsObservabilityInsights(processedRuns, toolUsage) + absOutputDir, _ := filepath.Abs(outputDir) return LogsData{ @@ -253,6 +256,7 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation Runs: runs, ToolUsage: toolUsage, MCPToolUsage: mcpToolUsage, + Observability: observability, ErrorsAndWarnings: errorsAndWarnings, MissingTools: missingTools, MissingData: missingData, @@ -942,4 +946,11 @@ func renderLogsConsole(data LogsData) { console.FormatInfoMessage("•"), len(data.ToolUsage)) } + + if len(data.Observability) > 0 { + fmt.Fprintln(os.Stderr) + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Observability Insights")) + fmt.Fprintln(os.Stderr) + renderObservabilityInsights(data.Observability) + } } diff --git a/pkg/cli/mcp_schema_test.go b/pkg/cli/mcp_schema_test.go index 8abb8427c4c..9d449bbc8b0 100644 --- a/pkg/cli/mcp_schema_test.go +++ b/pkg/cli/mcp_schema_test.go @@ -474,6 +474,44 @@ func TestGeneratedSchemasValidateRealOutput(t *testing.T) { } }) + t.Run("validates ObservabilityPolicy schema can be generated", func(t *testing.T) { + schema, err := GenerateSchema[ObservabilityPolicy]() + if err != nil { + t.Fatalf("GenerateSchema failed: %v", err) + } + + resolved, err := schema.Resolve(&jsonschema.ResolveOptions{}) + if err != nil { + t.Fatalf("Schema.Resolve failed: %v", err) + } + + data := ObservabilityPolicy{ + SchemaVersion: "1.0.0", + Rules: []ObservabilityPolicyRule{{ + ID: "block-domain", + Action: "fail", + Message: "blocked domain detected", + Match: ObservabilityPolicyMatch{ + BlockedDomains: []string{"evil.example.com"}, + }, + }}, + } + + jsonBytes, err := json.Marshal(data) + if err != nil { + t.Fatalf("json.Marshal failed: %v", err) + } + + var jsonValue map[string]any + if err := json.Unmarshal(jsonBytes, &jsonValue); err != nil { + t.Fatalf("json.Unmarshal failed: %v", err) + } + + if err := resolved.Validate(jsonValue); err != nil { + t.Errorf("Schema should validate real ObservabilityPolicy output: %v", err) + } + }) + t.Run("validates WorkflowStatus schema against real data", func(t *testing.T) { // Generate schema for WorkflowStatus schema, err := GenerateSchema[WorkflowStatus]() diff --git a/pkg/cli/observability_insights.go b/pkg/cli/observability_insights.go new file mode 100644 index 00000000000..37ced823883 --- /dev/null +++ b/pkg/cli/observability_insights.go @@ -0,0 +1,333 @@ +package cli + +import ( + "fmt" + "os" + "strings" +) + +type ObservabilityInsight struct { + Category string `json:"category"` + Severity string `json:"severity"` + Title string `json:"title"` + Summary string `json:"summary"` + Evidence string `json:"evidence,omitempty"` +} + +type workflowObservabilityStats struct { + workflowName string + runs int + failures int + timedOuts int + missingTools int + mcpFailures int + missingData int + safeItems int + totalTurns int + minTurns int + maxTurns int + blocked int + totalNet int +} + +func buildAuditObservabilityInsights(processedRun ProcessedRun, metrics MetricsData, toolUsage []ToolUsageInfo, createdItems []CreatedItemReport) []ObservabilityInsight { + insights := make([]ObservabilityInsight, 0, 5) + toolTypes := len(toolUsage) + + switch { + case metrics.Turns >= 12 || toolTypes >= 6: + insights = append(insights, ObservabilityInsight{ + Category: "execution", + Severity: "medium", + Title: "Exploratory execution path", + Summary: fmt.Sprintf("The agent used %d turns across %d tool types, which indicates adaptive planning instead of a strictly linear path.", metrics.Turns, toolTypes), + Evidence: fmt.Sprintf("turns=%d tool_types=%d", metrics.Turns, toolTypes), + }) + case metrics.Turns >= 6 || toolTypes >= 4: + insights = append(insights, ObservabilityInsight{ + Category: "execution", + Severity: "info", + Title: "Adaptive execution path", + Summary: fmt.Sprintf("The run stayed moderately dynamic with %d turns and %d tool types.", metrics.Turns, toolTypes), + Evidence: fmt.Sprintf("turns=%d tool_types=%d", metrics.Turns, toolTypes), + }) + default: + insights = append(insights, ObservabilityInsight{ + Category: "execution", + Severity: "info", + Title: "Directed execution path", + Summary: fmt.Sprintf("The run remained relatively linear with %d turns and %d tool types.", metrics.Turns, toolTypes), + Evidence: fmt.Sprintf("turns=%d tool_types=%d", metrics.Turns, toolTypes), + }) + } + + createdCount := len(createdItems) + safeItemsCount := processedRun.Run.SafeItemsCount + if createdCount > 0 || safeItemsCount > 0 { + insights = append(insights, ObservabilityInsight{ + Category: "actuation", + Severity: "info", + Title: "Write path executed", + Summary: fmt.Sprintf("The workflow crossed from analysis into action, producing %d created item(s) and %d safe output action(s).", createdCount, safeItemsCount), + Evidence: fmt.Sprintf("created_items=%d safe_items=%d", createdCount, safeItemsCount), + }) + } else { + insights = append(insights, ObservabilityInsight{ + Category: "actuation", + Severity: "info", + Title: "Read-only posture observed", + Summary: "The workflow stayed in an analysis posture and did not emit any GitHub write actions.", + Evidence: "created_items=0 safe_items=0", + }) + } + + frictionEvents := len(processedRun.MissingTools) + len(processedRun.MCPFailures) + len(processedRun.MissingData) + if frictionEvents > 0 { + severity := "medium" + if len(processedRun.MCPFailures) > 0 || frictionEvents >= 3 { + severity = "high" + } + insights = append(insights, ObservabilityInsight{ + Category: "tooling", + Severity: severity, + Title: "Capability friction detected", + Summary: fmt.Sprintf("The run hit %d capability gap event(s): %d missing tool(s), %d MCP failure(s), and %d missing data signal(s).", frictionEvents, len(processedRun.MissingTools), len(processedRun.MCPFailures), len(processedRun.MissingData)), + Evidence: fmt.Sprintf("missing_tools=%d mcp_failures=%d missing_data=%d", len(processedRun.MissingTools), len(processedRun.MCPFailures), len(processedRun.MissingData)), + }) + } + + if processedRun.FirewallAnalysis != nil && processedRun.FirewallAnalysis.TotalRequests > 0 { + blockedRate := float64(processedRun.FirewallAnalysis.BlockedRequests) / float64(processedRun.FirewallAnalysis.TotalRequests) + severity := "info" + title := "Network policy aligned" + summary := fmt.Sprintf("The firewall observed %d request(s) with %d blocked, for a %.0f%% block rate.", processedRun.FirewallAnalysis.TotalRequests, processedRun.FirewallAnalysis.BlockedRequests, blockedRate*100) + if processedRun.FirewallAnalysis.BlockedRequests > 0 { + title = "Network friction detected" + severity = "medium" + if blockedRate >= 0.5 || processedRun.FirewallAnalysis.BlockedRequests >= 10 { + severity = "high" + } + } + insights = append(insights, ObservabilityInsight{ + Category: "network", + Severity: severity, + Title: title, + Summary: summary, + Evidence: fmt.Sprintf("blocked=%d total=%d", processedRun.FirewallAnalysis.BlockedRequests, processedRun.FirewallAnalysis.TotalRequests), + }) + } + + if processedRun.RedactedDomainsAnalysis != nil && processedRun.RedactedDomainsAnalysis.TotalDomains > 0 { + insights = append(insights, ObservabilityInsight{ + Category: "privacy", + Severity: "info", + Title: "Sensitive destinations were redacted", + Summary: fmt.Sprintf("Observability data preserved privacy boundaries by redacting %d domain(s) from emitted logs.", processedRun.RedactedDomainsAnalysis.TotalDomains), + Evidence: fmt.Sprintf("redacted_domains=%d", processedRun.RedactedDomainsAnalysis.TotalDomains), + }) + } + + return insights +} + +func buildLogsObservabilityInsights(processedRuns []ProcessedRun, toolUsage []ToolUsageSummary) []ObservabilityInsight { + if len(processedRuns) == 0 { + return nil + } + + insights := make([]ObservabilityInsight, 0, 6) + workflowStats := make(map[string]*workflowObservabilityStats) + writeRuns := 0 + readOnlyRuns := 0 + + for _, pr := range processedRuns { + stats, exists := workflowStats[pr.Run.WorkflowName] + if !exists { + stats = &workflowObservabilityStats{ + workflowName: pr.Run.WorkflowName, + minTurns: pr.Run.Turns, + maxTurns: pr.Run.Turns, + } + workflowStats[pr.Run.WorkflowName] = stats + } + + stats.runs++ + stats.totalTurns += pr.Run.Turns + if stats.runs == 1 || pr.Run.Turns < stats.minTurns { + stats.minTurns = pr.Run.Turns + } + if pr.Run.Turns > stats.maxTurns { + stats.maxTurns = pr.Run.Turns + } + if pr.Run.Conclusion == "failure" { + stats.failures++ + } + if pr.Run.Conclusion == "timed_out" { + stats.timedOuts++ + } + stats.missingTools += len(pr.MissingTools) + stats.mcpFailures += len(pr.MCPFailures) + stats.missingData += len(pr.MissingData) + stats.safeItems += pr.Run.SafeItemsCount + if pr.Run.SafeItemsCount > 0 { + writeRuns++ + } else { + readOnlyRuns++ + } + if pr.FirewallAnalysis != nil { + stats.blocked += pr.FirewallAnalysis.BlockedRequests + stats.totalNet += pr.FirewallAnalysis.TotalRequests + } + } + + var failureHotspot *workflowObservabilityStats + for _, stats := range workflowStats { + if stats.failures == 0 { + continue + } + if failureHotspot == nil || stats.failures > failureHotspot.failures || (stats.failures == failureHotspot.failures && stats.workflowName < failureHotspot.workflowName) { + failureHotspot = stats + } + } + if failureHotspot != nil { + failureRate := float64(failureHotspot.failures) / float64(failureHotspot.runs) + severity := "medium" + if failureRate >= 0.5 { + severity = "high" + } + insights = append(insights, ObservabilityInsight{ + Category: "reliability", + Severity: severity, + Title: "Failure hotspot identified", + Summary: fmt.Sprintf("Workflow %q accounted for %d failure(s) across %d run(s), a %.0f%% failure rate.", failureHotspot.workflowName, failureHotspot.failures, failureHotspot.runs, failureRate*100), + Evidence: fmt.Sprintf("workflow=%s failures=%d runs=%d", failureHotspot.workflowName, failureHotspot.failures, failureHotspot.runs), + }) + } + + var driftHotspot *workflowObservabilityStats + for _, stats := range workflowStats { + if stats.runs < 2 { + continue + } + if stats.maxTurns-stats.minTurns < 4 { + continue + } + if driftHotspot == nil || (stats.maxTurns-stats.minTurns) > (driftHotspot.maxTurns-driftHotspot.minTurns) { + driftHotspot = stats + } + } + if driftHotspot != nil { + avgTurns := float64(driftHotspot.totalTurns) / float64(driftHotspot.runs) + insights = append(insights, ObservabilityInsight{ + Category: "drift", + Severity: "medium", + Title: "Execution drift observed", + Summary: fmt.Sprintf("Workflow %q varied from %d to %d turns across runs, which suggests changing task shape or unstable prompts (avg %.1f turns).", driftHotspot.workflowName, driftHotspot.minTurns, driftHotspot.maxTurns, avgTurns), + Evidence: fmt.Sprintf("workflow=%s min_turns=%d max_turns=%d", driftHotspot.workflowName, driftHotspot.minTurns, driftHotspot.maxTurns), + }) + } + + var toolingHotspot *workflowObservabilityStats + for _, stats := range workflowStats { + friction := stats.missingTools + stats.mcpFailures + stats.missingData + if friction == 0 { + continue + } + if toolingHotspot == nil || friction > (toolingHotspot.missingTools+toolingHotspot.mcpFailures+toolingHotspot.missingData) { + toolingHotspot = stats + } + } + if toolingHotspot != nil { + friction := toolingHotspot.missingTools + toolingHotspot.mcpFailures + toolingHotspot.missingData + severity := "medium" + if toolingHotspot.mcpFailures > 0 || friction >= 4 { + severity = "high" + } + insights = append(insights, ObservabilityInsight{ + Category: "tooling", + Severity: severity, + Title: "Capability hotspot identified", + Summary: fmt.Sprintf("Workflow %q produced the most capability friction: %d missing tool(s), %d MCP failure(s), and %d missing data signal(s).", toolingHotspot.workflowName, toolingHotspot.missingTools, toolingHotspot.mcpFailures, toolingHotspot.missingData), + Evidence: fmt.Sprintf("workflow=%s missing_tools=%d mcp_failures=%d missing_data=%d", toolingHotspot.workflowName, toolingHotspot.missingTools, toolingHotspot.mcpFailures, toolingHotspot.missingData), + }) + } + + var networkHotspot *workflowObservabilityStats + var networkRate float64 + for _, stats := range workflowStats { + if stats.totalNet == 0 || stats.blocked == 0 { + continue + } + rate := float64(stats.blocked) / float64(stats.totalNet) + if networkHotspot == nil || rate > networkRate { + networkHotspot = stats + networkRate = rate + } + } + if networkHotspot != nil { + severity := "medium" + if networkRate >= 0.5 || networkHotspot.blocked >= 10 { + severity = "high" + } + insights = append(insights, ObservabilityInsight{ + Category: "network", + Severity: severity, + Title: "Network friction hotspot identified", + Summary: fmt.Sprintf("Workflow %q had the highest firewall block pressure with %d blocked request(s) out of %d total (%.0f%%).", networkHotspot.workflowName, networkHotspot.blocked, networkHotspot.totalNet, networkRate*100), + Evidence: fmt.Sprintf("workflow=%s blocked=%d total=%d", networkHotspot.workflowName, networkHotspot.blocked, networkHotspot.totalNet), + }) + } + + if writeRuns > 0 || readOnlyRuns > 0 { + insights = append(insights, ObservabilityInsight{ + Category: "actuation", + Severity: "info", + Title: "Actuation mix summarized", + Summary: fmt.Sprintf("Across %d run(s), %d executed write-capable safe outputs and %d stayed read-only.", len(processedRuns), writeRuns, readOnlyRuns), + Evidence: fmt.Sprintf("write_runs=%d read_only_runs=%d", writeRuns, readOnlyRuns), + }) + } + + totalToolCalls := 0 + for _, tool := range toolUsage { + totalToolCalls += tool.TotalCalls + } + if len(toolUsage) > 0 && totalToolCalls > 0 { + topTool := toolUsage[0] + share := float64(topTool.TotalCalls) / float64(totalToolCalls) + if share >= 0.5 { + severity := "info" + if share >= 0.7 { + severity = "medium" + } + insights = append(insights, ObservabilityInsight{ + Category: "tooling", + Severity: severity, + Title: "Tool concentration observed", + Summary: fmt.Sprintf("Tool %q accounted for %.0f%% of observed tool calls, which suggests the workflow fleet depends heavily on a narrow capability path.", topTool.Name, share*100), + Evidence: fmt.Sprintf("tool=%s calls=%d total_calls=%d", topTool.Name, topTool.TotalCalls, totalToolCalls), + }) + } + } + + return insights +} + +func renderObservabilityInsights(insights []ObservabilityInsight) { + for _, insight := range insights { + icon := "[info]" + switch insight.Severity { + case "high": + icon = "[high]" + case "medium": + icon = "[medium]" + } + + fmt.Fprintf(os.Stderr, " %s %s [%s]\n", icon, insight.Title, insight.Category) + fmt.Fprintf(os.Stderr, " %s\n", insight.Summary) + if strings.TrimSpace(insight.Evidence) != "" { + fmt.Fprintf(os.Stderr, " Evidence: %s\n", insight.Evidence) + } + fmt.Fprintln(os.Stderr) + } +} diff --git a/pkg/cli/observability_insights_test.go b/pkg/cli/observability_insights_test.go new file mode 100644 index 00000000000..548459607d7 --- /dev/null +++ b/pkg/cli/observability_insights_test.go @@ -0,0 +1,121 @@ +//go:build !integration + +package cli + +import ( + "strings" + "testing" + "time" + + "github.com/github/gh-aw/pkg/workflow" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildAuditObservabilityInsights(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + Turns: 11, + SafeItemsCount: 2, + }, + MissingTools: []MissingToolReport{{Tool: "terraform"}}, + MCPFailures: []MCPFailureReport{{ServerName: "github"}}, + MissingData: []MissingDataReport{{DataType: "issue_body"}}, + FirewallAnalysis: &FirewallAnalysis{ + TotalRequests: 20, + BlockedRequests: 8, + AllowedRequests: 12, + }, + RedactedDomainsAnalysis: &RedactedDomainsAnalysis{TotalDomains: 3}, + } + + metrics := MetricsData{Turns: 11} + toolUsage := []ToolUsageInfo{ + {Name: "bash", CallCount: 4}, + {Name: "github_issue_read", CallCount: 2}, + {Name: "grep", CallCount: 1}, + {Name: "sed", CallCount: 1}, + } + createdItems := []CreatedItemReport{{Type: "create_issue"}} + + insights := buildAuditObservabilityInsights(processedRun, metrics, toolUsage, createdItems) + require.Len(t, insights, 5, "expected five audit insights from the supplied signals") + + titles := make([]string, 0, len(insights)) + for _, insight := range insights { + titles = append(titles, insight.Title) + } + + assert.Contains(t, titles, "Adaptive execution path") + assert.Contains(t, titles, "Write path executed") + assert.Contains(t, titles, "Capability friction detected") + assert.Contains(t, titles, "Network friction detected") + assert.Contains(t, titles, "Sensitive destinations were redacted") +} + +func TestBuildLogsObservabilityInsights(t *testing.T) { + processedRuns := []ProcessedRun{ + { + Run: WorkflowRun{WorkflowName: "triage", Conclusion: "failure", Turns: 3, SafeItemsCount: 0}, + MissingTools: []MissingToolReport{{Tool: "terraform"}}, + FirewallAnalysis: &FirewallAnalysis{TotalRequests: 10, BlockedRequests: 1}, + }, + { + Run: WorkflowRun{WorkflowName: "triage", Conclusion: "failure", Turns: 9, SafeItemsCount: 1}, + MCPFailures: []MCPFailureReport{{ServerName: "github"}}, + FirewallAnalysis: &FirewallAnalysis{TotalRequests: 10, BlockedRequests: 7}, + }, + { + Run: WorkflowRun{WorkflowName: "docs", Conclusion: "success", Turns: 2, SafeItemsCount: 1}, + }, + } + + toolUsage := []ToolUsageSummary{ + {Name: "bash", TotalCalls: 14}, + {Name: "github_issue_read", TotalCalls: 6}, + } + + insights := buildLogsObservabilityInsights(processedRuns, toolUsage) + require.NotEmpty(t, insights, "expected aggregated logs insights") + + var combined []string + for _, insight := range insights { + combined = append(combined, insight.Title+" "+insight.Summary) + } + text := strings.Join(combined, "\n") + + assert.Contains(t, text, "Failure hotspot identified") + assert.Contains(t, text, "Execution drift observed") + assert.Contains(t, text, "Capability hotspot identified") + assert.Contains(t, text, "Network friction hotspot identified") + assert.Contains(t, text, "Actuation mix summarized") + assert.Contains(t, text, "Tool concentration observed") +} + +func TestBuildAuditDataIncludesObservabilityInsights(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + DatabaseID: 42, + WorkflowName: "insight-test", + Status: "completed", + Conclusion: "success", + Duration: 2 * time.Minute, + Turns: 7, + SafeItemsCount: 1, + }, + } + + metrics := workflow.LogMetrics{ + Turns: 7, + ToolCalls: []workflow.ToolCallInfo{ + {Name: "bash", CallCount: 3}, + {Name: "github_issue_read", CallCount: 2}, + {Name: "grep", CallCount: 1}, + {Name: "sed", CallCount: 1}, + }, + } + + auditData := buildAuditData(processedRun, metrics, nil) + require.NotEmpty(t, auditData.ObservabilityInsights, "audit data should expose observability insights") + assert.Equal(t, "execution", auditData.ObservabilityInsights[0].Category) +} diff --git a/pkg/cli/observability_policy.go b/pkg/cli/observability_policy.go new file mode 100644 index 00000000000..f7db2314606 --- /dev/null +++ b/pkg/cli/observability_policy.go @@ -0,0 +1,250 @@ +package cli + +type ObservabilityPolicy struct { + SchemaVersion string `json:"schema_version"` + Rules []ObservabilityPolicyRule `json:"rules"` +} + +type ObservabilityPolicyRule struct { + ID string `json:"id"` + Action string `json:"action"` + Message string `json:"message"` + Match ObservabilityPolicyMatch `json:"match"` +} + +type ObservabilityPolicyMatch struct { + BlockedDomains []string `json:"blocked_domains,omitempty"` + MinBlockedRequests int `json:"min_blocked_requests,omitempty"` + InsightSeverities []string `json:"insight_severities,omitempty"` + ActuationModes []string `json:"actuation_modes,omitempty"` + MCPFailureServers []string `json:"mcp_failure_servers,omitempty"` + CreatedItemTypes []string `json:"created_item_types,omitempty"` +} + +type ObservabilityPayload struct { + Overview ObservabilityPayloadOverview `json:"overview"` + Network *ObservabilityPayloadNetwork `json:"network,omitempty"` + Actuation *ObservabilityPayloadActuation `json:"actuation,omitempty"` + Tooling *ObservabilityPayloadTooling `json:"tooling,omitempty"` + Insights []ObservabilityInsight `json:"insights,omitempty"` + Lineage *ObservabilityPayloadLineage `json:"lineage,omitempty"` + Execution *ObservabilityPayloadExecution `json:"execution,omitempty"` + Reasoning *ObservabilityPayloadReasoning `json:"reasoning,omitempty"` +} + +type ObservabilityPayloadOverview struct { + WorkflowName string `json:"workflow_name,omitempty"` + RunID any `json:"run_id,omitempty"` +} + +type ObservabilityPayloadLineage struct { + TraceID string `json:"trace_id,omitempty"` + Context *AwContext `json:"aw_context,omitempty"` +} + +type ObservabilityPayloadExecution struct { + TaskStatus string `json:"task_status,omitempty"` +} + +type ObservabilityPayloadReasoning struct { + Mode string `json:"mode,omitempty"` +} + +type ObservabilityPayloadNetwork struct { + BlockedRequests int `json:"blocked_requests,omitempty"` + BlockedDomains []string `json:"blocked_domains,omitempty"` +} + +type ObservabilityPayloadActuation struct { + Mode string `json:"mode,omitempty"` + CreatedItems []ObservabilityCreatedItem `json:"created_items,omitempty"` +} + +type ObservabilityCreatedItem struct { + Type string `json:"type"` +} + +type ObservabilityPayloadTooling struct { + MCPFailures []ObservabilityPolicyMCPFailure `json:"mcp_failures,omitempty"` +} + +type ObservabilityPolicyMCPFailure struct { + ServerName string `json:"server_name"` +} + +type ObservabilityPolicyViolation struct { + RuleID string `json:"rule_id"` + Action string `json:"action"` + Message string `json:"message"` + Evidence string `json:"evidence,omitempty"` +} + +type ObservabilityPolicyResult struct { + Violations []ObservabilityPolicyViolation `json:"violations,omitempty"` +} + +func EvaluateObservabilityPolicy(policy ObservabilityPolicy, payload ObservabilityPayload) ObservabilityPolicyResult { + result := ObservabilityPolicyResult{Violations: []ObservabilityPolicyViolation{}} + + for _, rule := range policy.Rules { + if violation, matched := evaluateObservabilityPolicyRule(rule, payload); matched { + result.Violations = append(result.Violations, violation) + } + } + + return result +} + +func evaluateObservabilityPolicyRule(rule ObservabilityPolicyRule, payload ObservabilityPayload) (ObservabilityPolicyViolation, bool) { + evidenceParts := make([]string, 0, 4) + matched := false + + if len(rule.Match.BlockedDomains) > 0 { + matchedDomain := firstMatch(rule.Match.BlockedDomains, payloadBlockedDomains(payload)) + if matchedDomain == "" { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "blocked_domain="+matchedDomain) + } + + if rule.Match.MinBlockedRequests > 0 { + blocked := payloadBlockedRequests(payload) + if blocked < rule.Match.MinBlockedRequests { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "blocked_requests_gte") + } + + if len(rule.Match.InsightSeverities) > 0 { + severity := firstInsightSeverityMatch(rule.Match.InsightSeverities, payload.Insights) + if severity == "" { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "insight_severity="+severity) + } + + if len(rule.Match.ActuationModes) > 0 { + mode := payloadActuationMode(payload) + if !containsString(rule.Match.ActuationModes, mode) { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "actuation_mode="+mode) + } + + if len(rule.Match.MCPFailureServers) > 0 { + server := firstMCPFailureServerMatch(rule.Match.MCPFailureServers, payload) + if server == "" { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "mcp_failure_server="+server) + } + + if len(rule.Match.CreatedItemTypes) > 0 { + itemType := firstCreatedItemTypeMatch(rule.Match.CreatedItemTypes, payload) + if itemType == "" { + return ObservabilityPolicyViolation{}, false + } + matched = true + evidenceParts = append(evidenceParts, "created_item_type="+itemType) + } + + if !matched { + return ObservabilityPolicyViolation{}, false + } + + return ObservabilityPolicyViolation{ + RuleID: rule.ID, + Action: rule.Action, + Message: rule.Message, + Evidence: joinEvidence(evidenceParts), + }, true +} + +func payloadBlockedDomains(payload ObservabilityPayload) []string { + if payload.Network == nil { + return nil + } + return payload.Network.BlockedDomains +} + +func payloadBlockedRequests(payload ObservabilityPayload) int { + if payload.Network == nil { + return 0 + } + return payload.Network.BlockedRequests +} + +func payloadActuationMode(payload ObservabilityPayload) string { + if payload.Actuation == nil { + return "" + } + return payload.Actuation.Mode +} + +func firstMCPFailureServerMatch(allowed []string, payload ObservabilityPayload) string { + if payload.Tooling == nil { + return "" + } + for _, failure := range payload.Tooling.MCPFailures { + if containsString(allowed, failure.ServerName) { + return failure.ServerName + } + } + return "" +} + +func firstCreatedItemTypeMatch(allowed []string, payload ObservabilityPayload) string { + if payload.Actuation == nil { + return "" + } + for _, item := range payload.Actuation.CreatedItems { + if containsString(allowed, item.Type) { + return item.Type + } + } + return "" +} + +func firstInsightSeverityMatch(allowed []string, insights []ObservabilityInsight) string { + for _, insight := range insights { + if containsString(allowed, insight.Severity) { + return insight.Severity + } + } + return "" +} + +func firstMatch(allowed []string, actual []string) string { + for _, item := range actual { + if containsString(allowed, item) { + return item + } + } + return "" +} + +func containsString(items []string, target string) bool { + for _, item := range items { + if item == target { + return true + } + } + return false +} + +func joinEvidence(parts []string) string { + if len(parts) == 0 { + return "" + } + + result := parts[0] + for i := 1; i < len(parts); i++ { + result += " " + parts[i] + } + return result +} diff --git a/pkg/cli/observability_policy_command.go b/pkg/cli/observability_policy_command.go new file mode 100644 index 00000000000..ecfd6d207ff --- /dev/null +++ b/pkg/cli/observability_policy_command.go @@ -0,0 +1,261 @@ +package cli + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/github/gh-aw/pkg/console" + "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/logger" + "github.com/spf13/cobra" +) + +var observabilityPolicyLog = logger.New("cli:observability_policy") + +type ObservabilityPolicyEvalConfig struct { + PolicyPath string + ReportPath string + JSONOutput bool +} + +type ObservabilityPolicyEvaluation struct { + PolicyPath string `json:"policy_path"` + ReportPath string `json:"report_path"` + Summary ObservabilityPolicyEvaluationSummary `json:"summary"` + Violations []ObservabilityPolicyViolation `json:"violations,omitempty"` +} + +type ObservabilityPolicyEvaluationSummary struct { + Status string `json:"status"` + TotalViolations int `json:"total_violations"` + FailViolations int `json:"fail_violations"` + GateViolations int `json:"gate_violations"` + WarnViolations int `json:"warn_violations"` + Blocking bool `json:"blocking"` +} + +// NewObservabilityPolicyCommand creates the observability-policy command. +func NewObservabilityPolicyCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "observability-policy", + Short: "Evaluate observability reports against guardrail policies", + Long: `Evaluate an observability report against a policy file to surface guardrail decisions. + +This command reads two JSON files: +- A policy file that defines fail, gate, or warn rules +- An observability report payload produced for a workflow run + +The result can be rendered for people or emitted as JSON for automation. +Blocking actions (fail and gate) return a non-zero exit status. + +Examples: + ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json + ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json --json`, + } + + cmd.AddCommand(newObservabilityPolicyEvalCommand()) + + return cmd +} + +func newObservabilityPolicyEvalCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "eval", + Short: "Evaluate a policy against an observability report", + Long: `Evaluate an observability policy against a workflow observability report. + +This command is intended for immediate guardrail checks in local development, +CI, or follow-up analysis after running gh aw logs or gh aw audit. + +Examples: + ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json + ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json --json`, + RunE: func(cmd *cobra.Command, args []string) error { + policyPath, _ := cmd.Flags().GetString("policy") + reportPath, _ := cmd.Flags().GetString("report") + jsonOutput, _ := cmd.Flags().GetBool("json") + + config := ObservabilityPolicyEvalConfig{ + PolicyPath: policyPath, + ReportPath: reportPath, + JSONOutput: jsonOutput, + } + + return RunObservabilityPolicyEval(config) + }, + } + + cmd.Flags().String("policy", "", "Path to the observability policy JSON file") + cmd.Flags().String("report", "", "Path to the observability report JSON file") + addJSONFlag(cmd) + _ = cmd.MarkFlagRequired("policy") + _ = cmd.MarkFlagRequired("report") + + return cmd +} + +// RunObservabilityPolicyEval executes observability policy evaluation. +func RunObservabilityPolicyEval(config ObservabilityPolicyEvalConfig) error { + if config.PolicyPath == "" { + return fmt.Errorf("policy path is required") + } + if config.ReportPath == "" { + return fmt.Errorf("report path is required") + } + + policy, err := readObservabilityPolicyFile(config.PolicyPath) + if err != nil { + return err + } + + payload, err := readObservabilityPayloadFile(config.ReportPath) + if err != nil { + return err + } + + observabilityPolicyLog.Printf("Evaluating policy=%s report=%s", config.PolicyPath, config.ReportPath) + + result := EvaluateObservabilityPolicy(policy, payload) + evaluation := buildObservabilityPolicyEvaluation(config, result) + + if config.JSONOutput { + output, err := json.MarshalIndent(evaluation, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal observability policy result: %w", err) + } + fmt.Println(string(output)) + } else { + renderObservabilityPolicyEvaluation(evaluation) + } + + return evaluation.summaryError() +} + +func readObservabilityPolicyFile(path string) (ObservabilityPolicy, error) { + content, err := os.ReadFile(path) + if err != nil { + return ObservabilityPolicy{}, fmt.Errorf("failed to read observability policy file: %w", err) + } + + var policy ObservabilityPolicy + if err := json.Unmarshal(content, &policy); err != nil { + return ObservabilityPolicy{}, fmt.Errorf("failed to parse observability policy file: %w", err) + } + + return policy, nil +} + +func readObservabilityPayloadFile(path string) (ObservabilityPayload, error) { + content, err := os.ReadFile(path) + if err != nil { + return ObservabilityPayload{}, fmt.Errorf("failed to read observability report file: %w", err) + } + + var payload ObservabilityPayload + if err := json.Unmarshal(content, &payload); err != nil { + return ObservabilityPayload{}, fmt.Errorf("failed to parse observability report file: %w", err) + } + + return payload, nil +} + +func buildObservabilityPolicyEvaluation(config ObservabilityPolicyEvalConfig, result ObservabilityPolicyResult) ObservabilityPolicyEvaluation { + summary := summarizeObservabilityPolicyResult(result) + + return ObservabilityPolicyEvaluation{ + PolicyPath: config.PolicyPath, + ReportPath: config.ReportPath, + Summary: summary, + Violations: result.Violations, + } +} + +func summarizeObservabilityPolicyResult(result ObservabilityPolicyResult) ObservabilityPolicyEvaluationSummary { + summary := ObservabilityPolicyEvaluationSummary{ + Status: "pass", + } + + for _, violation := range result.Violations { + summary.TotalViolations++ + switch violation.Action { + case "fail": + summary.FailViolations++ + case "gate": + summary.GateViolations++ + case "warn": + summary.WarnViolations++ + } + } + + summary.Blocking = summary.FailViolations > 0 || summary.GateViolations > 0 + + switch { + case summary.FailViolations > 0: + summary.Status = "fail" + case summary.GateViolations > 0: + summary.Status = "gate" + case summary.WarnViolations > 0: + summary.Status = "warn" + } + + return summary +} + +func renderObservabilityPolicyEvaluation(evaluation ObservabilityPolicyEvaluation) { + summary := evaluation.Summary + + if summary.TotalViolations == 0 { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage("No observability policy violations detected")) + return + } + + fmt.Fprintln(os.Stderr, console.FormatInfoMessage( + fmt.Sprintf("Observability policy evaluation found %d violation(s)", summary.TotalViolations), + )) + + for _, violation := range evaluation.Violations { + message := fmt.Sprintf("%s: %s", violation.RuleID, violation.Message) + if violation.Evidence != "" { + message += " (" + violation.Evidence + ")" + } + + switch violation.Action { + case "fail": + fmt.Fprintln(os.Stderr, console.FormatErrorMessage(message)) + case "gate": + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(message)) + default: + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(message)) + } + } + + if summary.FailViolations > 0 { + fmt.Fprintln(os.Stderr, console.FormatErrorMessage( + fmt.Sprintf("Evaluation failed with %d fail violation(s)", summary.FailViolations), + )) + return + } + + if summary.GateViolations > 0 { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage( + fmt.Sprintf("Evaluation requires approval because %d gate violation(s) matched", summary.GateViolations), + )) + return + } + + fmt.Fprintln(os.Stderr, console.FormatWarningMessage( + fmt.Sprintf("Evaluation completed with %d warning violation(s)", summary.WarnViolations), + )) +} + +func (evaluation ObservabilityPolicyEvaluation) summaryError() error { + switch evaluation.Summary.Status { + case "fail": + return fmt.Errorf("observability policy evaluation failed with %d fail violation(s)", evaluation.Summary.FailViolations) + case "gate": + return fmt.Errorf("observability policy evaluation requires approval because %d gate violation(s) matched", evaluation.Summary.GateViolations) + default: + return nil + } +} diff --git a/pkg/cli/observability_policy_command_test.go b/pkg/cli/observability_policy_command_test.go new file mode 100644 index 00000000000..29960c24f8c --- /dev/null +++ b/pkg/cli/observability_policy_command_test.go @@ -0,0 +1,141 @@ +//go:build !integration + +package cli + +import ( + "encoding/json" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSummarizeObservabilityPolicyResult(t *testing.T) { + result := ObservabilityPolicyResult{ + Violations: []ObservabilityPolicyViolation{ + {Action: "warn"}, + {Action: "gate"}, + {Action: "fail"}, + }, + } + + summary := summarizeObservabilityPolicyResult(result) + + assert.Equal(t, "fail", summary.Status, "fail should take precedence in summary status") + assert.Equal(t, 3, summary.TotalViolations, "all violations should be counted") + assert.Equal(t, 1, summary.FailViolations, "fail violations should be counted") + assert.Equal(t, 1, summary.GateViolations, "gate violations should be counted") + assert.Equal(t, 1, summary.WarnViolations, "warn violations should be counted") + assert.True(t, summary.Blocking, "fail or gate should mark summary as blocking") +} + +func TestRunObservabilityPolicyEval_JSONOutput(t *testing.T) { + policyPath := writeJSONFixture(t, "policy.json", ObservabilityPolicy{ + SchemaVersion: "1.0.0", + Rules: []ObservabilityPolicyRule{ + { + ID: "warn-control-plane-failure", + Action: "warn", + Message: "GitHub MCP failed during the run", + Match: ObservabilityPolicyMatch{ + MCPFailureServers: []string{"github"}, + }, + }, + }, + }) + + reportPath := writeJSONFixture(t, "report.json", ObservabilityPayload{ + Tooling: &ObservabilityPayloadTooling{ + MCPFailures: []ObservabilityPolicyMCPFailure{{ServerName: "github"}}, + }, + }) + + stdout := captureStream(t, true, func() { + err := RunObservabilityPolicyEval(ObservabilityPolicyEvalConfig{ + PolicyPath: policyPath, + ReportPath: reportPath, + JSONOutput: true, + }) + require.NoError(t, err, "warn-only result should not return an error") + }) + + var evaluation ObservabilityPolicyEvaluation + require.NoError(t, json.Unmarshal([]byte(stdout), &evaluation), "command should emit valid JSON") + assert.Equal(t, "warn", evaluation.Summary.Status, "warn-only result should produce warn status") + assert.Equal(t, 1, evaluation.Summary.WarnViolations, "warn violations should be counted") + assert.Len(t, evaluation.Violations, 1, "one policy violation should be emitted") +} + +func TestRunObservabilityPolicyEval_GateViolationReturnsError(t *testing.T) { + policyPath := writeJSONFixture(t, "policy.json", ObservabilityPolicy{ + SchemaVersion: "1.0.0", + Rules: []ObservabilityPolicyRule{ + { + ID: "gate-write-mode", + Action: "gate", + Message: "Write-capable runs require approval", + Match: ObservabilityPolicyMatch{ + ActuationModes: []string{"write_capable"}, + }, + }, + }, + }) + + reportPath := writeJSONFixture(t, "report.json", ObservabilityPayload{ + Actuation: &ObservabilityPayloadActuation{Mode: "write_capable"}, + }) + + var err error + _ = captureStream(t, true, func() { + err = RunObservabilityPolicyEval(ObservabilityPolicyEvalConfig{ + PolicyPath: policyPath, + ReportPath: reportPath, + JSONOutput: true, + }) + }) + + assert.ErrorContains(t, err, "requires approval", "gate violations should return a blocking error") +} + +func writeJSONFixture(t *testing.T, name string, value any) string { + t.Helper() + + dir := t.TempDir() + path := filepath.Join(dir, name) + content, err := json.Marshal(value) + require.NoError(t, err, "fixture should marshal") + require.NoError(t, os.WriteFile(path, content, 0o644), "fixture should be written") + + return path +} + +func captureStream(t *testing.T, stdout bool, fn func()) string { + t.Helper() + + r, w, err := os.Pipe() + require.NoError(t, err, "pipe should be created") + + if stdout { + old := os.Stdout + os.Stdout = w + defer func() { + os.Stdout = old + }() + } else { + old := os.Stderr + os.Stderr = w + defer func() { + os.Stderr = old + }() + } + + fn() + require.NoError(t, w.Close(), "writer should close cleanly") + + output, readErr := io.ReadAll(r) + require.NoError(t, readErr, "captured output should be readable") + return string(output) +} diff --git a/pkg/cli/observability_policy_test.go b/pkg/cli/observability_policy_test.go new file mode 100644 index 00000000000..b1cabad4146 --- /dev/null +++ b/pkg/cli/observability_policy_test.go @@ -0,0 +1,127 @@ +//go:build !integration + +package cli + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestEvaluateObservabilityPolicy(t *testing.T) { + policy := ObservabilityPolicy{ + SchemaVersion: "1.0.0", + Rules: []ObservabilityPolicyRule{ + { + ID: "block-unapproved-domain", + Action: "fail", + Message: "Blocked domain is not allowed", + Match: ObservabilityPolicyMatch{ + BlockedDomains: []string{"evil.example.com"}, + }, + }, + { + ID: "gate-high-risk-write", + Action: "gate", + Message: "High severity write-capable run requires approval", + Match: ObservabilityPolicyMatch{ + InsightSeverities: []string{"high", "critical"}, + ActuationModes: []string{"write_capable", "mixed"}, + }, + }, + { + ID: "warn-control-plane-failure", + Action: "warn", + Message: "GitHub MCP failed during the run", + Match: ObservabilityPolicyMatch{ + MCPFailureServers: []string{"github"}, + }, + }, + }, + } + + payload := ObservabilityPayload{ + Network: &ObservabilityPayloadNetwork{ + BlockedRequests: 3, + BlockedDomains: []string{"evil.example.com", "unknown.example.com"}, + }, + Actuation: &ObservabilityPayloadActuation{ + Mode: "write_capable", + CreatedItems: []ObservabilityCreatedItem{ + {Type: "create_pull_request"}, + }, + }, + Tooling: &ObservabilityPayloadTooling{ + MCPFailures: []ObservabilityPolicyMCPFailure{ + {ServerName: "github"}, + }, + }, + Insights: []ObservabilityInsight{ + {Severity: "high", Title: "Network friction detected"}, + }, + } + + result := EvaluateObservabilityPolicy(policy, payload) + require.Len(t, result.Violations, 3, "expected all three rules to match") + + assert.Equal(t, "block-unapproved-domain", result.Violations[0].RuleID) + assert.Equal(t, "fail", result.Violations[0].Action) + assert.Contains(t, result.Violations[0].Evidence, "blocked_domain=evil.example.com") + + assert.Equal(t, "gate-high-risk-write", result.Violations[1].RuleID) + assert.Equal(t, "gate", result.Violations[1].Action) + assert.Contains(t, result.Violations[1].Evidence, "insight_severity=high") + assert.Contains(t, result.Violations[1].Evidence, "actuation_mode=write_capable") + + assert.Equal(t, "warn-control-plane-failure", result.Violations[2].RuleID) + assert.Equal(t, "warn", result.Violations[2].Action) + assert.Contains(t, result.Violations[2].Evidence, "mcp_failure_server=github") +} + +func TestEvaluateObservabilityPolicy_NoMatch(t *testing.T) { + policy := ObservabilityPolicy{ + SchemaVersion: "1.0.0", + Rules: []ObservabilityPolicyRule{ + { + ID: "no-match", + Action: "fail", + Message: "Should not trigger", + Match: ObservabilityPolicyMatch{ + BlockedDomains: []string{"evil.example.com"}, + }, + }, + }, + } + + payload := ObservabilityPayload{ + Network: &ObservabilityPayloadNetwork{ + BlockedDomains: []string{"safe.example.com"}, + }, + } + + result := EvaluateObservabilityPolicy(policy, payload) + assert.Empty(t, result.Violations, "unexpected violations for non-matching payload") +} + +func TestObservabilityPolicySchemaParsesAndHasRules(t *testing.T) { + schemaPath := filepath.Join("..", "..", "schemas", "observability-policy.json") + schemaContent, err := os.ReadFile(schemaPath) + require.NoError(t, err, "should read observability policy schema") + + var schema map[string]any + require.NoError(t, json.Unmarshal(schemaContent, &schema), "schema should parse as JSON") + + assert.Equal(t, "http://json-schema.org/draft-07/schema#", schema["$schema"]) + properties, ok := schema["properties"].(map[string]any) + require.True(t, ok, "root properties should exist") + assert.Contains(t, properties, "rules") + + defs, ok := schema["$defs"].(map[string]any) + require.True(t, ok, "schema defs should exist") + assert.Contains(t, defs, "Rule") + assert.Contains(t, defs, "Match") +} diff --git a/pkg/cli/observability_schema_test.go b/pkg/cli/observability_schema_test.go new file mode 100644 index 00000000000..cfa008a0cc7 --- /dev/null +++ b/pkg/cli/observability_schema_test.go @@ -0,0 +1,65 @@ +//go:build !integration + +package cli + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestObservabilityReportSchemaIncludesLineageAndReasoning(t *testing.T) { + schemaPath := filepath.Join("..", "..", "schemas", "observability-report.json") + schemaContent, err := os.ReadFile(schemaPath) + require.NoError(t, err, "should read observability schema") + + var schema map[string]any + require.NoError(t, json.Unmarshal(schemaContent, &schema), "schema should parse as JSON") + + assert.Equal(t, "http://json-schema.org/draft-07/schema#", schema["$schema"], "schema should use Draft 7 for consistency with published schemas") + + properties, ok := schema["properties"].(map[string]any) + require.True(t, ok, "root properties should exist") + + _, hasLineage := properties["lineage"] + assert.True(t, hasLineage, "schema should include lineage section") + + _, hasReasoning := properties["reasoning"] + assert.True(t, hasReasoning, "schema should include reasoning section") + + defs, ok := schema["$defs"].(map[string]any) + require.True(t, ok, "schema defs should exist") + + awContextDef, ok := defs["AwContext"].(map[string]any) + require.True(t, ok, "AwContext definition should exist") + + awContextProps, ok := awContextDef["properties"].(map[string]any) + require.True(t, ok, "AwContext properties should exist") + assert.Contains(t, awContextProps, "repo") + assert.Contains(t, awContextProps, "run_id") + assert.Contains(t, awContextProps, "workflow_id") + assert.Contains(t, awContextProps, "workflow_call_id") + assert.Contains(t, awContextProps, "actor") + assert.Contains(t, awContextProps, "event_type") + + reasoningStepDef, ok := defs["ReasoningStep"].(map[string]any) + require.True(t, ok, "ReasoningStep definition should exist") + + reasoningStepProps, ok := reasoningStepDef["properties"].(map[string]any) + require.True(t, ok, "ReasoningStep properties should exist") + assert.Contains(t, reasoningStepProps, "kind") + assert.Contains(t, reasoningStepProps, "summary") + assert.Contains(t, reasoningStepProps, "evidence") + assert.Contains(t, reasoningStepProps, "tool_refs") + + lineageDef, ok := defs["Lineage"].(map[string]any) + require.True(t, ok, "Lineage definition should exist") + + lineageRequired, ok := lineageDef["required"].([]any) + require.True(t, ok, "Lineage required array should exist") + assert.Contains(t, lineageRequired, "trace_id") +} diff --git a/pkg/parser/schema_test.go b/pkg/parser/schema_test.go index 515df3720a3..0fe8adf86e7 100644 --- a/pkg/parser/schema_test.go +++ b/pkg/parser/schema_test.go @@ -194,3 +194,25 @@ func TestGetSafeOutputTypeKeys(t *testing.T) { } } } + +func TestValidateMainWorkflowFrontmatterWithSchema_AllowsObservabilityJobSummary(t *testing.T) { + frontmatter := map[string]any{ + "on": "push", + "observability": map[string]any{ + "job-summary": "on", + }, + } + + tempFile := "/tmp/gh-aw/test_observability_frontmatter.md" + if err := os.MkdirAll("/tmp/gh-aw", 0755); err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + if err := os.WriteFile(tempFile, []byte("---\non: push\nobservability:\n job-summary: on\n---\n"), 0644); err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tempFile) + + if err := ValidateMainWorkflowFrontmatterWithSchemaAndLocation(frontmatter, tempFile); err != nil { + t.Fatalf("Expected observability config to validate, got: %v", err) + } +} diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 53a9c911a1d..e019f170641 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -8123,6 +8123,18 @@ }, "additionalProperties": false }, + "observability": { + "type": "object", + "description": "Optional observability output settings for workflow runs.", + "properties": { + "job-summary": { + "type": "string", + "enum": ["on", "off"], + "description": "If set to 'on', append a compact observability section to the GitHub Actions job summary. Defaults to off when omitted." + } + }, + "additionalProperties": false + }, "bots": { "type": "array", "description": "Allow list of bot identifiers that can trigger the workflow even if they don't meet the required role permissions. When the actor is in this list, the bot must be active (installed) on the repository to trigger the workflow.", diff --git a/pkg/workflow/compiler_yaml_ai_execution.go b/pkg/workflow/compiler_yaml_ai_execution.go index ae8ea049b30..320e25feb0c 100644 --- a/pkg/workflow/compiler_yaml_ai_execution.go +++ b/pkg/workflow/compiler_yaml_ai_execution.go @@ -5,6 +5,31 @@ import ( "strings" ) +func getObservabilityJobSummaryMode(data *WorkflowData) string { + if data == nil { + return "" + } + + mode := "" + if data.ParsedFrontmatter != nil && data.ParsedFrontmatter.Observability != nil { + mode = data.ParsedFrontmatter.Observability.JobSummary + } + + if mode == "" && data.RawFrontmatter != nil { + if rawObservability, ok := data.RawFrontmatter["observability"].(map[string]any); ok { + if rawMode, ok := rawObservability["job-summary"].(string); ok { + mode = rawMode + } + } + } + + if mode == "off" { + return "" + } + + return mode +} + // generateEngineExecutionSteps generates the GitHub Actions steps for executing the AI engine func (c *Compiler) generateEngineExecutionSteps(yaml *strings.Builder, data *WorkflowData, engine CodingAgentEngine, logFile string) { @@ -91,6 +116,29 @@ func (c *Compiler) generateMCPGatewayLogParsing(yaml *strings.Builder) { yaml.WriteString(" await main();\n") } +// generateObservabilitySummary generates an opt-in step that synthesizes a compact +// observability section for the GitHub Actions step summary from existing runtime files. +func (c *Compiler) generateObservabilitySummary(yaml *strings.Builder, data *WorkflowData) { + mode := getObservabilityJobSummaryMode(data) + if mode == "" { + return + } + + compilerYamlLog.Printf("Generating observability step summary: mode=%s", mode) + + yaml.WriteString(" - name: Generate observability summary\n") + yaml.WriteString(" if: always()\n") + fmt.Fprintf(yaml, " uses: %s\n", GetActionPin("actions/github-script")) + yaml.WriteString(" env:\n") + fmt.Fprintf(yaml, " GH_AW_OBSERVABILITY_JOB_SUMMARY: %q\n", mode) + yaml.WriteString(" with:\n") + yaml.WriteString(" script: |\n") + yaml.WriteString(" const { setupGlobals } = require('" + SetupActionDestination + "/setup_globals.cjs');\n") + yaml.WriteString(" setupGlobals(core, github, context, exec, io);\n") + yaml.WriteString(" const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_observability_summary.cjs');\n") + yaml.WriteString(" await main(core);\n") +} + // generateStopMCPGateway generates a step that stops the MCP gateway process using its PID from step output // It passes the gateway port and API key to enable graceful shutdown via /close endpoint func (c *Compiler) generateStopMCPGateway(yaml *strings.Builder, data *WorkflowData) { diff --git a/pkg/workflow/compiler_yaml_main_job.go b/pkg/workflow/compiler_yaml_main_job.go index 94f82799414..1e808ff0bb0 100644 --- a/pkg/workflow/compiler_yaml_main_job.go +++ b/pkg/workflow/compiler_yaml_main_job.go @@ -468,6 +468,9 @@ func (c *Compiler) generateMainJobSteps(yaml *strings.Builder, data *WorkflowDat } } + // Optionally synthesize a compact observability section from runtime artifacts. + c.generateObservabilitySummary(yaml, data) + // Collect agent stdio logs path for unified upload artifactPaths = append(artifactPaths, logFileFull) diff --git a/pkg/workflow/frontmatter_types.go b/pkg/workflow/frontmatter_types.go index f3fe8ca3af8..bc1daaebf4b 100644 --- a/pkg/workflow/frontmatter_types.go +++ b/pkg/workflow/frontmatter_types.go @@ -129,6 +129,11 @@ type RateLimitConfig struct { IgnoredRoles []string `json:"ignored-roles,omitempty"` // Roles that are exempt from rate limiting (e.g., ["admin", "maintainer"]) } +// ObservabilityConfig represents workflow observability options. +type ObservabilityConfig struct { + JobSummary string `json:"job-summary,omitempty"` +} + // FrontmatterConfig represents the structured configuration from workflow frontmatter // This provides compile-time type safety and clearer error messages compared to map[string]any type FrontmatterConfig struct { @@ -188,6 +193,7 @@ type FrontmatterConfig struct { // Metadata Metadata map[string]string `json:"metadata,omitempty"` // Custom metadata key-value pairs SecretMasking *SecretMaskingConfig `json:"secret-masking,omitempty"` + Observability *ObservabilityConfig `json:"observability,omitempty"` // Rate limiting configuration RateLimit *RateLimitConfig `json:"rate-limit,omitempty"` diff --git a/pkg/workflow/frontmatter_types_test.go b/pkg/workflow/frontmatter_types_test.go index afffbedada8..3c47a39a475 100644 --- a/pkg/workflow/frontmatter_types_test.go +++ b/pkg/workflow/frontmatter_types_test.go @@ -191,6 +191,27 @@ func TestParseFrontmatterConfig(t *testing.T) { } }) + t.Run("handles observability configuration", func(t *testing.T) { + frontmatter := map[string]any{ + "observability": map[string]any{ + "job-summary": "on", + }, + } + + config, err := ParseFrontmatterConfig(frontmatter) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if config.Observability == nil { + t.Fatal("Observability should not be nil") + } + + if config.Observability.JobSummary != "on" { + t.Errorf("JobSummary = %q, want %q", config.Observability.JobSummary, "on") + } + }) + t.Run("handles jobs configuration", func(t *testing.T) { frontmatter := map[string]any{ "jobs": map[string]any{ diff --git a/pkg/workflow/observability_job_summary_test.go b/pkg/workflow/observability_job_summary_test.go new file mode 100644 index 00000000000..fd2765e5e82 --- /dev/null +++ b/pkg/workflow/observability_job_summary_test.go @@ -0,0 +1,85 @@ +//go:build !integration + +package workflow + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCompileWorkflow_IncludesObservabilitySummaryStepWhenOptedIn(t *testing.T) { + tmpDir := t.TempDir() + workflowPath := filepath.Join(tmpDir, "observability-summary.md") + content := `--- +on: push +permissions: + contents: read +observability: + job-summary: on +engine: copilot +--- + +# Test Observability Summary +` + + if err := os.WriteFile(workflowPath, []byte(content), 0o644); err != nil { + t.Fatalf("Failed to write workflow: %v", err) + } + + compiler := NewCompiler() + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Unexpected compile error: %v", err) + } + + lockPath := filepath.Join(tmpDir, "observability-summary.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read lock file: %v", err) + } + + compiled := string(lockContent) + if !strings.Contains(compiled, "- name: Generate observability summary") { + t.Fatal("Expected observability summary step to be generated") + } + if !strings.Contains(compiled, "GH_AW_OBSERVABILITY_JOB_SUMMARY: \"on\"") { + t.Fatal("Expected observability summary mode env var to be set") + } + if !strings.Contains(compiled, "require('${{ runner.temp }}/gh-aw/actions/generate_observability_summary.cjs')") { + t.Fatal("Expected generated workflow to load generate_observability_summary.cjs") + } +} + +func TestCompileWorkflow_DoesNotIncludeObservabilitySummaryStepByDefault(t *testing.T) { + tmpDir := t.TempDir() + workflowPath := filepath.Join(tmpDir, "no-observability-summary.md") + content := `--- +on: push +permissions: + contents: read +engine: copilot +--- + +# Test No Observability Summary +` + + if err := os.WriteFile(workflowPath, []byte(content), 0o644); err != nil { + t.Fatalf("Failed to write workflow: %v", err) + } + + compiler := NewCompiler() + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Unexpected compile error: %v", err) + } + + lockPath := filepath.Join(tmpDir, "no-observability-summary.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read lock file: %v", err) + } + + if strings.Contains(string(lockContent), "- name: Generate observability summary") { + t.Fatal("Did not expect observability summary step when feature is not configured") + } +} diff --git a/schemas/observability-policy.json b/schemas/observability-policy.json new file mode 100644 index 00000000000..a84d896f7fc --- /dev/null +++ b/schemas/observability-policy.json @@ -0,0 +1,97 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/github/gh-aw/schemas/observability-policy.json", + "title": "GitHub Agentic Workflows Observability Policy", + "description": "Schema for policy rules evaluated against observability-report.json payloads. These rules convert distilled observability signals into enforcement actions such as fail, gate, or warn.", + "type": "object", + "required": ["schema_version", "rules"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "description": "Version of the observability policy schema.", + "examples": ["1.0.0"] + }, + "rules": { + "type": "array", + "description": "List of policy rules to evaluate against an observability payload.", + "minItems": 1, + "items": { + "$ref": "#/$defs/Rule" + } + } + }, + "$defs": { + "Rule": { + "type": "object", + "required": ["id", "action", "message", "match"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable identifier for the rule." + }, + "action": { + "type": "string", + "description": "Enforcement action to apply when the rule matches.", + "enum": ["fail", "gate", "warn"] + }, + "message": { + "type": "string", + "description": "Human-readable message explaining the policy violation." + }, + "match": { + "$ref": "#/$defs/Match" + } + } + }, + "Match": { + "type": "object", + "additionalProperties": false, + "properties": { + "blocked_domains": { + "type": "array", + "description": "Match if any blocked domain in the payload equals one of these domains.", + "items": { + "type": "string" + } + }, + "min_blocked_requests": { + "type": "integer", + "description": "Match if blocked request count is greater than or equal to this threshold.", + "minimum": 0 + }, + "insight_severities": { + "type": "array", + "description": "Match if any distilled insight has one of these severities.", + "items": { + "type": "string", + "enum": ["critical", "high", "medium", "low", "info"] + } + }, + "actuation_modes": { + "type": "array", + "description": "Match if the payload actuation mode is one of these values.", + "items": { + "type": "string", + "enum": ["read_only", "write_capable", "mixed"] + } + }, + "mcp_failure_servers": { + "type": "array", + "description": "Match if any MCP failure references one of these server names.", + "items": { + "type": "string" + } + }, + "created_item_types": { + "type": "array", + "description": "Match if any created item type equals one of these values.", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/schemas/observability-report.json b/schemas/observability-report.json new file mode 100644 index 00000000000..766babf4e53 --- /dev/null +++ b/schemas/observability-report.json @@ -0,0 +1,601 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/github/gh-aw/schemas/observability-report.json", + "title": "GitHub Agentic Workflows Observability Report", + "description": "Schema for distilled observability payloads emitted by GitHub Agentic Workflows. This payload separates execution status from reasoning-oriented telemetry, including lineage from aw_context, correlation identifiers, tool activity, network posture, actuation outcomes, and synthesized insights.", + "type": "object", + "required": ["schema_version", "kind", "generated_at", "overview", "lineage", "execution", "insights"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "description": "Version of this observability payload schema.", + "examples": ["1.0.0"] + }, + "kind": { + "type": "string", + "description": "Payload scope. 'run' is for a single execution. 'fleet' is for multi-run summaries and trend aggregation.", + "enum": ["run", "fleet"] + }, + "generated_at": { + "type": "string", + "description": "RFC 3339 timestamp when this payload was generated.", + "format": "date-time" + }, + "overview": { + "$ref": "#/$defs/Overview" + }, + "lineage": { + "$ref": "#/$defs/Lineage" + }, + "execution": { + "$ref": "#/$defs/Execution" + }, + "reasoning": { + "$ref": "#/$defs/Reasoning" + }, + "tooling": { + "$ref": "#/$defs/Tooling" + }, + "network": { + "$ref": "#/$defs/Network" + }, + "actuation": { + "$ref": "#/$defs/Actuation" + }, + "insights": { + "type": "array", + "description": "Distilled decision-oriented signals derived from the workflow run or fleet of runs.", + "items": { + "$ref": "#/$defs/Insight" + } + }, + "findings": { + "type": "array", + "description": "Human-facing findings suitable for audit or incident review.", + "items": { + "$ref": "#/$defs/Finding" + } + }, + "recommendations": { + "type": "array", + "description": "Actionable recommendations derived from the observability data.", + "items": { + "$ref": "#/$defs/Recommendation" + } + } + }, + "$defs": { + "Overview": { + "type": "object", + "required": ["workflow_name", "status"], + "additionalProperties": false, + "properties": { + "run_id": { + "type": ["integer", "string"], + "description": "Workflow run identifier for run-scoped payloads. May be omitted for fleet summaries." + }, + "workflow_name": { + "type": "string", + "description": "Human-readable workflow name." + }, + "workflow_path": { + "type": "string", + "description": "Workflow file path, if known." + }, + "status": { + "type": "string", + "description": "Current lifecycle status of the workflow or aggregate health state for fleet payloads.", + "examples": ["completed", "in_progress", "success", "degraded"] + }, + "conclusion": { + "type": "string", + "description": "GitHub Actions conclusion for a completed run.", + "examples": ["success", "failure", "timed_out", "cancelled"] + }, + "event": { + "type": "string", + "description": "Triggering GitHub event name." + }, + "branch": { + "type": "string", + "description": "Head branch or reference name." + }, + "url": { + "type": "string", + "description": "GitHub Actions URL for the workflow run.", + "format": "uri" + } + } + }, + "Lineage": { + "type": "object", + "required": ["trace_id"], + "additionalProperties": false, + "properties": { + "trace_id": { + "type": "string", + "description": "Stable correlation identifier for this end-to-end execution lineage. Prefer workflow_call_id when aw_context is present; otherwise use a generated correlation key." + }, + "span_id": { + "type": "string", + "description": "Identifier for the current report node or aggregation span." + }, + "parent_trace_id": { + "type": "string", + "description": "Optional parent trace identifier when this payload was derived from another execution or aggregate report." + }, + "aw_context": { + "$ref": "#/$defs/AwContext" + }, + "engine": { + "$ref": "#/$defs/Engine" + } + } + }, + "AwContext": { + "type": "object", + "required": ["repo", "run_id", "workflow_id"], + "additionalProperties": false, + "properties": { + "repo": { + "type": "string", + "description": "Calling workflow repository in owner/repo format." + }, + "run_id": { + "type": "string", + "description": "GitHub Actions run ID of the calling workflow." + }, + "workflow_id": { + "type": "string", + "description": "Full workflow ref of the calling workflow, including repository, path, and ref." + }, + "workflow_call_id": { + "type": "string", + "description": "Unique call attempt identifier, typically composed from run_id and run_attempt." + }, + "time": { + "type": "string", + "description": "RFC 3339 timestamp of the dispatch or workflow call handoff.", + "format": "date-time" + }, + "actor": { + "type": "string", + "description": "GitHub actor that triggered the calling workflow." + }, + "event_type": { + "type": "string", + "description": "GitHub event name of the calling workflow." + } + } + }, + "Engine": { + "type": "object", + "additionalProperties": false, + "properties": { + "engine_id": { + "type": "string", + "description": "Stable engine identifier, such as copilot, claude, codex, or gemini." + }, + "engine_name": { + "type": "string", + "description": "Human-readable engine name." + }, + "model": { + "type": "string", + "description": "Model or engine variant used for this run." + }, + "version": { + "type": "string", + "description": "Engine or CLI version that produced the output." + } + } + }, + "Execution": { + "type": "object", + "required": ["task_status"], + "additionalProperties": false, + "properties": { + "task_status": { + "type": "string", + "description": "Outcome-focused status distinct from reasoning telemetry.", + "enum": ["success", "failure", "partial", "timed_out", "cancelled", "unknown"] + }, + "duration_ms": { + "type": "integer", + "description": "Total execution duration in milliseconds.", + "minimum": 0 + }, + "turns": { + "type": "integer", + "description": "Number of decision turns used by the agent.", + "minimum": 0 + }, + "token_usage": { + "type": "integer", + "description": "Total token usage for the run.", + "minimum": 0 + }, + "estimated_cost": { + "type": "number", + "description": "Estimated USD cost for the run.", + "minimum": 0 + }, + "error_count": { + "type": "integer", + "description": "Number of errors observed in the execution.", + "minimum": 0 + }, + "warning_count": { + "type": "integer", + "description": "Number of warnings observed in the execution.", + "minimum": 0 + } + } + }, + "Reasoning": { + "type": "object", + "additionalProperties": false, + "properties": { + "mode": { + "type": "string", + "description": "High-level characterization of the reasoning posture.", + "enum": ["directed", "adaptive", "exploratory", "unknown"] + }, + "reasoning_steps": { + "type": "array", + "description": "Optional coarse-grained reasoning steps that explain decision points without storing raw private chain-of-thought.", + "items": { + "$ref": "#/$defs/ReasoningStep" + } + }, + "drift_signal": { + "type": "string", + "description": "Optional summary of execution drift compared with prior runs.", + "examples": ["stable", "rising_turn_count", "volatile_tool_path"] + } + } + }, + "ReasoningStep": { + "type": "object", + "required": ["id", "kind", "summary"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable identifier for the reasoning step within the payload." + }, + "parent_id": { + "type": "string", + "description": "Optional parent step identifier for hierarchical plans." + }, + "kind": { + "type": "string", + "description": "Type of reasoning step.", + "enum": ["plan", "observe", "decide", "act", "verify", "handoff"] + }, + "summary": { + "type": "string", + "description": "Short explanation of the decision or transition." + }, + "evidence": { + "type": "array", + "description": "Structured evidence references supporting the reasoning step.", + "items": { + "$ref": "#/$defs/EvidenceRef" + } + }, + "tool_refs": { + "type": "array", + "description": "Tool calls associated with this reasoning step.", + "items": { + "type": "string" + } + }, + "outcome": { + "type": "string", + "description": "Observed result of this reasoning step.", + "examples": ["confirmed", "blocked", "needs-human-review"] + } + } + }, + "EvidenceRef": { + "type": "object", + "required": ["type", "value"], + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "description": "Evidence type.", + "enum": ["file", "log", "tool_call", "metric", "network_domain", "safe_output", "external"] + }, + "value": { + "type": "string", + "description": "Opaque identifier or locator for the evidence source." + }, + "label": { + "type": "string", + "description": "Human-readable description of the evidence." + } + } + }, + "Tooling": { + "type": "object", + "additionalProperties": false, + "properties": { + "tool_types": { + "type": "integer", + "description": "Number of unique tool types used during the run or across the aggregation.", + "minimum": 0 + }, + "tool_calls": { + "type": "array", + "description": "Observed tool call summaries.", + "items": { + "$ref": "#/$defs/ToolCall" + } + }, + "missing_tools": { + "type": "array", + "description": "Tools requested by the agent but not available.", + "items": { + "$ref": "#/$defs/MissingTool" + } + }, + "mcp_failures": { + "type": "array", + "description": "MCP server failures encountered by the run.", + "items": { + "$ref": "#/$defs/MCPFailure" + } + }, + "missing_data": { + "type": "array", + "description": "Signals that required data was missing during execution.", + "items": { + "$ref": "#/$defs/MissingData" + } + } + } + }, + "ToolCall": { + "type": "object", + "required": ["name", "call_count"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Tool name." + }, + "server_name": { + "type": "string", + "description": "MCP server name for MCP-hosted tools." + }, + "call_count": { + "type": "integer", + "description": "Number of times the tool was called.", + "minimum": 0 + }, + "max_input_size": { + "type": "integer", + "description": "Maximum observed input size.", + "minimum": 0 + }, + "max_output_size": { + "type": "integer", + "description": "Maximum observed output size.", + "minimum": 0 + }, + "max_duration_ms": { + "type": "integer", + "description": "Maximum observed execution duration in milliseconds.", + "minimum": 0 + }, + "status": { + "type": "string", + "description": "Aggregate outcome for the tool across the observed scope.", + "examples": ["success", "partial", "error"] + } + } + }, + "MissingTool": { + "type": "object", + "required": ["tool", "reason"], + "additionalProperties": false, + "properties": { + "tool": { + "type": "string" + }, + "reason": { + "type": "string" + }, + "alternatives": { + "type": "string" + } + } + }, + "MCPFailure": { + "type": "object", + "required": ["server_name", "status"], + "additionalProperties": false, + "properties": { + "server_name": { + "type": "string" + }, + "status": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + } + } + }, + "MissingData": { + "type": "object", + "required": ["data_type", "reason"], + "additionalProperties": false, + "properties": { + "data_type": { + "type": "string" + }, + "reason": { + "type": "string" + }, + "context": { + "type": "string" + } + } + }, + "Network": { + "type": "object", + "additionalProperties": false, + "properties": { + "total_requests": { + "type": "integer", + "minimum": 0 + }, + "allowed_requests": { + "type": "integer", + "minimum": 0 + }, + "blocked_requests": { + "type": "integer", + "minimum": 0 + }, + "allowed_domains": { + "type": "array", + "items": { + "type": "string" + } + }, + "blocked_domains": { + "type": "array", + "items": { + "type": "string" + } + }, + "redacted_domains": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "Actuation": { + "type": "object", + "additionalProperties": false, + "properties": { + "mode": { + "type": "string", + "description": "Whether the execution remained read-only or crossed into write-capable actions.", + "enum": ["read_only", "write_capable", "mixed"] + }, + "safe_items_count": { + "type": "integer", + "minimum": 0 + }, + "created_items": { + "type": "array", + "items": { + "$ref": "#/$defs/CreatedItem" + } + } + } + }, + "CreatedItem": { + "type": "object", + "required": ["type"], + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "repo": { + "type": "string" + }, + "number": { + "type": "integer", + "minimum": 0 + }, + "url": { + "type": "string", + "format": "uri" + }, + "temporary_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + } + } + }, + "Insight": { + "type": "object", + "required": ["category", "severity", "title", "summary"], + "additionalProperties": false, + "properties": { + "category": { + "type": "string" + }, + "severity": { + "type": "string", + "enum": ["critical", "high", "medium", "low", "info"] + }, + "title": { + "type": "string" + }, + "summary": { + "type": "string" + }, + "evidence": { + "type": "string" + } + } + }, + "Finding": { + "type": "object", + "required": ["category", "severity", "title", "description"], + "additionalProperties": false, + "properties": { + "category": { + "type": "string" + }, + "severity": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "impact": { + "type": "string" + } + } + }, + "Recommendation": { + "type": "object", + "required": ["priority", "action", "reason"], + "additionalProperties": false, + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"] + }, + "action": { + "type": "string" + }, + "reason": { + "type": "string" + }, + "example": { + "type": "string" + } + } + } + } +} From 5abdb234213461cfea9f5cb82dc3ff5f81685aba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 18:05:55 +0000 Subject: [PATCH 02/12] fix: address review comments for observability audit improvements Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Agent-Logs-Url: https://github.com/github/gh-aw/sessions/3c9ee3d6-0a27-4f53-810e-025fc6e26234 --- .../js/generate_observability_summary.cjs | 28 +++++++++---------- pkg/cli/audit_comparison.go | 13 +++++++++ pkg/cli/observability_insights.go | 4 +++ pkg/cli/observability_policy.go | 4 ++- pkg/cli/observability_policy_command.go | 2 ++ 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/actions/setup/js/generate_observability_summary.cjs b/actions/setup/js/generate_observability_summary.cjs index 2b85c9707f9..0f28bb12a48 100644 --- a/actions/setup/js/generate_observability_summary.cjs +++ b/actions/setup/js/generate_observability_summary.cjs @@ -20,27 +20,27 @@ function readJSONIfExists(path) { } function countBlockedRequests() { + let total = 0; + for (const path of gatewayEventPaths) { if (!fs.existsSync(path)) { continue; } - const content = fs.readFileSync(path, "utf8"); - return content - .split("\n") - .map(line => line.trim()) - .filter(Boolean) - .reduce((count, line) => { - try { - const entry = JSON.parse(line); - return entry && entry.type === "DIFC_FILTERED" ? count + 1 : count; - } catch { - return count; - } - }, 0); + const lines = fs.readFileSync(path, "utf8").split("\n"); + for (const raw of lines) { + const line = raw.trim(); + if (!line) continue; + try { + const entry = JSON.parse(line); + if (entry && entry.type === "DIFC_FILTERED") total++; + } catch { + // skip malformed lines + } + } } - return 0; + return total; } function uniqueCreatedItemTypes(items) { diff --git a/pkg/cli/audit_comparison.go b/pkg/cli/audit_comparison.go index bf111e68d8b..c8db2c622dc 100644 --- a/pkg/cli/audit_comparison.go +++ b/pkg/cli/audit_comparison.go @@ -137,15 +137,20 @@ func buildAuditComparison(current auditComparisonSnapshot, baselineRun *Workflow if current.Turns > baseline.Turns { reasonCodes = append(reasonCodes, "turns_increase") + } else if current.Turns < baseline.Turns { + reasonCodes = append(reasonCodes, "turns_decrease") } if baseline.Posture != current.Posture { reasonCodes = append(reasonCodes, "posture_changed") } if current.BlockedRequests > baseline.BlockedRequests { reasonCodes = append(reasonCodes, "blocked_requests_increase") + } else if current.BlockedRequests < baseline.BlockedRequests { + reasonCodes = append(reasonCodes, "blocked_requests_decrease") } newMCPFailure := len(baseline.MCPFailures) == 0 && len(current.MCPFailures) > 0 + mcpFailuresResolved := len(baseline.MCPFailures) > 0 && len(current.MCPFailures) == 0 if newMCPFailure || len(baseline.MCPFailures) > 0 || len(current.MCPFailures) > 0 { delta.MCPFailure = &AuditComparisonMCPFailureDelta{ Before: baseline.MCPFailures, @@ -155,6 +160,8 @@ func buildAuditComparison(current auditComparisonSnapshot, baselineRun *Workflow } if newMCPFailure { reasonCodes = append(reasonCodes, "new_mcp_failure") + } else if mcpFailuresResolved { + reasonCodes = append(reasonCodes, "mcp_failures_resolved") } label := "stable" @@ -165,6 +172,12 @@ func buildAuditComparison(current auditComparisonSnapshot, baselineRun *Workflow label = "risky" case current.BlockedRequests > baseline.BlockedRequests: label = "risky" + case delta.Posture.Before != "" && delta.Posture.After != "" && delta.Posture.Before != delta.Posture.After: + label = "changed" + case mcpFailuresResolved: + label = "changed" + case current.BlockedRequests < baseline.BlockedRequests: + label = "changed" case len(reasonCodes) > 0: label = "changed" } diff --git a/pkg/cli/observability_insights.go b/pkg/cli/observability_insights.go index 37ced823883..b2cb45cee2b 100644 --- a/pkg/cli/observability_insights.go +++ b/pkg/cli/observability_insights.go @@ -317,10 +317,14 @@ func renderObservabilityInsights(insights []ObservabilityInsight) { for _, insight := range insights { icon := "[info]" switch insight.Severity { + case "critical": + icon = "[critical]" case "high": icon = "[high]" case "medium": icon = "[medium]" + case "low": + icon = "[low]" } fmt.Fprintf(os.Stderr, " %s %s [%s]\n", icon, insight.Title, insight.Category) diff --git a/pkg/cli/observability_policy.go b/pkg/cli/observability_policy.go index f7db2314606..b95408a10e4 100644 --- a/pkg/cli/observability_policy.go +++ b/pkg/cli/observability_policy.go @@ -1,5 +1,7 @@ package cli +import "fmt" + type ObservabilityPolicy struct { SchemaVersion string `json:"schema_version"` Rules []ObservabilityPolicyRule `json:"rules"` @@ -114,7 +116,7 @@ func evaluateObservabilityPolicyRule(rule ObservabilityPolicyRule, payload Obser return ObservabilityPolicyViolation{}, false } matched = true - evidenceParts = append(evidenceParts, "blocked_requests_gte") + evidenceParts = append(evidenceParts, fmt.Sprintf("blocked_requests_gte=%d actual=%d", rule.Match.MinBlockedRequests, blocked)) } if len(rule.Match.InsightSeverities) > 0 { diff --git a/pkg/cli/observability_policy_command.go b/pkg/cli/observability_policy_command.go index ecfd6d207ff..70587fb9f43 100644 --- a/pkg/cli/observability_policy_command.go +++ b/pkg/cli/observability_policy_command.go @@ -225,6 +225,8 @@ func renderObservabilityPolicyEvaluation(evaluation ObservabilityPolicyEvaluatio fmt.Fprintln(os.Stderr, console.FormatErrorMessage(message)) case "gate": fmt.Fprintln(os.Stderr, console.FormatWarningMessage(message)) + case "warn": + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(message)) default: fmt.Fprintln(os.Stderr, console.FormatInfoMessage(message)) } From d32e410b012d01b630c57876965064ce0862a731 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Tue, 24 Mar 2026 19:52:48 +0100 Subject: [PATCH 03/12] enhance audit comparison and reporting with task domain & behavior fingerprint --- ...agentic-observability-central-kit.lock.yml | 1195 +++++++++++++++++ .../agentic-observability-central-kit.md | 179 +++ .../agentic-observability-kit.lock.yml | 1191 ++++++++++++++++ .../workflows/agentic-observability-kit.md | 172 +++ .../docs/guides/agentic-observability-kit.md | 112 ++ docs/src/content/docs/patterns/monitoring.md | 4 + pkg/cli/audit.go | 11 +- pkg/cli/audit_agentic_analysis.go | 346 +++++ pkg/cli/audit_agentic_analysis_test.go | 107 ++ pkg/cli/audit_comparison.go | 282 +++- pkg/cli/audit_comparison_test.go | 70 + pkg/cli/audit_report.go | 56 +- pkg/cli/audit_report_render.go | 80 +- pkg/cli/logs_json_test.go | 66 + pkg/cli/logs_models.go | 42 +- pkg/cli/logs_orchestrator.go | 30 + pkg/cli/logs_report.go | 112 +- pkg/cli/logs_summary_test.go | 27 + 18 files changed, 3936 insertions(+), 146 deletions(-) create mode 100644 .github/workflows/agentic-observability-central-kit.lock.yml create mode 100644 .github/workflows/agentic-observability-central-kit.md create mode 100644 .github/workflows/agentic-observability-kit.lock.yml create mode 100644 .github/workflows/agentic-observability-kit.md create mode 100644 docs/src/content/docs/guides/agentic-observability-kit.md create mode 100644 pkg/cli/audit_agentic_analysis.go create mode 100644 pkg/cli/audit_agentic_analysis_test.go diff --git a/.github/workflows/agentic-observability-central-kit.lock.yml b/.github/workflows/agentic-observability-central-kit.lock.yml new file mode 100644 index 00000000000..9f30f696780 --- /dev/null +++ b/.github/workflows/agentic-observability-central-kit.lock.yml @@ -0,0 +1,1195 @@ +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw. DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# Central reporting variant of the agentic observability kit for platform repositories +# +# Resolved workflow manifest: +# Imports: +# - shared/reporting.md +# +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"70afa9d30cf87e41cfe6cdba9b2ab2902c336e9766219b10e44c9c05634796d5","strict":true,"agent_id":"copilot"} + +name: "Agentic Observability Central Kit" +"on": + schedule: + - cron: "52 8 * * 1" + # Friendly format: weekly on monday around 08:30 (scattered) + workflow_dispatch: + inputs: + aw_context: + default: "" + description: Agent caller context (used internally by Agentic Workflows). + required: false + type: string + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Agentic Observability Central Kit" + +env: + REPORT_REPOSITORY: ${{ vars.REPORT_REPOSITORY || github.repository }} + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} + model: ${{ steps.generate_aw_info.outputs.model }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Generate agentic run info + id: generate_aw_info + env: + GH_AW_INFO_ENGINE_ID: "copilot" + GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI" + GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} + GH_AW_INFO_VERSION: "latest" + GH_AW_INFO_AGENT_VERSION: "latest" + GH_AW_INFO_WORKFLOW_NAME: "Agentic Observability Central Kit" + GH_AW_INFO_EXPERIMENTAL: "false" + GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" + GH_AW_INFO_STAGED: "false" + GH_AW_INFO_ALLOWED_DOMAINS: '["defaults"]' + GH_AW_INFO_FIREWALL_ENABLED: "true" + GH_AW_INFO_AWF_VERSION: "v0.25.0" + GH_AW_INFO_AWMG_VERSION: "" + GH_AW_INFO_FIREWALL_TYPE: "squid" + GH_AW_COMPILED_STRICT: "true" + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs'); + await main(core, context); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: ${RUNNER_TEMP}/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Checkout .github and .agents folders + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + sparse-checkout: | + .github + .agents + sparse-checkout-cone-mode: true + fetch-depth: 1 + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_FILE: "agentic-observability-central-kit.lock.yml" + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl + GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + # poutine:ignore untrusted_checkout_exec + run: | + bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh + { + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/agentic_workflows_guide.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" + cat << 'GH_AW_PROMPT_EOF' + + Tools: create_issue(max:10), create_discussion, missing_tool, missing_data, noop + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/shared/reporting.md}} + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/agentic-observability-central-kit.md}} + GH_AW_PROMPT_EOF + } > "$GH_AW_PROMPT" + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + + const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_ENV_REPORT_REPOSITORY: process.env.GH_AW_ENV_REPORT_REPOSITORY, + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + # poutine:ignore untrusted_checkout_exec + run: bash ${RUNNER_TEMP}/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + # poutine:ignore untrusted_checkout_exec + run: bash ${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh + - name: Upload activation artifact + if: success() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: activation + path: | + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/aw-prompts/prompt.txt + retention-days: 1 + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + discussions: read + issues: read + pull-requests: read + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_WORKFLOW_ID_SANITIZED: agenticobservabilitycentralkit + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} + detection_success: ${{ steps.detection_conclusion.outputs.success }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} + model: ${{ needs.activation.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Set runtime paths + id: set-runtime-paths + run: | + echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" + echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" + echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Setup Go for CLI build + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + cache: true + - name: Build gh-aw CLI + run: | + echo "Building gh-aw CLI for linux/amd64..." + mkdir -p dist + VERSION=$(git describe --tags --always --dirty) + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags "-s -w -X main.version=${VERSION}" \ + -o dist/gh-aw-linux-amd64 \ + ./cmd/gh-aw + # Copy binary to root for direct execution in user-defined steps + cp dist/gh-aw-linux-amd64 ./gh-aw + chmod +x ./gh-aw + echo "✓ Built gh-aw CLI successfully" + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 + - name: Build gh-aw Docker image + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: linux/amd64 + push: false + load: true + tags: localhost/gh-aw:dev + build-args: | + BINARY=dist/gh-aw-linux-amd64 + - name: Create gh-aw temp directory + run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Configure gh CLI for GitHub Enterprise + run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh + env: + GH_TOKEN: ${{ github.token }} + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + github.event.pull_request || github.event.issue.pull_request + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Install GitHub Copilot CLI + run: ${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh latest + env: + GH_HOST: github.com + - name: Install AWF binary + run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.25.0 + - name: Parse integrity filter lists + id: parse-guard-vars + env: + GH_AW_BLOCKED_USERS_VAR: ${{ vars.GH_AW_GITHUB_BLOCKED_USERS || '' }} + GH_AW_APPROVAL_LABELS_VAR: ${{ vars.GH_AW_GITHUB_APPROVAL_LABELS || '' }} + run: bash ${RUNNER_TEMP}/gh-aw/actions/parse_guard_list.sh + - name: Download container images + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.3 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + - name: Install gh-aw extension + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + # Check if gh-aw extension is already installed + if gh extension list | grep -q "github/gh-aw"; then + echo "gh-aw extension already installed, upgrading..." + gh extension upgrade gh-aw || true + else + echo "Installing gh-aw extension..." + gh extension install github/gh-aw + fi + gh aw --version + # Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization + mkdir -p ${RUNNER_TEMP}/gh-aw + GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1) + if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then + cp "$GH_AW_BIN" ${RUNNER_TEMP}/gh-aw/gh-aw + chmod +x ${RUNNER_TEMP}/gh-aw/gh-aw + echo "Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw" + else + echo "::error::Failed to find gh-aw binary for MCP server" + exit 1 + fi + - name: Write Safe Outputs Config + run: | + mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"create_discussion":{"expires":168,"max":1},"create_issue":{"group":true,"max":10},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + - name: Write Safe Outputs Tools + run: | + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF' + { + "description_suffixes": { + "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[observability central] \". Discussions will be created in category \"audits\". Discussions will be created in repository \"${{ env.REPORT_REPOSITORY }}\".", + "create_issue": " CONSTRAINTS: Maximum 10 issue(s) can be created. Labels [\"agentics\" \"warning\" \"platform\"] will be automatically added. Issues will be created in repository \"${{ env.REPORT_REPOSITORY }}\"." + }, + "repo_params": {}, + "dynamic_tools": [] + } + GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "create_discussion": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "category": { + "type": "string", + "sanitize": true, + "maxLength": 128 + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "create_issue": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "parent": { + "issueOrPRNumber": true + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "temporary_id": { + "type": "string" + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_data": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "context": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "data_type": { + "type": "string", + "sanitize": true, + "maxLength": 128 + }, + "reason": { + "type": "string", + "sanitize": true, + "maxLength": 256 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash ${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP Gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.3' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "agenticworkflows": { + "type": "stdio", + "container": "localhost/gh-aw:dev", + "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], + "env": { + "DEBUG": "*", + "GITHUB_TOKEN": "\${GITHUB_TOKEN}", + "GITHUB_ACTOR": "\${GITHUB_ACTOR}", + "GITHUB_REPOSITORY": "\${GITHUB_REPOSITORY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.32.0", + "env": { + "GITHUB_HOST": "\${GITHUB_SERVER_URL}", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests,discussions" + }, + "guard-policies": { + "allow-only": { + "approval-labels": ${{ steps.parse-guard-vars.outputs.approval_labels }}, + "blocked-users": ${{ steps.parse-guard-vars.outputs.blocked_users }}, + "min-integrity": "merged", + "repos": "all" + } + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Download activation artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: activation + path: /tmp/gh-aw + - name: Clean git credentials + continue-on-error: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + timeout-minutes: 30 + run: | + set -o pipefail + touch /tmp/gh-aw/agent-step-summary.md + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + COPILOT_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_PHASE: agent + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_VERSION: dev + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_AW: true + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md + GITHUB_WORKSPACE: ${{ github.workspace }} + GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_AUTHOR_NAME: github-actions[bot] + GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_COMMITTER_NAME: github-actions[bot] + XDG_CONFIG_HOME: /home/runner + - name: Detect inference access error + id: detect-inference-error + if: always() + continue-on-error: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP Gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash ${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Append agent step summary + if: always() + run: bash ${RUNNER_TEMP}/gh-aw/actions/append_agent_step_summary.sh + - name: Copy Safe Outputs + if: always() + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + run: | + mkdir -p /tmp/gh-aw + cp "$GH_AW_SAFE_OUTPUTS" /tmp/gh-aw/safeoutputs.jsonl 2>/dev/null || true + - name: Ingest agent output + id: collect_output + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP Gateway logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) + if command -v awf &> /dev/null; then + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'AWF binary not installed, skipping firewall log summary' + fi + - name: Write agent output placeholder if missing + if: always() + run: | + if [ ! -f /tmp/gh-aw/agent_output.json ]; then + echo '{"items":[]}' > /tmp/gh-aw/agent_output.json + fi + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: agent + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/proxy-logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + /tmp/gh-aw/safeoutputs.jsonl + /tmp/gh-aw/agent_output.json + if-no-files-found: ignore + - name: Upload firewall audit logs + if: always() + continue-on-error: true + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: firewall-audit-logs + path: /tmp/gh-aw/sandbox/firewall/logs/ + if-no-files-found: ignore + # --- Threat Detection (inline) --- + - name: Check if detection needed + id: detection_guard + if: always() + env: + OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }} + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + run: | + if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then + echo "run_detection=true" >> "$GITHUB_OUTPUT" + echo "Detection will run: output_types=$OUTPUT_TYPES, has_patch=$HAS_PATCH" + else + echo "run_detection=false" >> "$GITHUB_OUTPUT" + echo "Detection skipped: no agent outputs or patches to analyze" + fi + - name: Clear MCP configuration for detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + rm -f /tmp/gh-aw/mcp-config/mcp-servers.json + rm -f /home/runner/.copilot/mcp-config.json + rm -f "$GITHUB_WORKSPACE/.gemini/settings.json" + - name: Prepare threat detection files + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection/aw-prompts + cp /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/threat-detection/aw-prompts/prompt.txt 2>/dev/null || true + cp /tmp/gh-aw/agent_output.json /tmp/gh-aw/threat-detection/agent_output.json 2>/dev/null || true + for f in /tmp/gh-aw/aw-*.patch; do + [ -f "$f" ] && cp "$f" /tmp/gh-aw/threat-detection/ 2>/dev/null || true + done + echo "Prepared threat detection files:" + ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true + - name: Setup threat detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + WORKFLOW_NAME: "Agentic Observability Central Kit" + WORKFLOW_DESCRIPTION: "Central reporting variant of the agentic observability kit for platform repositories" + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Execute GitHub Copilot CLI + if: always() && steps.detection_guard.outputs.run_detection == 'true' + id: detection_agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + touch /tmp/gh-aw/agent-step-summary.md + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PHASE: detection + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_VERSION: dev + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_AW: true + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md + GITHUB_WORKSPACE: ${{ github.workspace }} + GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_AUTHOR_NAME: github-actions[bot] + GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_COMMITTER_NAME: github-actions[bot] + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_detection_results + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: detection + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + - name: Set detection conclusion + id: detection_conclusion + if: always() + env: + RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} + DETECTION_SUCCESS: ${{ steps.parse_detection_results.outputs.success }} + run: | + if [[ "$RUN_DETECTION" != "true" ]]; then + echo "conclusion=skipped" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection was not needed, marking as skipped" + elif [[ "$DETECTION_SUCCESS" == "true" ]]; then + echo "conclusion=success" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection passed successfully" + else + echo "conclusion=failure" >> "$GITHUB_OUTPUT" + echo "success=false" >> "$GITHUB_OUTPUT" + echo "Detection found issues" + fi + + conclusion: + needs: + - activation + - agent + - safe_outputs + if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + concurrency: + group: "gh-aw-conclusion-agentic-observability-central-kit" + cancel-in-progress: false + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: "1" + GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" + GH_AW_TRACKER_ID: "agentic-observability-central-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" + GH_AW_TRACKER_ID: "agentic-observability-central-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" + GH_AW_TRACKER_ID: "agentic-observability-central-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "agentic-observability-central-kit" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} + GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} + GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} + GH_AW_GROUP_REPORTS: "false" + GH_AW_FAILURE_REPORT_AS_ISSUE: "true" + GH_AW_TIMEOUT_MINUTES: "30" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" + GH_AW_TRACKER_ID: "agentic-observability-central-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "false" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); + await main(); + + safe_outputs: + needs: agent + if: (!cancelled()) && needs.agent.result != 'skipped' && needs.agent.outputs.detection_success == 'true' + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + timeout-minutes: 15 + env: + GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/agentic-observability-central-kit" + GH_AW_ENGINE_ID: "copilot" + GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} + GH_AW_TRACKER_ID: "agentic-observability-central-kit" + GH_AW_WORKFLOW_ID: "agentic-observability-central-kit" + GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" + outputs: + code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} + code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} + created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_OUTPUT" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"target-repo\":\"${{ env.REPORT_REPOSITORY }}\",\"title_prefix\":\"[observability central] \"},\"create_issue\":{\"group\":true,\"labels\":[\"agentics\",\"warning\",\"platform\"],\"max\":10,\"target-repo\":\"${{ env.REPORT_REPOSITORY }}\"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + - name: Upload Safe Output Items + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: safe-output-items + path: /tmp/gh-aw/safe-output-items.jsonl + if-no-files-found: ignore + diff --git a/.github/workflows/agentic-observability-central-kit.md b/.github/workflows/agentic-observability-central-kit.md new file mode 100644 index 00000000000..90678ea5be2 --- /dev/null +++ b/.github/workflows/agentic-observability-central-kit.md @@ -0,0 +1,179 @@ +--- +description: Central reporting variant of the agentic observability kit for platform repositories +on: + schedule: weekly on monday around 08:30 + workflow_dispatch: +permissions: + contents: read + actions: read + issues: read + pull-requests: read + discussions: read +env: + REPORT_REPOSITORY: ${{ vars.REPORT_REPOSITORY || github.repository }} +engine: copilot +strict: true +tracker-id: agentic-observability-central-kit +tools: + agentic-workflows: + github: + toolsets: [default, discussions] + allowed-repos: all + min-integrity: merged +safe-outputs: + create-discussion: + target-repo: ${{ env.REPORT_REPOSITORY }} + expires: 7d + category: "audits" + title-prefix: "[observability central] " + max: 1 + close-older-discussions: true + create-issue: + target-repo: ${{ env.REPORT_REPOSITORY }} + labels: [agentics, warning, platform] + max: 10 + group: true + noop: + report-as-issue: false +timeout-minutes: 30 +imports: + - shared/reporting.md +--- + +# Agentic Observability Central Kit + +You are the central reporting variant of the agentic observability kit. Analyze recent agentic workflow runs for the current repository, but publish the portfolio report and warning issues into the central reporting repository defined by `${{ env.REPORT_REPOSITORY }}`. + +## Mission + +Produce one platform-readable report and a small number of targeted warning issues so that a central workflow operations team can monitor many repositories from one place. + +Focus on: + +1. repeated drift away from a successful baseline +2. repeated risky behavior changes such as new write posture, new MCP failures, or more blocked requests +3. repeated resource-heavy or weak-control patterns +4. low-value agentic workflows that should be simplified later +5. workflows that do not form stable cohorts and therefore resist trustworthy comparison + +Always create a discussion report in the central reporting repository. Create issues only for repeated, actionable patterns. + +## Data Collection Rules + +- Use the `agentic-workflows` MCP tool, not shell commands. +- Start with the `logs` tool over the last 14 days. +- Leave `workflow_name` empty so you analyze the full repository. +- Use `count` large enough to cover the repository, typically `300`. +- Use the `audit` tool only for up to 3 runs that need deeper inspection. +- If there are very few runs, still create a report and explain the limitation. + +## Signals To Use + +Prefer the built-in agentic signals from logs and audit data: + +- `task_domain.name` and `task_domain.label` +- `behavior_fingerprint.execution_style` +- `behavior_fingerprint.tool_breadth` +- `behavior_fingerprint.actuation_style` +- `behavior_fingerprint.resource_profile` +- `behavior_fingerprint.dispatch_mode` +- `agentic_assessments[].kind` +- `agentic_assessments[].severity` +- `comparison.baseline.selection` +- `comparison.baseline.matched_on[]` +- `comparison.classification.label` +- `comparison.classification.reason_codes[]` +- `comparison.recommendation.action` + +## Reporting Requirements + +The discussion is for a platform team that may not know the local repository well, so every highlighted workflow must include repository context. + +### Visible Summary + +Keep these sections visible: + +1. `### Executive Summary` +2. `### Repository Summary` +3. `### Highest Risk Workflows` +4. `### Platform Actions` + +Include: + +- repository name +- date range analyzed +- workflows analyzed +- runs analyzed +- risky runs +- repeated warning candidates +- deterministic candidates + +### Details + +Put verbose per-workflow breakdowns inside `
` blocks. + +### Central Routing Expectations + +Because the outputs land in a central repository: + +- mention the analyzed source repository explicitly in the discussion title or opening paragraph +- name the source repository in every warning issue +- include up to 3 representative run links +- avoid repo-local language like "this repo" without naming it + +## Warning Thresholds + +Create at most one warning issue per workflow when, in the last 14 days: + +1. two or more runs for the same workflow have `comparison.classification.label == "risky"` +2. two or more runs contain `new_mcp_failure` or `blocked_requests_increase` +3. two or more runs contain a medium or high severity `resource_heavy_for_domain` +4. two or more runs contain a medium or high severity `poor_agentic_control` + +Do not open issues for single-run anomalies. + +## Optimization Candidates + +Keep these in the report unless they are severe and repeated: + +- repeated `overkill_for_agentic` +- workflows that remain `lean`, `directed`, and `narrow` +- workflows whose comparisons keep falling back to `latest_success` + +These are platform portfolio decisions, not immediate incidents. + +## Use Of Audit + +Use `audit` only to deepen the top few warnings. Good candidates are: + +- the newest risky run for a repeatedly warning workflow +- a run with a new MCP failure +- a run that changed from read-only to write-capable posture + +Fold audit evidence back into the report and issues. Do not dump raw audit output. + +## Output Requirements + +### Discussion + +Always create one discussion in `${{ env.REPORT_REPOSITORY }}` that includes: + +- the source repository name +- the date range analyzed +- the clearest repeated risk patterns +- the most common assessment kinds +- deterministic candidates +- workflows that need owner attention now + +### Issues + +When creating a warning issue in `${{ env.REPORT_REPOSITORY }}`: + +- name both the source repository and the workflow +- explain the repeated evidence with run counts and specific reason codes or assessment kinds +- include the most relevant recommendation from the comparison or assessment data +- link up to 3 representative runs + +### No-op + +If the repository has no recent runs or no report can be produced, call `noop` with a short explanation. Otherwise do not use `noop`. diff --git a/.github/workflows/agentic-observability-kit.lock.yml b/.github/workflows/agentic-observability-kit.lock.yml new file mode 100644 index 00000000000..3d956becca9 --- /dev/null +++ b/.github/workflows/agentic-observability-kit.lock.yml @@ -0,0 +1,1191 @@ +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw. DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# Drop-in observability kit for repositories using agentic workflows +# +# Resolved workflow manifest: +# Imports: +# - shared/reporting.md +# +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"10a830eaf5cf3094ef3a9310d0dbb615f44717f0750cda14c4c543076df1a0c6","strict":true,"agent_id":"copilot"} + +name: "Agentic Observability Kit" +"on": + schedule: + - cron: "7 8 * * 1" + # Friendly format: weekly on monday around 08:00 (scattered) + workflow_dispatch: + inputs: + aw_context: + default: "" + description: Agent caller context (used internally by Agentic Workflows). + required: false + type: string + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Agentic Observability Kit" + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} + model: ${{ steps.generate_aw_info.outputs.model }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Generate agentic run info + id: generate_aw_info + env: + GH_AW_INFO_ENGINE_ID: "copilot" + GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI" + GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} + GH_AW_INFO_VERSION: "latest" + GH_AW_INFO_AGENT_VERSION: "latest" + GH_AW_INFO_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_INFO_EXPERIMENTAL: "false" + GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" + GH_AW_INFO_STAGED: "false" + GH_AW_INFO_ALLOWED_DOMAINS: '["defaults"]' + GH_AW_INFO_FIREWALL_ENABLED: "true" + GH_AW_INFO_AWF_VERSION: "v0.25.0" + GH_AW_INFO_AWMG_VERSION: "" + GH_AW_INFO_FIREWALL_TYPE: "squid" + GH_AW_COMPILED_STRICT: "true" + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs'); + await main(core, context); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: ${RUNNER_TEMP}/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Checkout .github and .agents folders + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + sparse-checkout: | + .github + .agents + sparse-checkout-cone-mode: true + fetch-depth: 1 + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_FILE: "agentic-observability-kit.lock.yml" + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + # poutine:ignore untrusted_checkout_exec + run: | + bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh + { + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/agentic_workflows_guide.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" + cat << 'GH_AW_PROMPT_EOF' + + Tools: create_issue(max:5), create_discussion, missing_tool, missing_data, noop + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/shared/reporting.md}} + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/agentic-observability-kit.md}} + GH_AW_PROMPT_EOF + } > "$GH_AW_PROMPT" + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + + const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + # poutine:ignore untrusted_checkout_exec + run: bash ${RUNNER_TEMP}/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + # poutine:ignore untrusted_checkout_exec + run: bash ${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh + - name: Upload activation artifact + if: success() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: activation + path: | + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/aw-prompts/prompt.txt + retention-days: 1 + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + discussions: read + issues: read + pull-requests: read + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_WORKFLOW_ID_SANITIZED: agenticobservabilitykit + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} + detection_success: ${{ steps.detection_conclusion.outputs.success }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} + model: ${{ needs.activation.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Set runtime paths + id: set-runtime-paths + run: | + echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" + echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" + echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Setup Go for CLI build + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + cache: true + - name: Build gh-aw CLI + run: | + echo "Building gh-aw CLI for linux/amd64..." + mkdir -p dist + VERSION=$(git describe --tags --always --dirty) + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags "-s -w -X main.version=${VERSION}" \ + -o dist/gh-aw-linux-amd64 \ + ./cmd/gh-aw + # Copy binary to root for direct execution in user-defined steps + cp dist/gh-aw-linux-amd64 ./gh-aw + chmod +x ./gh-aw + echo "✓ Built gh-aw CLI successfully" + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 + - name: Build gh-aw Docker image + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: linux/amd64 + push: false + load: true + tags: localhost/gh-aw:dev + build-args: | + BINARY=dist/gh-aw-linux-amd64 + - name: Create gh-aw temp directory + run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Configure gh CLI for GitHub Enterprise + run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh + env: + GH_TOKEN: ${{ github.token }} + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + github.event.pull_request || github.event.issue.pull_request + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Install GitHub Copilot CLI + run: ${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh latest + env: + GH_HOST: github.com + - name: Install AWF binary + run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.25.0 + - name: Determine automatic lockdown mode for GitHub MCP Server + id: determine-automatic-lockdown + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + with: + script: | + const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Download container images + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.3 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + - name: Install gh-aw extension + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + # Check if gh-aw extension is already installed + if gh extension list | grep -q "github/gh-aw"; then + echo "gh-aw extension already installed, upgrading..." + gh extension upgrade gh-aw || true + else + echo "Installing gh-aw extension..." + gh extension install github/gh-aw + fi + gh aw --version + # Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization + mkdir -p ${RUNNER_TEMP}/gh-aw + GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1) + if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then + cp "$GH_AW_BIN" ${RUNNER_TEMP}/gh-aw/gh-aw + chmod +x ${RUNNER_TEMP}/gh-aw/gh-aw + echo "Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw" + else + echo "::error::Failed to find gh-aw binary for MCP server" + exit 1 + fi + - name: Write Safe Outputs Config + run: | + mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"create_discussion":{"expires":168,"max":1},"create_issue":{"group":true,"max":5},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + - name: Write Safe Outputs Tools + run: | + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF' + { + "description_suffixes": { + "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[observability] \". Discussions will be created in category \"audits\".", + "create_issue": " CONSTRAINTS: Maximum 5 issue(s) can be created. Labels [\"agentics\" \"warning\"] will be automatically added." + }, + "repo_params": {}, + "dynamic_tools": [] + } + GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "create_discussion": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "category": { + "type": "string", + "sanitize": true, + "maxLength": 128 + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "create_issue": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "parent": { + "issueOrPRNumber": true + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "temporary_id": { + "type": "string" + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_data": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "context": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "data_type": { + "type": "string", + "sanitize": true, + "maxLength": 128 + }, + "reason": { + "type": "string", + "sanitize": true, + "maxLength": 256 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash ${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP Gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_GUARD_MIN_INTEGRITY: ${{ steps.determine-automatic-lockdown.outputs.min_integrity }} + GITHUB_MCP_GUARD_REPOS: ${{ steps.determine-automatic-lockdown.outputs.repos }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.3' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "agenticworkflows": { + "type": "stdio", + "container": "localhost/gh-aw:dev", + "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], + "env": { + "DEBUG": "*", + "GITHUB_TOKEN": "\${GITHUB_TOKEN}", + "GITHUB_ACTOR": "\${GITHUB_ACTOR}", + "GITHUB_REPOSITORY": "\${GITHUB_REPOSITORY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.32.0", + "env": { + "GITHUB_HOST": "\${GITHUB_SERVER_URL}", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests,discussions" + }, + "guard-policies": { + "allow-only": { + "min-integrity": "$GITHUB_MCP_GUARD_MIN_INTEGRITY", + "repos": "$GITHUB_MCP_GUARD_REPOS" + } + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Download activation artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: activation + path: /tmp/gh-aw + - name: Clean git credentials + continue-on-error: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + timeout-minutes: 30 + run: | + set -o pipefail + touch /tmp/gh-aw/agent-step-summary.md + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + COPILOT_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_PHASE: agent + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_VERSION: dev + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_AW: true + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md + GITHUB_WORKSPACE: ${{ github.workspace }} + GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_AUTHOR_NAME: github-actions[bot] + GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_COMMITTER_NAME: github-actions[bot] + XDG_CONFIG_HOME: /home/runner + - name: Detect inference access error + id: detect-inference-error + if: always() + continue-on-error: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP Gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash ${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Append agent step summary + if: always() + run: bash ${RUNNER_TEMP}/gh-aw/actions/append_agent_step_summary.sh + - name: Copy Safe Outputs + if: always() + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + run: | + mkdir -p /tmp/gh-aw + cp "$GH_AW_SAFE_OUTPUTS" /tmp/gh-aw/safeoutputs.jsonl 2>/dev/null || true + - name: Ingest agent output + id: collect_output + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP Gateway logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) + if command -v awf &> /dev/null; then + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'AWF binary not installed, skipping firewall log summary' + fi + - name: Write agent output placeholder if missing + if: always() + run: | + if [ ! -f /tmp/gh-aw/agent_output.json ]; then + echo '{"items":[]}' > /tmp/gh-aw/agent_output.json + fi + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: agent + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + /tmp/gh-aw/safeoutputs.jsonl + /tmp/gh-aw/agent_output.json + if-no-files-found: ignore + - name: Upload firewall audit logs + if: always() + continue-on-error: true + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: firewall-audit-logs + path: /tmp/gh-aw/sandbox/firewall/logs/ + if-no-files-found: ignore + # --- Threat Detection (inline) --- + - name: Check if detection needed + id: detection_guard + if: always() + env: + OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }} + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + run: | + if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then + echo "run_detection=true" >> "$GITHUB_OUTPUT" + echo "Detection will run: output_types=$OUTPUT_TYPES, has_patch=$HAS_PATCH" + else + echo "run_detection=false" >> "$GITHUB_OUTPUT" + echo "Detection skipped: no agent outputs or patches to analyze" + fi + - name: Clear MCP configuration for detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + rm -f /tmp/gh-aw/mcp-config/mcp-servers.json + rm -f /home/runner/.copilot/mcp-config.json + rm -f "$GITHUB_WORKSPACE/.gemini/settings.json" + - name: Prepare threat detection files + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection/aw-prompts + cp /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/threat-detection/aw-prompts/prompt.txt 2>/dev/null || true + cp /tmp/gh-aw/agent_output.json /tmp/gh-aw/threat-detection/agent_output.json 2>/dev/null || true + for f in /tmp/gh-aw/aw-*.patch; do + [ -f "$f" ] && cp "$f" /tmp/gh-aw/threat-detection/ 2>/dev/null || true + done + echo "Prepared threat detection files:" + ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true + - name: Setup threat detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + WORKFLOW_NAME: "Agentic Observability Kit" + WORKFLOW_DESCRIPTION: "Drop-in observability kit for repositories using agentic workflows" + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Execute GitHub Copilot CLI + if: always() && steps.detection_guard.outputs.run_detection == 'true' + id: detection_agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + touch /tmp/gh-aw/agent-step-summary.md + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PHASE: detection + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_VERSION: dev + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_AW: true + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md + GITHUB_WORKSPACE: ${{ github.workspace }} + GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_AUTHOR_NAME: github-actions[bot] + GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com + GIT_COMMITTER_NAME: github-actions[bot] + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_detection_results + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: detection + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + - name: Set detection conclusion + id: detection_conclusion + if: always() + env: + RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} + DETECTION_SUCCESS: ${{ steps.parse_detection_results.outputs.success }} + run: | + if [[ "$RUN_DETECTION" != "true" ]]; then + echo "conclusion=skipped" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection was not needed, marking as skipped" + elif [[ "$DETECTION_SUCCESS" == "true" ]]; then + echo "conclusion=success" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection passed successfully" + else + echo "conclusion=failure" >> "$GITHUB_OUTPUT" + echo "success=false" >> "$GITHUB_OUTPUT" + echo "Detection found issues" + fi + + conclusion: + needs: + - activation + - agent + - safe_outputs + if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + concurrency: + group: "gh-aw-conclusion-agentic-observability-kit" + cancel-in-progress: false + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: "1" + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "agentic-observability-kit" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} + GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} + GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} + GH_AW_GROUP_REPORTS: "false" + GH_AW_FAILURE_REPORT_AS_ISSUE: "true" + GH_AW_TIMEOUT_MINUTES: "30" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "false" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); + await main(); + + safe_outputs: + needs: agent + if: (!cancelled()) && needs.agent.result != 'skipped' && needs.agent.outputs.detection_success == 'true' + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + timeout-minutes: 15 + env: + GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/agentic-observability-kit" + GH_AW_ENGINE_ID: "copilot" + GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} + GH_AW_TRACKER_ID: "agentic-observability-kit" + GH_AW_WORKFLOW_ID: "agentic-observability-kit" + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + outputs: + code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} + code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} + created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_OUTPUT" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[observability] \"},\"create_issue\":{\"group\":true,\"labels\":[\"agentics\",\"warning\"],\"max\":5},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + - name: Upload Safe Output Items + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: safe-output-items + path: /tmp/gh-aw/safe-output-items.jsonl + if-no-files-found: ignore + diff --git a/.github/workflows/agentic-observability-kit.md b/.github/workflows/agentic-observability-kit.md new file mode 100644 index 00000000000..98b1f43cdc3 --- /dev/null +++ b/.github/workflows/agentic-observability-kit.md @@ -0,0 +1,172 @@ +--- +description: Drop-in observability kit for repositories using agentic workflows +on: + schedule: weekly on monday around 08:00 + workflow_dispatch: +permissions: + contents: read + actions: read + issues: read + pull-requests: read + discussions: read +engine: copilot +strict: true +tracker-id: agentic-observability-kit +tools: + agentic-workflows: + github: + toolsets: [default, discussions] +safe-outputs: + create-discussion: + expires: 7d + category: "audits" + title-prefix: "[observability] " + max: 1 + close-older-discussions: true + create-issue: + labels: [agentics, warning] + max: 5 + group: true + noop: + report-as-issue: false +timeout-minutes: 30 +imports: + - shared/reporting.md +--- + +# Agentic Observability Kit + +You are an agentic workflow observability analyst. Produce one executive report that teams can read quickly, and create targeted warning issues only when repeated patterns show that a workflow needs intervention. + +## Mission + +Review recent agentic workflow runs and surface the signals that matter operationally: + +1. Repeated drift away from a successful baseline +2. Weak control patterns such as new write posture, new MCP failures, or more blocked requests +3. Resource-heavy runs that are expensive for the domain they serve +4. Stable but low-value agentic runs that may be better as deterministic automation +5. Delegated workflows that lost continuity or are no longer behaving like a consistent cohort + +Always create a discussion with the report. Create issues only for repeated, actionable problems. + +## Data Collection Rules + +- Use the `agentic-workflows` MCP tool, not shell commands. +- Start with the `logs` tool over the last 14 days. +- Leave `workflow_name` empty so you analyze the full repository. +- Use `count` large enough to cover the repository, typically `300`. +- Use the `audit` tool only for up to 3 runs that need deeper inspection. +- If there are very few runs, still produce a report and explain the limitation. + +## Signals To Use + +The logs JSON already contains the main agentic signals. Prefer these fields over ad hoc heuristics: + +- `task_domain.name` and `task_domain.label` +- `behavior_fingerprint.execution_style` +- `behavior_fingerprint.tool_breadth` +- `behavior_fingerprint.actuation_style` +- `behavior_fingerprint.resource_profile` +- `behavior_fingerprint.dispatch_mode` +- `agentic_assessments[].kind` +- `agentic_assessments[].severity` +- `comparison.baseline.selection` +- `comparison.baseline.matched_on[]` +- `comparison.classification.label` +- `comparison.classification.reason_codes[]` +- `comparison.recommendation.action` + +Treat these values as the canonical signals for reporting. + +## Reporting Model + +The discussion must stay concise and operator-friendly. + +### Visible Summary + +Keep these sections visible: + +1. `### Executive Summary` +2. `### Key Metrics` +3. `### Highest Risk Workflows` +4. `### Recommended Actions` + +Include small numeric summaries such as: + +- workflows analyzed +- runs analyzed +- runs with `comparison.classification.label == "risky"` +- runs with medium or high `agentic_assessments` +- workflows with repeated `overkill_for_agentic` +- workflows whose comparisons mostly fell back to `latest_success` + +### Details + +Put detailed per-workflow breakdowns inside `
` blocks. + +### What Good Reporting Looks Like + +For each highlighted workflow, explain: + +- what domain it appears to belong to +- what its behavioral fingerprint looks like +- whether it is stable against a cohort match or only compared to latest success +- whether the risky behavior is new, repeated, or likely intentional +- what a team should change next + +## Warning Thresholds + +Create an issue only when a workflow crosses one of these thresholds in the last 14 days: + +1. Two or more runs for the same workflow have `comparison.classification.label == "risky"`. +2. Two or more runs for the same workflow contain `new_mcp_failure` or `blocked_requests_increase` in `comparison.classification.reason_codes`. +3. Two or more runs for the same workflow contain a medium or high severity `resource_heavy_for_domain` assessment. +4. Two or more runs for the same workflow contain a medium or high severity `poor_agentic_control` assessment. + +Do not open duplicate issues for the same workflow in the same run. Create at most one issue per workflow. + +## Optimization Candidates + +Do not create issues for these by default. Report them in the discussion unless they are severe and repeated: + +- repeated `overkill_for_agentic` +- workflows that are consistently `lean`, `directed`, and `narrow` +- workflows that are always compared using `latest_success` instead of `cohort_match` + +These are portfolio cleanup opportunities, not immediate incidents. + +## Use Of Audit + +Use `audit` only when the logs summary is not enough to explain a top problem. Good audit candidates are: + +- the newest risky run for a workflow with repeated warnings +- a run with a new MCP failure +- a run that changed from read-only to write-capable posture + +When you use `audit`, fold the extra evidence back into the report instead of dumping raw output. + +## Output Requirements + +### Discussion + +Always create one discussion that includes: + +- the date range analyzed +- the workflows with the clearest repeated risk +- the most common assessment kinds +- a short list of deterministic candidates +- a short list of workflows that need owner attention now + +### Issues + +When creating a warning issue: + +- use a concrete title naming the workflow and the repeated pattern +- explain the evidence with run counts and the specific assessment or comparison reason codes +- include the most relevant recommendation from the comparison or assessment data +- link up to 3 representative runs + +### No-op + +If the repository has no recent runs or no report can be produced, call `noop` with a short explanation. Otherwise do not use `noop`. diff --git a/docs/src/content/docs/guides/agentic-observability-kit.md b/docs/src/content/docs/guides/agentic-observability-kit.md new file mode 100644 index 00000000000..123480fd067 --- /dev/null +++ b/docs/src/content/docs/guides/agentic-observability-kit.md @@ -0,0 +1,112 @@ +--- +title: How to Add the Agentic Observability Kit +description: Add a drop-in workflow that turns gh aw logs and audit signals into recurring observability reports and warning issues. +--- + +Use this guide when a repository already has agentic workflows and needs a supported starter workflow for run-behavior reporting. + +The kit reviews recent runs, publishes a recurring discussion report, and opens warning issues only when a workflow shows repeated risk. Use [Projects & Monitoring](/gh-aw/patterns/monitoring/) instead when building a custom project board or status-update workflow. + +There are two variants: + +- `agentic-observability-kit` publishes into the same repository it analyzes +- `agentic-observability-central-kit` publishes into a central reporting repository + +## Add the workflow + +Run: + +```bash wrap +gh aw add github/gh-aw/agentic-observability-kit +``` + +This adds the workflow source file to `.github/workflows` so it can be reviewed and customized like any other workflow. + +## Add the central variant + +Use the central variant when a platform or workflow-operations repository should collect reports from many repositories. + +Run: + +```bash wrap +gh aw add github/gh-aw/agentic-observability-central-kit +``` + +Then set the `REPORT_REPOSITORY` repository variable to the destination repository in `owner/repo` format. + +Example: + +```text +acme/workflow-operations +``` + +If `REPORT_REPOSITORY` is not set, the workflow falls back to the current repository. + +## Review the default outputs + +By default, the workflow creates: + +- one discussion report per run in the `audits` category +- up to five warning issues when a workflow shows repeated risky behavior + +The default issue labels are `agentics` and `warning`. + +If the repository uses a different discussion category or labeling convention, edit the `safe-outputs` section after adding the workflow. + +## Compile the workflow + +After reviewing the file, compile it: + +```bash wrap +gh aw compile .github/workflows/agentic-observability-kit.md +``` + +If the repository already uses a bulk compile step, run that instead. + +For the central variant: + +```bash wrap +gh aw compile .github/workflows/agentic-observability-central-kit.md +``` + +## What counts as a warning + +The kit opens issues only for repeated, actionable patterns in the last 14 days. By default, that means one workflow crossed the same threshold in at least two runs. + +The default warning conditions are: + +- repeated `risky` comparison classifications +- repeated `new_mcp_failure` or `blocked_requests_increase` comparison reasons +- repeated medium or high `resource_heavy_for_domain` +- repeated medium or high `poor_agentic_control` + +## What stays in the report instead of opening an issue + +Some findings stay in the discussion report instead of opening an issue because they are usually optimization candidates rather than incidents: + +- repeated `overkill_for_agentic` +- workflows that remain `lean`, `directed`, and `narrow` across successful runs +- workflows that can only be compared to `latest_success` and never find a meaningful cohort match + +## Customizing the kit + +The starter workflow is designed to be modified after import. + +Common changes are: + +- widen the analysis window from 14 days to 30 days +- change labels to match internal triage processes +- route discussions to a central reporting repository +- route warning issues to a platform or workflow-operations repository +- tighten or relax warning thresholds depending on run volume + +If the organization wants one central place for reports, update the `create-discussion` and `create-issue` safe outputs to target that repository. + +If a central platform repository is already the operating model, prefer `agentic-observability-central-kit` instead of manually rewriting the single-repo starter. + +## Related documentation + +- [Debugging Workflows](/gh-aw/troubleshooting/debugging/) +- [GH-AW as an MCP Server](/gh-aw/reference/gh-aw-as-mcp-server/) +- [Projects & Monitoring](/gh-aw/patterns/monitoring/) +- [CentralRepoOps](/gh-aw/patterns/central-repo-ops/) diff --git a/docs/src/content/docs/patterns/monitoring.md b/docs/src/content/docs/patterns/monitoring.md index 0b5fb2b5c87..294ee979615 100644 --- a/docs/src/content/docs/patterns/monitoring.md +++ b/docs/src/content/docs/patterns/monitoring.md @@ -5,6 +5,8 @@ description: Use GitHub Projects + safe-outputs to track and monitor workflow wo Use this pattern when you want a durable “source of truth” for what your agentic workflows discovered, decided, and did. +This page is about composing a custom monitoring layer with GitHub Projects and safe outputs. If the goal is to add a ready-made weekly reporting workflow for run behavior, use the [agentic observability kit](/gh-aw/guides/agentic-observability-kit/). + ## What this pattern is - **Projects** are the dashboard: a GitHub Projects v2 board holds issues/PRs and custom fields. @@ -112,4 +114,6 @@ See the full reference: [/reference/safe-outputs/#no-op-logging-noop](/gh-aw/ref - Use `gh aw status` to see which workflows are enabled and their latest run state. - Use `gh aw logs` and `gh aw audit` to inspect tool usage, errors, MCP failures, and network patterns. +Use the [agentic observability kit](/gh-aw/guides/agentic-observability-kit/) when you want a supported starter workflow that turns recent `gh aw logs` and `gh aw audit` signals into recurring reports and warning issues. Stay on this page when you need a custom board, status-update workflow, or issue/project correlation model. + See: [/setup/cli/](/gh-aw/setup/cli/) diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go index 9d6d4aa9af3..a5180b3a8f0 100644 --- a/pkg/cli/audit.go +++ b/pkg/cli/audit.go @@ -354,9 +354,14 @@ func AuditWorkflowRun(ctx context.Context, runID int64, owner, repo, hostname st MCPFailures: mcpFailures, JobDetails: jobDetails, } + awContext, _, _, taskDomain, behaviorFingerprint, agenticAssessments := deriveRunAgenticAnalysis(processedRun, metrics) + processedRun.AwContext = awContext + processedRun.TaskDomain = taskDomain + processedRun.BehaviorFingerprint = behaviorFingerprint + processedRun.AgenticAssessments = agenticAssessments currentSnapshot := buildAuditComparisonSnapshot(processedRun, currentCreatedItems) - comparison := buildAuditComparisonForRun(run, currentSnapshot, runOutputDir, owner, repo, hostname, verbose) + comparison := buildAuditComparisonForRun(processedRun, currentSnapshot, runOutputDir, owner, repo, hostname, verbose) // Build structured audit data auditData := buildAuditData(processedRun, metrics, mcpToolUsage) @@ -420,6 +425,10 @@ func AuditWorkflowRun(ctx context.Context, runID int64, owner, repo, hostname st ProcessedAt: time.Now(), Run: run, Metrics: metrics, + AwContext: processedRun.AwContext, + TaskDomain: processedRun.TaskDomain, + BehaviorFingerprint: processedRun.BehaviorFingerprint, + AgenticAssessments: processedRun.AgenticAssessments, AccessAnalysis: accessAnalysis, FirewallAnalysis: firewallAnalysis, RedactedDomainsAnalysis: redactedDomainsAnalysis, diff --git a/pkg/cli/audit_agentic_analysis.go b/pkg/cli/audit_agentic_analysis.go new file mode 100644 index 00000000000..9674a2120f0 --- /dev/null +++ b/pkg/cli/audit_agentic_analysis.go @@ -0,0 +1,346 @@ +package cli + +import ( + "fmt" + "path/filepath" + "slices" + "strings" + "time" + + "github.com/github/gh-aw/pkg/timeutil" + "github.com/github/gh-aw/pkg/workflow" +) + +// TaskDomainInfo describes the dominant task type inferred for a workflow run. +type TaskDomainInfo struct { + Name string `json:"name"` + Label string `json:"label"` + Reason string `json:"reason,omitempty"` +} + +// BehaviorFingerprint summarizes the run's execution profile in compact dimensions. +type BehaviorFingerprint struct { + ExecutionStyle string `json:"execution_style"` + ToolBreadth string `json:"tool_breadth"` + ActuationStyle string `json:"actuation_style"` + ResourceProfile string `json:"resource_profile"` + DispatchMode string `json:"dispatch_mode"` +} + +// AgenticAssessment captures an actionable judgment about the run's behavior. +type AgenticAssessment struct { + Kind string `json:"kind"` + Severity string `json:"severity"` + Summary string `json:"summary"` + Evidence string `json:"evidence,omitempty"` + Recommendation string `json:"recommendation,omitempty"` +} + +func buildToolUsageInfo(metrics LogMetrics) []ToolUsageInfo { + toolStats := make(map[string]*ToolUsageInfo) + + for _, toolCall := range metrics.ToolCalls { + displayKey := workflow.PrettifyToolName(toolCall.Name) + if existing, exists := toolStats[displayKey]; exists { + existing.CallCount += toolCall.CallCount + if toolCall.MaxInputSize > existing.MaxInputSize { + existing.MaxInputSize = toolCall.MaxInputSize + } + if toolCall.MaxOutputSize > existing.MaxOutputSize { + existing.MaxOutputSize = toolCall.MaxOutputSize + } + if toolCall.MaxDuration > 0 { + maxDuration := timeutil.FormatDuration(toolCall.MaxDuration) + if existing.MaxDuration == "" || toolCall.MaxDuration > parseDurationString(existing.MaxDuration) { + existing.MaxDuration = maxDuration + } + } + continue + } + + info := &ToolUsageInfo{ + Name: displayKey, + CallCount: toolCall.CallCount, + MaxInputSize: toolCall.MaxInputSize, + MaxOutputSize: toolCall.MaxOutputSize, + } + if toolCall.MaxDuration > 0 { + info.MaxDuration = timeutil.FormatDuration(toolCall.MaxDuration) + } + toolStats[displayKey] = info + } + + toolUsage := make([]ToolUsageInfo, 0, len(toolStats)) + for _, info := range toolStats { + toolUsage = append(toolUsage, *info) + } + + slices.SortFunc(toolUsage, func(a, b ToolUsageInfo) int { + if a.CallCount != b.CallCount { + return b.CallCount - a.CallCount + } + return strings.Compare(a.Name, b.Name) + }) + + return toolUsage +} + +func deriveRunAgenticAnalysis(processedRun ProcessedRun, metrics LogMetrics) (*AwContext, []ToolUsageInfo, []CreatedItemReport, *TaskDomainInfo, *BehaviorFingerprint, []AgenticAssessment) { + var awContext *AwContext + if processedRun.AwContext != nil { + awContext = processedRun.AwContext + } else if processedRun.Run.LogsPath != "" { + awInfoPath := filepath.Join(processedRun.Run.LogsPath, "aw_info.json") + if info, err := parseAwInfo(awInfoPath, false); err == nil && info != nil { + awContext = info.Context + } + } + + toolUsage := buildToolUsageInfo(metrics) + createdItems := extractCreatedItemsFromManifest(processedRun.Run.LogsPath) + metricsData := MetricsData{ + TokenUsage: processedRun.Run.TokenUsage, + EstimatedCost: processedRun.Run.EstimatedCost, + Turns: processedRun.Run.Turns, + ErrorCount: processedRun.Run.ErrorCount, + WarningCount: processedRun.Run.WarningCount, + } + + taskDomain := detectTaskDomain(processedRun, createdItems, toolUsage, awContext) + behaviorFingerprint := buildBehaviorFingerprint(processedRun, metricsData, toolUsage, createdItems, awContext) + agenticAssessments := buildAgenticAssessments(processedRun, metricsData, toolUsage, createdItems, taskDomain, behaviorFingerprint, awContext) + + return awContext, toolUsage, createdItems, taskDomain, behaviorFingerprint, agenticAssessments +} + +func detectTaskDomain(processedRun ProcessedRun, createdItems []CreatedItemReport, toolUsage []ToolUsageInfo, awContext *AwContext) *TaskDomainInfo { + combined := strings.ToLower(strings.Join([]string{ + processedRun.Run.WorkflowName, + processedRun.Run.WorkflowPath, + processedRun.Run.Event, + }, " ")) + + createdTypes := make([]string, 0, len(createdItems)) + for _, item := range createdItems { + createdTypes = append(createdTypes, strings.ToLower(item.Type)) + } + createdJoined := strings.Join(createdTypes, " ") + + toolNames := make([]string, 0, len(toolUsage)) + for _, tool := range toolUsage { + toolNames = append(toolNames, strings.ToLower(tool.Name)) + } + toolJoined := strings.Join(toolNames, " ") + + switch { + case containsAny(combined, "release", "deploy", "publish", "backport", "changelog"): + return &TaskDomainInfo{Name: "release_ops", Label: "Release / Ops", Reason: "Workflow metadata matches release or operational automation."} + case containsAny(combined, "research", "investigat", "analysis", "analy", "report", "audit"): + return &TaskDomainInfo{Name: "research", Label: "Research", Reason: "Workflow naming and instructions suggest exploratory analysis or reporting."} + case containsAny(combined, "triage", "label", "classif", "route") || containsAny(createdJoined, "add_labels", "remove_labels", "set_issue_type"): + return &TaskDomainInfo{Name: "triage", Label: "Triage", Reason: "The run focused on classification, routing, or issue state updates."} + case containsAny(combined, "fix", "patch", "repair", "refactor", "swe", "code", "review") || containsAny(createdJoined, "create_pull_request_review_comment", "submit_pull_request_review"): + return &TaskDomainInfo{Name: "code_fix", Label: "Code Fix", Reason: "The workflow appears oriented toward code changes or pull request review."} + case containsAny(combined, "cleanup", "maint", "update", "deps", "sync", "housekeeping"): + return &TaskDomainInfo{Name: "repo_maintenance", Label: "Repo Maintenance", Reason: "Workflow metadata matches repository maintenance or update work."} + case containsAny(combined, "issue", "discussion", "comment", "support", "reply") || containsAny(createdJoined, "add_comment", "create_discussion"): + return &TaskDomainInfo{Name: "issue_response", Label: "Issue Response", Reason: "The run is primarily interacting with issue, discussion, or comment threads."} + case awContext != nil: + return &TaskDomainInfo{Name: "delegated_automation", Label: "Delegated Automation", Reason: "The run was dispatched from an upstream workflow and is acting as a delegated task."} + case containsAny(toolJoined, "github_issue_read", "github-discussion-query"): + return &TaskDomainInfo{Name: "issue_response", Label: "Issue Response", Reason: "Tool usage centers on repository conversations and issue context."} + default: + return &TaskDomainInfo{Name: "general_automation", Label: "General Automation", Reason: "The run does not strongly match a narrower workflow domain yet."} + } +} + +func buildBehaviorFingerprint(processedRun ProcessedRun, metrics MetricsData, toolUsage []ToolUsageInfo, createdItems []CreatedItemReport, awContext *AwContext) *BehaviorFingerprint { + toolTypes := len(toolUsage) + writeCount := len(createdItems) + processedRun.Run.SafeItemsCount + + executionStyle := "directed" + switch { + case metrics.Turns >= 10 || toolTypes >= 6: + executionStyle = "exploratory" + case metrics.Turns >= 5 || toolTypes >= 4: + executionStyle = "adaptive" + } + + toolBreadth := "narrow" + switch { + case toolTypes >= 6: + toolBreadth = "broad" + case toolTypes >= 3: + toolBreadth = "moderate" + } + + actuationStyle := "read_only" + switch { + case writeCount >= 6: + actuationStyle = "write_heavy" + case writeCount > 0: + actuationStyle = "selective_write" + } + + resourceProfile := "lean" + switch { + case processedRun.Run.Duration >= 15*time.Minute || metrics.Turns >= 12 || toolTypes >= 6 || writeCount >= 8: + resourceProfile = "heavy" + case processedRun.Run.Duration >= 5*time.Minute || metrics.Turns >= 6 || toolTypes >= 4 || writeCount >= 3: + resourceProfile = "moderate" + } + + dispatchMode := "standalone" + if awContext != nil { + dispatchMode = "delegated" + } + + return &BehaviorFingerprint{ + ExecutionStyle: executionStyle, + ToolBreadth: toolBreadth, + ActuationStyle: actuationStyle, + ResourceProfile: resourceProfile, + DispatchMode: dispatchMode, + } +} + +func buildAgenticAssessments(processedRun ProcessedRun, metrics MetricsData, toolUsage []ToolUsageInfo, createdItems []CreatedItemReport, domain *TaskDomainInfo, fingerprint *BehaviorFingerprint, awContext *AwContext) []AgenticAssessment { + if domain == nil || fingerprint == nil { + return nil + } + + assessments := make([]AgenticAssessment, 0, 4) + toolTypes := len(toolUsage) + frictionEvents := len(processedRun.MissingTools) + len(processedRun.MCPFailures) + len(processedRun.MissingData) + writeCount := len(createdItems) + processedRun.Run.SafeItemsCount + + if fingerprint.ResourceProfile == "heavy" { + severity := "medium" + if metrics.Turns >= 14 || toolTypes >= 7 || processedRun.Run.Duration >= 20*time.Minute { + severity = "high" + } + assessments = append(assessments, AgenticAssessment{ + Kind: "resource_heavy_for_domain", + Severity: severity, + Summary: fmt.Sprintf("This %s run consumed a heavy execution profile for its task shape.", domain.Label), + Evidence: fmt.Sprintf("turns=%d tool_types=%d duration=%s write_actions=%d", metrics.Turns, toolTypes, formatAssessmentDuration(processedRun.Run.Duration), writeCount), + Recommendation: "Compare this run to similar successful runs and trim unnecessary turns, tools, or write actions.", + }) + } + + if (domain.Name == "triage" || domain.Name == "repo_maintenance" || domain.Name == "issue_response") && fingerprint.ResourceProfile == "lean" && fingerprint.ExecutionStyle == "directed" && fingerprint.ToolBreadth == "narrow" { + assessments = append(assessments, AgenticAssessment{ + Kind: "overkill_for_agentic", + Severity: "low", + Summary: fmt.Sprintf("This %s run looks stable enough that deterministic automation may be a simpler fit.", domain.Label), + Evidence: fmt.Sprintf("turns=%d tool_types=%d actuation=%s", metrics.Turns, toolTypes, fingerprint.ActuationStyle), + Recommendation: "Consider whether a scripted rule or deterministic workflow step could replace this agentic path.", + }) + } + + if frictionEvents >= 3 || (frictionEvents > 0 && writeCount >= 3) || ((domain.Name == "triage" || domain.Name == "repo_maintenance" || domain.Name == "issue_response") && fingerprint.ExecutionStyle == "exploratory") { + severity := "medium" + if frictionEvents >= 4 || (frictionEvents > 0 && fingerprint.ActuationStyle == "write_heavy") { + severity = "high" + } + assessments = append(assessments, AgenticAssessment{ + Kind: "poor_agentic_control", + Severity: severity, + Summary: "The run showed signs of broad or weakly controlled agentic behavior.", + Evidence: fmt.Sprintf("friction=%d execution=%s actuation=%s", frictionEvents, fingerprint.ExecutionStyle, fingerprint.ActuationStyle), + Recommendation: "Tighten instructions, reduce unnecessary tools, or delay write actions until the workflow has stronger evidence.", + }) + } + + if awContext != nil { + assessments = append(assessments, AgenticAssessment{ + Kind: "delegated_context_present", + Severity: "info", + Summary: "The run preserved upstream dispatch context, which helps trace multi-workflow episodes.", + Evidence: fmt.Sprintf("workflow_call_id=%s event_type=%s", awContext.WorkflowCallID, awContext.EventType), + Recommendation: "Use this context when comparing downstream runs so follow-up workflows are evaluated as part of one task chain.", + }) + } + + return assessments +} + +func generateAgenticAssessmentFindings(assessments []AgenticAssessment) []Finding { + findings := make([]Finding, 0, len(assessments)) + for _, assessment := range assessments { + category := "agentic" + impact := "Review recommended" + switch assessment.Kind { + case "resource_heavy_for_domain": + category = "performance" + impact = "Higher cost and latency than a comparable well-behaved run" + case "overkill_for_agentic": + category = "optimization" + impact = "A deterministic implementation may be cheaper and easier to govern" + case "poor_agentic_control": + category = "agentic" + impact = "Broad or weakly controlled behavior can reduce trust even when the run succeeds" + case "delegated_context_present": + category = "coordination" + impact = "Context continuity improves downstream debugging and auditability" + } + findings = append(findings, Finding{ + Category: category, + Severity: assessment.Severity, + Title: prettifyAssessmentKind(assessment.Kind), + Description: assessment.Summary, + Impact: impact, + }) + } + return findings +} + +func generateAgenticAssessmentRecommendations(assessments []AgenticAssessment) []Recommendation { + recommendations := make([]Recommendation, 0, len(assessments)) + for _, assessment := range assessments { + if assessment.Recommendation == "" || assessment.Severity == "info" { + continue + } + priority := "medium" + if assessment.Severity == "high" { + priority = "high" + } + recommendations = append(recommendations, Recommendation{ + Priority: priority, + Action: assessment.Recommendation, + Reason: assessment.Summary, + }) + } + return recommendations +} + +func containsAny(value string, terms ...string) bool { + for _, term := range terms { + if strings.Contains(value, term) { + return true + } + } + return false +} + +func prettifyAssessmentKind(kind string) string { + switch kind { + case "resource_heavy_for_domain": + return "Resource Heavy For Domain" + case "overkill_for_agentic": + return "Potential Deterministic Alternative" + case "poor_agentic_control": + return "Weak Agentic Control" + case "delegated_context_present": + return "Dispatch Context Preserved" + default: + return strings.ReplaceAll(kind, "_", " ") + } +} + +func formatAssessmentDuration(duration time.Duration) string { + if duration <= 0 { + return "n/a" + } + return duration.String() +} diff --git a/pkg/cli/audit_agentic_analysis_test.go b/pkg/cli/audit_agentic_analysis_test.go new file mode 100644 index 00000000000..96fa46fd7d8 --- /dev/null +++ b/pkg/cli/audit_agentic_analysis_test.go @@ -0,0 +1,107 @@ +//go:build !integration + +package cli + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectTaskDomain(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + WorkflowName: "Weekly Research Report", + WorkflowPath: ".github/workflows/weekly-research.yml", + Event: "schedule", + }, + } + + domain := detectTaskDomain(processedRun, nil, nil, nil) + require.NotNil(t, domain, "domain should be detected") + assert.Equal(t, "research", domain.Name) + assert.Equal(t, "Research", domain.Label) +} + +func TestBuildAgenticAssessmentsFlagsPotentialDeterministicAlternative(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + WorkflowName: "Issue Triage", + Turns: 2, + Duration: 2 * time.Minute, + }, + } + metrics := MetricsData{Turns: 2} + toolUsage := []ToolUsageInfo{{Name: "github_issue_read", CallCount: 1}} + domain := &TaskDomainInfo{Name: "triage", Label: "Triage"} + fingerprint := &BehaviorFingerprint{ + ExecutionStyle: "directed", + ToolBreadth: "narrow", + ActuationStyle: "read_only", + ResourceProfile: "lean", + DispatchMode: "standalone", + } + + assessments := buildAgenticAssessments(processedRun, metrics, toolUsage, nil, domain, fingerprint, nil) + require.NotEmpty(t, assessments) + assert.Equal(t, "overkill_for_agentic", assessments[0].Kind) +} + +func TestBuildAgenticAssessmentsFlagsResourceHeavyRun(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + WorkflowName: "Deep Research", + Turns: 15, + Duration: 22 * time.Minute, + SafeItemsCount: 4, + }, + } + metrics := MetricsData{Turns: 15} + toolUsage := []ToolUsageInfo{ + {Name: "bash", CallCount: 4}, + {Name: "grep", CallCount: 3}, + {Name: "gh", CallCount: 2}, + {Name: "github_issue_read", CallCount: 2}, + {Name: "sed", CallCount: 1}, + {Name: "cat", CallCount: 1}, + {Name: "jq", CallCount: 1}, + } + domain := &TaskDomainInfo{Name: "research", Label: "Research"} + fingerprint := buildBehaviorFingerprint(processedRun, metrics, toolUsage, []CreatedItemReport{{Type: "create_issue"}}, nil) + + assessments := buildAgenticAssessments(processedRun, metrics, toolUsage, []CreatedItemReport{{Type: "create_issue"}}, domain, fingerprint, nil) + + var found bool + for _, assessment := range assessments { + if assessment.Kind == "resource_heavy_for_domain" { + found = true + assert.Equal(t, "high", assessment.Severity) + } + } + assert.True(t, found, "resource heavy assessment should be present") +} + +func TestBuildAuditDataIncludesAgenticAnalysis(t *testing.T) { + processedRun := ProcessedRun{ + Run: WorkflowRun{ + DatabaseID: 7, + WorkflowName: "Issue Triage", + WorkflowPath: ".github/workflows/issue-triage.yml", + Status: "completed", + Conclusion: "success", + Duration: 3 * time.Minute, + Turns: 3, + Event: "issues", + LogsPath: t.TempDir(), + }, + } + metrics := LogMetrics{Turns: 3} + + auditData := buildAuditData(processedRun, metrics, nil) + require.NotNil(t, auditData.TaskDomain, "task domain should be present") + require.NotNil(t, auditData.BehaviorFingerprint, "behavioral fingerprint should be present") + assert.NotEmpty(t, auditData.AgenticAssessments, "agentic assessments should be present") + assert.Equal(t, "triage", auditData.TaskDomain.Name) +} diff --git a/pkg/cli/audit_comparison.go b/pkg/cli/audit_comparison.go index c8db2c622dc..d76600b4f54 100644 --- a/pkg/cli/audit_comparison.go +++ b/pkg/cli/audit_comparison.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "path/filepath" + "slices" "sort" "strings" @@ -21,10 +22,12 @@ type AuditComparisonData struct { } type AuditComparisonBaseline struct { - RunID int64 `json:"run_id"` - WorkflowName string `json:"workflow_name,omitempty"` - Conclusion string `json:"conclusion,omitempty"` - CreatedAt string `json:"created_at,omitempty"` + RunID int64 `json:"run_id"` + WorkflowName string `json:"workflow_name,omitempty"` + Conclusion string `json:"conclusion,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + Selection string `json:"selection,omitempty"` + MatchedOn []string `json:"matched_on,omitempty"` } type AuditComparisonDelta struct { @@ -68,6 +71,18 @@ type auditComparisonSnapshot struct { MCPFailures []string } +type auditComparisonCandidate struct { + Run WorkflowRun + Snapshot auditComparisonSnapshot + TaskDomain *TaskDomainInfo + BehaviorFingerprint *BehaviorFingerprint + Selection string + MatchedOn []string + Score int +} + +const maxAuditComparisonCandidates = 10 + func buildAuditComparisonSnapshot(processedRun ProcessedRun, createdItems []CreatedItemReport) auditComparisonSnapshot { blockedRequests := 0 if processedRun.FirewallAnalysis != nil { @@ -111,6 +126,193 @@ func loadAuditComparisonSnapshotFromArtifacts(run WorkflowRun, logsPath string, }, nil } +func buildAuditComparisonCandidateFromSummary(summary *RunSummary, logsPath string) auditComparisonCandidate { + posture := "read_only" + if summary.Run.SafeItemsCount > 0 || len(extractCreatedItemsFromManifest(logsPath)) > 0 { + posture = "write_capable" + } + + blockedRequests := 0 + if summary.FirewallAnalysis != nil { + blockedRequests = summary.FirewallAnalysis.BlockedRequests + } + + return auditComparisonCandidate{ + Run: summary.Run, + Snapshot: auditComparisonSnapshot{ + Turns: summary.Metrics.Turns, + Posture: posture, + BlockedRequests: blockedRequests, + MCPFailures: collectMCPFailureServers(summary.MCPFailures), + }, + TaskDomain: summary.TaskDomain, + BehaviorFingerprint: summary.BehaviorFingerprint, + } +} + +func buildAuditComparisonCandidateFromProcessedRun(processedRun ProcessedRun) auditComparisonCandidate { + return auditComparisonCandidate{ + Run: processedRun.Run, + Snapshot: buildAuditComparisonSnapshot(processedRun, extractCreatedItemsFromManifest(processedRun.Run.LogsPath)), + TaskDomain: processedRun.TaskDomain, + BehaviorFingerprint: processedRun.BehaviorFingerprint, + } +} + +func loadAuditComparisonCandidate(run WorkflowRun, logsPath string, verbose bool) (auditComparisonCandidate, error) { + if summary, ok := loadRunSummary(logsPath, false); ok && summary != nil { + candidate := buildAuditComparisonCandidateFromSummary(summary, logsPath) + candidate.Run = run + return candidate, nil + } + + snapshot, err := loadAuditComparisonSnapshotFromArtifacts(run, logsPath, verbose) + if err != nil { + return auditComparisonCandidate{}, err + } + + processedRun := ProcessedRun{Run: run} + metrics, metricsErr := extractLogMetrics(logsPath, verbose, run.WorkflowPath) + if metricsErr == nil { + processedRun.Run.TokenUsage = metrics.TokenUsage + processedRun.Run.EstimatedCost = metrics.EstimatedCost + processedRun.Run.Turns = metrics.Turns + } + if firewallAnalysis, firewallErr := analyzeFirewallLogs(logsPath, verbose); firewallErr == nil { + processedRun.FirewallAnalysis = firewallAnalysis + } + if mcpFailures, mcpErr := extractMCPFailuresFromRun(logsPath, run, verbose); mcpErr == nil { + processedRun.MCPFailures = mcpFailures + } + awContext, _, _, taskDomain, behaviorFingerprint, _ := deriveRunAgenticAnalysis(processedRun, metrics) + processedRun.AwContext = awContext + + return auditComparisonCandidate{ + Run: run, + Snapshot: snapshot, + TaskDomain: taskDomain, + BehaviorFingerprint: behaviorFingerprint, + Selection: "latest_success", + MatchedOn: nil, + Score: 0, + }, nil +} + +func scoreAuditComparisonCandidate(current ProcessedRun, candidate *auditComparisonCandidate) { + if candidate == nil { + return + } + + score := 0 + matchedOn := make([]string, 0, 6) + + if current.Run.Event != "" && current.Run.Event == candidate.Run.Event { + score += 5 + matchedOn = append(matchedOn, "event") + } + + if current.TaskDomain != nil && candidate.TaskDomain != nil && current.TaskDomain.Name == candidate.TaskDomain.Name { + score += 50 + matchedOn = append(matchedOn, "task_domain") + } + + if current.BehaviorFingerprint != nil && candidate.BehaviorFingerprint != nil { + if current.BehaviorFingerprint.ExecutionStyle == candidate.BehaviorFingerprint.ExecutionStyle { + score += 20 + matchedOn = append(matchedOn, "execution_style") + } + if current.BehaviorFingerprint.ResourceProfile == candidate.BehaviorFingerprint.ResourceProfile { + score += 25 + matchedOn = append(matchedOn, "resource_profile") + } + if current.BehaviorFingerprint.ActuationStyle == candidate.BehaviorFingerprint.ActuationStyle { + score += 10 + matchedOn = append(matchedOn, "actuation_style") + } + if current.BehaviorFingerprint.DispatchMode == candidate.BehaviorFingerprint.DispatchMode { + score += 5 + matchedOn = append(matchedOn, "dispatch_mode") + } + if current.BehaviorFingerprint.ToolBreadth == candidate.BehaviorFingerprint.ToolBreadth { + score += 2 + matchedOn = append(matchedOn, "tool_breadth") + } + } + + candidate.Score = score + if slices.Contains(matchedOn, "task_domain") || slices.Contains(matchedOn, "execution_style") || slices.Contains(matchedOn, "resource_profile") || slices.Contains(matchedOn, "actuation_style") { + candidate.Selection = "cohort_match" + candidate.MatchedOn = matchedOn + return + } + + candidate.Selection = "latest_success" + candidate.MatchedOn = nil +} + +func selectAuditComparisonBaseline(current ProcessedRun, candidates []auditComparisonCandidate) *auditComparisonCandidate { + if len(candidates) == 0 { + return nil + } + + for index := range candidates { + scoreAuditComparisonCandidate(current, &candidates[index]) + } + + sort.SliceStable(candidates, func(left, right int) bool { + if candidates[left].Score != candidates[right].Score { + return candidates[left].Score > candidates[right].Score + } + return candidates[left].Run.CreatedAt.After(candidates[right].Run.CreatedAt) + }) + + return &candidates[0] +} + +func sameAuditComparisonWorkflow(left WorkflowRun, right WorkflowRun) bool { + if left.WorkflowPath != "" && right.WorkflowPath != "" { + return left.WorkflowPath == right.WorkflowPath + } + if left.WorkflowName != "" && right.WorkflowName != "" { + return left.WorkflowName == right.WorkflowName + } + return false +} + +func buildAuditComparisonForProcessedRuns(currentRun ProcessedRun, processedRuns []ProcessedRun) *AuditComparisonData { + currentSnapshot := buildAuditComparisonSnapshot(currentRun, extractCreatedItemsFromManifest(currentRun.Run.LogsPath)) + candidates := make([]auditComparisonCandidate, 0, len(processedRuns)) + + for _, candidateRun := range processedRuns { + if candidateRun.Run.DatabaseID == currentRun.Run.DatabaseID { + continue + } + if candidateRun.Run.Conclusion != "success" { + continue + } + if !candidateRun.Run.CreatedAt.Before(currentRun.Run.CreatedAt) { + continue + } + if !sameAuditComparisonWorkflow(currentRun.Run, candidateRun.Run) { + continue + } + + candidates = append(candidates, buildAuditComparisonCandidateFromProcessedRun(candidateRun)) + } + + selected := selectAuditComparisonBaseline(currentRun, candidates) + if selected == nil { + return &AuditComparisonData{BaselineFound: false} + } + + comparison := buildAuditComparison(currentSnapshot, &selected.Run, &selected.Snapshot) + if comparison != nil && comparison.Baseline != nil { + comparison.Baseline.Selection = selected.Selection + comparison.Baseline.MatchedOn = selected.MatchedOn + } + return comparison +} + func buildAuditComparison(current auditComparisonSnapshot, baselineRun *WorkflowRun, baseline *auditComparisonSnapshot) *AuditComparisonData { if baselineRun == nil || baseline == nil { return &AuditComparisonData{BaselineFound: false} @@ -189,6 +391,7 @@ func buildAuditComparison(current auditComparisonSnapshot, baselineRun *Workflow WorkflowName: baselineRun.WorkflowName, Conclusion: baselineRun.Conclusion, CreatedAt: baselineRun.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + Selection: "latest_success", }, Delta: delta, Classification: &AuditComparisonClassification{ @@ -203,7 +406,7 @@ func buildAuditComparison(current auditComparisonSnapshot, baselineRun *Workflow func recommendAuditComparisonAction(label string, delta *AuditComparisonDelta) string { if delta == nil || label == "stable" { - return "No action needed; this run matches the last successful baseline closely." + return "No action needed; this run matches the selected successful baseline closely." } if delta.Posture.Before == "read_only" && delta.Posture.After == "write_capable" { @@ -216,10 +419,10 @@ func recommendAuditComparisonAction(label string, delta *AuditComparisonDelta) s return "Review network policy changes before treating the new blocked requests as normal behavior." } if delta.Turns.After > delta.Turns.Before { - return "Compare prompt or task-shape changes because this run needed more turns than the last successful baseline." + return "Compare prompt or task-shape changes because this run needed more turns than the selected successful baseline." } - return "Review the behavior change against the previous successful run before treating it as the new normal." + return "Review the behavior change against the selected successful baseline before treating it as the new normal." } func deriveAuditPosture(createdItems []CreatedItemReport) string { @@ -250,7 +453,8 @@ func collectMCPFailureServers(failures []MCPFailureReport) []string { return servers } -func findPreviousSuccessfulWorkflowRun(current WorkflowRun, owner, repo, hostname string, verbose bool) (*WorkflowRun, error) { +func findPreviousSuccessfulWorkflowRuns(current WorkflowRun, owner, repo, hostname string, verbose bool) ([]WorkflowRun, error) { + _ = verbose workflowID := filepath.Base(current.WorkflowPath) if workflowID == "." || workflowID == "" { return nil, fmt.Errorf("workflow path unavailable for run %d", current.DatabaseID) @@ -259,12 +463,12 @@ func findPreviousSuccessfulWorkflowRun(current WorkflowRun, owner, repo, hostnam encodedWorkflowID := url.PathEscape(workflowID) var endpoint string if owner != "" && repo != "" { - endpoint = fmt.Sprintf("repos/%s/%s/actions/workflows/%s/runs?per_page=50", owner, repo, encodedWorkflowID) + endpoint = fmt.Sprintf("repos/%s/%s/actions/workflows/%s/runs?per_page=%d", owner, repo, encodedWorkflowID, maxAuditComparisonCandidates) } else { - endpoint = fmt.Sprintf("repos/{owner}/{repo}/actions/workflows/%s/runs?per_page=50", encodedWorkflowID) + endpoint = fmt.Sprintf("repos/{owner}/{repo}/actions/workflows/%s/runs?per_page=%d", encodedWorkflowID, maxAuditComparisonCandidates) } - jq := fmt.Sprintf(`[.workflow_runs[] | select(.id != %d and .conclusion == "success" and .created_at < "%s") | {databaseId: .id, number: .run_number, url: .html_url, status: .status, conclusion: .conclusion, workflowName: .name, workflowPath: .path, createdAt: .created_at, startedAt: .run_started_at, updatedAt: .updated_at, event: .event, headBranch: .head_branch, headSha: .head_sha, displayTitle: .display_title}] | .[0]`, current.DatabaseID, current.CreatedAt.Format("2006-01-02T15:04:05Z07:00")) + jq := fmt.Sprintf(`[.workflow_runs[] | select(.id != %d and .conclusion == "success" and .created_at < "%s") | {databaseId: .id, number: .run_number, url: .html_url, status: .status, conclusion: .conclusion, workflowName: .name, workflowPath: .path, createdAt: .created_at, startedAt: .run_started_at, updatedAt: .updated_at, event: .event, headBranch: .head_branch, headSha: .head_sha, displayTitle: .display_title}]`, current.DatabaseID, current.CreatedAt.Format("2006-01-02T15:04:05Z07:00")) args := []string{"api"} if hostname != "" && hostname != "github.com" { @@ -278,47 +482,63 @@ func findPreviousSuccessfulWorkflowRun(current WorkflowRun, owner, repo, hostnam } trimmed := strings.TrimSpace(string(output)) - if trimmed == "null" || trimmed == "" { + if trimmed == "null" || trimmed == "" || trimmed == "[]" { return nil, nil } - var run WorkflowRun - if err := json.Unmarshal(output, &run); err != nil { - return nil, fmt.Errorf("failed to parse previous successful workflow run: %w", err) + var runs []WorkflowRun + if err := json.Unmarshal(output, &runs); err != nil { + return nil, fmt.Errorf("failed to parse previous successful workflow runs: %w", err) } - if strings.HasPrefix(run.WorkflowName, ".github/") { - if displayName := resolveWorkflowDisplayName(run.WorkflowPath, owner, repo, hostname); displayName != "" { - run.WorkflowName = displayName + for index := range runs { + if strings.HasPrefix(runs[index].WorkflowName, ".github/") { + if displayName := resolveWorkflowDisplayName(runs[index].WorkflowPath, owner, repo, hostname); displayName != "" { + runs[index].WorkflowName = displayName + } } } - return &run, nil + return runs, nil } -func buildAuditComparisonForRun(currentRun WorkflowRun, currentSnapshot auditComparisonSnapshot, outputDir string, owner, repo, hostname string, verbose bool) *AuditComparisonData { - baselineRun, err := findPreviousSuccessfulWorkflowRun(currentRun, owner, repo, hostname, verbose) +func buildAuditComparisonForRun(currentRun ProcessedRun, currentSnapshot auditComparisonSnapshot, outputDir string, owner, repo, hostname string, verbose bool) *AuditComparisonData { + baselineRuns, err := findPreviousSuccessfulWorkflowRuns(currentRun.Run, owner, repo, hostname, verbose) if err != nil { auditLog.Printf("Skipping audit comparison: failed to find baseline: %v", err) return &AuditComparisonData{BaselineFound: false} } - if baselineRun == nil { + if len(baselineRuns) == 0 { return &AuditComparisonData{BaselineFound: false} } - baselineOutputDir := filepath.Join(outputDir, fmt.Sprintf("baseline-%d", baselineRun.DatabaseID)) - if _, err := os.Stat(baselineOutputDir); err != nil { - if downloadErr := downloadRunArtifacts(baselineRun.DatabaseID, baselineOutputDir, verbose, owner, repo, hostname); downloadErr != nil { - auditLog.Printf("Skipping baseline comparison for run %d: failed to download baseline artifacts: %v", baselineRun.DatabaseID, downloadErr) - return &AuditComparisonData{BaselineFound: false} + candidates := make([]auditComparisonCandidate, 0, len(baselineRuns)) + for _, baselineRun := range baselineRuns { + baselineOutputDir := filepath.Join(outputDir, fmt.Sprintf("baseline-%d", baselineRun.DatabaseID)) + if _, err := os.Stat(baselineOutputDir); err != nil { + if downloadErr := downloadRunArtifacts(baselineRun.DatabaseID, baselineOutputDir, verbose, owner, repo, hostname); downloadErr != nil { + auditLog.Printf("Skipping candidate baseline for run %d: failed to download baseline artifacts: %v", baselineRun.DatabaseID, downloadErr) + continue + } } + + candidate, candidateErr := loadAuditComparisonCandidate(baselineRun, baselineOutputDir, verbose) + if candidateErr != nil { + auditLog.Printf("Skipping candidate baseline for run %d: failed to load baseline snapshot: %v", baselineRun.DatabaseID, candidateErr) + continue + } + candidates = append(candidates, candidate) } - baselineSnapshot, err := loadAuditComparisonSnapshotFromArtifacts(*baselineRun, baselineOutputDir, verbose) - if err != nil { - auditLog.Printf("Skipping baseline comparison for run %d: failed to load baseline snapshot: %v", baselineRun.DatabaseID, err) + selected := selectAuditComparisonBaseline(currentRun, candidates) + if selected == nil { return &AuditComparisonData{BaselineFound: false} } - return buildAuditComparison(currentSnapshot, baselineRun, &baselineSnapshot) + comparison := buildAuditComparison(currentSnapshot, &selected.Run, &selected.Snapshot) + if comparison != nil && comparison.Baseline != nil { + comparison.Baseline.Selection = selected.Selection + comparison.Baseline.MatchedOn = selected.MatchedOn + } + return comparison } diff --git a/pkg/cli/audit_comparison_test.go b/pkg/cli/audit_comparison_test.go index c3952c84aa3..3296140dc0a 100644 --- a/pkg/cli/audit_comparison_test.go +++ b/pkg/cli/audit_comparison_test.go @@ -65,3 +65,73 @@ func TestBuildAuditComparison_StableRun(t *testing.T) { assert.Empty(t, comparison.Classification.ReasonCodes, "stable runs should have no reason codes") assert.Contains(t, comparison.Recommendation.Action, "No action needed", "stable runs should produce a no-op recommendation") } + +func TestSelectAuditComparisonBaselinePrefersCohortMatchOverRecency(t *testing.T) { + current := ProcessedRun{ + Run: WorkflowRun{ + Event: "issues", + }, + TaskDomain: &TaskDomainInfo{Name: "triage", Label: "Triage"}, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "directed", + ToolBreadth: "narrow", + ActuationStyle: "read_only", + ResourceProfile: "lean", + DispatchMode: "standalone", + }, + } + + candidates := []auditComparisonCandidate{ + { + Run: WorkflowRun{ + DatabaseID: 200, + CreatedAt: time.Date(2026, 3, 23, 12, 0, 0, 0, time.UTC), + Event: "push", + }, + TaskDomain: &TaskDomainInfo{Name: "release_ops", Label: "Release / Ops"}, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "adaptive", + ToolBreadth: "moderate", + ActuationStyle: "selective_write", + ResourceProfile: "moderate", + DispatchMode: "standalone", + }, + }, + { + Run: WorkflowRun{ + DatabaseID: 150, + CreatedAt: time.Date(2026, 3, 22, 12, 0, 0, 0, time.UTC), + Event: "issues", + }, + TaskDomain: &TaskDomainInfo{Name: "triage", Label: "Triage"}, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "directed", + ToolBreadth: "narrow", + ActuationStyle: "read_only", + ResourceProfile: "lean", + DispatchMode: "standalone", + }, + }, + } + + selected := selectAuditComparisonBaseline(current, candidates) + require.NotNil(t, selected, "baseline should be selected") + assert.Equal(t, int64(150), selected.Run.DatabaseID, "cohort-matching run should beat the more recent but behaviorally different run") + assert.Equal(t, "cohort_match", selected.Selection) + assert.Contains(t, selected.MatchedOn, "task_domain") + assert.Contains(t, selected.MatchedOn, "resource_profile") + assert.Positive(t, selected.Score, "cohort match should have a positive score") +} + +func TestScoreAuditComparisonCandidateFallsBackToLatestSuccess(t *testing.T) { + current := ProcessedRun{Run: WorkflowRun{Event: "issues"}} + candidate := auditComparisonCandidate{ + Run: WorkflowRun{DatabaseID: 300, CreatedAt: time.Date(2026, 3, 21, 12, 0, 0, 0, time.UTC), Event: "push"}, + } + + scoreAuditComparisonCandidate(current, &candidate) + + assert.Equal(t, 0, candidate.Score) + assert.Equal(t, "latest_success", candidate.Selection) + assert.Nil(t, candidate.MatchedOn) +} diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 90bde3a7635..01703b68737 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -13,7 +13,6 @@ import ( "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/sliceutil" "github.com/github/gh-aw/pkg/timeutil" - "github.com/github/gh-aw/pkg/workflow" ) var auditReportLog = logger.New("cli:audit_report") @@ -22,6 +21,9 @@ var auditReportLog = logger.New("cli:audit_report") type AuditData struct { Overview OverviewData `json:"overview"` Comparison *AuditComparisonData `json:"comparison,omitempty"` + TaskDomain *TaskDomainInfo `json:"task_domain,omitempty"` + BehaviorFingerprint *BehaviorFingerprint `json:"behavior_fingerprint,omitempty"` + AgenticAssessments []AgenticAssessment `json:"agentic_assessments,omitempty"` Metrics MetricsData `json:"metrics"` KeyFindings []Finding `json:"key_findings,omitempty"` Recommendations []Recommendation `json:"recommendations,omitempty"` @@ -219,14 +221,6 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage overview.LogsPath = run.LogsPath } - // Parse aw_info.json to extract aw_context if present - if run.LogsPath != "" { - awInfoPath := filepath.Join(run.LogsPath, "aw_info.json") - if info, err := parseAwInfo(awInfoPath, false); err == nil && info != nil { - overview.AwContext = info.Context - } - } - if run.Duration > 0 { overview.Duration = timeutil.FormatDuration(run.Duration) } @@ -268,49 +262,16 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage } } - // Build tool usage - var toolUsage []ToolUsageInfo - toolStats := make(map[string]*ToolUsageInfo) - for _, toolCall := range metrics.ToolCalls { - displayKey := workflow.PrettifyToolName(toolCall.Name) - if existing, exists := toolStats[displayKey]; exists { - existing.CallCount += toolCall.CallCount - if toolCall.MaxInputSize > existing.MaxInputSize { - existing.MaxInputSize = toolCall.MaxInputSize - } - if toolCall.MaxOutputSize > existing.MaxOutputSize { - existing.MaxOutputSize = toolCall.MaxOutputSize - } - if toolCall.MaxDuration > 0 { - maxDur := timeutil.FormatDuration(toolCall.MaxDuration) - if existing.MaxDuration == "" || toolCall.MaxDuration > parseDurationString(existing.MaxDuration) { - existing.MaxDuration = maxDur - } - } - } else { - info := &ToolUsageInfo{ - Name: displayKey, - CallCount: toolCall.CallCount, - MaxInputSize: toolCall.MaxInputSize, - MaxOutputSize: toolCall.MaxOutputSize, - } - if toolCall.MaxDuration > 0 { - info.MaxDuration = timeutil.FormatDuration(toolCall.MaxDuration) - } - toolStats[displayKey] = info - } - } - for _, info := range toolStats { - toolUsage = append(toolUsage, *info) - } - - createdItems := extractCreatedItemsFromManifest(run.LogsPath) + awContext, toolUsage, createdItems, taskDomain, behaviorFingerprint, agenticAssessments := deriveRunAgenticAnalysis(processedRun, metrics) + overview.AwContext = awContext // Generate key findings findings := generateFindings(processedRun, metricsData, errors, warnings) + findings = append(findings, generateAgenticAssessmentFindings(agenticAssessments)...) // Generate recommendations recommendations := generateRecommendations(processedRun, metricsData, findings) + recommendations = append(recommendations, generateAgenticAssessmentRecommendations(agenticAssessments)...) observabilityInsights := buildAuditObservabilityInsights(processedRun, metricsData, toolUsage, createdItems) @@ -324,6 +285,9 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage return AuditData{ Overview: overview, + TaskDomain: taskDomain, + BehaviorFingerprint: behaviorFingerprint, + AgenticAssessments: agenticAssessments, Metrics: metricsData, KeyFindings: findings, Recommendations: recommendations, diff --git a/pkg/cli/audit_report_render.go b/pkg/cli/audit_report_render.go index 87fab938ce3..b621d1b610a 100644 --- a/pkg/cli/audit_report_render.go +++ b/pkg/cli/audit_report_render.go @@ -33,11 +33,29 @@ func renderConsole(data AuditData, logsPath string) { renderOverview(data.Overview) if data.Comparison != nil { - fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Comparison To Last Successful Run")) + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Comparison To Similar Successful Run")) fmt.Fprintln(os.Stderr) renderAuditComparison(data.Comparison) } + if data.TaskDomain != nil { + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Detected Task Domain")) + fmt.Fprintln(os.Stderr) + renderTaskDomain(data.TaskDomain) + } + + if data.BehaviorFingerprint != nil { + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Behavioral Fingerprint")) + fmt.Fprintln(os.Stderr) + renderBehaviorFingerprint(data.BehaviorFingerprint) + } + + if len(data.AgenticAssessments) > 0 { + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Agentic Assessment")) + fmt.Fprintln(os.Stderr) + renderAgenticAssessments(data.AgenticAssessments) + } + // Key Findings Section - NEW if len(data.KeyFindings) > 0 { auditReportLog.Printf("Rendering %d key findings", len(data.KeyFindings)) @@ -198,7 +216,7 @@ func renderAuditComparison(comparison *AuditComparisonData) { } if !comparison.BaselineFound || comparison.Baseline == nil || comparison.Delta == nil || comparison.Classification == nil { - fmt.Fprintln(os.Stderr, " No previous successful run was available for baseline comparison.") + fmt.Fprintln(os.Stderr, " No suitable successful run was available for baseline comparison.") fmt.Fprintln(os.Stderr) return } @@ -208,6 +226,12 @@ func renderAuditComparison(comparison *AuditComparisonData) { fmt.Fprintf(os.Stderr, " (%s)", comparison.Baseline.Conclusion) } fmt.Fprintln(os.Stderr) + if comparison.Baseline.Selection != "" { + fmt.Fprintf(os.Stderr, " Selection: %s\n", strings.ReplaceAll(comparison.Baseline.Selection, "_", " ")) + } + if len(comparison.Baseline.MatchedOn) > 0 { + fmt.Fprintf(os.Stderr, " Matched on: %s\n", strings.Join(comparison.Baseline.MatchedOn, ", ")) + } fmt.Fprintf(os.Stderr, " Classification: %s\n", comparison.Classification.Label) fmt.Fprintln(os.Stderr, " Changes:") @@ -224,7 +248,7 @@ func renderAuditComparison(comparison *AuditComparisonData) { fmt.Fprintf(os.Stderr, " - New MCP failure: %s\n", strings.Join(comparison.Delta.MCPFailure.After, ", ")) } if len(comparison.Classification.ReasonCodes) == 0 { - fmt.Fprintln(os.Stderr, " - No meaningful behavior change from the last successful baseline") + fmt.Fprintln(os.Stderr, " - No meaningful behavior change from the selected successful baseline") } if comparison.Recommendation != nil && comparison.Recommendation.Action != "" { fmt.Fprintf(os.Stderr, " Recommended action: %s\n", comparison.Recommendation.Action) @@ -259,6 +283,56 @@ func renderMetrics(metrics MetricsData) { fmt.Fprint(os.Stderr, console.RenderStruct(metrics)) } +type taskDomainDisplay struct { + Domain string `console:"header:Domain"` + Reason string `console:"header:Reason"` +} + +type behaviorFingerprintDisplay struct { + Execution string `console:"header:Execution"` + Tools string `console:"header:Tools"` + Actuation string `console:"header:Actuation"` + Resource string `console:"header:Resources"` + Dispatch string `console:"header:Dispatch"` +} + +func renderTaskDomain(domain *TaskDomainInfo) { + if domain == nil { + return + } + fmt.Fprint(os.Stderr, console.RenderStruct(taskDomainDisplay{ + Domain: domain.Label, + Reason: domain.Reason, + })) +} + +func renderBehaviorFingerprint(fingerprint *BehaviorFingerprint) { + if fingerprint == nil { + return + } + fmt.Fprint(os.Stderr, console.RenderStruct(behaviorFingerprintDisplay{ + Execution: fingerprint.ExecutionStyle, + Tools: fingerprint.ToolBreadth, + Actuation: fingerprint.ActuationStyle, + Resource: fingerprint.ResourceProfile, + Dispatch: fingerprint.DispatchMode, + })) +} + +func renderAgenticAssessments(assessments []AgenticAssessment) { + for _, assessment := range assessments { + severity := strings.ToUpper(assessment.Severity) + fmt.Fprintf(os.Stderr, " [%s] %s\n", severity, assessment.Summary) + if assessment.Evidence != "" { + fmt.Fprintf(os.Stderr, " Evidence: %s\n", assessment.Evidence) + } + if assessment.Recommendation != "" { + fmt.Fprintf(os.Stderr, " Recommendation: %s\n", assessment.Recommendation) + } + fmt.Fprintln(os.Stderr) + } +} + // renderJobsTable renders the jobs as a table using console.RenderTable func renderJobsTable(jobs []JobData) { auditReportLog.Printf("Rendering jobs table with %d jobs", len(jobs)) diff --git a/pkg/cli/logs_json_test.go b/pkg/cli/logs_json_test.go index 5f205797a03..26edf14ff32 100644 --- a/pkg/cli/logs_json_test.go +++ b/pkg/cli/logs_json_test.go @@ -23,6 +23,7 @@ func TestBuildLogsData(t *testing.T) { DatabaseID: 12345, Number: 1, WorkflowName: "Test Workflow", + WorkflowPath: ".github/workflows/test-workflow.yml", Status: "completed", Conclusion: "success", Duration: 5 * time.Minute, @@ -38,6 +39,24 @@ func TestBuildLogsData(t *testing.T) { Event: "push", HeadBranch: "main", }, + TaskDomain: &TaskDomainInfo{ + Name: "triage", + Label: "Triage", + }, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "directed", + ToolBreadth: "narrow", + ActuationStyle: "read_only", + ResourceProfile: "lean", + DispatchMode: "standalone", + }, + AgenticAssessments: []AgenticAssessment{ + { + Kind: "overkill_for_agentic", + Severity: "low", + Summary: "Deterministic automation may be a better fit.", + }, + }, MissingTools: []MissingToolReport{}, MCPFailures: []MCPFailureReport{}, }, @@ -46,6 +65,7 @@ func TestBuildLogsData(t *testing.T) { DatabaseID: 12346, Number: 2, WorkflowName: "Test Workflow", + WorkflowPath: ".github/workflows/test-workflow.yml", Status: "completed", Conclusion: "failure", Duration: 3 * time.Minute, @@ -61,6 +81,17 @@ func TestBuildLogsData(t *testing.T) { Event: "pull_request", HeadBranch: "feature", }, + TaskDomain: &TaskDomainInfo{ + Name: "triage", + Label: "Triage", + }, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "directed", + ToolBreadth: "narrow", + ActuationStyle: "read_only", + ResourceProfile: "lean", + DispatchMode: "standalone", + }, MissingTools: []MissingToolReport{ { Tool: "github_search", @@ -109,6 +140,30 @@ func TestBuildLogsData(t *testing.T) { if logsData.Runs[0].DatabaseID != 12345 { t.Errorf("Expected DatabaseID 12345, got %d", logsData.Runs[0].DatabaseID) } + if logsData.Runs[0].TaskDomain == nil || logsData.Runs[0].TaskDomain.Name != "triage" { + t.Fatalf("Expected first run to include task domain, got %+v", logsData.Runs[0].TaskDomain) + } + if logsData.Runs[0].BehaviorFingerprint == nil || logsData.Runs[0].BehaviorFingerprint.ResourceProfile != "lean" { + t.Fatalf("Expected first run to include behavior fingerprint, got %+v", logsData.Runs[0].BehaviorFingerprint) + } + if len(logsData.Runs[0].AgenticAssessments) != 1 { + t.Fatalf("Expected first run to include 1 agentic assessment, got %d", len(logsData.Runs[0].AgenticAssessments)) + } + if logsData.Runs[0].Comparison == nil { + t.Fatal("Expected first run to include comparison payload") + } + if logsData.Runs[0].Comparison.BaselineFound { + t.Fatal("Expected oldest run to have no baseline in logs comparison") + } + if logsData.Runs[1].Comparison == nil || !logsData.Runs[1].Comparison.BaselineFound { + t.Fatalf("Expected newer run to include a baseline comparison, got %+v", logsData.Runs[1].Comparison) + } + if logsData.Runs[1].Comparison.Baseline == nil || logsData.Runs[1].Comparison.Baseline.Selection != "cohort_match" { + t.Fatalf("Expected newer run to use cohort_match baseline, got %+v", logsData.Runs[1].Comparison.Baseline) + } + if logsData.Runs[1].Comparison.Baseline == nil || logsData.Runs[1].Comparison.Baseline.RunID != 12345 { + t.Fatalf("Expected newer run baseline to point to run 12345, got %+v", logsData.Runs[1].Comparison.Baseline) + } // Duration format from formatDuration is "5.0m", not "5m0s" if logsData.Runs[0].Duration == "" { t.Errorf("Expected non-empty Duration, got empty string") @@ -157,6 +212,14 @@ func TestRenderLogsJSON(t *testing.T) { LogsPath: filepath.Join(tmpDir, "run-12345"), Event: "push", Branch: "main", + Comparison: &AuditComparisonData{ + BaselineFound: true, + Baseline: &AuditComparisonBaseline{ + RunID: 12000, + Selection: "cohort_match", + MatchedOn: []string{"task_domain", "resource_profile"}, + }, + }, }, }, LogsLocation: tmpDir, @@ -197,6 +260,9 @@ func TestRenderLogsJSON(t *testing.T) { if len(parsedData.Runs) != 1 { t.Errorf("Expected 1 run in JSON, got %d", len(parsedData.Runs)) } + if parsedData.Runs[0].Comparison == nil || parsedData.Runs[0].Comparison.Baseline == nil || parsedData.Runs[0].Comparison.Baseline.Selection != "cohort_match" { + t.Fatalf("Expected comparison metadata to survive JSON round-trip, got %+v", parsedData.Runs[0].Comparison) + } } // TestBuildMissingToolsSummary tests missing tools aggregation diff --git a/pkg/cli/logs_models.go b/pkg/cli/logs_models.go index 912da697645..76bce722ffb 100644 --- a/pkg/cli/logs_models.go +++ b/pkg/cli/logs_models.go @@ -69,6 +69,10 @@ type LogMetrics = workflow.LogMetrics // ProcessedRun represents a workflow run with its associated analysis type ProcessedRun struct { Run WorkflowRun + AwContext *AwContext + TaskDomain *TaskDomainInfo + BehaviorFingerprint *BehaviorFingerprint + AgenticAssessments []AgenticAssessment AccessAnalysis *DomainAnalysis FirewallAnalysis *FirewallAnalysis RedactedDomainsAnalysis *RedactedDomainsAnalysis @@ -174,27 +178,35 @@ var ErrNoArtifacts = errors.New("no artifacts found for this run") // - If the CLI version in the summary doesn't match the current version, the run is reprocessed // - This ensures that bug fixes and improvements in log parsing are automatically applied type RunSummary struct { - CLIVersion string `json:"cli_version"` // CLI version used to process this run - RunID int64 `json:"run_id"` // Workflow run database ID - ProcessedAt time.Time `json:"processed_at"` // When this summary was created - Run WorkflowRun `json:"run"` // Full workflow run metadata - Metrics LogMetrics `json:"metrics"` // Extracted log metrics - AccessAnalysis *DomainAnalysis `json:"access_analysis"` // Network access analysis - FirewallAnalysis *FirewallAnalysis `json:"firewall_analysis"` // Firewall log analysis - RedactedDomainsAnalysis *RedactedDomainsAnalysis `json:"redacted_domains_analysis"` // Redacted URL domains analysis - MissingTools []MissingToolReport `json:"missing_tools"` // Missing tool reports - MissingData []MissingDataReport `json:"missing_data"` // Missing data reports - Noops []NoopReport `json:"noops"` // Noop messages - MCPFailures []MCPFailureReport `json:"mcp_failures"` // MCP server failures - MCPToolUsage *MCPToolUsageData `json:"mcp_tool_usage,omitempty"` // MCP tool usage data - ArtifactsList []string `json:"artifacts_list"` // List of downloaded artifact files - JobDetails []JobInfoWithDuration `json:"job_details"` // Job execution details + CLIVersion string `json:"cli_version"` // CLI version used to process this run + RunID int64 `json:"run_id"` // Workflow run database ID + ProcessedAt time.Time `json:"processed_at"` // When this summary was created + Run WorkflowRun `json:"run"` // Full workflow run metadata + Metrics LogMetrics `json:"metrics"` // Extracted log metrics + AwContext *AwContext `json:"context,omitempty"` // aw_context data from aw_info.json + TaskDomain *TaskDomainInfo `json:"task_domain,omitempty"` // Inferred workflow task domain + BehaviorFingerprint *BehaviorFingerprint `json:"behavior_fingerprint,omitempty"` // Compact execution profile + AgenticAssessments []AgenticAssessment `json:"agentic_assessments,omitempty"` // Derived agentic judgments + AccessAnalysis *DomainAnalysis `json:"access_analysis"` // Network access analysis + FirewallAnalysis *FirewallAnalysis `json:"firewall_analysis"` // Firewall log analysis + RedactedDomainsAnalysis *RedactedDomainsAnalysis `json:"redacted_domains_analysis"` // Redacted URL domains analysis + MissingTools []MissingToolReport `json:"missing_tools"` // Missing tool reports + MissingData []MissingDataReport `json:"missing_data"` // Missing data reports + Noops []NoopReport `json:"noops"` // Noop messages + MCPFailures []MCPFailureReport `json:"mcp_failures"` // MCP server failures + MCPToolUsage *MCPToolUsageData `json:"mcp_tool_usage,omitempty"` // MCP tool usage data + ArtifactsList []string `json:"artifacts_list"` // List of downloaded artifact files + JobDetails []JobInfoWithDuration `json:"job_details"` // Job execution details } // DownloadResult represents the result of downloading and processing a workflow run type DownloadResult struct { Run WorkflowRun Metrics LogMetrics + AwContext *AwContext + TaskDomain *TaskDomainInfo + BehaviorFingerprint *BehaviorFingerprint + AgenticAssessments []AgenticAssessment AccessAnalysis *DomainAnalysis FirewallAnalysis *FirewallAnalysis RedactedDomainsAnalysis *RedactedDomainsAnalysis diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index b1893a08e8d..39703d81235 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -350,6 +350,10 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s processedRun := ProcessedRun{ Run: run, + AwContext: result.AwContext, + TaskDomain: result.TaskDomain, + BehaviorFingerprint: result.BehaviorFingerprint, + AgenticAssessments: result.AgenticAssessments, AccessAnalysis: result.AccessAnalysis, FirewallAnalysis: result.FirewallAnalysis, RedactedDomainsAnalysis: result.RedactedDomainsAnalysis, @@ -609,6 +613,10 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out result := DownloadResult{ Run: summary.Run, Metrics: summary.Metrics, + AwContext: summary.AwContext, + TaskDomain: summary.TaskDomain, + BehaviorFingerprint: summary.BehaviorFingerprint, + AgenticAssessments: summary.AgenticAssessments, AccessAnalysis: summary.AccessAnalysis, FirewallAnalysis: summary.FirewallAnalysis, RedactedDomainsAnalysis: summary.RedactedDomainsAnalysis, @@ -762,6 +770,24 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out } } + processedRun := ProcessedRun{ + Run: result.Run, + AccessAnalysis: accessAnalysis, + FirewallAnalysis: firewallAnalysis, + RedactedDomainsAnalysis: redactedDomainsAnalysis, + MissingTools: missingTools, + MissingData: missingData, + Noops: noops, + MCPFailures: mcpFailures, + MCPToolUsage: mcpToolUsage, + JobDetails: jobDetails, + } + awContext, _, _, taskDomain, behaviorFingerprint, agenticAssessments := deriveRunAgenticAnalysis(processedRun, metrics) + result.AwContext = awContext + result.TaskDomain = taskDomain + result.BehaviorFingerprint = behaviorFingerprint + result.AgenticAssessments = agenticAssessments + // Create and save run summary summary := &RunSummary{ CLIVersion: GetVersion(), @@ -769,6 +795,10 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out ProcessedAt: time.Now(), Run: result.Run, Metrics: metrics, + AwContext: result.AwContext, + TaskDomain: result.TaskDomain, + BehaviorFingerprint: result.BehaviorFingerprint, + AgenticAssessments: result.AgenticAssessments, AccessAnalysis: accessAnalysis, FirewallAnalysis: firewallAnalysis, RedactedDomainsAnalysis: redactedDomainsAnalysis, diff --git a/pkg/cli/logs_report.go b/pkg/cli/logs_report.go index 411c2081958..b8ac59c5a02 100644 --- a/pkg/cli/logs_report.go +++ b/pkg/cli/logs_report.go @@ -66,30 +66,34 @@ type LogsSummary struct { // RunData contains information about a single workflow run type RunData struct { - DatabaseID int64 `json:"database_id" console:"header:Run ID"` - Number int `json:"number" console:"-"` - WorkflowName string `json:"workflow_name" console:"header:Workflow"` - WorkflowPath string `json:"workflow_path" console:"-"` - Agent string `json:"agent,omitempty" console:"header:Agent,omitempty"` - Status string `json:"status" console:"header:Status"` - Conclusion string `json:"conclusion,omitempty" console:"-"` - Duration string `json:"duration,omitempty" console:"header:Duration,omitempty"` - TokenUsage int `json:"token_usage,omitempty" console:"header:Tokens,format:number,omitempty"` - EstimatedCost float64 `json:"estimated_cost,omitempty" console:"header:Cost ($),format:cost,omitempty"` - Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"` - ErrorCount int `json:"error_count" console:"header:Errors"` - WarningCount int `json:"warning_count" console:"header:Warnings"` - MissingToolCount int `json:"missing_tool_count" console:"header:Missing Tools"` - MissingDataCount int `json:"missing_data_count" console:"header:Missing Data"` - SafeItemsCount int `json:"safe_items_count,omitempty" console:"header:Safe Items,omitempty"` - CreatedAt time.Time `json:"created_at" console:"header:Created"` - StartedAt time.Time `json:"started_at,omitzero" console:"-"` - UpdatedAt time.Time `json:"updated_at,omitzero" console:"-"` - URL string `json:"url" console:"-"` - LogsPath string `json:"logs_path" console:"header:Logs Path"` - Event string `json:"event" console:"-"` - Branch string `json:"branch" console:"-"` - AwContext *AwContext `json:"context,omitempty" console:"-"` // aw_context data from aw_info.json + DatabaseID int64 `json:"database_id" console:"header:Run ID"` + Number int `json:"number" console:"-"` + WorkflowName string `json:"workflow_name" console:"header:Workflow"` + WorkflowPath string `json:"workflow_path" console:"-"` + Agent string `json:"agent,omitempty" console:"header:Agent,omitempty"` + Status string `json:"status" console:"header:Status"` + Conclusion string `json:"conclusion,omitempty" console:"-"` + Duration string `json:"duration,omitempty" console:"header:Duration,omitempty"` + TokenUsage int `json:"token_usage,omitempty" console:"header:Tokens,format:number,omitempty"` + EstimatedCost float64 `json:"estimated_cost,omitempty" console:"header:Cost ($),format:cost,omitempty"` + Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"` + ErrorCount int `json:"error_count" console:"header:Errors"` + WarningCount int `json:"warning_count" console:"header:Warnings"` + MissingToolCount int `json:"missing_tool_count" console:"header:Missing Tools"` + MissingDataCount int `json:"missing_data_count" console:"header:Missing Data"` + SafeItemsCount int `json:"safe_items_count,omitempty" console:"header:Safe Items,omitempty"` + CreatedAt time.Time `json:"created_at" console:"header:Created"` + StartedAt time.Time `json:"started_at,omitzero" console:"-"` + UpdatedAt time.Time `json:"updated_at,omitzero" console:"-"` + URL string `json:"url" console:"-"` + LogsPath string `json:"logs_path" console:"header:Logs Path"` + Event string `json:"event" console:"-"` + Branch string `json:"branch" console:"-"` + Comparison *AuditComparisonData `json:"comparison,omitempty" console:"-"` + TaskDomain *TaskDomainInfo `json:"task_domain,omitempty" console:"-"` + BehaviorFingerprint *BehaviorFingerprint `json:"behavior_fingerprint,omitempty" console:"-"` + AgenticAssessments []AgenticAssessment `json:"agentic_assessments,omitempty" console:"-"` + AwContext *AwContext `json:"context,omitempty" console:"-"` // aw_context data from aw_info.json } // ToolUsageSummary contains aggregated tool usage statistics @@ -167,39 +171,47 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation totalMissingData += run.MissingDataCount totalSafeItems += run.SafeItemsCount - // Extract agent/engine ID and aw_context from aw_info.json + // Extract agent/engine ID from aw_info.json and only fall back to the file for aw_context. agentID := "" - var awContext *AwContext + awContext := pr.AwContext awInfoPath := filepath.Join(run.LogsPath, "aw_info.json") if info, err := parseAwInfo(awInfoPath, false); err == nil && info != nil { agentID = info.EngineID - awContext = info.Context + if awContext == nil { + awContext = info.Context + } } + comparison := buildAuditComparisonForProcessedRuns(pr, processedRuns) + runData := RunData{ - DatabaseID: run.DatabaseID, - Number: run.Number, - WorkflowName: run.WorkflowName, - WorkflowPath: run.WorkflowPath, - Agent: agentID, - Status: run.Status, - Conclusion: run.Conclusion, - TokenUsage: run.TokenUsage, - EstimatedCost: run.EstimatedCost, - Turns: run.Turns, - ErrorCount: run.ErrorCount, - WarningCount: run.WarningCount, - MissingToolCount: run.MissingToolCount, - MissingDataCount: run.MissingDataCount, - SafeItemsCount: run.SafeItemsCount, - CreatedAt: run.CreatedAt, - StartedAt: run.StartedAt, - UpdatedAt: run.UpdatedAt, - URL: run.URL, - LogsPath: run.LogsPath, - Event: run.Event, - Branch: run.HeadBranch, - AwContext: awContext, + DatabaseID: run.DatabaseID, + Number: run.Number, + WorkflowName: run.WorkflowName, + WorkflowPath: run.WorkflowPath, + Agent: agentID, + Status: run.Status, + Conclusion: run.Conclusion, + TokenUsage: run.TokenUsage, + EstimatedCost: run.EstimatedCost, + Turns: run.Turns, + ErrorCount: run.ErrorCount, + WarningCount: run.WarningCount, + MissingToolCount: run.MissingToolCount, + MissingDataCount: run.MissingDataCount, + SafeItemsCount: run.SafeItemsCount, + CreatedAt: run.CreatedAt, + StartedAt: run.StartedAt, + UpdatedAt: run.UpdatedAt, + URL: run.URL, + LogsPath: run.LogsPath, + Event: run.Event, + Branch: run.HeadBranch, + Comparison: comparison, + TaskDomain: pr.TaskDomain, + BehaviorFingerprint: pr.BehaviorFingerprint, + AgenticAssessments: pr.AgenticAssessments, + AwContext: awContext, } if run.Duration > 0 { runData.Duration = timeutil.FormatDuration(run.Duration) diff --git a/pkg/cli/logs_summary_test.go b/pkg/cli/logs_summary_test.go index e1f4df9fc4d..4e409012ae9 100644 --- a/pkg/cli/logs_summary_test.go +++ b/pkg/cli/logs_summary_test.go @@ -44,6 +44,24 @@ func TestSaveAndLoadRunSummary(t *testing.T) { EstimatedCost: 0.05, Turns: 5, }, + TaskDomain: &TaskDomainInfo{ + Name: "research", + Label: "Research", + }, + BehaviorFingerprint: &BehaviorFingerprint{ + ExecutionStyle: "adaptive", + ToolBreadth: "moderate", + ActuationStyle: "selective_write", + ResourceProfile: "moderate", + DispatchMode: "delegated", + }, + AgenticAssessments: []AgenticAssessment{ + { + Kind: "delegated_context_present", + Severity: "info", + Summary: "The run preserved upstream dispatch context.", + }, + }, MissingTools: []MissingToolReport{ { Tool: "test_tool", @@ -86,6 +104,15 @@ func TestSaveAndLoadRunSummary(t *testing.T) { if loadedSummary.Metrics.TokenUsage != testSummary.Metrics.TokenUsage { t.Errorf("Metrics.TokenUsage mismatch: got %d, want %d", loadedSummary.Metrics.TokenUsage, testSummary.Metrics.TokenUsage) } + if loadedSummary.TaskDomain == nil || loadedSummary.TaskDomain.Name != testSummary.TaskDomain.Name { + t.Fatalf("TaskDomain mismatch: got %+v, want %+v", loadedSummary.TaskDomain, testSummary.TaskDomain) + } + if loadedSummary.BehaviorFingerprint == nil || loadedSummary.BehaviorFingerprint.DispatchMode != testSummary.BehaviorFingerprint.DispatchMode { + t.Fatalf("BehaviorFingerprint mismatch: got %+v, want %+v", loadedSummary.BehaviorFingerprint, testSummary.BehaviorFingerprint) + } + if len(loadedSummary.AgenticAssessments) != len(testSummary.AgenticAssessments) { + t.Fatalf("AgenticAssessments length mismatch: got %d, want %d", len(loadedSummary.AgenticAssessments), len(testSummary.AgenticAssessments)) + } if len(loadedSummary.MissingTools) != len(testSummary.MissingTools) { t.Errorf("MissingTools length mismatch: got %d, want %d", len(loadedSummary.MissingTools), len(testSummary.MissingTools)) } From 89f4ad98473887c52a3c0fda1da3f49c8895bebd Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Tue, 24 Mar 2026 20:01:34 +0100 Subject: [PATCH 04/12] simplify string checks and error handling --- pkg/cli/observability_policy.go | 23 +++++++---------------- pkg/cli/observability_policy_command.go | 5 +++-- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/pkg/cli/observability_policy.go b/pkg/cli/observability_policy.go index b95408a10e4..e8c55083644 100644 --- a/pkg/cli/observability_policy.go +++ b/pkg/cli/observability_policy.go @@ -1,6 +1,10 @@ package cli -import "fmt" +import ( + "fmt" + "slices" + "strings" +) type ObservabilityPolicy struct { SchemaVersion string `json:"schema_version"` @@ -231,22 +235,9 @@ func firstMatch(allowed []string, actual []string) string { } func containsString(items []string, target string) bool { - for _, item := range items { - if item == target { - return true - } - } - return false + return slices.Contains(items, target) } func joinEvidence(parts []string) string { - if len(parts) == 0 { - return "" - } - - result := parts[0] - for i := 1; i < len(parts); i++ { - result += " " + parts[i] - } - return result + return strings.Join(parts, " ") } diff --git a/pkg/cli/observability_policy_command.go b/pkg/cli/observability_policy_command.go index 70587fb9f43..5c1d4b2f355 100644 --- a/pkg/cli/observability_policy_command.go +++ b/pkg/cli/observability_policy_command.go @@ -2,6 +2,7 @@ package cli import ( "encoding/json" + "errors" "fmt" "os" @@ -98,10 +99,10 @@ Examples: // RunObservabilityPolicyEval executes observability policy evaluation. func RunObservabilityPolicyEval(config ObservabilityPolicyEvalConfig) error { if config.PolicyPath == "" { - return fmt.Errorf("policy path is required") + return errors.New("policy path is required") } if config.ReportPath == "" { - return fmt.Errorf("report path is required") + return errors.New("report path is required") } policy, err := readObservabilityPolicyFile(config.PolicyPath) From 97cf92790b9151c7f41b8ab2e6797a8e7c60a3d6 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 07:35:34 +0100 Subject: [PATCH 05/12] rm observability policy cmd and related --- ...agentic-observability-central-kit.lock.yml | 1195 ----------------- .../agentic-observability-central-kit.md | 179 --- .serena/memories/project_overview.md | 1 + .serena/memories/style_and_conventions.md | 1 + .serena/memories/suggested_commands.md | 1 + .serena/memories/task_completion.md | 1 + .serena/project.yml | 23 + cmd/gh-aw/main.go | 3 - pkg/cli/audit_comparison.go | 6 +- pkg/cli/logs_models.go | 2 +- pkg/cli/mcp_schema_test.go | 38 - pkg/cli/observability_policy.go | 243 ---- pkg/cli/observability_policy_command.go | 264 ---- pkg/cli/observability_policy_command_test.go | 141 -- pkg/cli/observability_policy_test.go | 127 -- pkg/cli/observability_schema_test.go | 65 - schemas/observability-policy.json | 97 -- schemas/observability-report.json | 601 --------- 18 files changed, 30 insertions(+), 2958 deletions(-) delete mode 100644 .github/workflows/agentic-observability-central-kit.lock.yml delete mode 100644 .github/workflows/agentic-observability-central-kit.md create mode 100644 .serena/memories/project_overview.md create mode 100644 .serena/memories/style_and_conventions.md create mode 100644 .serena/memories/suggested_commands.md create mode 100644 .serena/memories/task_completion.md delete mode 100644 pkg/cli/observability_policy.go delete mode 100644 pkg/cli/observability_policy_command.go delete mode 100644 pkg/cli/observability_policy_command_test.go delete mode 100644 pkg/cli/observability_policy_test.go delete mode 100644 pkg/cli/observability_schema_test.go delete mode 100644 schemas/observability-policy.json delete mode 100644 schemas/observability-report.json diff --git a/.github/workflows/agentic-observability-central-kit.lock.yml b/.github/workflows/agentic-observability-central-kit.lock.yml deleted file mode 100644 index 9f30f696780..00000000000 --- a/.github/workflows/agentic-observability-central-kit.lock.yml +++ /dev/null @@ -1,1195 +0,0 @@ -# ___ _ _ -# / _ \ | | (_) -# | |_| | __ _ ___ _ __ | |_ _ ___ -# | _ |/ _` |/ _ \ '_ \| __| |/ __| -# | | | | (_| | __/ | | | |_| | (__ -# \_| |_/\__, |\___|_| |_|\__|_|\___| -# __/ | -# _ _ |___/ -# | | | | / _| | -# | | | | ___ _ __ _ __| |_| | _____ ____ -# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| -# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ -# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ -# -# This file was automatically generated by gh-aw. DO NOT EDIT. -# -# To update this file, edit the corresponding .md file and run: -# gh aw compile -# Not all edits will cause changes to this file. -# -# For more information: https://github.github.com/gh-aw/introduction/overview/ -# -# Central reporting variant of the agentic observability kit for platform repositories -# -# Resolved workflow manifest: -# Imports: -# - shared/reporting.md -# -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"70afa9d30cf87e41cfe6cdba9b2ab2902c336e9766219b10e44c9c05634796d5","strict":true,"agent_id":"copilot"} - -name: "Agentic Observability Central Kit" -"on": - schedule: - - cron: "52 8 * * 1" - # Friendly format: weekly on monday around 08:30 (scattered) - workflow_dispatch: - inputs: - aw_context: - default: "" - description: Agent caller context (used internally by Agentic Workflows). - required: false - type: string - -permissions: {} - -concurrency: - group: "gh-aw-${{ github.workflow }}" - -run-name: "Agentic Observability Central Kit" - -env: - REPORT_REPOSITORY: ${{ vars.REPORT_REPOSITORY || github.repository }} - -jobs: - activation: - runs-on: ubuntu-slim - permissions: - contents: read - outputs: - comment_id: "" - comment_repo: "" - lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} - model: ${{ steps.generate_aw_info.outputs.model }} - secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - - name: Generate agentic run info - id: generate_aw_info - env: - GH_AW_INFO_ENGINE_ID: "copilot" - GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI" - GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} - GH_AW_INFO_VERSION: "latest" - GH_AW_INFO_AGENT_VERSION: "latest" - GH_AW_INFO_WORKFLOW_NAME: "Agentic Observability Central Kit" - GH_AW_INFO_EXPERIMENTAL: "false" - GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" - GH_AW_INFO_STAGED: "false" - GH_AW_INFO_ALLOWED_DOMAINS: '["defaults"]' - GH_AW_INFO_FIREWALL_ENABLED: "true" - GH_AW_INFO_AWF_VERSION: "v0.25.0" - GH_AW_INFO_AWMG_VERSION: "" - GH_AW_INFO_FIREWALL_TYPE: "squid" - GH_AW_COMPILED_STRICT: "true" - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs'); - await main(core, context); - - name: Validate COPILOT_GITHUB_TOKEN secret - id: validate-secret - run: ${RUNNER_TEMP}/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default - env: - COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} - - name: Checkout .github and .agents folders - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - sparse-checkout: | - .github - .agents - sparse-checkout-cone-mode: true - fetch-depth: 1 - - name: Check workflow file timestamps - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_WORKFLOW_FILE: "agentic-observability-central-kit.lock.yml" - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); - await main(); - - name: Create prompt with built-in context - env: - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl - GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} - GH_AW_GITHUB_ACTOR: ${{ github.actor }} - GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} - GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} - GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} - GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} - GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} - GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} - GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} - # poutine:ignore untrusted_checkout_exec - run: | - bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh - { - cat << 'GH_AW_PROMPT_EOF' - - GH_AW_PROMPT_EOF - cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" - cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" - cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" - cat "${RUNNER_TEMP}/gh-aw/prompts/agentic_workflows_guide.md" - cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_EOF' - - Tools: create_issue(max:10), create_discussion, missing_tool, missing_data, noop - - - The following GitHub context information is available for this workflow: - {{#if __GH_AW_GITHUB_ACTOR__ }} - - **actor**: __GH_AW_GITHUB_ACTOR__ - {{/if}} - {{#if __GH_AW_GITHUB_REPOSITORY__ }} - - **repository**: __GH_AW_GITHUB_REPOSITORY__ - {{/if}} - {{#if __GH_AW_GITHUB_WORKSPACE__ }} - - **workspace**: __GH_AW_GITHUB_WORKSPACE__ - {{/if}} - {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} - - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ - {{/if}} - {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} - - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ - {{/if}} - {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} - - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ - {{/if}} - {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} - - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ - {{/if}} - {{#if __GH_AW_GITHUB_RUN_ID__ }} - - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ - {{/if}} - - - GH_AW_PROMPT_EOF - cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_EOF' - - GH_AW_PROMPT_EOF - cat << 'GH_AW_PROMPT_EOF' - {{#runtime-import .github/workflows/shared/reporting.md}} - GH_AW_PROMPT_EOF - cat << 'GH_AW_PROMPT_EOF' - {{#runtime-import .github/workflows/agentic-observability-central-kit.md}} - GH_AW_PROMPT_EOF - } > "$GH_AW_PROMPT" - - name: Interpolate variables and render templates - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs'); - await main(); - - name: Substitute placeholders - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_ENV_REPORT_REPOSITORY: ${{ env.REPORT_REPOSITORY }} - GH_AW_GITHUB_ACTOR: ${{ github.actor }} - GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} - GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} - GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} - GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} - GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} - GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} - GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - - const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs'); - - // Call the substitution function - return await substitutePlaceholders({ - file: process.env.GH_AW_PROMPT, - substitutions: { - GH_AW_ENV_REPORT_REPOSITORY: process.env.GH_AW_ENV_REPORT_REPOSITORY, - GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, - GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, - GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, - GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, - GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, - GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, - GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, - GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE - } - }); - - name: Validate prompt placeholders - env: - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - # poutine:ignore untrusted_checkout_exec - run: bash ${RUNNER_TEMP}/gh-aw/actions/validate_prompt_placeholders.sh - - name: Print prompt - env: - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - # poutine:ignore untrusted_checkout_exec - run: bash ${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh - - name: Upload activation artifact - if: success() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: activation - path: | - /tmp/gh-aw/aw_info.json - /tmp/gh-aw/aw-prompts/prompt.txt - retention-days: 1 - - agent: - needs: activation - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - discussions: read - issues: read - pull-requests: read - concurrency: - group: "gh-aw-copilot-${{ github.workflow }}" - env: - DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} - GH_AW_ASSETS_ALLOWED_EXTS: "" - GH_AW_ASSETS_BRANCH: "" - GH_AW_ASSETS_MAX_SIZE_KB: 0 - GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs - GH_AW_WORKFLOW_ID_SANITIZED: agenticobservabilitycentralkit - outputs: - checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} - detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} - detection_success: ${{ steps.detection_conclusion.outputs.success }} - has_patch: ${{ steps.collect_output.outputs.has_patch }} - inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} - model: ${{ needs.activation.outputs.model }} - output: ${{ steps.collect_output.outputs.output }} - output_types: ${{ steps.collect_output.outputs.output_types }} - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - - name: Set runtime paths - id: set-runtime-paths - run: | - echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" - echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" - echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - name: Setup Go for CLI build - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version-file: go.mod - cache: true - - name: Build gh-aw CLI - run: | - echo "Building gh-aw CLI for linux/amd64..." - mkdir -p dist - VERSION=$(git describe --tags --always --dirty) - CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ - -ldflags "-s -w -X main.version=${VERSION}" \ - -o dist/gh-aw-linux-amd64 \ - ./cmd/gh-aw - # Copy binary to root for direct execution in user-defined steps - cp dist/gh-aw-linux-amd64 ./gh-aw - chmod +x ./gh-aw - echo "✓ Built gh-aw CLI successfully" - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 - - name: Build gh-aw Docker image - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 - with: - context: . - platforms: linux/amd64 - push: false - load: true - tags: localhost/gh-aw:dev - build-args: | - BINARY=dist/gh-aw-linux-amd64 - - name: Create gh-aw temp directory - run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - - name: Configure gh CLI for GitHub Enterprise - run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh - env: - GH_TOKEN: ${{ github.token }} - - name: Configure Git credentials - env: - REPO_NAME: ${{ github.repository }} - SERVER_URL: ${{ github.server_url }} - run: | - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git config --global user.name "github-actions[bot]" - git config --global am.keepcr true - # Re-authenticate git with GitHub token - SERVER_URL_STRIPPED="${SERVER_URL#https://}" - git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" - echo "Git configured with standard GitHub Actions identity" - - name: Checkout PR branch - id: checkout-pr - if: | - github.event.pull_request || github.event.issue.pull_request - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - with: - github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs'); - await main(); - - name: Install GitHub Copilot CLI - run: ${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh latest - env: - GH_HOST: github.com - - name: Install AWF binary - run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.25.0 - - name: Parse integrity filter lists - id: parse-guard-vars - env: - GH_AW_BLOCKED_USERS_VAR: ${{ vars.GH_AW_GITHUB_BLOCKED_USERS || '' }} - GH_AW_APPROVAL_LABELS_VAR: ${{ vars.GH_AW_GITHUB_APPROVAL_LABELS || '' }} - run: bash ${RUNNER_TEMP}/gh-aw/actions/parse_guard_list.sh - - name: Download container images - run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.3 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine - - name: Install gh-aw extension - env: - GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - run: | - # Check if gh-aw extension is already installed - if gh extension list | grep -q "github/gh-aw"; then - echo "gh-aw extension already installed, upgrading..." - gh extension upgrade gh-aw || true - else - echo "Installing gh-aw extension..." - gh extension install github/gh-aw - fi - gh aw --version - # Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization - mkdir -p ${RUNNER_TEMP}/gh-aw - GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1) - if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then - cp "$GH_AW_BIN" ${RUNNER_TEMP}/gh-aw/gh-aw - chmod +x ${RUNNER_TEMP}/gh-aw/gh-aw - echo "Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw" - else - echo "::error::Failed to find gh-aw binary for MCP server" - exit 1 - fi - - name: Write Safe Outputs Config - run: | - mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs - mkdir -p /tmp/gh-aw/safeoutputs - mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' - {"create_discussion":{"expires":168,"max":1},"create_issue":{"group":true,"max":10},"missing_data":{},"missing_tool":{},"noop":{"max":1}} - GH_AW_SAFE_OUTPUTS_CONFIG_EOF - - name: Write Safe Outputs Tools - run: | - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF' - { - "description_suffixes": { - "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[observability central] \". Discussions will be created in category \"audits\". Discussions will be created in repository \"${{ env.REPORT_REPOSITORY }}\".", - "create_issue": " CONSTRAINTS: Maximum 10 issue(s) can be created. Labels [\"agentics\" \"warning\" \"platform\"] will be automatically added. Issues will be created in repository \"${{ env.REPORT_REPOSITORY }}\"." - }, - "repo_params": {}, - "dynamic_tools": [] - } - GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' - { - "create_discussion": { - "defaultMax": 1, - "fields": { - "body": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 65000 - }, - "category": { - "type": "string", - "sanitize": true, - "maxLength": 128 - }, - "repo": { - "type": "string", - "maxLength": 256 - }, - "title": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 128 - } - } - }, - "create_issue": { - "defaultMax": 1, - "fields": { - "body": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 65000 - }, - "labels": { - "type": "array", - "itemType": "string", - "itemSanitize": true, - "itemMaxLength": 128 - }, - "parent": { - "issueOrPRNumber": true - }, - "repo": { - "type": "string", - "maxLength": 256 - }, - "temporary_id": { - "type": "string" - }, - "title": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 128 - } - } - }, - "missing_data": { - "defaultMax": 20, - "fields": { - "alternatives": { - "type": "string", - "sanitize": true, - "maxLength": 256 - }, - "context": { - "type": "string", - "sanitize": true, - "maxLength": 256 - }, - "data_type": { - "type": "string", - "sanitize": true, - "maxLength": 128 - }, - "reason": { - "type": "string", - "sanitize": true, - "maxLength": 256 - } - } - }, - "missing_tool": { - "defaultMax": 20, - "fields": { - "alternatives": { - "type": "string", - "sanitize": true, - "maxLength": 512 - }, - "reason": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 256 - }, - "tool": { - "type": "string", - "sanitize": true, - "maxLength": 128 - } - } - }, - "noop": { - "defaultMax": 1, - "fields": { - "message": { - "required": true, - "type": "string", - "sanitize": true, - "maxLength": 65000 - } - } - } - } - GH_AW_SAFE_OUTPUTS_VALIDATION_EOF - node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs - - name: Generate Safe Outputs MCP Server Config - id: safe-outputs-config - run: | - # Generate a secure random API key (360 bits of entropy, 40+ chars) - # Mask immediately to prevent timing vulnerabilities - API_KEY=$(openssl rand -base64 45 | tr -d '/+=') - echo "::add-mask::${API_KEY}" - - PORT=3001 - - # Set outputs for next steps - { - echo "safe_outputs_api_key=${API_KEY}" - echo "safe_outputs_port=${PORT}" - } >> "$GITHUB_OUTPUT" - - echo "Safe Outputs MCP server will run on port ${PORT}" - - - name: Start Safe Outputs MCP HTTP Server - id: safe-outputs-start - env: - DEBUG: '*' - GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} - GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} - GH_AW_SAFE_OUTPUTS_TOOLS_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/tools.json - GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json - GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs - run: | - # Environment variables are set above to prevent template injection - export DEBUG - export GH_AW_SAFE_OUTPUTS_PORT - export GH_AW_SAFE_OUTPUTS_API_KEY - export GH_AW_SAFE_OUTPUTS_TOOLS_PATH - export GH_AW_SAFE_OUTPUTS_CONFIG_PATH - export GH_AW_MCP_LOG_DIR - - bash ${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh - - - name: Start MCP Gateway - id: start-mcp-gateway - env: - GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} - GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} - GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -eo pipefail - mkdir -p /tmp/gh-aw/mcp-config - - # Export gateway environment variables for MCP config and gateway script - export MCP_GATEWAY_PORT="80" - export MCP_GATEWAY_DOMAIN="host.docker.internal" - MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') - echo "::add-mask::${MCP_GATEWAY_API_KEY}" - export MCP_GATEWAY_API_KEY - export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" - mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" - export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288" - export DEBUG="*" - - export GH_AW_ENGINE="copilot" - export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.3' - - mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh - { - "mcpServers": { - "agenticworkflows": { - "type": "stdio", - "container": "localhost/gh-aw:dev", - "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], - "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], - "env": { - "DEBUG": "*", - "GITHUB_TOKEN": "\${GITHUB_TOKEN}", - "GITHUB_ACTOR": "\${GITHUB_ACTOR}", - "GITHUB_REPOSITORY": "\${GITHUB_REPOSITORY}" - }, - "guard-policies": { - "write-sink": { - "accept": [ - "*" - ] - } - } - }, - "github": { - "type": "stdio", - "container": "ghcr.io/github/github-mcp-server:v0.32.0", - "env": { - "GITHUB_HOST": "\${GITHUB_SERVER_URL}", - "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", - "GITHUB_READ_ONLY": "1", - "GITHUB_TOOLSETS": "context,repos,issues,pull_requests,discussions" - }, - "guard-policies": { - "allow-only": { - "approval-labels": ${{ steps.parse-guard-vars.outputs.approval_labels }}, - "blocked-users": ${{ steps.parse-guard-vars.outputs.blocked_users }}, - "min-integrity": "merged", - "repos": "all" - } - } - }, - "safeoutputs": { - "type": "http", - "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", - "headers": { - "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" - }, - "guard-policies": { - "write-sink": { - "accept": [ - "*" - ] - } - } - } - }, - "gateway": { - "port": $MCP_GATEWAY_PORT, - "domain": "${MCP_GATEWAY_DOMAIN}", - "apiKey": "${MCP_GATEWAY_API_KEY}", - "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" - } - } - GH_AW_MCP_CONFIG_EOF - - name: Download activation artifact - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: activation - path: /tmp/gh-aw - - name: Clean git credentials - continue-on-error: true - run: bash ${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh - - name: Execute GitHub Copilot CLI - id: agentic_execution - # Copilot CLI tool arguments (sorted): - timeout-minutes: 30 - run: | - set -o pipefail - touch /tmp/gh-aw/agent-step-summary.md - # shellcheck disable=SC1003 - sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ - -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log - env: - COPILOT_AGENT_RUNNER_TYPE: STANDALONE - COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} - COPILOT_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} - GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json - GH_AW_PHASE: agent - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: dev - GITHUB_API_URL: ${{ github.api_url }} - GITHUB_AW: true - GITHUB_HEAD_REF: ${{ github.head_ref }} - GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - GITHUB_REF_NAME: ${{ github.ref_name }} - GITHUB_SERVER_URL: ${{ github.server_url }} - GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md - GITHUB_WORKSPACE: ${{ github.workspace }} - GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com - GIT_AUTHOR_NAME: github-actions[bot] - GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com - GIT_COMMITTER_NAME: github-actions[bot] - XDG_CONFIG_HOME: /home/runner - - name: Detect inference access error - id: detect-inference-error - if: always() - continue-on-error: true - run: bash ${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh - - name: Configure Git credentials - env: - REPO_NAME: ${{ github.repository }} - SERVER_URL: ${{ github.server_url }} - run: | - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git config --global user.name "github-actions[bot]" - git config --global am.keepcr true - # Re-authenticate git with GitHub token - SERVER_URL_STRIPPED="${SERVER_URL#https://}" - git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" - echo "Git configured with standard GitHub Actions identity" - - name: Copy Copilot session state files to logs - if: always() - continue-on-error: true - run: | - # Copy Copilot session state files to logs folder for artifact collection - # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them - SESSION_STATE_DIR="$HOME/.copilot/session-state" - LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" - - if [ -d "$SESSION_STATE_DIR" ]; then - echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" - mkdir -p "$LOGS_DIR" - cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true - echo "Session state files copied successfully" - else - echo "No session-state directory found at $SESSION_STATE_DIR" - fi - - name: Stop MCP Gateway - if: always() - continue-on-error: true - env: - MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} - MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} - GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} - run: | - bash ${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" - - name: Redact secrets in logs - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs'); - await main(); - env: - GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' - SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} - SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} - SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} - SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Append agent step summary - if: always() - run: bash ${RUNNER_TEMP}/gh-aw/actions/append_agent_step_summary.sh - - name: Copy Safe Outputs - if: always() - env: - GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - run: | - mkdir -p /tmp/gh-aw - cp "$GH_AW_SAFE_OUTPUTS" /tmp/gh-aw/safeoutputs.jsonl 2>/dev/null || true - - name: Ingest agent output - id: collect_output - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" - GITHUB_SERVER_URL: ${{ github.server_url }} - GITHUB_API_URL: ${{ github.api_url }} - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs'); - await main(); - - name: Parse agent logs for step summary - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs'); - await main(); - - name: Parse MCP Gateway logs for step summary - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs'); - await main(); - - name: Print firewall logs - if: always() - continue-on-error: true - env: - AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs - run: | - # Fix permissions on firewall logs so they can be uploaded as artifacts - # AWF runs with sudo, creating files owned by root - sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true - # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) - if command -v awf &> /dev/null; then - awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" - else - echo 'AWF binary not installed, skipping firewall log summary' - fi - - name: Write agent output placeholder if missing - if: always() - run: | - if [ ! -f /tmp/gh-aw/agent_output.json ]; then - echo '{"items":[]}' > /tmp/gh-aw/agent_output.json - fi - - name: Upload agent artifacts - if: always() - continue-on-error: true - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: agent - path: | - /tmp/gh-aw/aw-prompts/prompt.txt - /tmp/gh-aw/sandbox/agent/logs/ - /tmp/gh-aw/redacted-urls.log - /tmp/gh-aw/mcp-logs/ - /tmp/gh-aw/proxy-logs/ - /tmp/gh-aw/agent-stdio.log - /tmp/gh-aw/agent/ - /tmp/gh-aw/safeoutputs.jsonl - /tmp/gh-aw/agent_output.json - if-no-files-found: ignore - - name: Upload firewall audit logs - if: always() - continue-on-error: true - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: firewall-audit-logs - path: /tmp/gh-aw/sandbox/firewall/logs/ - if-no-files-found: ignore - # --- Threat Detection (inline) --- - - name: Check if detection needed - id: detection_guard - if: always() - env: - OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }} - HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} - run: | - if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then - echo "run_detection=true" >> "$GITHUB_OUTPUT" - echo "Detection will run: output_types=$OUTPUT_TYPES, has_patch=$HAS_PATCH" - else - echo "run_detection=false" >> "$GITHUB_OUTPUT" - echo "Detection skipped: no agent outputs or patches to analyze" - fi - - name: Clear MCP configuration for detection - if: always() && steps.detection_guard.outputs.run_detection == 'true' - run: | - rm -f /tmp/gh-aw/mcp-config/mcp-servers.json - rm -f /home/runner/.copilot/mcp-config.json - rm -f "$GITHUB_WORKSPACE/.gemini/settings.json" - - name: Prepare threat detection files - if: always() && steps.detection_guard.outputs.run_detection == 'true' - run: | - mkdir -p /tmp/gh-aw/threat-detection/aw-prompts - cp /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/threat-detection/aw-prompts/prompt.txt 2>/dev/null || true - cp /tmp/gh-aw/agent_output.json /tmp/gh-aw/threat-detection/agent_output.json 2>/dev/null || true - for f in /tmp/gh-aw/aw-*.patch; do - [ -f "$f" ] && cp "$f" /tmp/gh-aw/threat-detection/ 2>/dev/null || true - done - echo "Prepared threat detection files:" - ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true - - name: Setup threat detection - if: always() && steps.detection_guard.outputs.run_detection == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - WORKFLOW_NAME: "Agentic Observability Central Kit" - WORKFLOW_DESCRIPTION: "Central reporting variant of the agentic observability kit for platform repositories" - HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs'); - await main(); - - name: Ensure threat-detection directory and log - if: always() && steps.detection_guard.outputs.run_detection == 'true' - run: | - mkdir -p /tmp/gh-aw/threat-detection - touch /tmp/gh-aw/threat-detection/detection.log - - name: Execute GitHub Copilot CLI - if: always() && steps.detection_guard.outputs.run_detection == 'true' - id: detection_agentic_execution - # Copilot CLI tool arguments (sorted): - # --allow-tool shell(cat) - # --allow-tool shell(grep) - # --allow-tool shell(head) - # --allow-tool shell(jq) - # --allow-tool shell(ls) - # --allow-tool shell(tail) - # --allow-tool shell(wc) - timeout-minutes: 20 - run: | - set -o pipefail - touch /tmp/gh-aw/agent-step-summary.md - # shellcheck disable=SC1003 - sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ - -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log - env: - COPILOT_AGENT_RUNNER_TYPE: STANDALONE - COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} - COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} - GH_AW_PHASE: detection - GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_VERSION: dev - GITHUB_API_URL: ${{ github.api_url }} - GITHUB_AW: true - GITHUB_HEAD_REF: ${{ github.head_ref }} - GITHUB_REF_NAME: ${{ github.ref_name }} - GITHUB_SERVER_URL: ${{ github.server_url }} - GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md - GITHUB_WORKSPACE: ${{ github.workspace }} - GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com - GIT_AUTHOR_NAME: github-actions[bot] - GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com - GIT_COMMITTER_NAME: github-actions[bot] - XDG_CONFIG_HOME: /home/runner - - name: Parse threat detection results - id: parse_detection_results - if: always() && steps.detection_guard.outputs.run_detection == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs'); - await main(); - - name: Upload threat detection log - if: always() && steps.detection_guard.outputs.run_detection == 'true' - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: detection - path: /tmp/gh-aw/threat-detection/detection.log - if-no-files-found: ignore - - name: Set detection conclusion - id: detection_conclusion - if: always() - env: - RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} - DETECTION_SUCCESS: ${{ steps.parse_detection_results.outputs.success }} - run: | - if [[ "$RUN_DETECTION" != "true" ]]; then - echo "conclusion=skipped" >> "$GITHUB_OUTPUT" - echo "success=true" >> "$GITHUB_OUTPUT" - echo "Detection was not needed, marking as skipped" - elif [[ "$DETECTION_SUCCESS" == "true" ]]; then - echo "conclusion=success" >> "$GITHUB_OUTPUT" - echo "success=true" >> "$GITHUB_OUTPUT" - echo "Detection passed successfully" - else - echo "conclusion=failure" >> "$GITHUB_OUTPUT" - echo "success=false" >> "$GITHUB_OUTPUT" - echo "Detection found issues" - fi - - conclusion: - needs: - - activation - - agent - - safe_outputs - if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') - runs-on: ubuntu-slim - permissions: - contents: read - discussions: write - issues: write - concurrency: - group: "gh-aw-conclusion-agentic-observability-central-kit" - cancel-in-progress: false - outputs: - noop_message: ${{ steps.noop.outputs.noop_message }} - tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} - total_count: ${{ steps.missing_tool.outputs.total_count }} - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - - name: Download agent output artifact - id: download-agent-output - continue-on-error: true - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: agent - path: /tmp/gh-aw/ - - name: Setup agent output environment variable - id: setup-agent-output-env - if: steps.download-agent-output.outcome == 'success' - run: | - mkdir -p /tmp/gh-aw/ - find "/tmp/gh-aw/" -type f -print - echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Process No-Op Messages - id: noop - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_NOOP_MAX: "1" - GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" - GH_AW_TRACKER_ID: "agentic-observability-central-kit" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs'); - await main(); - - name: Record Missing Tool - id: missing_tool - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" - GH_AW_TRACKER_ID: "agentic-observability-central-kit" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs'); - await main(); - - name: Handle Agent Failure - id: handle_agent_failure - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" - GH_AW_TRACKER_ID: "agentic-observability-central-kit" - GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} - GH_AW_WORKFLOW_ID: "agentic-observability-central-kit" - GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} - GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} - GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} - GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} - GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} - GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} - GH_AW_GROUP_REPORTS: "false" - GH_AW_FAILURE_REPORT_AS_ISSUE: "true" - GH_AW_TIMEOUT_MINUTES: "30" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs'); - await main(); - - name: Handle No-Op Message - id: handle_noop_message - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" - GH_AW_TRACKER_ID: "agentic-observability-central-kit" - GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} - GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} - GH_AW_NOOP_REPORT_AS_ISSUE: "false" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); - await main(); - - safe_outputs: - needs: agent - if: (!cancelled()) && needs.agent.result != 'skipped' && needs.agent.outputs.detection_success == 'true' - runs-on: ubuntu-slim - permissions: - contents: read - discussions: write - issues: write - timeout-minutes: 15 - env: - GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/agentic-observability-central-kit" - GH_AW_ENGINE_ID: "copilot" - GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} - GH_AW_TRACKER_ID: "agentic-observability-central-kit" - GH_AW_WORKFLOW_ID: "agentic-observability-central-kit" - GH_AW_WORKFLOW_NAME: "Agentic Observability Central Kit" - outputs: - code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} - code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} - create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} - create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} - created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} - created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} - process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} - process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - - name: Download agent output artifact - id: download-agent-output - continue-on-error: true - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: agent - path: /tmp/gh-aw/ - - name: Setup agent output environment variable - id: setup-agent-output-env - if: steps.download-agent-output.outcome == 'success' - run: | - mkdir -p /tmp/gh-aw/ - find "/tmp/gh-aw/" -type f -print - echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Configure GH_HOST for enterprise compatibility - id: ghes-host-config - shell: bash - run: | - # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct - # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. - GH_HOST="${GITHUB_SERVER_URL#https://}" - GH_HOST="${GH_HOST#http://}" - echo "GH_HOST=${GH_HOST}" >> "$GITHUB_OUTPUT" - - name: Process Safe Outputs - id: process_safe_outputs - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" - GITHUB_SERVER_URL: ${{ github.server_url }} - GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"target-repo\":\"${{ env.REPORT_REPOSITORY }}\",\"title_prefix\":\"[observability central] \"},\"create_issue\":{\"group\":true,\"labels\":[\"agentics\",\"warning\",\"platform\"],\"max\":10,\"target-repo\":\"${{ env.REPORT_REPOSITORY }}\"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs'); - await main(); - - name: Upload Safe Output Items - if: always() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: safe-output-items - path: /tmp/gh-aw/safe-output-items.jsonl - if-no-files-found: ignore - diff --git a/.github/workflows/agentic-observability-central-kit.md b/.github/workflows/agentic-observability-central-kit.md deleted file mode 100644 index 90678ea5be2..00000000000 --- a/.github/workflows/agentic-observability-central-kit.md +++ /dev/null @@ -1,179 +0,0 @@ ---- -description: Central reporting variant of the agentic observability kit for platform repositories -on: - schedule: weekly on monday around 08:30 - workflow_dispatch: -permissions: - contents: read - actions: read - issues: read - pull-requests: read - discussions: read -env: - REPORT_REPOSITORY: ${{ vars.REPORT_REPOSITORY || github.repository }} -engine: copilot -strict: true -tracker-id: agentic-observability-central-kit -tools: - agentic-workflows: - github: - toolsets: [default, discussions] - allowed-repos: all - min-integrity: merged -safe-outputs: - create-discussion: - target-repo: ${{ env.REPORT_REPOSITORY }} - expires: 7d - category: "audits" - title-prefix: "[observability central] " - max: 1 - close-older-discussions: true - create-issue: - target-repo: ${{ env.REPORT_REPOSITORY }} - labels: [agentics, warning, platform] - max: 10 - group: true - noop: - report-as-issue: false -timeout-minutes: 30 -imports: - - shared/reporting.md ---- - -# Agentic Observability Central Kit - -You are the central reporting variant of the agentic observability kit. Analyze recent agentic workflow runs for the current repository, but publish the portfolio report and warning issues into the central reporting repository defined by `${{ env.REPORT_REPOSITORY }}`. - -## Mission - -Produce one platform-readable report and a small number of targeted warning issues so that a central workflow operations team can monitor many repositories from one place. - -Focus on: - -1. repeated drift away from a successful baseline -2. repeated risky behavior changes such as new write posture, new MCP failures, or more blocked requests -3. repeated resource-heavy or weak-control patterns -4. low-value agentic workflows that should be simplified later -5. workflows that do not form stable cohorts and therefore resist trustworthy comparison - -Always create a discussion report in the central reporting repository. Create issues only for repeated, actionable patterns. - -## Data Collection Rules - -- Use the `agentic-workflows` MCP tool, not shell commands. -- Start with the `logs` tool over the last 14 days. -- Leave `workflow_name` empty so you analyze the full repository. -- Use `count` large enough to cover the repository, typically `300`. -- Use the `audit` tool only for up to 3 runs that need deeper inspection. -- If there are very few runs, still create a report and explain the limitation. - -## Signals To Use - -Prefer the built-in agentic signals from logs and audit data: - -- `task_domain.name` and `task_domain.label` -- `behavior_fingerprint.execution_style` -- `behavior_fingerprint.tool_breadth` -- `behavior_fingerprint.actuation_style` -- `behavior_fingerprint.resource_profile` -- `behavior_fingerprint.dispatch_mode` -- `agentic_assessments[].kind` -- `agentic_assessments[].severity` -- `comparison.baseline.selection` -- `comparison.baseline.matched_on[]` -- `comparison.classification.label` -- `comparison.classification.reason_codes[]` -- `comparison.recommendation.action` - -## Reporting Requirements - -The discussion is for a platform team that may not know the local repository well, so every highlighted workflow must include repository context. - -### Visible Summary - -Keep these sections visible: - -1. `### Executive Summary` -2. `### Repository Summary` -3. `### Highest Risk Workflows` -4. `### Platform Actions` - -Include: - -- repository name -- date range analyzed -- workflows analyzed -- runs analyzed -- risky runs -- repeated warning candidates -- deterministic candidates - -### Details - -Put verbose per-workflow breakdowns inside `
` blocks. - -### Central Routing Expectations - -Because the outputs land in a central repository: - -- mention the analyzed source repository explicitly in the discussion title or opening paragraph -- name the source repository in every warning issue -- include up to 3 representative run links -- avoid repo-local language like "this repo" without naming it - -## Warning Thresholds - -Create at most one warning issue per workflow when, in the last 14 days: - -1. two or more runs for the same workflow have `comparison.classification.label == "risky"` -2. two or more runs contain `new_mcp_failure` or `blocked_requests_increase` -3. two or more runs contain a medium or high severity `resource_heavy_for_domain` -4. two or more runs contain a medium or high severity `poor_agentic_control` - -Do not open issues for single-run anomalies. - -## Optimization Candidates - -Keep these in the report unless they are severe and repeated: - -- repeated `overkill_for_agentic` -- workflows that remain `lean`, `directed`, and `narrow` -- workflows whose comparisons keep falling back to `latest_success` - -These are platform portfolio decisions, not immediate incidents. - -## Use Of Audit - -Use `audit` only to deepen the top few warnings. Good candidates are: - -- the newest risky run for a repeatedly warning workflow -- a run with a new MCP failure -- a run that changed from read-only to write-capable posture - -Fold audit evidence back into the report and issues. Do not dump raw audit output. - -## Output Requirements - -### Discussion - -Always create one discussion in `${{ env.REPORT_REPOSITORY }}` that includes: - -- the source repository name -- the date range analyzed -- the clearest repeated risk patterns -- the most common assessment kinds -- deterministic candidates -- workflows that need owner attention now - -### Issues - -When creating a warning issue in `${{ env.REPORT_REPOSITORY }}`: - -- name both the source repository and the workflow -- explain the repeated evidence with run counts and specific reason codes or assessment kinds -- include the most relevant recommendation from the comparison or assessment data -- link up to 3 representative runs - -### No-op - -If the repository has no recent runs or no report can be produced, call `noop` with a short explanation. Otherwise do not use `noop`. diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 00000000000..b1ff0f29782 --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1 @@ +gh-aw is a Go-based GitHub CLI extension (`gh aw`) for authoring agentic workflows in Markdown and compiling/running them as GitHub Actions. Primary stack: Go, some JS/CommonJS under actions/setup/js, bash scripts, docs in Astro/Starlight. Core areas: cmd/gh-aw entrypoint, pkg/cli commands, pkg/parser frontmatter/schema parsing, pkg/workflow compilation, .github/workflows sample workflows. \ No newline at end of file diff --git a/.serena/memories/style_and_conventions.md b/.serena/memories/style_and_conventions.md new file mode 100644 index 00000000000..f5156d9f1cb --- /dev/null +++ b/.serena/memories/style_and_conventions.md @@ -0,0 +1 @@ +Prefer small focused files. Use console formatting helpers for CLI output and send diagnostic output to stderr. Use logger.New with pkg:file namespaces for debug logging. Use `any` instead of `interface{}`. Add build tags to every Go test file. For JS in actions/setup/js use GitHub Actions core APIs and run fmt/lint for CJS. Recompile workflow .md changes into tracked .lock.yml files. Avoid legacy/fallback support unless requested. \ No newline at end of file diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 00000000000..97e530bebf4 --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1 @@ +Key commands: `make fmt`, `make lint`, `make test-unit`, `make recompile`, `make build`, `make agent-finish`. Prefer selective tests like `go test -v -run "TestName" ./pkg/cli/`. Run CLI locally with `./gh-aw --help`, `./gh-aw compile`, `./gh-aw audit `, `./gh-aw logs`. Standard macOS utilities available: git, ls, cd, grep/rg, find, sed, awk. \ No newline at end of file diff --git a/.serena/memories/task_completion.md b/.serena/memories/task_completion.md new file mode 100644 index 00000000000..0f672fe6a22 --- /dev/null +++ b/.serena/memories/task_completion.md @@ -0,0 +1 @@ +Before commit always run `make agent-finish` (or at least `make fmt` and relevant selective tests). After modifying Go files run `make fmt`; after workflow markdown changes run `make recompile`; after schema changes run `make build`; after JS changes run `make fmt-cjs` and `make lint-cjs`. Do not leave workflow markdown changes uncompiled. \ No newline at end of file diff --git a/.serena/project.yml b/.serena/project.yml index 8da1c65e531..ba912e71ac9 100644 --- a/.serena/project.yml +++ b/.serena/project.yml @@ -118,3 +118,26 @@ symbol_info_budget: # Note: the backend is fixed at startup. If a project with a different backend # is activated post-init, an error will be returned. language_backend: + +# line ending convention to use when writing source files. +# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) +# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. +line_ending: + +# list of regex patterns which, when matched, mark a memory entry as read‑only. +# Extends the list from the global configuration, merging the two lists. +read_only_memory_patterns: [] + +# list of regex patterns for memories to completely ignore. +# Matching memories will not appear in list_memories or activate_project output +# and cannot be accessed via read_memory or write_memory. +# To access ignored memory files, use the read_file tool on the raw file path. +# Extends the list from the global configuration, merging the two lists. +# Example: ["_archive/.*", "_episodes/.*"] +ignored_memory_patterns: [] + +# advanced configuration option allowing to configure language server-specific options. +# Maps the language key to the options. +# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. +# No documentation on options means no options are available. +ls_specific_settings: {} diff --git a/cmd/gh-aw/main.go b/cmd/gh-aw/main.go index dfeb20d4149..b348ec1607e 100644 --- a/cmd/gh-aw/main.go +++ b/cmd/gh-aw/main.go @@ -716,7 +716,6 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all logsCmd := cli.NewLogsCommand() auditCmd := cli.NewAuditCommand() healthCmd := cli.NewHealthCommand() - observabilityPolicyCmd := cli.NewObservabilityPolicyCommand() mcpServerCmd := cli.NewMCPServerCommand() prCmd := cli.NewPRCommand() secretsCmd := cli.NewSecretsCommand() @@ -759,7 +758,6 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all logsCmd.GroupID = "analysis" auditCmd.GroupID = "analysis" healthCmd.GroupID = "analysis" - observabilityPolicyCmd.GroupID = "analysis" checksCmd.GroupID = "analysis" // Utilities @@ -789,7 +787,6 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all rootCmd.AddCommand(logsCmd) rootCmd.AddCommand(auditCmd) rootCmd.AddCommand(healthCmd) - rootCmd.AddCommand(observabilityPolicyCmd) rootCmd.AddCommand(checksCmd) rootCmd.AddCommand(mcpCmd) rootCmd.AddCommand(mcpServerCmd) diff --git a/pkg/cli/audit_comparison.go b/pkg/cli/audit_comparison.go index d76600b4f54..aedcabe3f77 100644 --- a/pkg/cli/audit_comparison.go +++ b/pkg/cli/audit_comparison.go @@ -127,10 +127,8 @@ func loadAuditComparisonSnapshotFromArtifacts(run WorkflowRun, logsPath string, } func buildAuditComparisonCandidateFromSummary(summary *RunSummary, logsPath string) auditComparisonCandidate { - posture := "read_only" - if summary.Run.SafeItemsCount > 0 || len(extractCreatedItemsFromManifest(logsPath)) > 0 { - posture = "write_capable" - } + createdItems := extractCreatedItemsFromManifest(logsPath) + posture := deriveAuditPosture(createdItems) blockedRequests := 0 if summary.FirewallAnalysis != nil { diff --git a/pkg/cli/logs_models.go b/pkg/cli/logs_models.go index b9e9d2c888f..3e96fc05cd1 100644 --- a/pkg/cli/logs_models.go +++ b/pkg/cli/logs_models.go @@ -190,7 +190,7 @@ type RunSummary struct { AgenticAssessments []AgenticAssessment `json:"agentic_assessments,omitempty"` // Derived agentic judgments AccessAnalysis *DomainAnalysis `json:"access_analysis"` // Network access analysis FirewallAnalysis *FirewallAnalysis `json:"firewall_analysis"` // Firewall log analysis - PolicyAnalysis *PolicyAnalysis `json:"policy_analysis,omitempty"` // Firewall policy rule attribution + PolicyAnalysis *PolicyAnalysis `json:"policy_analysis,omitempty"` // Firewall policy rule attribution RedactedDomainsAnalysis *RedactedDomainsAnalysis `json:"redacted_domains_analysis"` // Redacted URL domains analysis MissingTools []MissingToolReport `json:"missing_tools"` // Missing tool reports MissingData []MissingDataReport `json:"missing_data"` // Missing data reports diff --git a/pkg/cli/mcp_schema_test.go b/pkg/cli/mcp_schema_test.go index 9d449bbc8b0..8abb8427c4c 100644 --- a/pkg/cli/mcp_schema_test.go +++ b/pkg/cli/mcp_schema_test.go @@ -474,44 +474,6 @@ func TestGeneratedSchemasValidateRealOutput(t *testing.T) { } }) - t.Run("validates ObservabilityPolicy schema can be generated", func(t *testing.T) { - schema, err := GenerateSchema[ObservabilityPolicy]() - if err != nil { - t.Fatalf("GenerateSchema failed: %v", err) - } - - resolved, err := schema.Resolve(&jsonschema.ResolveOptions{}) - if err != nil { - t.Fatalf("Schema.Resolve failed: %v", err) - } - - data := ObservabilityPolicy{ - SchemaVersion: "1.0.0", - Rules: []ObservabilityPolicyRule{{ - ID: "block-domain", - Action: "fail", - Message: "blocked domain detected", - Match: ObservabilityPolicyMatch{ - BlockedDomains: []string{"evil.example.com"}, - }, - }}, - } - - jsonBytes, err := json.Marshal(data) - if err != nil { - t.Fatalf("json.Marshal failed: %v", err) - } - - var jsonValue map[string]any - if err := json.Unmarshal(jsonBytes, &jsonValue); err != nil { - t.Fatalf("json.Unmarshal failed: %v", err) - } - - if err := resolved.Validate(jsonValue); err != nil { - t.Errorf("Schema should validate real ObservabilityPolicy output: %v", err) - } - }) - t.Run("validates WorkflowStatus schema against real data", func(t *testing.T) { // Generate schema for WorkflowStatus schema, err := GenerateSchema[WorkflowStatus]() diff --git a/pkg/cli/observability_policy.go b/pkg/cli/observability_policy.go deleted file mode 100644 index e8c55083644..00000000000 --- a/pkg/cli/observability_policy.go +++ /dev/null @@ -1,243 +0,0 @@ -package cli - -import ( - "fmt" - "slices" - "strings" -) - -type ObservabilityPolicy struct { - SchemaVersion string `json:"schema_version"` - Rules []ObservabilityPolicyRule `json:"rules"` -} - -type ObservabilityPolicyRule struct { - ID string `json:"id"` - Action string `json:"action"` - Message string `json:"message"` - Match ObservabilityPolicyMatch `json:"match"` -} - -type ObservabilityPolicyMatch struct { - BlockedDomains []string `json:"blocked_domains,omitempty"` - MinBlockedRequests int `json:"min_blocked_requests,omitempty"` - InsightSeverities []string `json:"insight_severities,omitempty"` - ActuationModes []string `json:"actuation_modes,omitempty"` - MCPFailureServers []string `json:"mcp_failure_servers,omitempty"` - CreatedItemTypes []string `json:"created_item_types,omitempty"` -} - -type ObservabilityPayload struct { - Overview ObservabilityPayloadOverview `json:"overview"` - Network *ObservabilityPayloadNetwork `json:"network,omitempty"` - Actuation *ObservabilityPayloadActuation `json:"actuation,omitempty"` - Tooling *ObservabilityPayloadTooling `json:"tooling,omitempty"` - Insights []ObservabilityInsight `json:"insights,omitempty"` - Lineage *ObservabilityPayloadLineage `json:"lineage,omitempty"` - Execution *ObservabilityPayloadExecution `json:"execution,omitempty"` - Reasoning *ObservabilityPayloadReasoning `json:"reasoning,omitempty"` -} - -type ObservabilityPayloadOverview struct { - WorkflowName string `json:"workflow_name,omitempty"` - RunID any `json:"run_id,omitempty"` -} - -type ObservabilityPayloadLineage struct { - TraceID string `json:"trace_id,omitempty"` - Context *AwContext `json:"aw_context,omitempty"` -} - -type ObservabilityPayloadExecution struct { - TaskStatus string `json:"task_status,omitempty"` -} - -type ObservabilityPayloadReasoning struct { - Mode string `json:"mode,omitempty"` -} - -type ObservabilityPayloadNetwork struct { - BlockedRequests int `json:"blocked_requests,omitempty"` - BlockedDomains []string `json:"blocked_domains,omitempty"` -} - -type ObservabilityPayloadActuation struct { - Mode string `json:"mode,omitempty"` - CreatedItems []ObservabilityCreatedItem `json:"created_items,omitempty"` -} - -type ObservabilityCreatedItem struct { - Type string `json:"type"` -} - -type ObservabilityPayloadTooling struct { - MCPFailures []ObservabilityPolicyMCPFailure `json:"mcp_failures,omitempty"` -} - -type ObservabilityPolicyMCPFailure struct { - ServerName string `json:"server_name"` -} - -type ObservabilityPolicyViolation struct { - RuleID string `json:"rule_id"` - Action string `json:"action"` - Message string `json:"message"` - Evidence string `json:"evidence,omitempty"` -} - -type ObservabilityPolicyResult struct { - Violations []ObservabilityPolicyViolation `json:"violations,omitempty"` -} - -func EvaluateObservabilityPolicy(policy ObservabilityPolicy, payload ObservabilityPayload) ObservabilityPolicyResult { - result := ObservabilityPolicyResult{Violations: []ObservabilityPolicyViolation{}} - - for _, rule := range policy.Rules { - if violation, matched := evaluateObservabilityPolicyRule(rule, payload); matched { - result.Violations = append(result.Violations, violation) - } - } - - return result -} - -func evaluateObservabilityPolicyRule(rule ObservabilityPolicyRule, payload ObservabilityPayload) (ObservabilityPolicyViolation, bool) { - evidenceParts := make([]string, 0, 4) - matched := false - - if len(rule.Match.BlockedDomains) > 0 { - matchedDomain := firstMatch(rule.Match.BlockedDomains, payloadBlockedDomains(payload)) - if matchedDomain == "" { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, "blocked_domain="+matchedDomain) - } - - if rule.Match.MinBlockedRequests > 0 { - blocked := payloadBlockedRequests(payload) - if blocked < rule.Match.MinBlockedRequests { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, fmt.Sprintf("blocked_requests_gte=%d actual=%d", rule.Match.MinBlockedRequests, blocked)) - } - - if len(rule.Match.InsightSeverities) > 0 { - severity := firstInsightSeverityMatch(rule.Match.InsightSeverities, payload.Insights) - if severity == "" { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, "insight_severity="+severity) - } - - if len(rule.Match.ActuationModes) > 0 { - mode := payloadActuationMode(payload) - if !containsString(rule.Match.ActuationModes, mode) { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, "actuation_mode="+mode) - } - - if len(rule.Match.MCPFailureServers) > 0 { - server := firstMCPFailureServerMatch(rule.Match.MCPFailureServers, payload) - if server == "" { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, "mcp_failure_server="+server) - } - - if len(rule.Match.CreatedItemTypes) > 0 { - itemType := firstCreatedItemTypeMatch(rule.Match.CreatedItemTypes, payload) - if itemType == "" { - return ObservabilityPolicyViolation{}, false - } - matched = true - evidenceParts = append(evidenceParts, "created_item_type="+itemType) - } - - if !matched { - return ObservabilityPolicyViolation{}, false - } - - return ObservabilityPolicyViolation{ - RuleID: rule.ID, - Action: rule.Action, - Message: rule.Message, - Evidence: joinEvidence(evidenceParts), - }, true -} - -func payloadBlockedDomains(payload ObservabilityPayload) []string { - if payload.Network == nil { - return nil - } - return payload.Network.BlockedDomains -} - -func payloadBlockedRequests(payload ObservabilityPayload) int { - if payload.Network == nil { - return 0 - } - return payload.Network.BlockedRequests -} - -func payloadActuationMode(payload ObservabilityPayload) string { - if payload.Actuation == nil { - return "" - } - return payload.Actuation.Mode -} - -func firstMCPFailureServerMatch(allowed []string, payload ObservabilityPayload) string { - if payload.Tooling == nil { - return "" - } - for _, failure := range payload.Tooling.MCPFailures { - if containsString(allowed, failure.ServerName) { - return failure.ServerName - } - } - return "" -} - -func firstCreatedItemTypeMatch(allowed []string, payload ObservabilityPayload) string { - if payload.Actuation == nil { - return "" - } - for _, item := range payload.Actuation.CreatedItems { - if containsString(allowed, item.Type) { - return item.Type - } - } - return "" -} - -func firstInsightSeverityMatch(allowed []string, insights []ObservabilityInsight) string { - for _, insight := range insights { - if containsString(allowed, insight.Severity) { - return insight.Severity - } - } - return "" -} - -func firstMatch(allowed []string, actual []string) string { - for _, item := range actual { - if containsString(allowed, item) { - return item - } - } - return "" -} - -func containsString(items []string, target string) bool { - return slices.Contains(items, target) -} - -func joinEvidence(parts []string) string { - return strings.Join(parts, " ") -} diff --git a/pkg/cli/observability_policy_command.go b/pkg/cli/observability_policy_command.go deleted file mode 100644 index 5c1d4b2f355..00000000000 --- a/pkg/cli/observability_policy_command.go +++ /dev/null @@ -1,264 +0,0 @@ -package cli - -import ( - "encoding/json" - "errors" - "fmt" - "os" - - "github.com/github/gh-aw/pkg/console" - "github.com/github/gh-aw/pkg/constants" - "github.com/github/gh-aw/pkg/logger" - "github.com/spf13/cobra" -) - -var observabilityPolicyLog = logger.New("cli:observability_policy") - -type ObservabilityPolicyEvalConfig struct { - PolicyPath string - ReportPath string - JSONOutput bool -} - -type ObservabilityPolicyEvaluation struct { - PolicyPath string `json:"policy_path"` - ReportPath string `json:"report_path"` - Summary ObservabilityPolicyEvaluationSummary `json:"summary"` - Violations []ObservabilityPolicyViolation `json:"violations,omitempty"` -} - -type ObservabilityPolicyEvaluationSummary struct { - Status string `json:"status"` - TotalViolations int `json:"total_violations"` - FailViolations int `json:"fail_violations"` - GateViolations int `json:"gate_violations"` - WarnViolations int `json:"warn_violations"` - Blocking bool `json:"blocking"` -} - -// NewObservabilityPolicyCommand creates the observability-policy command. -func NewObservabilityPolicyCommand() *cobra.Command { - cmd := &cobra.Command{ - Use: "observability-policy", - Short: "Evaluate observability reports against guardrail policies", - Long: `Evaluate an observability report against a policy file to surface guardrail decisions. - -This command reads two JSON files: -- A policy file that defines fail, gate, or warn rules -- An observability report payload produced for a workflow run - -The result can be rendered for people or emitted as JSON for automation. -Blocking actions (fail and gate) return a non-zero exit status. - -Examples: - ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json - ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json --json`, - } - - cmd.AddCommand(newObservabilityPolicyEvalCommand()) - - return cmd -} - -func newObservabilityPolicyEvalCommand() *cobra.Command { - cmd := &cobra.Command{ - Use: "eval", - Short: "Evaluate a policy against an observability report", - Long: `Evaluate an observability policy against a workflow observability report. - -This command is intended for immediate guardrail checks in local development, -CI, or follow-up analysis after running gh aw logs or gh aw audit. - -Examples: - ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json - ` + string(constants.CLIExtensionPrefix) + ` observability-policy eval --policy policy.json --report observability-report.json --json`, - RunE: func(cmd *cobra.Command, args []string) error { - policyPath, _ := cmd.Flags().GetString("policy") - reportPath, _ := cmd.Flags().GetString("report") - jsonOutput, _ := cmd.Flags().GetBool("json") - - config := ObservabilityPolicyEvalConfig{ - PolicyPath: policyPath, - ReportPath: reportPath, - JSONOutput: jsonOutput, - } - - return RunObservabilityPolicyEval(config) - }, - } - - cmd.Flags().String("policy", "", "Path to the observability policy JSON file") - cmd.Flags().String("report", "", "Path to the observability report JSON file") - addJSONFlag(cmd) - _ = cmd.MarkFlagRequired("policy") - _ = cmd.MarkFlagRequired("report") - - return cmd -} - -// RunObservabilityPolicyEval executes observability policy evaluation. -func RunObservabilityPolicyEval(config ObservabilityPolicyEvalConfig) error { - if config.PolicyPath == "" { - return errors.New("policy path is required") - } - if config.ReportPath == "" { - return errors.New("report path is required") - } - - policy, err := readObservabilityPolicyFile(config.PolicyPath) - if err != nil { - return err - } - - payload, err := readObservabilityPayloadFile(config.ReportPath) - if err != nil { - return err - } - - observabilityPolicyLog.Printf("Evaluating policy=%s report=%s", config.PolicyPath, config.ReportPath) - - result := EvaluateObservabilityPolicy(policy, payload) - evaluation := buildObservabilityPolicyEvaluation(config, result) - - if config.JSONOutput { - output, err := json.MarshalIndent(evaluation, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal observability policy result: %w", err) - } - fmt.Println(string(output)) - } else { - renderObservabilityPolicyEvaluation(evaluation) - } - - return evaluation.summaryError() -} - -func readObservabilityPolicyFile(path string) (ObservabilityPolicy, error) { - content, err := os.ReadFile(path) - if err != nil { - return ObservabilityPolicy{}, fmt.Errorf("failed to read observability policy file: %w", err) - } - - var policy ObservabilityPolicy - if err := json.Unmarshal(content, &policy); err != nil { - return ObservabilityPolicy{}, fmt.Errorf("failed to parse observability policy file: %w", err) - } - - return policy, nil -} - -func readObservabilityPayloadFile(path string) (ObservabilityPayload, error) { - content, err := os.ReadFile(path) - if err != nil { - return ObservabilityPayload{}, fmt.Errorf("failed to read observability report file: %w", err) - } - - var payload ObservabilityPayload - if err := json.Unmarshal(content, &payload); err != nil { - return ObservabilityPayload{}, fmt.Errorf("failed to parse observability report file: %w", err) - } - - return payload, nil -} - -func buildObservabilityPolicyEvaluation(config ObservabilityPolicyEvalConfig, result ObservabilityPolicyResult) ObservabilityPolicyEvaluation { - summary := summarizeObservabilityPolicyResult(result) - - return ObservabilityPolicyEvaluation{ - PolicyPath: config.PolicyPath, - ReportPath: config.ReportPath, - Summary: summary, - Violations: result.Violations, - } -} - -func summarizeObservabilityPolicyResult(result ObservabilityPolicyResult) ObservabilityPolicyEvaluationSummary { - summary := ObservabilityPolicyEvaluationSummary{ - Status: "pass", - } - - for _, violation := range result.Violations { - summary.TotalViolations++ - switch violation.Action { - case "fail": - summary.FailViolations++ - case "gate": - summary.GateViolations++ - case "warn": - summary.WarnViolations++ - } - } - - summary.Blocking = summary.FailViolations > 0 || summary.GateViolations > 0 - - switch { - case summary.FailViolations > 0: - summary.Status = "fail" - case summary.GateViolations > 0: - summary.Status = "gate" - case summary.WarnViolations > 0: - summary.Status = "warn" - } - - return summary -} - -func renderObservabilityPolicyEvaluation(evaluation ObservabilityPolicyEvaluation) { - summary := evaluation.Summary - - if summary.TotalViolations == 0 { - fmt.Fprintln(os.Stderr, console.FormatSuccessMessage("No observability policy violations detected")) - return - } - - fmt.Fprintln(os.Stderr, console.FormatInfoMessage( - fmt.Sprintf("Observability policy evaluation found %d violation(s)", summary.TotalViolations), - )) - - for _, violation := range evaluation.Violations { - message := fmt.Sprintf("%s: %s", violation.RuleID, violation.Message) - if violation.Evidence != "" { - message += " (" + violation.Evidence + ")" - } - - switch violation.Action { - case "fail": - fmt.Fprintln(os.Stderr, console.FormatErrorMessage(message)) - case "gate": - fmt.Fprintln(os.Stderr, console.FormatWarningMessage(message)) - case "warn": - fmt.Fprintln(os.Stderr, console.FormatWarningMessage(message)) - default: - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(message)) - } - } - - if summary.FailViolations > 0 { - fmt.Fprintln(os.Stderr, console.FormatErrorMessage( - fmt.Sprintf("Evaluation failed with %d fail violation(s)", summary.FailViolations), - )) - return - } - - if summary.GateViolations > 0 { - fmt.Fprintln(os.Stderr, console.FormatWarningMessage( - fmt.Sprintf("Evaluation requires approval because %d gate violation(s) matched", summary.GateViolations), - )) - return - } - - fmt.Fprintln(os.Stderr, console.FormatWarningMessage( - fmt.Sprintf("Evaluation completed with %d warning violation(s)", summary.WarnViolations), - )) -} - -func (evaluation ObservabilityPolicyEvaluation) summaryError() error { - switch evaluation.Summary.Status { - case "fail": - return fmt.Errorf("observability policy evaluation failed with %d fail violation(s)", evaluation.Summary.FailViolations) - case "gate": - return fmt.Errorf("observability policy evaluation requires approval because %d gate violation(s) matched", evaluation.Summary.GateViolations) - default: - return nil - } -} diff --git a/pkg/cli/observability_policy_command_test.go b/pkg/cli/observability_policy_command_test.go deleted file mode 100644 index 29960c24f8c..00000000000 --- a/pkg/cli/observability_policy_command_test.go +++ /dev/null @@ -1,141 +0,0 @@ -//go:build !integration - -package cli - -import ( - "encoding/json" - "io" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestSummarizeObservabilityPolicyResult(t *testing.T) { - result := ObservabilityPolicyResult{ - Violations: []ObservabilityPolicyViolation{ - {Action: "warn"}, - {Action: "gate"}, - {Action: "fail"}, - }, - } - - summary := summarizeObservabilityPolicyResult(result) - - assert.Equal(t, "fail", summary.Status, "fail should take precedence in summary status") - assert.Equal(t, 3, summary.TotalViolations, "all violations should be counted") - assert.Equal(t, 1, summary.FailViolations, "fail violations should be counted") - assert.Equal(t, 1, summary.GateViolations, "gate violations should be counted") - assert.Equal(t, 1, summary.WarnViolations, "warn violations should be counted") - assert.True(t, summary.Blocking, "fail or gate should mark summary as blocking") -} - -func TestRunObservabilityPolicyEval_JSONOutput(t *testing.T) { - policyPath := writeJSONFixture(t, "policy.json", ObservabilityPolicy{ - SchemaVersion: "1.0.0", - Rules: []ObservabilityPolicyRule{ - { - ID: "warn-control-plane-failure", - Action: "warn", - Message: "GitHub MCP failed during the run", - Match: ObservabilityPolicyMatch{ - MCPFailureServers: []string{"github"}, - }, - }, - }, - }) - - reportPath := writeJSONFixture(t, "report.json", ObservabilityPayload{ - Tooling: &ObservabilityPayloadTooling{ - MCPFailures: []ObservabilityPolicyMCPFailure{{ServerName: "github"}}, - }, - }) - - stdout := captureStream(t, true, func() { - err := RunObservabilityPolicyEval(ObservabilityPolicyEvalConfig{ - PolicyPath: policyPath, - ReportPath: reportPath, - JSONOutput: true, - }) - require.NoError(t, err, "warn-only result should not return an error") - }) - - var evaluation ObservabilityPolicyEvaluation - require.NoError(t, json.Unmarshal([]byte(stdout), &evaluation), "command should emit valid JSON") - assert.Equal(t, "warn", evaluation.Summary.Status, "warn-only result should produce warn status") - assert.Equal(t, 1, evaluation.Summary.WarnViolations, "warn violations should be counted") - assert.Len(t, evaluation.Violations, 1, "one policy violation should be emitted") -} - -func TestRunObservabilityPolicyEval_GateViolationReturnsError(t *testing.T) { - policyPath := writeJSONFixture(t, "policy.json", ObservabilityPolicy{ - SchemaVersion: "1.0.0", - Rules: []ObservabilityPolicyRule{ - { - ID: "gate-write-mode", - Action: "gate", - Message: "Write-capable runs require approval", - Match: ObservabilityPolicyMatch{ - ActuationModes: []string{"write_capable"}, - }, - }, - }, - }) - - reportPath := writeJSONFixture(t, "report.json", ObservabilityPayload{ - Actuation: &ObservabilityPayloadActuation{Mode: "write_capable"}, - }) - - var err error - _ = captureStream(t, true, func() { - err = RunObservabilityPolicyEval(ObservabilityPolicyEvalConfig{ - PolicyPath: policyPath, - ReportPath: reportPath, - JSONOutput: true, - }) - }) - - assert.ErrorContains(t, err, "requires approval", "gate violations should return a blocking error") -} - -func writeJSONFixture(t *testing.T, name string, value any) string { - t.Helper() - - dir := t.TempDir() - path := filepath.Join(dir, name) - content, err := json.Marshal(value) - require.NoError(t, err, "fixture should marshal") - require.NoError(t, os.WriteFile(path, content, 0o644), "fixture should be written") - - return path -} - -func captureStream(t *testing.T, stdout bool, fn func()) string { - t.Helper() - - r, w, err := os.Pipe() - require.NoError(t, err, "pipe should be created") - - if stdout { - old := os.Stdout - os.Stdout = w - defer func() { - os.Stdout = old - }() - } else { - old := os.Stderr - os.Stderr = w - defer func() { - os.Stderr = old - }() - } - - fn() - require.NoError(t, w.Close(), "writer should close cleanly") - - output, readErr := io.ReadAll(r) - require.NoError(t, readErr, "captured output should be readable") - return string(output) -} diff --git a/pkg/cli/observability_policy_test.go b/pkg/cli/observability_policy_test.go deleted file mode 100644 index b1cabad4146..00000000000 --- a/pkg/cli/observability_policy_test.go +++ /dev/null @@ -1,127 +0,0 @@ -//go:build !integration - -package cli - -import ( - "encoding/json" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestEvaluateObservabilityPolicy(t *testing.T) { - policy := ObservabilityPolicy{ - SchemaVersion: "1.0.0", - Rules: []ObservabilityPolicyRule{ - { - ID: "block-unapproved-domain", - Action: "fail", - Message: "Blocked domain is not allowed", - Match: ObservabilityPolicyMatch{ - BlockedDomains: []string{"evil.example.com"}, - }, - }, - { - ID: "gate-high-risk-write", - Action: "gate", - Message: "High severity write-capable run requires approval", - Match: ObservabilityPolicyMatch{ - InsightSeverities: []string{"high", "critical"}, - ActuationModes: []string{"write_capable", "mixed"}, - }, - }, - { - ID: "warn-control-plane-failure", - Action: "warn", - Message: "GitHub MCP failed during the run", - Match: ObservabilityPolicyMatch{ - MCPFailureServers: []string{"github"}, - }, - }, - }, - } - - payload := ObservabilityPayload{ - Network: &ObservabilityPayloadNetwork{ - BlockedRequests: 3, - BlockedDomains: []string{"evil.example.com", "unknown.example.com"}, - }, - Actuation: &ObservabilityPayloadActuation{ - Mode: "write_capable", - CreatedItems: []ObservabilityCreatedItem{ - {Type: "create_pull_request"}, - }, - }, - Tooling: &ObservabilityPayloadTooling{ - MCPFailures: []ObservabilityPolicyMCPFailure{ - {ServerName: "github"}, - }, - }, - Insights: []ObservabilityInsight{ - {Severity: "high", Title: "Network friction detected"}, - }, - } - - result := EvaluateObservabilityPolicy(policy, payload) - require.Len(t, result.Violations, 3, "expected all three rules to match") - - assert.Equal(t, "block-unapproved-domain", result.Violations[0].RuleID) - assert.Equal(t, "fail", result.Violations[0].Action) - assert.Contains(t, result.Violations[0].Evidence, "blocked_domain=evil.example.com") - - assert.Equal(t, "gate-high-risk-write", result.Violations[1].RuleID) - assert.Equal(t, "gate", result.Violations[1].Action) - assert.Contains(t, result.Violations[1].Evidence, "insight_severity=high") - assert.Contains(t, result.Violations[1].Evidence, "actuation_mode=write_capable") - - assert.Equal(t, "warn-control-plane-failure", result.Violations[2].RuleID) - assert.Equal(t, "warn", result.Violations[2].Action) - assert.Contains(t, result.Violations[2].Evidence, "mcp_failure_server=github") -} - -func TestEvaluateObservabilityPolicy_NoMatch(t *testing.T) { - policy := ObservabilityPolicy{ - SchemaVersion: "1.0.0", - Rules: []ObservabilityPolicyRule{ - { - ID: "no-match", - Action: "fail", - Message: "Should not trigger", - Match: ObservabilityPolicyMatch{ - BlockedDomains: []string{"evil.example.com"}, - }, - }, - }, - } - - payload := ObservabilityPayload{ - Network: &ObservabilityPayloadNetwork{ - BlockedDomains: []string{"safe.example.com"}, - }, - } - - result := EvaluateObservabilityPolicy(policy, payload) - assert.Empty(t, result.Violations, "unexpected violations for non-matching payload") -} - -func TestObservabilityPolicySchemaParsesAndHasRules(t *testing.T) { - schemaPath := filepath.Join("..", "..", "schemas", "observability-policy.json") - schemaContent, err := os.ReadFile(schemaPath) - require.NoError(t, err, "should read observability policy schema") - - var schema map[string]any - require.NoError(t, json.Unmarshal(schemaContent, &schema), "schema should parse as JSON") - - assert.Equal(t, "http://json-schema.org/draft-07/schema#", schema["$schema"]) - properties, ok := schema["properties"].(map[string]any) - require.True(t, ok, "root properties should exist") - assert.Contains(t, properties, "rules") - - defs, ok := schema["$defs"].(map[string]any) - require.True(t, ok, "schema defs should exist") - assert.Contains(t, defs, "Rule") - assert.Contains(t, defs, "Match") -} diff --git a/pkg/cli/observability_schema_test.go b/pkg/cli/observability_schema_test.go deleted file mode 100644 index cfa008a0cc7..00000000000 --- a/pkg/cli/observability_schema_test.go +++ /dev/null @@ -1,65 +0,0 @@ -//go:build !integration - -package cli - -import ( - "encoding/json" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestObservabilityReportSchemaIncludesLineageAndReasoning(t *testing.T) { - schemaPath := filepath.Join("..", "..", "schemas", "observability-report.json") - schemaContent, err := os.ReadFile(schemaPath) - require.NoError(t, err, "should read observability schema") - - var schema map[string]any - require.NoError(t, json.Unmarshal(schemaContent, &schema), "schema should parse as JSON") - - assert.Equal(t, "http://json-schema.org/draft-07/schema#", schema["$schema"], "schema should use Draft 7 for consistency with published schemas") - - properties, ok := schema["properties"].(map[string]any) - require.True(t, ok, "root properties should exist") - - _, hasLineage := properties["lineage"] - assert.True(t, hasLineage, "schema should include lineage section") - - _, hasReasoning := properties["reasoning"] - assert.True(t, hasReasoning, "schema should include reasoning section") - - defs, ok := schema["$defs"].(map[string]any) - require.True(t, ok, "schema defs should exist") - - awContextDef, ok := defs["AwContext"].(map[string]any) - require.True(t, ok, "AwContext definition should exist") - - awContextProps, ok := awContextDef["properties"].(map[string]any) - require.True(t, ok, "AwContext properties should exist") - assert.Contains(t, awContextProps, "repo") - assert.Contains(t, awContextProps, "run_id") - assert.Contains(t, awContextProps, "workflow_id") - assert.Contains(t, awContextProps, "workflow_call_id") - assert.Contains(t, awContextProps, "actor") - assert.Contains(t, awContextProps, "event_type") - - reasoningStepDef, ok := defs["ReasoningStep"].(map[string]any) - require.True(t, ok, "ReasoningStep definition should exist") - - reasoningStepProps, ok := reasoningStepDef["properties"].(map[string]any) - require.True(t, ok, "ReasoningStep properties should exist") - assert.Contains(t, reasoningStepProps, "kind") - assert.Contains(t, reasoningStepProps, "summary") - assert.Contains(t, reasoningStepProps, "evidence") - assert.Contains(t, reasoningStepProps, "tool_refs") - - lineageDef, ok := defs["Lineage"].(map[string]any) - require.True(t, ok, "Lineage definition should exist") - - lineageRequired, ok := lineageDef["required"].([]any) - require.True(t, ok, "Lineage required array should exist") - assert.Contains(t, lineageRequired, "trace_id") -} diff --git a/schemas/observability-policy.json b/schemas/observability-policy.json deleted file mode 100644 index a84d896f7fc..00000000000 --- a/schemas/observability-policy.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://github.com/github/gh-aw/schemas/observability-policy.json", - "title": "GitHub Agentic Workflows Observability Policy", - "description": "Schema for policy rules evaluated against observability-report.json payloads. These rules convert distilled observability signals into enforcement actions such as fail, gate, or warn.", - "type": "object", - "required": ["schema_version", "rules"], - "additionalProperties": false, - "properties": { - "schema_version": { - "type": "string", - "description": "Version of the observability policy schema.", - "examples": ["1.0.0"] - }, - "rules": { - "type": "array", - "description": "List of policy rules to evaluate against an observability payload.", - "minItems": 1, - "items": { - "$ref": "#/$defs/Rule" - } - } - }, - "$defs": { - "Rule": { - "type": "object", - "required": ["id", "action", "message", "match"], - "additionalProperties": false, - "properties": { - "id": { - "type": "string", - "description": "Stable identifier for the rule." - }, - "action": { - "type": "string", - "description": "Enforcement action to apply when the rule matches.", - "enum": ["fail", "gate", "warn"] - }, - "message": { - "type": "string", - "description": "Human-readable message explaining the policy violation." - }, - "match": { - "$ref": "#/$defs/Match" - } - } - }, - "Match": { - "type": "object", - "additionalProperties": false, - "properties": { - "blocked_domains": { - "type": "array", - "description": "Match if any blocked domain in the payload equals one of these domains.", - "items": { - "type": "string" - } - }, - "min_blocked_requests": { - "type": "integer", - "description": "Match if blocked request count is greater than or equal to this threshold.", - "minimum": 0 - }, - "insight_severities": { - "type": "array", - "description": "Match if any distilled insight has one of these severities.", - "items": { - "type": "string", - "enum": ["critical", "high", "medium", "low", "info"] - } - }, - "actuation_modes": { - "type": "array", - "description": "Match if the payload actuation mode is one of these values.", - "items": { - "type": "string", - "enum": ["read_only", "write_capable", "mixed"] - } - }, - "mcp_failure_servers": { - "type": "array", - "description": "Match if any MCP failure references one of these server names.", - "items": { - "type": "string" - } - }, - "created_item_types": { - "type": "array", - "description": "Match if any created item type equals one of these values.", - "items": { - "type": "string" - } - } - } - } - } -} diff --git a/schemas/observability-report.json b/schemas/observability-report.json deleted file mode 100644 index 766babf4e53..00000000000 --- a/schemas/observability-report.json +++ /dev/null @@ -1,601 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://github.com/github/gh-aw/schemas/observability-report.json", - "title": "GitHub Agentic Workflows Observability Report", - "description": "Schema for distilled observability payloads emitted by GitHub Agentic Workflows. This payload separates execution status from reasoning-oriented telemetry, including lineage from aw_context, correlation identifiers, tool activity, network posture, actuation outcomes, and synthesized insights.", - "type": "object", - "required": ["schema_version", "kind", "generated_at", "overview", "lineage", "execution", "insights"], - "additionalProperties": false, - "properties": { - "schema_version": { - "type": "string", - "description": "Version of this observability payload schema.", - "examples": ["1.0.0"] - }, - "kind": { - "type": "string", - "description": "Payload scope. 'run' is for a single execution. 'fleet' is for multi-run summaries and trend aggregation.", - "enum": ["run", "fleet"] - }, - "generated_at": { - "type": "string", - "description": "RFC 3339 timestamp when this payload was generated.", - "format": "date-time" - }, - "overview": { - "$ref": "#/$defs/Overview" - }, - "lineage": { - "$ref": "#/$defs/Lineage" - }, - "execution": { - "$ref": "#/$defs/Execution" - }, - "reasoning": { - "$ref": "#/$defs/Reasoning" - }, - "tooling": { - "$ref": "#/$defs/Tooling" - }, - "network": { - "$ref": "#/$defs/Network" - }, - "actuation": { - "$ref": "#/$defs/Actuation" - }, - "insights": { - "type": "array", - "description": "Distilled decision-oriented signals derived from the workflow run or fleet of runs.", - "items": { - "$ref": "#/$defs/Insight" - } - }, - "findings": { - "type": "array", - "description": "Human-facing findings suitable for audit or incident review.", - "items": { - "$ref": "#/$defs/Finding" - } - }, - "recommendations": { - "type": "array", - "description": "Actionable recommendations derived from the observability data.", - "items": { - "$ref": "#/$defs/Recommendation" - } - } - }, - "$defs": { - "Overview": { - "type": "object", - "required": ["workflow_name", "status"], - "additionalProperties": false, - "properties": { - "run_id": { - "type": ["integer", "string"], - "description": "Workflow run identifier for run-scoped payloads. May be omitted for fleet summaries." - }, - "workflow_name": { - "type": "string", - "description": "Human-readable workflow name." - }, - "workflow_path": { - "type": "string", - "description": "Workflow file path, if known." - }, - "status": { - "type": "string", - "description": "Current lifecycle status of the workflow or aggregate health state for fleet payloads.", - "examples": ["completed", "in_progress", "success", "degraded"] - }, - "conclusion": { - "type": "string", - "description": "GitHub Actions conclusion for a completed run.", - "examples": ["success", "failure", "timed_out", "cancelled"] - }, - "event": { - "type": "string", - "description": "Triggering GitHub event name." - }, - "branch": { - "type": "string", - "description": "Head branch or reference name." - }, - "url": { - "type": "string", - "description": "GitHub Actions URL for the workflow run.", - "format": "uri" - } - } - }, - "Lineage": { - "type": "object", - "required": ["trace_id"], - "additionalProperties": false, - "properties": { - "trace_id": { - "type": "string", - "description": "Stable correlation identifier for this end-to-end execution lineage. Prefer workflow_call_id when aw_context is present; otherwise use a generated correlation key." - }, - "span_id": { - "type": "string", - "description": "Identifier for the current report node or aggregation span." - }, - "parent_trace_id": { - "type": "string", - "description": "Optional parent trace identifier when this payload was derived from another execution or aggregate report." - }, - "aw_context": { - "$ref": "#/$defs/AwContext" - }, - "engine": { - "$ref": "#/$defs/Engine" - } - } - }, - "AwContext": { - "type": "object", - "required": ["repo", "run_id", "workflow_id"], - "additionalProperties": false, - "properties": { - "repo": { - "type": "string", - "description": "Calling workflow repository in owner/repo format." - }, - "run_id": { - "type": "string", - "description": "GitHub Actions run ID of the calling workflow." - }, - "workflow_id": { - "type": "string", - "description": "Full workflow ref of the calling workflow, including repository, path, and ref." - }, - "workflow_call_id": { - "type": "string", - "description": "Unique call attempt identifier, typically composed from run_id and run_attempt." - }, - "time": { - "type": "string", - "description": "RFC 3339 timestamp of the dispatch or workflow call handoff.", - "format": "date-time" - }, - "actor": { - "type": "string", - "description": "GitHub actor that triggered the calling workflow." - }, - "event_type": { - "type": "string", - "description": "GitHub event name of the calling workflow." - } - } - }, - "Engine": { - "type": "object", - "additionalProperties": false, - "properties": { - "engine_id": { - "type": "string", - "description": "Stable engine identifier, such as copilot, claude, codex, or gemini." - }, - "engine_name": { - "type": "string", - "description": "Human-readable engine name." - }, - "model": { - "type": "string", - "description": "Model or engine variant used for this run." - }, - "version": { - "type": "string", - "description": "Engine or CLI version that produced the output." - } - } - }, - "Execution": { - "type": "object", - "required": ["task_status"], - "additionalProperties": false, - "properties": { - "task_status": { - "type": "string", - "description": "Outcome-focused status distinct from reasoning telemetry.", - "enum": ["success", "failure", "partial", "timed_out", "cancelled", "unknown"] - }, - "duration_ms": { - "type": "integer", - "description": "Total execution duration in milliseconds.", - "minimum": 0 - }, - "turns": { - "type": "integer", - "description": "Number of decision turns used by the agent.", - "minimum": 0 - }, - "token_usage": { - "type": "integer", - "description": "Total token usage for the run.", - "minimum": 0 - }, - "estimated_cost": { - "type": "number", - "description": "Estimated USD cost for the run.", - "minimum": 0 - }, - "error_count": { - "type": "integer", - "description": "Number of errors observed in the execution.", - "minimum": 0 - }, - "warning_count": { - "type": "integer", - "description": "Number of warnings observed in the execution.", - "minimum": 0 - } - } - }, - "Reasoning": { - "type": "object", - "additionalProperties": false, - "properties": { - "mode": { - "type": "string", - "description": "High-level characterization of the reasoning posture.", - "enum": ["directed", "adaptive", "exploratory", "unknown"] - }, - "reasoning_steps": { - "type": "array", - "description": "Optional coarse-grained reasoning steps that explain decision points without storing raw private chain-of-thought.", - "items": { - "$ref": "#/$defs/ReasoningStep" - } - }, - "drift_signal": { - "type": "string", - "description": "Optional summary of execution drift compared with prior runs.", - "examples": ["stable", "rising_turn_count", "volatile_tool_path"] - } - } - }, - "ReasoningStep": { - "type": "object", - "required": ["id", "kind", "summary"], - "additionalProperties": false, - "properties": { - "id": { - "type": "string", - "description": "Stable identifier for the reasoning step within the payload." - }, - "parent_id": { - "type": "string", - "description": "Optional parent step identifier for hierarchical plans." - }, - "kind": { - "type": "string", - "description": "Type of reasoning step.", - "enum": ["plan", "observe", "decide", "act", "verify", "handoff"] - }, - "summary": { - "type": "string", - "description": "Short explanation of the decision or transition." - }, - "evidence": { - "type": "array", - "description": "Structured evidence references supporting the reasoning step.", - "items": { - "$ref": "#/$defs/EvidenceRef" - } - }, - "tool_refs": { - "type": "array", - "description": "Tool calls associated with this reasoning step.", - "items": { - "type": "string" - } - }, - "outcome": { - "type": "string", - "description": "Observed result of this reasoning step.", - "examples": ["confirmed", "blocked", "needs-human-review"] - } - } - }, - "EvidenceRef": { - "type": "object", - "required": ["type", "value"], - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "description": "Evidence type.", - "enum": ["file", "log", "tool_call", "metric", "network_domain", "safe_output", "external"] - }, - "value": { - "type": "string", - "description": "Opaque identifier or locator for the evidence source." - }, - "label": { - "type": "string", - "description": "Human-readable description of the evidence." - } - } - }, - "Tooling": { - "type": "object", - "additionalProperties": false, - "properties": { - "tool_types": { - "type": "integer", - "description": "Number of unique tool types used during the run or across the aggregation.", - "minimum": 0 - }, - "tool_calls": { - "type": "array", - "description": "Observed tool call summaries.", - "items": { - "$ref": "#/$defs/ToolCall" - } - }, - "missing_tools": { - "type": "array", - "description": "Tools requested by the agent but not available.", - "items": { - "$ref": "#/$defs/MissingTool" - } - }, - "mcp_failures": { - "type": "array", - "description": "MCP server failures encountered by the run.", - "items": { - "$ref": "#/$defs/MCPFailure" - } - }, - "missing_data": { - "type": "array", - "description": "Signals that required data was missing during execution.", - "items": { - "$ref": "#/$defs/MissingData" - } - } - } - }, - "ToolCall": { - "type": "object", - "required": ["name", "call_count"], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Tool name." - }, - "server_name": { - "type": "string", - "description": "MCP server name for MCP-hosted tools." - }, - "call_count": { - "type": "integer", - "description": "Number of times the tool was called.", - "minimum": 0 - }, - "max_input_size": { - "type": "integer", - "description": "Maximum observed input size.", - "minimum": 0 - }, - "max_output_size": { - "type": "integer", - "description": "Maximum observed output size.", - "minimum": 0 - }, - "max_duration_ms": { - "type": "integer", - "description": "Maximum observed execution duration in milliseconds.", - "minimum": 0 - }, - "status": { - "type": "string", - "description": "Aggregate outcome for the tool across the observed scope.", - "examples": ["success", "partial", "error"] - } - } - }, - "MissingTool": { - "type": "object", - "required": ["tool", "reason"], - "additionalProperties": false, - "properties": { - "tool": { - "type": "string" - }, - "reason": { - "type": "string" - }, - "alternatives": { - "type": "string" - } - } - }, - "MCPFailure": { - "type": "object", - "required": ["server_name", "status"], - "additionalProperties": false, - "properties": { - "server_name": { - "type": "string" - }, - "status": { - "type": "string" - }, - "timestamp": { - "type": "string", - "format": "date-time" - } - } - }, - "MissingData": { - "type": "object", - "required": ["data_type", "reason"], - "additionalProperties": false, - "properties": { - "data_type": { - "type": "string" - }, - "reason": { - "type": "string" - }, - "context": { - "type": "string" - } - } - }, - "Network": { - "type": "object", - "additionalProperties": false, - "properties": { - "total_requests": { - "type": "integer", - "minimum": 0 - }, - "allowed_requests": { - "type": "integer", - "minimum": 0 - }, - "blocked_requests": { - "type": "integer", - "minimum": 0 - }, - "allowed_domains": { - "type": "array", - "items": { - "type": "string" - } - }, - "blocked_domains": { - "type": "array", - "items": { - "type": "string" - } - }, - "redacted_domains": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "Actuation": { - "type": "object", - "additionalProperties": false, - "properties": { - "mode": { - "type": "string", - "description": "Whether the execution remained read-only or crossed into write-capable actions.", - "enum": ["read_only", "write_capable", "mixed"] - }, - "safe_items_count": { - "type": "integer", - "minimum": 0 - }, - "created_items": { - "type": "array", - "items": { - "$ref": "#/$defs/CreatedItem" - } - } - } - }, - "CreatedItem": { - "type": "object", - "required": ["type"], - "additionalProperties": false, - "properties": { - "type": { - "type": "string" - }, - "repo": { - "type": "string" - }, - "number": { - "type": "integer", - "minimum": 0 - }, - "url": { - "type": "string", - "format": "uri" - }, - "temporary_id": { - "type": "string" - }, - "timestamp": { - "type": "string", - "format": "date-time" - } - } - }, - "Insight": { - "type": "object", - "required": ["category", "severity", "title", "summary"], - "additionalProperties": false, - "properties": { - "category": { - "type": "string" - }, - "severity": { - "type": "string", - "enum": ["critical", "high", "medium", "low", "info"] - }, - "title": { - "type": "string" - }, - "summary": { - "type": "string" - }, - "evidence": { - "type": "string" - } - } - }, - "Finding": { - "type": "object", - "required": ["category", "severity", "title", "description"], - "additionalProperties": false, - "properties": { - "category": { - "type": "string" - }, - "severity": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "impact": { - "type": "string" - } - } - }, - "Recommendation": { - "type": "object", - "required": ["priority", "action", "reason"], - "additionalProperties": false, - "properties": { - "priority": { - "type": "string", - "enum": ["high", "medium", "low"] - }, - "action": { - "type": "string" - }, - "reason": { - "type": "string" - }, - "example": { - "type": "string" - } - } - } - } -} From 3a1c0b39385c8a63d1927ca60f417f3644052e6b Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 07:53:35 +0100 Subject: [PATCH 06/12] clean up --- .serena/memories/project_overview.md | 1 - .serena/memories/style_and_conventions.md | 1 - .serena/memories/suggested_commands.md | 1 - .serena/memories/task_completion.md | 1 - .serena/project.yml | 23 ----------------------- 5 files changed, 27 deletions(-) delete mode 100644 .serena/memories/project_overview.md delete mode 100644 .serena/memories/style_and_conventions.md delete mode 100644 .serena/memories/suggested_commands.md delete mode 100644 .serena/memories/task_completion.md diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md deleted file mode 100644 index b1ff0f29782..00000000000 --- a/.serena/memories/project_overview.md +++ /dev/null @@ -1 +0,0 @@ -gh-aw is a Go-based GitHub CLI extension (`gh aw`) for authoring agentic workflows in Markdown and compiling/running them as GitHub Actions. Primary stack: Go, some JS/CommonJS under actions/setup/js, bash scripts, docs in Astro/Starlight. Core areas: cmd/gh-aw entrypoint, pkg/cli commands, pkg/parser frontmatter/schema parsing, pkg/workflow compilation, .github/workflows sample workflows. \ No newline at end of file diff --git a/.serena/memories/style_and_conventions.md b/.serena/memories/style_and_conventions.md deleted file mode 100644 index f5156d9f1cb..00000000000 --- a/.serena/memories/style_and_conventions.md +++ /dev/null @@ -1 +0,0 @@ -Prefer small focused files. Use console formatting helpers for CLI output and send diagnostic output to stderr. Use logger.New with pkg:file namespaces for debug logging. Use `any` instead of `interface{}`. Add build tags to every Go test file. For JS in actions/setup/js use GitHub Actions core APIs and run fmt/lint for CJS. Recompile workflow .md changes into tracked .lock.yml files. Avoid legacy/fallback support unless requested. \ No newline at end of file diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md deleted file mode 100644 index 97e530bebf4..00000000000 --- a/.serena/memories/suggested_commands.md +++ /dev/null @@ -1 +0,0 @@ -Key commands: `make fmt`, `make lint`, `make test-unit`, `make recompile`, `make build`, `make agent-finish`. Prefer selective tests like `go test -v -run "TestName" ./pkg/cli/`. Run CLI locally with `./gh-aw --help`, `./gh-aw compile`, `./gh-aw audit `, `./gh-aw logs`. Standard macOS utilities available: git, ls, cd, grep/rg, find, sed, awk. \ No newline at end of file diff --git a/.serena/memories/task_completion.md b/.serena/memories/task_completion.md deleted file mode 100644 index 0f672fe6a22..00000000000 --- a/.serena/memories/task_completion.md +++ /dev/null @@ -1 +0,0 @@ -Before commit always run `make agent-finish` (or at least `make fmt` and relevant selective tests). After modifying Go files run `make fmt`; after workflow markdown changes run `make recompile`; after schema changes run `make build`; after JS changes run `make fmt-cjs` and `make lint-cjs`. Do not leave workflow markdown changes uncompiled. \ No newline at end of file diff --git a/.serena/project.yml b/.serena/project.yml index ba912e71ac9..8da1c65e531 100644 --- a/.serena/project.yml +++ b/.serena/project.yml @@ -118,26 +118,3 @@ symbol_info_budget: # Note: the backend is fixed at startup. If a project with a different backend # is activated post-init, an error will be returned. language_backend: - -# line ending convention to use when writing source files. -# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) -# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. -line_ending: - -# list of regex patterns which, when matched, mark a memory entry as read‑only. -# Extends the list from the global configuration, merging the two lists. -read_only_memory_patterns: [] - -# list of regex patterns for memories to completely ignore. -# Matching memories will not appear in list_memories or activate_project output -# and cannot be accessed via read_memory or write_memory. -# To access ignored memory files, use the read_file tool on the raw file path. -# Extends the list from the global configuration, merging the two lists. -# Example: ["_archive/.*", "_episodes/.*"] -ignored_memory_patterns: [] - -# advanced configuration option allowing to configure language server-specific options. -# Maps the language key to the options. -# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. -# No documentation on options means no options are available. -ls_specific_settings: {} From 7baf503a021c21c9e56bcb1440651b7d087cf7d8 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 07:59:36 +0100 Subject: [PATCH 07/12] rm docs --- .../docs/guides/agentic-observability-kit.md | 112 ------------------ docs/src/content/docs/patterns/monitoring.md | 4 - 2 files changed, 116 deletions(-) delete mode 100644 docs/src/content/docs/guides/agentic-observability-kit.md diff --git a/docs/src/content/docs/guides/agentic-observability-kit.md b/docs/src/content/docs/guides/agentic-observability-kit.md deleted file mode 100644 index 123480fd067..00000000000 --- a/docs/src/content/docs/guides/agentic-observability-kit.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: How to Add the Agentic Observability Kit -description: Add a drop-in workflow that turns gh aw logs and audit signals into recurring observability reports and warning issues. ---- - -Use this guide when a repository already has agentic workflows and needs a supported starter workflow for run-behavior reporting. - -The kit reviews recent runs, publishes a recurring discussion report, and opens warning issues only when a workflow shows repeated risk. Use [Projects & Monitoring](/gh-aw/patterns/monitoring/) instead when building a custom project board or status-update workflow. - -There are two variants: - -- `agentic-observability-kit` publishes into the same repository it analyzes -- `agentic-observability-central-kit` publishes into a central reporting repository - -## Add the workflow - -Run: - -```bash wrap -gh aw add github/gh-aw/agentic-observability-kit -``` - -This adds the workflow source file to `.github/workflows` so it can be reviewed and customized like any other workflow. - -## Add the central variant - -Use the central variant when a platform or workflow-operations repository should collect reports from many repositories. - -Run: - -```bash wrap -gh aw add github/gh-aw/agentic-observability-central-kit -``` - -Then set the `REPORT_REPOSITORY` repository variable to the destination repository in `owner/repo` format. - -Example: - -```text -acme/workflow-operations -``` - -If `REPORT_REPOSITORY` is not set, the workflow falls back to the current repository. - -## Review the default outputs - -By default, the workflow creates: - -- one discussion report per run in the `audits` category -- up to five warning issues when a workflow shows repeated risky behavior - -The default issue labels are `agentics` and `warning`. - -If the repository uses a different discussion category or labeling convention, edit the `safe-outputs` section after adding the workflow. - -## Compile the workflow - -After reviewing the file, compile it: - -```bash wrap -gh aw compile .github/workflows/agentic-observability-kit.md -``` - -If the repository already uses a bulk compile step, run that instead. - -For the central variant: - -```bash wrap -gh aw compile .github/workflows/agentic-observability-central-kit.md -``` - -## What counts as a warning - -The kit opens issues only for repeated, actionable patterns in the last 14 days. By default, that means one workflow crossed the same threshold in at least two runs. - -The default warning conditions are: - -- repeated `risky` comparison classifications -- repeated `new_mcp_failure` or `blocked_requests_increase` comparison reasons -- repeated medium or high `resource_heavy_for_domain` -- repeated medium or high `poor_agentic_control` - -## What stays in the report instead of opening an issue - -Some findings stay in the discussion report instead of opening an issue because they are usually optimization candidates rather than incidents: - -- repeated `overkill_for_agentic` -- workflows that remain `lean`, `directed`, and `narrow` across successful runs -- workflows that can only be compared to `latest_success` and never find a meaningful cohort match - -## Customizing the kit - -The starter workflow is designed to be modified after import. - -Common changes are: - -- widen the analysis window from 14 days to 30 days -- change labels to match internal triage processes -- route discussions to a central reporting repository -- route warning issues to a platform or workflow-operations repository -- tighten or relax warning thresholds depending on run volume - -If the organization wants one central place for reports, update the `create-discussion` and `create-issue` safe outputs to target that repository. - -If a central platform repository is already the operating model, prefer `agentic-observability-central-kit` instead of manually rewriting the single-repo starter. - -## Related documentation - -- [Debugging Workflows](/gh-aw/troubleshooting/debugging/) -- [GH-AW as an MCP Server](/gh-aw/reference/gh-aw-as-mcp-server/) -- [Projects & Monitoring](/gh-aw/patterns/monitoring/) -- [CentralRepoOps](/gh-aw/patterns/central-repo-ops/) diff --git a/docs/src/content/docs/patterns/monitoring.md b/docs/src/content/docs/patterns/monitoring.md index 294ee979615..0b5fb2b5c87 100644 --- a/docs/src/content/docs/patterns/monitoring.md +++ b/docs/src/content/docs/patterns/monitoring.md @@ -5,8 +5,6 @@ description: Use GitHub Projects + safe-outputs to track and monitor workflow wo Use this pattern when you want a durable “source of truth” for what your agentic workflows discovered, decided, and did. -This page is about composing a custom monitoring layer with GitHub Projects and safe outputs. If the goal is to add a ready-made weekly reporting workflow for run behavior, use the [agentic observability kit](/gh-aw/guides/agentic-observability-kit/). - ## What this pattern is - **Projects** are the dashboard: a GitHub Projects v2 board holds issues/PRs and custom fields. @@ -114,6 +112,4 @@ See the full reference: [/reference/safe-outputs/#no-op-logging-noop](/gh-aw/ref - Use `gh aw status` to see which workflows are enabled and their latest run state. - Use `gh aw logs` and `gh aw audit` to inspect tool usage, errors, MCP failures, and network patterns. -Use the [agentic observability kit](/gh-aw/guides/agentic-observability-kit/) when you want a supported starter workflow that turns recent `gh aw logs` and `gh aw audit` signals into recurring reports and warning issues. Stay on this page when you need a custom board, status-update workflow, or issue/project correlation model. - See: [/setup/cli/](/gh-aw/setup/cli/) From c1ac9aaf5a7790882a895a4fe2ce173efca137e1 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 08:16:58 +0100 Subject: [PATCH 08/12] avoid unnecessary refactoring --- pkg/cli/audit_report.go | 51 +++++++++++++++++++++++++++++++++++++++-- pkg/cli/logs_report.go | 11 +++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 7452219f1cf..e6de59671ce 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -13,6 +13,7 @@ import ( "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/sliceutil" "github.com/github/gh-aw/pkg/timeutil" + "github.com/github/gh-aw/pkg/workflow" ) var auditReportLog = logger.New("cli:audit_report") @@ -235,6 +236,13 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage overview.Duration = timeutil.FormatDuration(run.Duration) } + if run.LogsPath != "" { + awInfoPath := filepath.Join(run.LogsPath, "aw_info.json") + if awInfo, err := parseAwInfo(awInfoPath, false); err == nil && awInfo != nil { + overview.AwContext = awInfo.Context + } + } + // Build metrics metricsData := MetricsData{ TokenUsage: run.TokenUsage, @@ -272,8 +280,47 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage } } - awContext, toolUsage, createdItems, taskDomain, behaviorFingerprint, agenticAssessments := deriveRunAgenticAnalysis(processedRun, metrics) - overview.AwContext = awContext + toolStats := make(map[string]*ToolUsageInfo) + for _, toolCall := range metrics.ToolCalls { + displayKey := workflow.PrettifyToolName(toolCall.Name) + if existing, exists := toolStats[displayKey]; exists { + existing.CallCount += toolCall.CallCount + if toolCall.MaxInputSize > existing.MaxInputSize { + existing.MaxInputSize = toolCall.MaxInputSize + } + if toolCall.MaxOutputSize > existing.MaxOutputSize { + existing.MaxOutputSize = toolCall.MaxOutputSize + } + if toolCall.MaxDuration > 0 { + maxDuration := timeutil.FormatDuration(toolCall.MaxDuration) + if existing.MaxDuration == "" || toolCall.MaxDuration > parseDurationString(existing.MaxDuration) { + existing.MaxDuration = maxDuration + } + } + continue + } + + toolInfo := &ToolUsageInfo{ + Name: displayKey, + CallCount: toolCall.CallCount, + MaxInputSize: toolCall.MaxInputSize, + MaxOutputSize: toolCall.MaxOutputSize, + } + if toolCall.MaxDuration > 0 { + toolInfo.MaxDuration = timeutil.FormatDuration(toolCall.MaxDuration) + } + toolStats[displayKey] = toolInfo + } + + toolUsage := make([]ToolUsageInfo, 0, len(toolStats)) + for _, info := range toolStats { + toolUsage = append(toolUsage, *info) + } + + createdItems := extractCreatedItemsFromManifest(run.LogsPath) + taskDomain := detectTaskDomain(processedRun, createdItems, toolUsage, overview.AwContext) + behaviorFingerprint := buildBehaviorFingerprint(processedRun, metricsData, toolUsage, createdItems, overview.AwContext) + agenticAssessments := buildAgenticAssessments(processedRun, metricsData, toolUsage, createdItems, taskDomain, behaviorFingerprint, overview.AwContext) // Generate key findings findings := generateFindings(processedRun, metricsData, errors, warnings) diff --git a/pkg/cli/logs_report.go b/pkg/cli/logs_report.go index b8ac59c5a02..adf036ad09c 100644 --- a/pkg/cli/logs_report.go +++ b/pkg/cli/logs_report.go @@ -171,15 +171,16 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation totalMissingData += run.MissingDataCount totalSafeItems += run.SafeItemsCount - // Extract agent/engine ID from aw_info.json and only fall back to the file for aw_context. + // Extract agent/engine ID and aw_context from aw_info.json. agentID := "" - awContext := pr.AwContext + var awContext *AwContext awInfoPath := filepath.Join(run.LogsPath, "aw_info.json") if info, err := parseAwInfo(awInfoPath, false); err == nil && info != nil { agentID = info.EngineID - if awContext == nil { - awContext = info.Context - } + awContext = info.Context + } + if awContext == nil { + awContext = pr.AwContext } comparison := buildAuditComparisonForProcessedRuns(pr, processedRuns) From 80b689384c88c324fba9b035e02e5ac4e7a47b49 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 08:44:02 +0100 Subject: [PATCH 09/12] update agentic observability kit --- .../agentic-observability-kit.lock.yml | 99 +++++-------------- .../workflows/agentic-observability-kit.md | 38 ++++--- 2 files changed, 50 insertions(+), 87 deletions(-) diff --git a/.github/workflows/agentic-observability-kit.lock.yml b/.github/workflows/agentic-observability-kit.lock.yml index 3d956becca9..b834e367db3 100644 --- a/.github/workflows/agentic-observability-kit.lock.yml +++ b/.github/workflows/agentic-observability-kit.lock.yml @@ -12,7 +12,7 @@ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # -# This file was automatically generated by gh-aw. DO NOT EDIT. +# This file was automatically generated by gh-aw (v0.63.1). DO NOT EDIT. # # To update this file, edit the corresponding .md file and run: # gh aw compile @@ -26,12 +26,12 @@ # Imports: # - shared/reporting.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"10a830eaf5cf3094ef3a9310d0dbb615f44717f0750cda14c4c543076df1a0c6","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"a8b23dcad0059913cb1a28b0793d0e220cf27f9ecd567523892d6c11a3e5868b","compiler_version":"v0.63.1","strict":true,"agent_id":"copilot"} name: "Agentic Observability Kit" "on": schedule: - - cron: "7 8 * * 1" + - cron: "18 8 * * 1" # Friendly format: weekly on monday around 08:00 (scattered) workflow_dispatch: inputs: @@ -60,15 +60,8 @@ jobs: model: ${{ steps.generate_aw_info.outputs.model }} secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - name: Setup Scripts - uses: ./actions/setup + uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 with: destination: ${{ runner.temp }}/gh-aw/actions - name: Generate agentic run info @@ -79,6 +72,7 @@ jobs: GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_VERSION: "latest" GH_AW_INFO_AGENT_VERSION: "latest" + GH_AW_INFO_CLI_VERSION: "v0.63.1" GH_AW_INFO_WORKFLOW_NAME: "Agentic Observability Kit" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" @@ -146,7 +140,7 @@ jobs: cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" cat << 'GH_AW_PROMPT_EOF' - Tools: create_issue(max:5), create_discussion, missing_tool, missing_data, noop + Tools: create_issue, create_discussion, missing_tool, missing_data, noop The following GitHub context information is available for this workflow: @@ -279,15 +273,8 @@ jobs: output: ${{ steps.collect_output.outputs.output }} output_types: ${{ steps.collect_output.outputs.output_types }} steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - name: Setup Scripts - uses: ./actions/setup + uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 with: destination: ${{ runner.temp }}/gh-aw/actions - name: Set runtime paths @@ -300,36 +287,6 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - - name: Setup Go for CLI build - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version-file: go.mod - cache: true - - name: Build gh-aw CLI - run: | - echo "Building gh-aw CLI for linux/amd64..." - mkdir -p dist - VERSION=$(git describe --tags --always --dirty) - CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ - -ldflags "-s -w -X main.version=${VERSION}" \ - -o dist/gh-aw-linux-amd64 \ - ./cmd/gh-aw - # Copy binary to root for direct execution in user-defined steps - cp dist/gh-aw-linux-amd64 ./gh-aw - chmod +x ./gh-aw - echo "✓ Built gh-aw CLI successfully" - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 - - name: Build gh-aw Docker image - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 - with: - context: . - platforms: linux/amd64 - push: false - load: true - tags: localhost/gh-aw:dev - build-args: | - BINARY=dist/gh-aw-linux-amd64 - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - name: Configure gh CLI for GitHub Enterprise @@ -379,7 +336,7 @@ jobs: const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs'); await determineAutomaticLockdown(github, context, core); - name: Download container images - run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.3 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh alpine:latest ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.4 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine - name: Install gh-aw extension env: GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} @@ -410,7 +367,7 @@ jobs: mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' - {"create_discussion":{"expires":168,"max":1},"create_issue":{"group":true,"max":5},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + {"create_discussion":{"category":"audits","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[observability] "},"create_issue":{"close_older_issues":true,"labels":["agentics","warning","observability"],"max":1,"title_prefix":"[observability escalation] "},"mentions":{"enabled":false},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"}} GH_AW_SAFE_OUTPUTS_CONFIG_EOF - name: Write Safe Outputs Tools run: | @@ -418,7 +375,7 @@ jobs: { "description_suffixes": { "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[observability] \". Discussions will be created in category \"audits\".", - "create_issue": " CONSTRAINTS: Maximum 5 issue(s) can be created. Labels [\"agentics\" \"warning\"] will be automatically added." + "create_issue": " CONSTRAINTS: Maximum 1 issue(s) can be created. Title will be prefixed with \"[observability escalation] \". Labels [\"agentics\" \"warning\" \"observability\"] will be automatically added." }, "repo_params": {}, "dynamic_tools": [] @@ -609,7 +566,7 @@ jobs: export DEBUG="*" export GH_AW_ENGINE="copilot" - export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.3' + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.4' mkdir -p /home/runner/.copilot cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh @@ -617,8 +574,10 @@ jobs: "mcpServers": { "agenticworkflows": { "type": "stdio", - "container": "localhost/gh-aw:dev", - "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "container": "alpine:latest", + "entrypoint": "${RUNNER_TEMP}/gh-aw/gh-aw", + "entrypointArgs": ["mcp-server", "--validate-actor"], + "mounts": ["${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro", "/usr/bin/gh:/usr/bin/gh:ro", "\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], "env": { "DEBUG": "*", @@ -699,7 +658,7 @@ jobs: GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: dev + GH_AW_VERSION: v0.63.1 GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -788,6 +747,7 @@ jobs: env: GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" + GH_AW_ALLOWED_GITHUB_REFS: "" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} with: @@ -935,7 +895,7 @@ jobs: COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} GH_AW_PHASE: detection GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_VERSION: dev + GH_AW_VERSION: v0.63.1 GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -1005,15 +965,8 @@ jobs: tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} total_count: ${{ steps.missing_tool.outputs.total_count }} steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - name: Setup Scripts - uses: ./actions/setup + uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 with: destination: ${{ runner.temp }}/gh-aw/actions - name: Download agent output artifact @@ -1113,6 +1066,9 @@ jobs: contents: read discussions: write issues: write + concurrency: + group: "agentic-observability-kit-safe-outputs" + cancel-in-progress: false timeout-minutes: 15 env: GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/agentic-observability-kit" @@ -1131,15 +1087,8 @@ jobs: process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - name: Setup Scripts - uses: ./actions/setup + uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 with: destination: ${{ runner.temp }}/gh-aw/actions - name: Download agent output artifact @@ -1173,7 +1122,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[observability] \"},\"create_issue\":{\"group\":true,\"labels\":[\"agentics\",\"warning\"],\"max\":5},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[observability] \"},\"create_issue\":{\"close_older_issues\":true,\"labels\":[\"agentics\",\"warning\",\"observability\"],\"max\":1,\"title_prefix\":\"[observability escalation] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/agentic-observability-kit.md b/.github/workflows/agentic-observability-kit.md index 98b1f43cdc3..7fc0e20875b 100644 --- a/.github/workflows/agentic-observability-kit.md +++ b/.github/workflows/agentic-observability-kit.md @@ -17,6 +17,9 @@ tools: github: toolsets: [default, discussions] safe-outputs: + mentions: false + allowed-github-references: [] + concurrency-group: "agentic-observability-kit-safe-outputs" create-discussion: expires: 7d category: "audits" @@ -24,9 +27,10 @@ safe-outputs: max: 1 close-older-discussions: true create-issue: - labels: [agentics, warning] - max: 5 - group: true + title-prefix: "[observability escalation] " + labels: [agentics, warning, observability] + close-older-issues: true + max: 1 noop: report-as-issue: false timeout-minutes: 30 @@ -36,7 +40,7 @@ imports: # Agentic Observability Kit -You are an agentic workflow observability analyst. Produce one executive report that teams can read quickly, and create targeted warning issues only when repeated patterns show that a workflow needs intervention. +You are an agentic workflow observability analyst. Produce one executive report that teams can read quickly, and create at most one escalation issue only when repeated patterns show that repository owners need to take action. ## Mission @@ -48,7 +52,7 @@ Review recent agentic workflow runs and surface the signals that matter operatio 4. Stable but low-value agentic runs that may be better as deterministic automation 5. Delegated workflows that lost continuity or are no longer behaving like a consistent cohort -Always create a discussion with the report. Create issues only for repeated, actionable problems. +Always create a discussion with the full report. Create an escalation issue only when repeated, actionable problems need durable owner follow-up. ## Data Collection Rules @@ -115,16 +119,20 @@ For each highlighted workflow, explain: - whether the risky behavior is new, repeated, or likely intentional - what a team should change next -## Warning Thresholds +## Escalation Thresholds -Create an issue only when a workflow crosses one of these thresholds in the last 14 days: +Use the discussion as the complete source of truth for all qualifying workflows. Only create an escalation issue when one or more workflows cross these thresholds in the last 14 days: 1. Two or more runs for the same workflow have `comparison.classification.label == "risky"`. 2. Two or more runs for the same workflow contain `new_mcp_failure` or `blocked_requests_increase` in `comparison.classification.reason_codes`. 3. Two or more runs for the same workflow contain a medium or high severity `resource_heavy_for_domain` assessment. 4. Two or more runs for the same workflow contain a medium or high severity `poor_agentic_control` assessment. -Do not open duplicate issues for the same workflow in the same run. Create at most one issue per workflow. +Do not open one issue per workflow. Create at most one escalation issue for the whole run. + +If no workflow crosses these thresholds, do not create an escalation issue. + +If one or more workflows do cross these thresholds, create a single escalation issue that groups the highest-value follow-up work for repository owners. The escalation issue should summarize the workflows that need attention now, why they crossed the thresholds, and what change is recommended first. ## Optimization Candidates @@ -153,19 +161,25 @@ When you use `audit`, fold the extra evidence back into the report instead of du Always create one discussion that includes: - the date range analyzed +- all workflows that crossed the escalation thresholds - the workflows with the clearest repeated risk - the most common assessment kinds - a short list of deterministic candidates - a short list of workflows that need owner attention now +The discussion should cover all qualifying workflows even when no escalation issue is created. + ### Issues -When creating a warning issue: +Only create an escalation issue when at least one workflow crossed the escalation thresholds. When you do: -- use a concrete title naming the workflow and the repeated pattern +- create one issue for the whole run, not one issue per workflow +- use a concrete title that signals repository-level owner attention is needed +- group the escalated workflows in priority order - explain the evidence with run counts and the specific assessment or comparison reason codes -- include the most relevant recommendation from the comparison or assessment data -- link up to 3 representative runs +- include the most relevant recommendation for each escalated workflow +- link up to 3 representative runs across the highest-priority workflows +- make the issue concise enough to function as a backlog item, with the full detail living in the discussion ### No-op From 65499a9572523b60a4417c48fe4868caccb184c6 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 08:53:57 +0100 Subject: [PATCH 10/12] add episode and DAG model details --- .../agentic-observability-kit.lock.yml | 377 +++++++++++------- .../workflows/agentic-observability-kit.md | 37 ++ 2 files changed, 277 insertions(+), 137 deletions(-) diff --git a/.github/workflows/agentic-observability-kit.lock.yml b/.github/workflows/agentic-observability-kit.lock.yml index b834e367db3..d5f9bcbfa14 100644 --- a/.github/workflows/agentic-observability-kit.lock.yml +++ b/.github/workflows/agentic-observability-kit.lock.yml @@ -12,7 +12,7 @@ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # -# This file was automatically generated by gh-aw (v0.63.1). DO NOT EDIT. +# This file was automatically generated by gh-aw. DO NOT EDIT. # # To update this file, edit the corresponding .md file and run: # gh aw compile @@ -26,12 +26,12 @@ # Imports: # - shared/reporting.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"a8b23dcad0059913cb1a28b0793d0e220cf27f9ecd567523892d6c11a3e5868b","compiler_version":"v0.63.1","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"a8b23dcad0059913cb1a28b0793d0e220cf27f9ecd567523892d6c11a3e5868b","strict":true,"agent_id":"copilot"} name: "Agentic Observability Kit" "on": schedule: - - cron: "18 8 * * 1" + - cron: "7 8 * * 1" # Friendly format: weekly on monday around 08:00 (scattered) workflow_dispatch: inputs: @@ -60,8 +60,15 @@ jobs: model: ${{ steps.generate_aw_info.outputs.model }} secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false - name: Setup Scripts - uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 + uses: ./actions/setup with: destination: ${{ runner.temp }}/gh-aw/actions - name: Generate agentic run info @@ -72,7 +79,6 @@ jobs: GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_VERSION: "latest" GH_AW_INFO_AGENT_VERSION: "latest" - GH_AW_INFO_CLI_VERSION: "v0.63.1" GH_AW_INFO_WORKFLOW_NAME: "Agentic Observability Kit" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" @@ -265,16 +271,21 @@ jobs: GH_AW_WORKFLOW_ID_SANITIZED: agenticobservabilitykit outputs: checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} - detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} - detection_success: ${{ steps.detection_conclusion.outputs.success }} has_patch: ${{ steps.collect_output.outputs.has_patch }} inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} model: ${{ needs.activation.outputs.model }} output: ${{ steps.collect_output.outputs.output }} output_types: ${{ steps.collect_output.outputs.output_types }} steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false - name: Setup Scripts - uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 + uses: ./actions/setup with: destination: ${{ runner.temp }}/gh-aw/actions - name: Set runtime paths @@ -287,6 +298,36 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false + - name: Setup Go for CLI build + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + cache: true + - name: Build gh-aw CLI + run: | + echo "Building gh-aw CLI for linux/amd64..." + mkdir -p dist + VERSION=$(git describe --tags --always --dirty) + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags "-s -w -X main.version=${VERSION}" \ + -o dist/gh-aw-linux-amd64 \ + ./cmd/gh-aw + # Copy binary to root for direct execution in user-defined steps + cp dist/gh-aw-linux-amd64 ./gh-aw + chmod +x ./gh-aw + echo "✓ Built gh-aw CLI successfully" + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 + - name: Build gh-aw Docker image + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: linux/amd64 + push: false + load: true + tags: localhost/gh-aw:dev + build-args: | + BINARY=dist/gh-aw-linux-amd64 - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - name: Configure gh CLI for GitHub Enterprise @@ -336,7 +377,7 @@ jobs: const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs'); await determineAutomaticLockdown(github, context, core); - name: Download container images - run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh alpine:latest ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.4 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 ghcr.io/github/gh-aw-mcpg:v0.2.6 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine - name: Install gh-aw extension env: GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} @@ -566,7 +607,7 @@ jobs: export DEBUG="*" export GH_AW_ENGINE="copilot" - export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.4' + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.6' mkdir -p /home/runner/.copilot cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh @@ -574,10 +615,8 @@ jobs: "mcpServers": { "agenticworkflows": { "type": "stdio", - "container": "alpine:latest", - "entrypoint": "${RUNNER_TEMP}/gh-aw/gh-aw", - "entrypointArgs": ["mcp-server", "--validate-actor"], - "mounts": ["${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro", "/usr/bin/gh:/usr/bin/gh:ro", "\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "container": "localhost/gh-aw:dev", + "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], "env": { "DEBUG": "*", @@ -648,7 +687,7 @@ jobs: set -o pipefail touch /tmp/gh-aw/agent-step-summary.md # shellcheck disable=SC1003 - sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log env: COPILOT_AGENT_RUNNER_TYPE: STANDALONE @@ -658,7 +697,7 @@ jobs: GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: v0.63.1 + GH_AW_VERSION: dev GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -819,15 +858,175 @@ jobs: uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: firewall-audit-logs - path: /tmp/gh-aw/sandbox/firewall/logs/ + path: | + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/sandbox/firewall/audit/ if-no-files-found: ignore - # --- Threat Detection (inline) --- + + conclusion: + needs: + - activation + - agent + - detection + - safe_outputs + if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + concurrency: + group: "gh-aw-conclusion-agentic-observability-kit" + cancel-in-progress: false + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: "1" + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "agentic-observability-kit" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} + GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} + GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} + GH_AW_GROUP_REPORTS: "false" + GH_AW_FAILURE_REPORT_AS_ISSUE: "true" + GH_AW_TIMEOUT_MINUTES: "30" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" + GH_AW_TRACKER_ID: "agentic-observability-kit" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "false" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); + await main(); + + detection: + needs: agent + if: always() && needs.agent.result != 'skipped' + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} + detection_success: ${{ steps.detection_conclusion.outputs.success }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Download agent output artifact + id: download-agent-output + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw/ + - name: Setup agent output environment variable + id: setup-agent-output-env + if: steps.download-agent-output.outcome == 'success' + run: | + mkdir -p /tmp/gh-aw/ + find "/tmp/gh-aw/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" + # --- Threat Detection --- + - name: Download container images + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.0 ghcr.io/github/gh-aw-firewall/squid:0.25.0 - name: Check if detection needed id: detection_guard if: always() env: - OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }} - HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }} + HAS_PATCH: ${{ needs.agent.outputs.has_patch }} run: | if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then echo "run_detection=true" >> "$GITHUB_OUTPUT" @@ -859,7 +1058,7 @@ jobs: env: WORKFLOW_NAME: "Agentic Observability Kit" WORKFLOW_DESCRIPTION: "Drop-in observability kit for repositories using agentic workflows" - HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + HAS_PATCH: ${{ needs.agent.outputs.has_patch }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -871,6 +1070,12 @@ jobs: run: | mkdir -p /tmp/gh-aw/threat-detection touch /tmp/gh-aw/threat-detection/detection.log + - name: Install GitHub Copilot CLI + run: ${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh latest + env: + GH_HOST: github.com + - name: Install AWF binary + run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.25.0 - name: Execute GitHub Copilot CLI if: always() && steps.detection_guard.outputs.run_detection == 'true' id: detection_agentic_execution @@ -887,7 +1092,7 @@ jobs: set -o pipefail touch /tmp/gh-aw/agent-step-summary.md # shellcheck disable=SC1003 - sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.0 --skip-pull --enable-api-proxy \ -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log env: COPILOT_AGENT_RUNNER_TYPE: STANDALONE @@ -895,7 +1100,7 @@ jobs: COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} GH_AW_PHASE: detection GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_VERSION: v0.63.1 + GH_AW_VERSION: dev GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -944,123 +1149,14 @@ jobs: echo "conclusion=failure" >> "$GITHUB_OUTPUT" echo "success=false" >> "$GITHUB_OUTPUT" echo "Detection found issues" + exit 1 fi - conclusion: + safe_outputs: needs: - - activation - agent - - safe_outputs - if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') - runs-on: ubuntu-slim - permissions: - contents: read - discussions: write - issues: write - concurrency: - group: "gh-aw-conclusion-agentic-observability-kit" - cancel-in-progress: false - outputs: - noop_message: ${{ steps.noop.outputs.noop_message }} - tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} - total_count: ${{ steps.missing_tool.outputs.total_count }} - steps: - - name: Setup Scripts - uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 - with: - destination: ${{ runner.temp }}/gh-aw/actions - - name: Download agent output artifact - id: download-agent-output - continue-on-error: true - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: agent - path: /tmp/gh-aw/ - - name: Setup agent output environment variable - id: setup-agent-output-env - if: steps.download-agent-output.outcome == 'success' - run: | - mkdir -p /tmp/gh-aw/ - find "/tmp/gh-aw/" -type f -print - echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Process No-Op Messages - id: noop - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_NOOP_MAX: "1" - GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" - GH_AW_TRACKER_ID: "agentic-observability-kit" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs'); - await main(); - - name: Record Missing Tool - id: missing_tool - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" - GH_AW_TRACKER_ID: "agentic-observability-kit" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs'); - await main(); - - name: Handle Agent Failure - id: handle_agent_failure - if: always() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" - GH_AW_TRACKER_ID: "agentic-observability-kit" - GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} - GH_AW_WORKFLOW_ID: "agentic-observability-kit" - GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} - GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} - GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} - GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} - GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} - GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} - GH_AW_GROUP_REPORTS: "false" - GH_AW_FAILURE_REPORT_AS_ISSUE: "true" - GH_AW_TIMEOUT_MINUTES: "30" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs'); - await main(); - - name: Handle No-Op Message - id: handle_noop_message - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 - env: - GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Agentic Observability Kit" - GH_AW_TRACKER_ID: "agentic-observability-kit" - GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} - GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} - GH_AW_NOOP_REPORT_AS_ISSUE: "false" - with: - github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); - await main(); - - safe_outputs: - needs: agent - if: (!cancelled()) && needs.agent.result != 'skipped' && needs.agent.outputs.detection_success == 'true' + - detection + if: (!cancelled()) && needs.agent.result != 'skipped' && needs.detection.result == 'success' runs-on: ubuntu-slim permissions: contents: read @@ -1087,8 +1183,15 @@ jobs: process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false - name: Setup Scripts - uses: github/gh-aw-actions/setup@53e09ec0be6271e81a69f51ef93f37212c8834b0 # v0.63.1 + uses: ./actions/setup with: destination: ${{ runner.temp }}/gh-aw/actions - name: Download agent output artifact diff --git a/.github/workflows/agentic-observability-kit.md b/.github/workflows/agentic-observability-kit.md index 7fc0e20875b..53cee8da40d 100644 --- a/.github/workflows/agentic-observability-kit.md +++ b/.github/workflows/agentic-observability-kit.md @@ -63,6 +63,23 @@ Always create a discussion with the full report. Create an escalation issue only - Use the `audit` tool only for up to 3 runs that need deeper inspection. - If there are very few runs, still produce a report and explain the limitation. +## Episode And DAG Model + +Treat agentic workflows as execution DAGs, not just a flat list of independent runs. + +- A single run is a node. +- Trigger, delegation, and orchestration relationships are edges. +- A related set of runs can represent one higher-level episode of work. +- Cost, ownership, and risk may belong to the episode, not just to one run. + +Prefer native lineage when the platform provides it: + +- Treat `workflow_call` relationships as the strongest continuity signal because they preserve actor and billing attribution. +- Treat `workflow_run` relationships as native but weaker edges that show one workflow reacting to another workflow completion. +- Treat `dispatch-workflow` relationships as custom-correlated edges that may require `context.*` metadata to reconstruct continuity. + +When continuity markers are present, avoid judging delegated worker runs in isolation until you check whether they are part of a larger orchestrated episode. + ## Signals To Use The logs JSON already contains the main agentic signals. Prefer these fields over ad hoc heuristics: @@ -75,6 +92,11 @@ The logs JSON already contains the main agentic signals. Prefer these fields ove - `behavior_fingerprint.dispatch_mode` - `agentic_assessments[].kind` - `agentic_assessments[].severity` +- `context.repo` +- `context.run_id` +- `context.workflow_id` +- `context.workflow_call_id` +- `context.event_type` - `comparison.baseline.selection` - `comparison.baseline.matched_on[]` - `comparison.classification.label` @@ -83,6 +105,17 @@ The logs JSON already contains the main agentic signals. Prefer these fields ove Treat these values as the canonical signals for reporting. +## Continuity Rules + +Use the following rules when interpreting orchestrated or delegated runs: + +- Group runs into one episode when they share strong continuity markers such as `context.workflow_call_id`, or when the downstream run clearly points to the same upstream `context.run_id` and `context.workflow_id`. +- If a run has `behavior_fingerprint.dispatch_mode == "delegated"`, treat that as evidence that the run may be one node in a larger DAG. +- Do not over-penalize a worker run for being write-capable or resource-heavy if that behavior appears intentional within a larger orchestrator-to-worker sequence. +- If downstream runs lost continuity markers when you would expect them, report that as an observability problem. +- When multiple risky-looking runs appear to belong to one episode, summarize them together before escalating. +- If lineage is ambiguous, say so explicitly instead of inventing a chain. + ## Reporting Model The discussion must stay concise and operator-friendly. @@ -100,6 +133,7 @@ Include small numeric summaries such as: - workflows analyzed - runs analyzed +- inferred episodes analyzed - runs with `comparison.classification.label == "risky"` - runs with medium or high `agentic_assessments` - workflows with repeated `overkill_for_agentic` @@ -115,6 +149,8 @@ For each highlighted workflow, explain: - what domain it appears to belong to - what its behavioral fingerprint looks like +- whether it appears to participate in an orchestrated DAG or delegated episode +- whether the actor, cost, and risk seem to belong to the workflow itself or to a larger chain - whether it is stable against a cohort match or only compared to latest success - whether the risky behavior is new, repeated, or likely intentional - what a team should change next @@ -161,6 +197,7 @@ When you use `audit`, fold the extra evidence back into the report instead of du Always create one discussion that includes: - the date range analyzed +- any important orchestrator, worker, or workflow_run chains that materially change interpretation - all workflows that crossed the escalation thresholds - the workflows with the clearest repeated risk - the most common assessment kinds From 2532822670858ef6737876189610dd3c9775b7b8 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 09:06:26 +0100 Subject: [PATCH 11/12] add deterministic episode model and related fields to logs --- .../workflows/agentic-observability-kit.md | 67 ++-- pkg/cli/logs_ci_scenario_test.go | 23 ++ pkg/cli/logs_episode.go | 297 ++++++++++++++++++ pkg/cli/logs_json_test.go | 129 +++++++- pkg/cli/logs_report.go | 34 +- 5 files changed, 506 insertions(+), 44 deletions(-) create mode 100644 pkg/cli/logs_episode.go diff --git a/.github/workflows/agentic-observability-kit.md b/.github/workflows/agentic-observability-kit.md index 53cee8da40d..8b4f3408ca8 100644 --- a/.github/workflows/agentic-observability-kit.md +++ b/.github/workflows/agentic-observability-kit.md @@ -63,27 +63,40 @@ Always create a discussion with the full report. Create an escalation issue only - Use the `audit` tool only for up to 3 runs that need deeper inspection. - If there are very few runs, still produce a report and explain the limitation. -## Episode And DAG Model +## Deterministic Episode Model -Treat agentic workflows as execution DAGs, not just a flat list of independent runs. +The logs JSON now includes deterministic lineage fields: -- A single run is a node. -- Trigger, delegation, and orchestration relationships are edges. -- A related set of runs can represent one higher-level episode of work. -- Cost, ownership, and risk may belong to the episode, not just to one run. +- `episodes[]` for aggregated execution episodes +- `edges[]` for lineage edges between runs -Prefer native lineage when the platform provides it: +Treat those structures as the primary source of truth for graph shape, confidence, and episode rollups. -- Treat `workflow_call` relationships as the strongest continuity signal because they preserve actor and billing attribution. -- Treat `workflow_run` relationships as native but weaker edges that show one workflow reacting to another workflow completion. -- Treat `dispatch-workflow` relationships as custom-correlated edges that may require `context.*` metadata to reconstruct continuity. - -When continuity markers are present, avoid judging delegated worker runs in isolation until you check whether they are part of a larger orchestrated episode. +Prefer `episodes[]` and `edges[]` over reconstructing DAGs from raw runs in prompt space. Only fall back to per-run interpretation when episode data is absent or clearly incomplete. ## Signals To Use The logs JSON already contains the main agentic signals. Prefer these fields over ad hoc heuristics: +- `episodes[].episode_id` +- `episodes[].kind` +- `episodes[].confidence` +- `episodes[].reasons[]` +- `episodes[].root_run_id` +- `episodes[].run_ids[]` +- `episodes[].workflow_names[]` +- `episodes[].total_runs` +- `episodes[].total_tokens` +- `episodes[].total_estimated_cost` +- `episodes[].total_duration` +- `episodes[].risky_node_count` +- `episodes[].write_capable_node_count` +- `episodes[].mcp_failure_count` +- `episodes[].blocked_request_count` +- `episodes[].risk_distribution` +- `edges[].edge_type` +- `edges[].confidence` +- `edges[].reasons[]` - `task_domain.name` and `task_domain.label` - `behavior_fingerprint.execution_style` - `behavior_fingerprint.tool_breadth` @@ -105,16 +118,13 @@ The logs JSON already contains the main agentic signals. Prefer these fields ove Treat these values as the canonical signals for reporting. -## Continuity Rules - -Use the following rules when interpreting orchestrated or delegated runs: +## Interpretation Rules -- Group runs into one episode when they share strong continuity markers such as `context.workflow_call_id`, or when the downstream run clearly points to the same upstream `context.run_id` and `context.workflow_id`. -- If a run has `behavior_fingerprint.dispatch_mode == "delegated"`, treat that as evidence that the run may be one node in a larger DAG. -- Do not over-penalize a worker run for being write-capable or resource-heavy if that behavior appears intentional within a larger orchestrator-to-worker sequence. -- If downstream runs lost continuity markers when you would expect them, report that as an observability problem. -- When multiple risky-looking runs appear to belong to one episode, summarize them together before escalating. -- If lineage is ambiguous, say so explicitly instead of inventing a chain. +- Use episode-level analysis first. Do not treat connected runs as unrelated when `episodes[]` already groups them. +- Use per-run detail only to explain which nodes contributed to an episode-level problem. +- If an episode has low confidence, say so explicitly and avoid overconfident causal claims. +- If delegated workers look risky in isolation but the enclosing episode looks intentional and well-controlled, say that. +- If the deterministic episode model appears incomplete or missing expected lineage, report that as an observability finding. ## Reporting Model @@ -126,14 +136,15 @@ Keep these sections visible: 1. `### Executive Summary` 2. `### Key Metrics` -3. `### Highest Risk Workflows` +3. `### Highest Risk Episodes` 4. `### Recommended Actions` Include small numeric summaries such as: - workflows analyzed - runs analyzed -- inferred episodes analyzed +- episodes analyzed +- high-confidence episodes analyzed - runs with `comparison.classification.label == "risky"` - runs with medium or high `agentic_assessments` - workflows with repeated `overkill_for_agentic` @@ -145,19 +156,20 @@ Put detailed per-workflow breakdowns inside `
` blocks. ### What Good Reporting Looks Like -For each highlighted workflow, explain: +For each highlighted episode or workflow, explain: - what domain it appears to belong to - what its behavioral fingerprint looks like -- whether it appears to participate in an orchestrated DAG or delegated episode +- whether the deterministic graph shows an orchestrated DAG or delegated episode - whether the actor, cost, and risk seem to belong to the workflow itself or to a larger chain +- what the episode confidence level is and why - whether it is stable against a cohort match or only compared to latest success - whether the risky behavior is new, repeated, or likely intentional - what a team should change next ## Escalation Thresholds -Use the discussion as the complete source of truth for all qualifying workflows. Only create an escalation issue when one or more workflows cross these thresholds in the last 14 days: +Use the discussion as the complete source of truth for all qualifying workflows and episodes. Only create an escalation issue when one or more episodes or workflows cross these thresholds in the last 14 days: 1. Two or more runs for the same workflow have `comparison.classification.label == "risky"`. 2. Two or more runs for the same workflow contain `new_mcp_failure` or `blocked_requests_increase` in `comparison.classification.reason_codes`. @@ -170,6 +182,8 @@ If no workflow crosses these thresholds, do not create an escalation issue. If one or more workflows do cross these thresholds, create a single escalation issue that groups the highest-value follow-up work for repository owners. The escalation issue should summarize the workflows that need attention now, why they crossed the thresholds, and what change is recommended first. +Prefer escalating at the episode level when multiple risky runs are part of one coherent DAG. Only fall back to workflow-level escalation when no broader episode can be established with acceptable confidence. + ## Optimization Candidates Do not create issues for these by default. Report them in the discussion unless they are severe and repeated: @@ -198,6 +212,7 @@ Always create one discussion that includes: - the date range analyzed - any important orchestrator, worker, or workflow_run chains that materially change interpretation +- the most important inferred episodes and their confidence levels - all workflows that crossed the escalation thresholds - the workflows with the clearest repeated risk - the most common assessment kinds diff --git a/pkg/cli/logs_ci_scenario_test.go b/pkg/cli/logs_ci_scenario_test.go index 607d4f5d6f2..04cd6dae56d 100644 --- a/pkg/cli/logs_ci_scenario_test.go +++ b/pkg/cli/logs_ci_scenario_test.go @@ -248,12 +248,19 @@ func TestLogsJSONOutputStructure(t *testing.T) { if _, exists := parsed["runs"]; !exists { t.Error("Missing 'runs' field in JSON output") } + if _, exists := parsed["episodes"]; !exists { + t.Error("Missing 'episodes' field in JSON output") + } + if _, exists := parsed["edges"]; !exists { + t.Error("Missing 'edges' field in JSON output") + } // Verify summary has all required fields summary := parsed["summary"].(map[string]any) requiredFields := []string{ "total_runs", "total_duration", "total_tokens", "total_cost", "total_turns", "total_errors", "total_warnings", "total_missing_tools", + "total_episodes", "high_confidence_episodes", } for _, field := range requiredFields { @@ -270,6 +277,22 @@ func TestLogsJSONOutputStructure(t *testing.T) { if len(runs) != 0 { t.Errorf("Expected empty runs array, got %d runs", len(runs)) } + + episodes, ok := parsed["episodes"].([]any) + if !ok { + t.Errorf("Expected 'episodes' to be an array, got %T", parsed["episodes"]) + } + if len(episodes) != 0 { + t.Errorf("Expected empty episodes array, got %d episodes", len(episodes)) + } + + edges, ok := parsed["edges"].([]any) + if !ok { + t.Errorf("Expected 'edges' to be an array, got %T", parsed["edges"]) + } + if len(edges) != 0 { + t.Errorf("Expected empty edges array, got %d edges", len(edges)) + } } // TestSummaryFileWrittenWithNoRuns verifies that the summary.json file is created diff --git a/pkg/cli/logs_episode.go b/pkg/cli/logs_episode.go new file mode 100644 index 00000000000..d66d54c90fe --- /dev/null +++ b/pkg/cli/logs_episode.go @@ -0,0 +1,297 @@ +package cli + +import ( + "cmp" + "fmt" + "slices" + "strconv" + "time" + + "github.com/github/gh-aw/pkg/timeutil" +) + +// EpisodeEdge represents a deterministic lineage edge between two workflow runs. +type EpisodeEdge struct { + SourceRunID int64 `json:"source_run_id"` + TargetRunID int64 `json:"target_run_id"` + EdgeType string `json:"edge_type"` + Confidence string `json:"confidence"` + Reasons []string `json:"reasons,omitempty"` + SourceRepo string `json:"source_repo,omitempty"` + SourceRef string `json:"source_ref,omitempty"` + EventType string `json:"event_type,omitempty"` + EpisodeID string `json:"episode_id,omitempty"` +} + +// EpisodeData represents a deterministic episode rollup derived from workflow runs. +type EpisodeData struct { + EpisodeID string `json:"episode_id"` + Kind string `json:"kind"` + Confidence string `json:"confidence"` + Reasons []string `json:"reasons,omitempty"` + RootRunID int64 `json:"root_run_id,omitempty"` + RunIDs []int64 `json:"run_ids"` + WorkflowNames []string `json:"workflow_names"` + TotalRuns int `json:"total_runs"` + TotalTokens int `json:"total_tokens"` + TotalEstimatedCost float64 `json:"total_estimated_cost"` + TotalDuration string `json:"total_duration"` + RiskyNodeCount int `json:"risky_node_count"` + WriteCapableNodeCount int `json:"write_capable_node_count"` + MissingToolCount int `json:"missing_tool_count"` + MCPFailureCount int `json:"mcp_failure_count"` + BlockedRequestCount int `json:"blocked_request_count"` + RiskDistribution string `json:"risk_distribution"` +} + +type episodeAccumulator struct { + metadata EpisodeData + duration time.Duration + runSet map[int64]bool + nameSet map[string]bool + rootTime time.Time +} + +type episodeSeed struct { + EpisodeID string + Kind string + Confidence string + Reasons []string +} + +func buildEpisodeData(runs []RunData, processedRuns []ProcessedRun) ([]EpisodeData, []EpisodeEdge) { + runsByID := make(map[int64]RunData, len(runs)) + processedByID := make(map[int64]ProcessedRun, len(processedRuns)) + seedsByRunID := make(map[int64]episodeSeed, len(runs)) + parents := make(map[int64]int64, len(runs)) + for _, run := range runs { + runsByID[run.DatabaseID] = run + episodeID, kind, confidence, reasons := classifyEpisode(run) + seedsByRunID[run.DatabaseID] = episodeSeed{EpisodeID: episodeID, Kind: kind, Confidence: confidence, Reasons: append([]string(nil), reasons...)} + parents[run.DatabaseID] = run.DatabaseID + } + for _, processedRun := range processedRuns { + processedByID[processedRun.Run.DatabaseID] = processedRun + } + + edges := make([]EpisodeEdge, 0) + for _, run := range runs { + if edge, ok := buildEpisodeEdge(run, seedsByRunID[run.DatabaseID].EpisodeID, runsByID); ok { + edges = append(edges, edge) + unionEpisodes(parents, edge.SourceRunID, edge.TargetRunID) + } + } + + episodeMap := make(map[string]*episodeAccumulator) + rootMetadata := make(map[int64]episodeSeed) + for _, run := range runs { + root := findEpisodeParent(parents, run.DatabaseID) + seed := seedsByRunID[run.DatabaseID] + best, exists := rootMetadata[root] + if !exists || compareEpisodeSeeds(seed, best) > 0 { + rootMetadata[root] = seed + } + } + + for _, run := range runs { + root := findEpisodeParent(parents, run.DatabaseID) + selectedSeed := rootMetadata[root] + episodeID, kind, confidence, reasons := selectedSeed.EpisodeID, selectedSeed.Kind, selectedSeed.Confidence, selectedSeed.Reasons + acc, exists := episodeMap[episodeID] + if !exists { + acc = &episodeAccumulator{ + metadata: EpisodeData{ + EpisodeID: episodeID, + Kind: kind, + Confidence: confidence, + Reasons: append([]string(nil), reasons...), + RunIDs: []int64{}, + WorkflowNames: []string{}, + RiskDistribution: "none", + }, + runSet: make(map[int64]bool), + nameSet: make(map[string]bool), + rootTime: run.CreatedAt, + } + episodeMap[episodeID] = acc + } + + if !acc.runSet[run.DatabaseID] { + acc.runSet[run.DatabaseID] = true + acc.metadata.RunIDs = append(acc.metadata.RunIDs, run.DatabaseID) + } + if run.WorkflowName != "" && !acc.nameSet[run.WorkflowName] { + acc.nameSet[run.WorkflowName] = true + acc.metadata.WorkflowNames = append(acc.metadata.WorkflowNames, run.WorkflowName) + } + + acc.metadata.TotalRuns++ + acc.metadata.TotalTokens += run.TokenUsage + acc.metadata.TotalEstimatedCost += run.EstimatedCost + if run.Comparison != nil && run.Comparison.Classification != nil && run.Comparison.Classification.Label == "risky" { + acc.metadata.RiskyNodeCount++ + } + if run.BehaviorFingerprint != nil && run.BehaviorFingerprint.ActuationStyle != "read_only" { + acc.metadata.WriteCapableNodeCount++ + } + acc.metadata.MissingToolCount += run.MissingToolCount + if pr, ok := processedByID[run.DatabaseID]; ok { + acc.metadata.MCPFailureCount += len(pr.MCPFailures) + if pr.FirewallAnalysis != nil { + acc.metadata.BlockedRequestCount += pr.FirewallAnalysis.BlockedRequests + } + } + if !run.CreatedAt.IsZero() && (acc.metadata.RootRunID == 0 || run.CreatedAt.Before(acc.rootTime)) { + acc.rootTime = run.CreatedAt + acc.metadata.RootRunID = run.DatabaseID + } + if run.StartedAt.IsZero() && run.UpdatedAt.IsZero() { + acc.duration += run.CreatedAt.Sub(run.CreatedAt) + } else if !run.StartedAt.IsZero() && !run.UpdatedAt.IsZero() && run.UpdatedAt.After(run.StartedAt) { + acc.duration += run.UpdatedAt.Sub(run.StartedAt) + } else if pr, ok := processedByID[run.DatabaseID]; ok && pr.Run.Duration > 0 { + acc.duration += pr.Run.Duration + } + } + + for index := range edges { + root := findEpisodeParent(parents, edges[index].TargetRunID) + if selectedSeed, ok := rootMetadata[root]; ok { + edges[index].EpisodeID = selectedSeed.EpisodeID + } + } + + episodes := make([]EpisodeData, 0, len(episodeMap)) + for _, acc := range episodeMap { + slices.Sort(acc.metadata.RunIDs) + slices.Sort(acc.metadata.WorkflowNames) + if acc.duration > 0 { + acc.metadata.TotalDuration = timeutil.FormatDuration(acc.duration) + } + switch { + case acc.metadata.RiskyNodeCount == 0: + acc.metadata.RiskDistribution = "none" + case acc.metadata.RiskyNodeCount == 1: + acc.metadata.RiskDistribution = "concentrated" + default: + acc.metadata.RiskDistribution = "distributed" + } + episodes = append(episodes, acc.metadata) + } + + slices.SortFunc(episodes, func(a, b EpisodeData) int { + if a.RootRunID != b.RootRunID { + return cmp.Compare(a.RootRunID, b.RootRunID) + } + return cmp.Compare(a.EpisodeID, b.EpisodeID) + }) + slices.SortFunc(edges, func(a, b EpisodeEdge) int { + if a.SourceRunID != b.SourceRunID { + return cmp.Compare(a.SourceRunID, b.SourceRunID) + } + return cmp.Compare(a.TargetRunID, b.TargetRunID) + }) + + return episodes, edges +} + +func findEpisodeParent(parents map[int64]int64, runID int64) int64 { + parent, exists := parents[runID] + if !exists || parent == runID { + return runID + } + root := findEpisodeParent(parents, parent) + parents[runID] = root + return root +} + +func unionEpisodes(parents map[int64]int64, leftRunID, rightRunID int64) { + leftRoot := findEpisodeParent(parents, leftRunID) + rightRoot := findEpisodeParent(parents, rightRunID) + if leftRoot == rightRoot { + return + } + parents[leftRoot] = rightRoot +} + +func compareEpisodeSeeds(left, right episodeSeed) int { + if left.Kind != right.Kind { + return cmp.Compare(seedKindRank(left.Kind), seedKindRank(right.Kind)) + } + if left.Confidence != right.Confidence { + return cmp.Compare(seedConfidenceRank(left.Confidence), seedConfidenceRank(right.Confidence)) + } + return cmp.Compare(left.EpisodeID, right.EpisodeID) +} + +func seedKindRank(kind string) int { + switch kind { + case "workflow_call": + return 4 + case "dispatch_workflow": + return 3 + case "workflow_run": + return 2 + default: + return 1 + } +} + +func seedConfidenceRank(confidence string) int { + switch confidence { + case "high": + return 3 + case "medium": + return 2 + default: + return 1 + } +} + +func classifyEpisode(run RunData) (string, string, string, []string) { + if run.AwContext != nil { + if run.AwContext.WorkflowCallID != "" { + return fmt.Sprintf("dispatch:%s", run.AwContext.WorkflowCallID), "dispatch_workflow", "high", []string{"context.workflow_call_id"} + } + if run.AwContext.RunID != "" && run.AwContext.WorkflowID != "" { + return fmt.Sprintf("dispatch:%s:%s:%s", run.AwContext.Repo, run.AwContext.RunID, run.AwContext.WorkflowID), "dispatch_workflow", "medium", []string{"context.run_id", "context.workflow_id"} + } + } + if run.Event == "workflow_run" { + return fmt.Sprintf("workflow_run:%d", run.DatabaseID), "workflow_run", "low", []string{"event=workflow_run", "upstream run metadata unavailable in logs summary"} + } + return fmt.Sprintf("standalone:%d", run.DatabaseID), "standalone", "high", []string{"no_shared_lineage_markers"} +} + +func buildEpisodeEdge(run RunData, episodeID string, runsByID map[int64]RunData) (EpisodeEdge, bool) { + if run.AwContext == nil || run.AwContext.RunID == "" { + return EpisodeEdge{}, false + } + sourceRunID, err := strconv.ParseInt(run.AwContext.RunID, 10, 64) + if err != nil { + return EpisodeEdge{}, false + } + if _, ok := runsByID[sourceRunID]; !ok { + return EpisodeEdge{}, false + } + confidence := "medium" + reasons := []string{"context.run_id"} + if run.AwContext.WorkflowCallID != "" { + confidence = "high" + reasons = append(reasons, "context.workflow_call_id") + } + if run.AwContext.WorkflowID != "" { + reasons = append(reasons, "context.workflow_id") + } + return EpisodeEdge{ + SourceRunID: sourceRunID, + TargetRunID: run.DatabaseID, + EdgeType: "dispatch_workflow", + Confidence: confidence, + Reasons: reasons, + SourceRepo: run.AwContext.Repo, + SourceRef: run.AwContext.WorkflowID, + EventType: run.AwContext.EventType, + EpisodeID: episodeID, + }, true +} diff --git a/pkg/cli/logs_json_test.go b/pkg/cli/logs_json_test.go index 26edf14ff32..fcfd9f70824 100644 --- a/pkg/cli/logs_json_test.go +++ b/pkg/cli/logs_json_test.go @@ -130,11 +130,23 @@ func TestBuildLogsData(t *testing.T) { if logsData.Summary.TotalMissingTools != 1 { t.Errorf("Expected TotalMissingTools to be 1, got %d", logsData.Summary.TotalMissingTools) } + if logsData.Summary.TotalEpisodes != 2 { + t.Errorf("Expected TotalEpisodes to be 2, got %d", logsData.Summary.TotalEpisodes) + } + if logsData.Summary.HighConfidenceEpisodes != 2 { + t.Errorf("Expected HighConfidenceEpisodes to be 2, got %d", logsData.Summary.HighConfidenceEpisodes) + } // Verify runs data if len(logsData.Runs) != 2 { t.Errorf("Expected 2 runs, got %d", len(logsData.Runs)) } + if len(logsData.Episodes) != 2 { + t.Fatalf("Expected 2 episodes, got %d", len(logsData.Episodes)) + } + if len(logsData.Edges) != 0 { + t.Fatalf("Expected 0 edges for standalone runs, got %d", len(logsData.Edges)) + } // Verify first run if logsData.Runs[0].DatabaseID != 12345 { @@ -185,14 +197,16 @@ func TestRenderLogsJSON(t *testing.T) { // Create sample logs data logsData := LogsData{ Summary: LogsSummary{ - TotalRuns: 2, - TotalDuration: "8m0s", - TotalTokens: 1500, - TotalCost: 0.075, - TotalTurns: 5, - TotalErrors: 1, - TotalWarnings: 1, - TotalMissingTools: 1, + TotalRuns: 2, + TotalDuration: "8m0s", + TotalTokens: 1500, + TotalCost: 0.075, + TotalTurns: 5, + TotalErrors: 1, + TotalWarnings: 1, + TotalMissingTools: 1, + TotalEpisodes: 1, + HighConfidenceEpisodes: 1, }, Runs: []RunData{ { @@ -222,6 +236,19 @@ func TestRenderLogsJSON(t *testing.T) { }, }, }, + Episodes: []EpisodeData{ + { + EpisodeID: "standalone:12345", + Kind: "standalone", + Confidence: "high", + RunIDs: []int64{12345}, + WorkflowNames: []string{"Test Workflow"}, + TotalRuns: 1, + TotalTokens: 1000, + TotalEstimatedCost: 0.05, + }, + }, + Edges: []EpisodeEdge{}, LogsLocation: tmpDir, } @@ -257,6 +284,9 @@ func TestRenderLogsJSON(t *testing.T) { if parsedData.Summary.TotalTokens != 1500 { t.Errorf("Expected TotalTokens 1500, got %d", parsedData.Summary.TotalTokens) } + if parsedData.Summary.TotalEpisodes != 1 { + t.Errorf("Expected TotalEpisodes 1, got %d", parsedData.Summary.TotalEpisodes) + } if len(parsedData.Runs) != 1 { t.Errorf("Expected 1 run in JSON, got %d", len(parsedData.Runs)) } @@ -265,6 +295,89 @@ func TestRenderLogsJSON(t *testing.T) { } } +func TestBuildLogsDataAggregatesDispatchEpisode(t *testing.T) { + tmpDir := testutil.TempDir(t, "test-episode-*") + processedRuns := []ProcessedRun{ + { + Run: WorkflowRun{ + DatabaseID: 2001, + WorkflowName: "orchestrator", + WorkflowPath: ".github/workflows/orchestrator.yml", + Status: "completed", + Conclusion: "success", + Duration: 2 * time.Minute, + TokenUsage: 300, + EstimatedCost: 0.01, + CreatedAt: time.Date(2024, 2, 1, 12, 0, 0, 0, time.UTC), + StartedAt: time.Date(2024, 2, 1, 12, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2024, 2, 1, 12, 2, 0, 0, time.UTC), + LogsPath: filepath.Join(tmpDir, "run-2001"), + }, + }, + { + Run: WorkflowRun{ + DatabaseID: 2002, + WorkflowName: "worker", + WorkflowPath: ".github/workflows/worker.yml", + Status: "completed", + Conclusion: "success", + Duration: 4 * time.Minute, + TokenUsage: 700, + EstimatedCost: 0.03, + MissingToolCount: 1, + CreatedAt: time.Date(2024, 2, 1, 12, 3, 0, 0, time.UTC), + StartedAt: time.Date(2024, 2, 1, 12, 3, 0, 0, time.UTC), + UpdatedAt: time.Date(2024, 2, 1, 12, 7, 0, 0, time.UTC), + LogsPath: filepath.Join(tmpDir, "run-2002"), + }, + AwContext: &AwContext{ + Repo: "github/gh-aw", + RunID: "2001", + WorkflowID: "github/gh-aw/.github/workflows/orchestrator.yml@refs/heads/main", + WorkflowCallID: "2001-1", + EventType: "workflow_dispatch", + }, + BehaviorFingerprint: &BehaviorFingerprint{ActuationStyle: "selective_write"}, + MCPFailures: []MCPFailureReport{{ServerName: "github", Status: "failed"}}, + }, + } + + logsData := buildLogsData(processedRuns, tmpDir, nil) + + if logsData.Summary.TotalEpisodes != 1 { + t.Fatalf("Expected 1 episode, got %d", logsData.Summary.TotalEpisodes) + } + if logsData.Summary.HighConfidenceEpisodes != 1 { + t.Fatalf("Expected 1 high-confidence episode, got %d", logsData.Summary.HighConfidenceEpisodes) + } + if len(logsData.Edges) != 1 { + t.Fatalf("Expected 1 edge, got %d", len(logsData.Edges)) + } + edge := logsData.Edges[0] + if edge.SourceRunID != 2001 || edge.TargetRunID != 2002 { + t.Fatalf("Expected edge 2001->2002, got %d->%d", edge.SourceRunID, edge.TargetRunID) + } + if edge.EdgeType != "dispatch_workflow" { + t.Fatalf("Expected dispatch_workflow edge, got %s", edge.EdgeType) + } + episode := logsData.Episodes[0] + if episode.Kind != "dispatch_workflow" { + t.Fatalf("Expected dispatch_workflow episode, got %s", episode.Kind) + } + if episode.TotalRuns != 2 { + t.Fatalf("Expected episode TotalRuns 2, got %d", episode.TotalRuns) + } + if episode.TotalTokens != 1000 { + t.Fatalf("Expected episode TotalTokens 1000, got %d", episode.TotalTokens) + } + if episode.MCPFailureCount != 1 { + t.Fatalf("Expected episode MCPFailureCount 1, got %d", episode.MCPFailureCount) + } + if episode.WriteCapableNodeCount != 1 { + t.Fatalf("Expected episode WriteCapableNodeCount 1, got %d", episode.WriteCapableNodeCount) + } +} + // TestBuildMissingToolsSummary tests missing tools aggregation func TestBuildMissingToolsSummary(t *testing.T) { processedRuns := []ProcessedRun{ diff --git a/pkg/cli/logs_report.go b/pkg/cli/logs_report.go index adf036ad09c..ef6bbde0f33 100644 --- a/pkg/cli/logs_report.go +++ b/pkg/cli/logs_report.go @@ -22,6 +22,8 @@ var reportLog = logger.New("cli:logs_report") type LogsData struct { Summary LogsSummary `json:"summary" console:"title:Workflow Logs Summary"` Runs []RunData `json:"runs" console:"title:Workflow Logs Overview"` + Episodes []EpisodeData `json:"episodes" console:"-"` + Edges []EpisodeEdge `json:"edges" console:"-"` ToolUsage []ToolUsageSummary `json:"tool_usage,omitempty" console:"title:🛠️ Tool Usage Summary,omitempty"` MCPToolUsage *MCPToolUsageSummary `json:"mcp_tool_usage,omitempty" console:"title:🔧 MCP Tool Usage,omitempty"` Observability []ObservabilityInsight `json:"observability_insights,omitempty" console:"-"` @@ -52,16 +54,18 @@ type ContinuationData struct { // LogsSummary contains aggregate metrics across all runs type LogsSummary struct { - TotalRuns int `json:"total_runs" console:"header:Total Runs"` - TotalDuration string `json:"total_duration" console:"header:Total Duration"` - TotalTokens int `json:"total_tokens" console:"header:Total Tokens,format:number"` - TotalCost float64 `json:"total_cost" console:"header:Total Cost,format:cost"` - TotalTurns int `json:"total_turns" console:"header:Total Turns"` - TotalErrors int `json:"total_errors" console:"header:Total Errors"` - TotalWarnings int `json:"total_warnings" console:"header:Total Warnings"` - TotalMissingTools int `json:"total_missing_tools" console:"header:Total Missing Tools"` - TotalMissingData int `json:"total_missing_data" console:"header:Total Missing Data"` - TotalSafeItems int `json:"total_safe_items" console:"header:Total Safe Items"` + TotalRuns int `json:"total_runs" console:"header:Total Runs"` + TotalDuration string `json:"total_duration" console:"header:Total Duration"` + TotalTokens int `json:"total_tokens" console:"header:Total Tokens,format:number"` + TotalCost float64 `json:"total_cost" console:"header:Total Cost,format:cost"` + TotalTurns int `json:"total_turns" console:"header:Total Turns"` + TotalErrors int `json:"total_errors" console:"header:Total Errors"` + TotalWarnings int `json:"total_warnings" console:"header:Total Warnings"` + TotalMissingTools int `json:"total_missing_tools" console:"header:Total Missing Tools"` + TotalMissingData int `json:"total_missing_data" console:"header:Total Missing Data"` + TotalSafeItems int `json:"total_safe_items" console:"header:Total Safe Items"` + TotalEpisodes int `json:"total_episodes" console:"header:Total Episodes"` + HighConfidenceEpisodes int `json:"high_confidence_episodes" console:"header:High Confidence Episodes"` } // RunData contains information about a single workflow run @@ -233,6 +237,14 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation TotalSafeItems: totalSafeItems, } + episodes, edges := buildEpisodeData(runs, processedRuns) + for _, episode := range episodes { + summary.TotalEpisodes++ + if episode.Confidence == "high" { + summary.HighConfidenceEpisodes++ + } + } + // Build tool usage summary toolUsage := buildToolUsageSummary(processedRuns) @@ -267,6 +279,8 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation return LogsData{ Summary: summary, Runs: runs, + Episodes: episodes, + Edges: edges, ToolUsage: toolUsage, MCPToolUsage: mcpToolUsage, Observability: observability, From ee077e2d515879eb8a6450a491fed778faf8a56e Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 25 Mar 2026 09:15:20 +0100 Subject: [PATCH 12/12] fix lint error --- pkg/cli/logs_episode.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/cli/logs_episode.go b/pkg/cli/logs_episode.go index d66d54c90fe..17b2ef3d072 100644 --- a/pkg/cli/logs_episode.go +++ b/pkg/cli/logs_episode.go @@ -168,10 +168,10 @@ func buildEpisodeData(runs []RunData, processedRuns []ProcessedRun) ([]EpisodeDa if acc.duration > 0 { acc.metadata.TotalDuration = timeutil.FormatDuration(acc.duration) } - switch { - case acc.metadata.RiskyNodeCount == 0: + switch acc.metadata.RiskyNodeCount { + case 0: acc.metadata.RiskDistribution = "none" - case acc.metadata.RiskyNodeCount == 1: + case 1: acc.metadata.RiskDistribution = "concentrated" default: acc.metadata.RiskDistribution = "distributed" @@ -251,7 +251,7 @@ func seedConfidenceRank(confidence string) int { func classifyEpisode(run RunData) (string, string, string, []string) { if run.AwContext != nil { if run.AwContext.WorkflowCallID != "" { - return fmt.Sprintf("dispatch:%s", run.AwContext.WorkflowCallID), "dispatch_workflow", "high", []string{"context.workflow_call_id"} + return "dispatch:" + run.AwContext.WorkflowCallID, "dispatch_workflow", "high", []string{"context.workflow_call_id"} } if run.AwContext.RunID != "" && run.AwContext.WorkflowID != "" { return fmt.Sprintf("dispatch:%s:%s:%s", run.AwContext.Repo, run.AwContext.RunID, run.AwContext.WorkflowID), "dispatch_workflow", "medium", []string{"context.run_id", "context.workflow_id"}