From b55518d601ad69848007f339d43adfe5d53a848b Mon Sep 17 00:00:00 2001
From: Mara Nikola Kiefer <mnkiefer@github.com>
Date: Thu, 21 May 2026 07:46:15 +0200
Subject: [PATCH 1/7] chore: update otlp data quality validator description and
 architecture details

---
 .github/agents/agentic-workflows.agent.md     |  34 +-
 .../workflows/otlp-data-quality-validator.md  | 356 +++++++++---------
 2 files changed, 192 insertions(+), 198 deletions(-)

diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md
index 43071e7216c..f7e5eb4f1cd 100644
--- a/.github/agents/agentic-workflows.agent.md
+++ b/.github/agents/agentic-workflows.agent.md
@@ -25,7 +25,7 @@ This is a **dispatcher agent** that routes your request to the appropriate speci
 - **Choosing workflow architectures and design patterns**: Routes to `patterns` guide — consult this whenever the user asks for strategy, architecture, operating models, or pattern selection for agentic workflows
 
 > [!IMPORTANT]
-> For architecture/pattern-selection requests, load `https://github.com/github/gh-aw/blob/main/.github/aw/patterns.md` first.
+> For architecture/pattern-selection requests, load `https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/patterns.md` first.
 
 Workflows may optionally include:
 
@@ -37,7 +37,7 @@ Workflows may optionally include:
 - Workflow files: `.github/workflows/*.md` and `.github/workflows/**/*.md`
 - Workflow lock files: `.github/workflows/*.lock.yml`
 - Shared components: `.github/workflows/shared/*.md`
-- Configuration: https://github.com/github/gh-aw/blob/main/.github/aw/github-agentic-workflows.md
+- Configuration: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/github-agentic-workflows.md
 
 ## Problems This Solves
 
@@ -59,7 +59,7 @@ When you interact with this agent, it will:
 ### Create New Workflow
 **Load when**: User wants to create a new workflow from scratch, add automation, or design a workflow that doesn't exist yet
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/create-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/create-agentic-workflow.md
 
 **Use cases**:
 - "Create a workflow that triages issues"
@@ -69,7 +69,7 @@ When you interact with this agent, it will:
 ### Update Existing Workflow  
 **Load when**: User wants to modify, improve, or refactor an existing workflow
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/update-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/update-agentic-workflow.md
 
 **Use cases**:
 - "Add web-fetch tool to the issue-classifier workflow"
@@ -79,7 +79,7 @@ When you interact with this agent, it will:
 ### Debug Workflow  
 **Load when**: User needs to investigate, audit, debug, or understand a workflow, troubleshoot issues, analyze logs, or fix errors
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/debug-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/debug-agentic-workflow.md
 
 **Use cases**:
 - "Why is this workflow failing?"
@@ -89,7 +89,7 @@ When you interact with this agent, it will:
 ### Upgrade Agentic Workflows
 **Load when**: User wants to upgrade workflows to a new gh-aw version or fix deprecations
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/upgrade-agentic-workflows.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/upgrade-agentic-workflows.md
 
 **Use cases**:
 - "Upgrade all workflows to the latest version"
@@ -99,7 +99,7 @@ When you interact with this agent, it will:
 ### Create a Report-Generating Workflow
 **Load when**: The workflow being created or updated produces reports — recurring status updates, audit summaries, analyses, or any structured output posted as a GitHub issue, discussion, or comment
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/report.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/report.md
 
 **Use cases**:
 - "Create a weekly CI health report"
@@ -109,7 +109,7 @@ When you interact with this agent, it will:
 ### Create Shared Agentic Workflow
 **Load when**: User wants to create a reusable workflow component or wrap an MCP server
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/create-shared-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/create-shared-agentic-workflow.md
 
 **Use cases**:
 - "Create a shared component for Notion integration"
@@ -119,7 +119,7 @@ When you interact with this agent, it will:
 ### Fix Dependabot PRs
 **Load when**: User needs to close or fix open Dependabot PRs that update dependencies in generated manifest files (`.github/workflows/package.json`, `.github/workflows/requirements.txt`, `.github/workflows/go.mod`)
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/dependabot.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/dependabot.md
 
 **Use cases**:
 - "Fix the open Dependabot PRs for npm dependencies"
@@ -129,7 +129,7 @@ When you interact with this agent, it will:
 ### Analyze Test Coverage
 **Load when**: The workflow reads, analyzes, or reports test coverage — whether triggered by a PR, a schedule, or a slash command. Always consult this prompt before designing the coverage data strategy.
 
-**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/test-coverage.md
+**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/test-coverage.md
 
 **Use cases**:
 - "Create a workflow that comments coverage on PRs"
@@ -139,7 +139,7 @@ When you interact with this agent, it will:
 ### Render ASCII Charts in Markdown
 **Load when**: The workflow needs in-markdown charts (sparklines, bars, table+trend views) that must align cleanly and render reliably across GitHub surfaces, including mobile.
 
-**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/asciicharts.md
+**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/asciicharts.md
 
 **Use cases**:
 - "Show a compact trend chart in an issue comment"
@@ -149,7 +149,7 @@ When you interact with this agent, it will:
 ### CLI Commands Reference
 **Load when**: The user asks how to run, compile, debug, or manage workflows from the command line; needs the MCP tool equivalent of a `gh aw` command; or is in a restricted environment (e.g., Copilot Cloud) without direct CLI access.
 
-**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/cli-commands.md
+**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/cli-commands.md
 
 **Use cases**:
 - "How do I trigger workflow X on the main branch?"
@@ -160,7 +160,7 @@ When you interact with this agent, it will:
 ### Token Consumption Optimization
 **Load when**: The user asks how to reduce token usage, lower workflow costs, make a workflow faster or cheaper, or measure the impact of prompt or configuration changes.
 
-**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/token-optimization.md
+**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/token-optimization.md
 
 **Use cases**:
 - "How do I reduce the token cost of this workflow?"
@@ -173,7 +173,7 @@ When you interact with this agent, it will:
 ### Workflow Pattern Selection
 **Load when**: The user asks for architecture, strategy, operating model selection, or pattern recommendations for building agentic workflows.
 
-**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/patterns.md
+**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/patterns.md
 
 **Use cases**:
 - "Which pattern should I use for multi-repo rollout?"
@@ -225,12 +225,12 @@ gh aw compile --validate
 
 ## Important Notes
 
-- Always reference the instructions file at https://github.com/github/gh-aw/blob/main/.github/aw/github-agentic-workflows.md for complete documentation
+- Always reference the instructions file at https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/github-agentic-workflows.md for complete documentation
 - Use the MCP tool `agentic-workflows` when running in GitHub Copilot Cloud
 - Workflows must be compiled to `.lock.yml` files before running in GitHub Actions
 - **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF
 - Follow security best practices: minimal permissions, explicit network access, no template injection
-- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/main/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns.
+- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns.
 - **Single-file output**: When creating a workflow, produce exactly **one** workflow `.md` file. Do not create separate documentation files (architecture docs, runbooks, usage guides, etc.). If documentation is needed, add a brief `## Usage` section inside the workflow file itself.
 - **Triggering runs**: Always use `gh aw run <workflow-name>` to trigger a workflow on demand — not `gh workflow run <file>.lock.yml`. `gh aw run` handles workflow resolution by short name, input parsing and validation, and correct run-tracking for agentic workflows. Use `--ref <branch>` to run on a specific branch.
-- **CLI commands reference**: For a complete guide on all `gh aw` commands and their MCP tool equivalents (for restricted environments), see https://github.com/github/gh-aw/blob/main/.github/aw/cli-commands.md
+- **CLI commands reference**: For a complete guide on all `gh aw` commands and their MCP tool equivalents (for restricted environments), see https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/cli-commands.md
diff --git a/.github/workflows/otlp-data-quality-validator.md b/.github/workflows/otlp-data-quality-validator.md
index e910b595237..5f2baf979cf 100644
--- a/.github/workflows/otlp-data-quality-validator.md
+++ b/.github/workflows/otlp-data-quality-validator.md
@@ -1,7 +1,7 @@
 ---
 emoji: "🧭"
 name: OTLP Data Quality Validator
-description: Validates OTLP trace, metric, and log data quality across app emission, Collector processing, and backend visibility
+description: Validates gh-aw OTLP trace data quality across local JSONL mirror, direct vendor export, and backend visibility
 on:
   schedule: daily on weekdays
   workflow_dispatch:
@@ -35,30 +35,30 @@ imports:
 
 # OTLP Data Quality Validator
 
-You are an OpenTelemetry/OTLP data quality validation agent.
+You are an OpenTelemetry/OTLP data quality validation agent for GitHub Agentic Workflows (`gh-aw`).
 
-Your goal is to determine whether telemetry data is complete, deduplicated, correctly shaped, and reliably flowing from source applications through the Collector to the observability backend.
+Your goal is to determine whether gh-aw trace data is complete, deduplicated, correctly shaped, and reliably flowing from the workflow runtime to configured OTLP vendor endpoints.
 
-Signal scope:
-- traces
-- metrics
-- logs
+## Architecture
 
-Pipeline scope:
-- SDK/app emission
-- Collector receiver
-- Collector processors
-- Collector exporters
-- backend ingestion and query-visible layer
+gh-aw emits **traces only** (no metrics or logs). It sends OTLP spans **directly to vendor endpoints** — there is no OpenTelemetry Collector in the pipeline.
+
+```text
+gh-aw workflow runtime (actions/setup/js/send_otlp_span.cjs)
+  → local JSONL mirror (/tmp/gh-aw/otel.jsonl)
+  → OTLP/HTTP POST to vendor endpoints (concurrent fan-out)
+  → vendor backends (Sentry, Grafana Tempo, Datadog, etc.)
+```
+
+Normative specification: `specs/otel-observability-spec.md`
 
 Use the cheapest trustworthy source first:
-1. local files/artifacts and mirrors (for example `/tmp/gh-aw/otel.jsonl`)
-2. Collector/internal telemetry artifacts
-3. backend queries
+1. local JSONL mirror (`/tmp/gh-aw/otel.jsonl`) and export error logs (`/tmp/gh-aw/otlp-export-errors.jsonl`)
+2. backend queries via MCP tools (when available)
 
 Always distinguish:
-- emitted vs ingested vs query-visible
-- true loss vs expected sampling or visibility delay
+- emitted (in JSONL mirror) vs exported (HTTP response) vs query-visible (backend)
+- true loss vs expected visibility delay
 - suspected cause vs proven cause
 
 If required evidence is unavailable, continue and mark confidence/uncertainty explicitly.
@@ -69,156 +69,150 @@ If required evidence is unavailable, continue and mark confidence/uncertainty ex
 
 Define and report:
 - validation time window (start/end)
-- expected services, environments, namespaces, and signal types
-
-When synthetic fields exist, prefer exact matching using:
-- `validation.run_id`
-- `validation.sequence_id`
-- `validation.expected_count`
+- expected `service.name` values (format: `gh-aw.<workflow-id>`)
+- expected job names and span operations (setup, conclusion, agent)
 
-If synthetic fields do not exist, infer expectations from:
-- source-side counters
-- Collector receiver counts
-- backend ingestion/query counts
+Infer expectations from:
+- local JSONL mirror span count
+- `github.run_id` from resource attributes
+- export error count from `/tmp/gh-aw/otlp-export-errors.count`
 
 ### Step 2: Validate trace completeness and integrity
 
-Compute and report:
-- unique `trace_id` count
+From the local JSONL mirror (`/tmp/gh-aw/otel.jsonl`), compute and report:
+- unique `trace_id` count (expect 1 per workflow run)
 - unique span identity count using `trace_id + span_id`
 - duplicate spans with same `trace_id + span_id`
 
-When expected per-trace span counts exist, compare expected vs observed.
-
-Validate structure:
-- every non-root span must reference an existing `parent_span_id` in the same trace
-- root spans must not have `parent_span_id`
+Validate the expected span hierarchy per the spec (§9.3):
+- all setup spans share a single global `parent_span_id`
+- each conclusion span parents under its job's setup span
+- agent spans parent under the conclusion span
+- root setup parent has no parent
 
 Validate required fields per span:
-- `trace_id`
-- `span_id`
-- `name`
-- `kind`
-- `start_time`
-- `end_time`
-- `service.name`
-- resource attributes
+- `trace_id` (32-char hex)
+- `span_id` (16-char hex)
+- `name` (must match pattern `gh-aw.<job-name>.<operation>`)
+- `kind` (INTERNAL=1 for setup/conclusion, CLIENT=3 for agent)
+- `start_time_unix_nano`
+- `end_time_unix_nano`
 
 Flag timestamp issues:
 - `start_time > end_time`
 - far-future timestamps
 - timestamps far outside the validation window
 
-### Step 3: Validate metric completeness and quality
-
-Report:
-- observed metric names
-- diff between observed names and expected metric inventory
-
-Count metric points by:
-- metric name
-- resource identity
-- scope/instrumentation library
-- datapoint attributes
-- timestamp
-
-Detect duplicate datapoints using:
-`resource identity + scope + metric name + datapoint attributes + timestamp`
-
-Validate temporality:
-- cumulative counters should not reset unexpectedly
-- delta counters must not be interpreted as cumulative
-
-Flag suspicious behavior:
-- missing datapoints
-- counter decreases without reset evidence
-- unexpected zero values
-- cardinality spikes
-- missing required dimensions
-
-### Step 4: Validate log completeness and correlation
-
-Report total log records in the validation window.
-
-Detect duplicates using stable fingerprint:
-`timestamp + observed timestamp + body hash + severity + trace_id + span_id + resource identity`
-
-If `validation.sequence_id` exists:
-- identify missing sequence IDs
-- identify duplicate sequence IDs
-
-Validate required fields:
-- `timestamp`
-- `body`
-- `severity` or `severity_text`
-- `service.name`
-- resource attributes
-
-Check trace correlation:
-- logs emitted inside traces should contain both `trace_id` and `span_id`
-
-### Step 5: Check Collector health
-
-Inspect and report Collector internal telemetry. Use actual metric names when version-specific names differ.
-
-Cover:
-- accepted records by receiver
-- refused records by receiver
-- dropped records by processor
-- sent records by exporter
-- failed sends by exporter
-- retry counts
-- queue size/capacity
-- memory limiter drops
-- batch behavior
-- timeout/rate-limit exporter errors
-
-Pay special attention to metrics such as:
-- `otelcol_receiver_accepted_spans`
-- `otelcol_receiver_refused_spans`
-- `otelcol_processor_dropped_spans`
-- `otelcol_exporter_sent_spans`
-- `otelcol_exporter_send_failed_spans`
-- `otelcol_receiver_accepted_metric_points`
-- `otelcol_processor_dropped_metric_points`
-- `otelcol_exporter_sent_metric_points`
-- `otelcol_receiver_accepted_log_records`
-- `otelcol_processor_dropped_log_records`
-- `otelcol_exporter_sent_log_records`
-
-### Step 6: Reconcile pipeline stages
-
-For traces, metrics, and logs independently, reconcile:
-
-app emitted
-→ Collector received
-→ Collector processed
-→ Collector exported
-→ backend ingested
-→ backend query-visible
-
-For each mismatch, identify the most likely stage of loss, duplication, or transformation.
-
-Do not claim data loss unless cross-stage evidence supports it.
+```bash
+# Example: Extract span summary from JSONL mirror
+jq -c '.resourceSpans[].scopeSpans[].spans[] | {name, traceId, spanId, parentSpanId, kind, status}' /tmp/gh-aw/otel.jsonl
+```
+
+### Step 3: Validate span attribute contract
+
+Check setup spans for required attributes (spec §10.1):
+- `gh-aw.job.name`
+- `gh-aw.workflow.name`
+- `gh-aw.run.id`
+- `gh-aw.run.attempt`
+- `gh-aw.run.actor`
+- `gh-aw.repository`
+- `gh-aw.staged`
+
+Check conclusion spans for required attributes (spec §10.2):
+- `gh-aw.run.status` (must be `success`, `failure`, `timeout`, or `cancelled`)
+- `gh-aw.error_count`
+- `gh-aw.warning_count`
+- `gh-aw.action_minutes`
+- `gh-aw.output.item_count`
+- `gh-aw.otlp.export_errors`
+
+Check agent spans for GenAI semantic conventions (spec §10.3):
+- `gen_ai.system`
+- `gen_ai.request.model`
+- `gen_ai.operation.name` (must be `"chat"`)
+- `gen_ai.usage.input_tokens`
+- `gen_ai.usage.output_tokens`
+
+```bash
+# Example: Check required attributes on setup spans
+jq -c '.resourceSpans[].scopeSpans[].spans[] | select(.name | endswith(".setup")) | {name, attrs: [.attributes[]? | {(.key): .value}] | add}' /tmp/gh-aw/otel.jsonl
+```
+
+### Step 4: Validate resource attributes
+
+Check all spans for required resource attributes (spec §11.1):
+- `service.name` (format: `gh-aw.<workflow-id>` or `gh-aw`)
+- `service.version`
+- `github.repository`
+- `github.run_id`
+- `github.run_attempt`
+- `github.actions.run_url`
+
+Check instrumentation scope:
+- `scope.name` must be `gh-aw`
+- `scope.version` should match `service.version`
+
+```bash
+# Example: Extract resource attributes
+jq -c '.resourceSpans[].resource.attributes[] | {(.key): .value}' /tmp/gh-aw/otel.jsonl | sort -u
+```
+
+### Step 5: Validate trace ID propagation
+
+Verify trace ID consistency across jobs (spec §12):
+- all spans in a single workflow run share the same `trace_id`
+- setup spans across different jobs share the same global `parent_span_id`
+- the JSONL mirror `trace_id` matches the value in `GITHUB_AW_OTEL_TRACE_ID`
+
+If export errors exist, check `/tmp/gh-aw/otlp-export-errors.jsonl`:
+- which endpoints failed
+- HTTP status codes
+- whether failures are transient (retryable) or permanent
+
+```bash
+# Example: Check trace ID consistency
+jq -r '.resourceSpans[].scopeSpans[].spans[].traceId' /tmp/gh-aw/otel.jsonl | sort -u | wc -l
+# Expected: 1 (single trace ID per run)
+
+# Example: Check export errors
+cat /tmp/gh-aw/otlp-export-errors.jsonl 2>/dev/null || echo "No export errors"
+cat /tmp/gh-aw/otlp-export-errors.count 2>/dev/null || echo "0"
+```
+
+### Step 6: Reconcile local mirror vs backend visibility
+
+For each configured OTLP endpoint, reconcile:
+
+```text
+local JSONL mirror (emitted)
+  → OTLP/HTTP export (sent)
+  → vendor backend (query-visible)
+```
+
+Check:
+- span count in JSONL mirror vs backend
+- whether all span names from the mirror appear in the backend
+- whether resource attributes survived backend ingestion
+- whether `trace_id` is searchable in the backend
+
+For multi-endpoint fan-out, validate each endpoint independently. Failure on one endpoint SHOULD NOT affect others.
+
+Do not claim data loss unless cross-stage evidence supports it. Distinguish ingestion delay from actual loss.
 
 ### Step 7: Root-cause hypotheses
 
-Evaluate likely causes, including:
-- SDK not flushing on shutdown
-- sampling misconfiguration
-- duplicate exporters in app config
-- duplicate flow through both agent and gateway
-- multiple Collectors scraping same source
-- retry behavior causing duplicate ingestion
-- filelog receiver offset rereads
-- batch timeout/size effects
-- memory limiter drops
-- exporter queue overflow
-- backend rate limits
-- resource attribute mutation/overwrite
-- OTLP gRPC/HTTP protocol mismatch
-- wrong endpoint/path
-- metrics temporality mismatch
+Evaluate likely causes for any issues found, including:
+- OTLP endpoint misconfiguration (wrong URL, missing `/v1/traces` suffix)
+- authentication failures (expired API key, wrong header name)
+- Sentry header rewrite not applied (`Authorization` should become `x-sentry-auth`)
+- network allowlist missing vendor hostname
+- `if-missing: error` blocking gateway OTLP when secrets are unresolved
+- retry exhaustion (3 attempts with exponential backoff)
+- OTLP/HTTP JSON vs OTLP/HTTP protobuf mismatch
+- vendor rate limits or ingestion delays
+- span attribute redaction removing useful diagnostic data
+- proxy configuration interfering with `fetch`-based export
 
 Rank hypotheses by evidence strength and include alternatives.
 
@@ -231,31 +225,32 @@ Create exactly one issue with these sections in order:
 - main risks
 - most likely root cause (if any)
 
-### B. Completeness results
-Per signal (traces/metrics/logs):
-- expected count
-- observed count
-- missing count
-- duplicate count
+### B. Trace completeness
+- expected span count (from JSONL mirror)
+- observed span count (in backend)
+- missing spans
+- duplicate spans
+- trace ID consistency (single trace per run)
 - confidence level
 
-### C. Duplicate analysis
-- duplicate keys
-- affected services
-- affected windows
-- sample duplicate records
-
-### D. Schema and quality issues
-- missing fields
-- invalid timestamps
-- missing resource attributes
-- cardinality problems
-- trace/log correlation gaps
-
-### E. Pipeline health
-- Collector receiver/processor/exporter counters
-- dropped/refused/failed signals
-- queue/retry indicators
+### C. Span hierarchy validation
+- setup spans share global parent: pass/fail
+- conclusion spans parent under setup: pass/fail
+- agent spans parent under conclusion: pass/fail
+- span naming pattern `gh-aw.<job>.<op>`: pass/fail
+
+### D. Attribute contract validation
+- setup span required attributes: present/missing list
+- conclusion span required attributes: present/missing list
+- agent span GenAI attributes: present/missing list
+- resource attributes: present/missing list
+- instrumentation scope: correct/incorrect
+
+### E. Export and fan-out health
+- per-endpoint export status (success/fail/partial)
+- export error count and details
+- JSONL mirror write status
+- multi-endpoint fan-out independence
 
 ### F. Root-cause hypothesis
 - likely cause
@@ -263,18 +258,17 @@ Per signal (traces/metrics/logs):
 - alternative explanations
 
 ### G. Recommended fixes (prioritized)
-1. stop data loss
-2. stop duplication
-3. fix schema/resource attributes
-4. improve observability and alerts
+1. fix data loss or export failures
+2. fix missing required attributes
+3. fix span hierarchy or naming issues
+4. improve diagnostic coverage
 
 ### H. Validation queries or commands
-Provide concrete queries/commands/pseudocode used.
+Provide concrete jq/bash commands used against the JSONL mirror and backend.
 
 Rules:
 - Never assume missing equals lost without cross-stage evidence.
 - Always distinguish ingestion completeness from query visibility.
-- Treat sampled traces as intentionally incomplete only when sampling config is verified.
-- Do not flag legitimate metric resets as errors when reset metadata or restart evidence exists.
-- Prefer exact validation keyed by `validation.run_id` and `validation.sequence_id` when available.
+- Do not flag visibility delays under 5 minutes as data loss.
 - Be explicit about uncertainty.
+- Reference the normative spec (`specs/otel-observability-spec.md`) section numbers when reporting violations.

From b0f9068bf75a9f41c919de7c5f3ad5efcde7f024 Mon Sep 17 00:00:00 2001
From: Mara Nikola Kiefer <mnkiefer@github.com>
Date: Thu, 21 May 2026 07:59:05 +0200
Subject: [PATCH 2/7] update otel observability spec

---
 .github/agents/agentic-workflows.agent.md |  34 +-
 specs/otel-observability-spec.md          | 385 +++++++++++++++++++++-
 2 files changed, 390 insertions(+), 29 deletions(-)

diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md
index f7e5eb4f1cd..43071e7216c 100644
--- a/.github/agents/agentic-workflows.agent.md
+++ b/.github/agents/agentic-workflows.agent.md
@@ -25,7 +25,7 @@ This is a **dispatcher agent** that routes your request to the appropriate speci
 - **Choosing workflow architectures and design patterns**: Routes to `patterns` guide — consult this whenever the user asks for strategy, architecture, operating models, or pattern selection for agentic workflows
 
 > [!IMPORTANT]
-> For architecture/pattern-selection requests, load `https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/patterns.md` first.
+> For architecture/pattern-selection requests, load `https://github.com/github/gh-aw/blob/main/.github/aw/patterns.md` first.
 
 Workflows may optionally include:
 
@@ -37,7 +37,7 @@ Workflows may optionally include:
 - Workflow files: `.github/workflows/*.md` and `.github/workflows/**/*.md`
 - Workflow lock files: `.github/workflows/*.lock.yml`
 - Shared components: `.github/workflows/shared/*.md`
-- Configuration: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/github-agentic-workflows.md
+- Configuration: https://github.com/github/gh-aw/blob/main/.github/aw/github-agentic-workflows.md
 
 ## Problems This Solves
 
@@ -59,7 +59,7 @@ When you interact with this agent, it will:
 ### Create New Workflow
 **Load when**: User wants to create a new workflow from scratch, add automation, or design a workflow that doesn't exist yet
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/create-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/create-agentic-workflow.md
 
 **Use cases**:
 - "Create a workflow that triages issues"
@@ -69,7 +69,7 @@ When you interact with this agent, it will:
 ### Update Existing Workflow  
 **Load when**: User wants to modify, improve, or refactor an existing workflow
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/update-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/update-agentic-workflow.md
 
 **Use cases**:
 - "Add web-fetch tool to the issue-classifier workflow"
@@ -79,7 +79,7 @@ When you interact with this agent, it will:
 ### Debug Workflow  
 **Load when**: User needs to investigate, audit, debug, or understand a workflow, troubleshoot issues, analyze logs, or fix errors
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/debug-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/debug-agentic-workflow.md
 
 **Use cases**:
 - "Why is this workflow failing?"
@@ -89,7 +89,7 @@ When you interact with this agent, it will:
 ### Upgrade Agentic Workflows
 **Load when**: User wants to upgrade workflows to a new gh-aw version or fix deprecations
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/upgrade-agentic-workflows.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/upgrade-agentic-workflows.md
 
 **Use cases**:
 - "Upgrade all workflows to the latest version"
@@ -99,7 +99,7 @@ When you interact with this agent, it will:
 ### Create a Report-Generating Workflow
 **Load when**: The workflow being created or updated produces reports — recurring status updates, audit summaries, analyses, or any structured output posted as a GitHub issue, discussion, or comment
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/report.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/report.md
 
 **Use cases**:
 - "Create a weekly CI health report"
@@ -109,7 +109,7 @@ When you interact with this agent, it will:
 ### Create Shared Agentic Workflow
 **Load when**: User wants to create a reusable workflow component or wrap an MCP server
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/create-shared-agentic-workflow.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/create-shared-agentic-workflow.md
 
 **Use cases**:
 - "Create a shared component for Notion integration"
@@ -119,7 +119,7 @@ When you interact with this agent, it will:
 ### Fix Dependabot PRs
 **Load when**: User needs to close or fix open Dependabot PRs that update dependencies in generated manifest files (`.github/workflows/package.json`, `.github/workflows/requirements.txt`, `.github/workflows/go.mod`)
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/dependabot.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/dependabot.md
 
 **Use cases**:
 - "Fix the open Dependabot PRs for npm dependencies"
@@ -129,7 +129,7 @@ When you interact with this agent, it will:
 ### Analyze Test Coverage
 **Load when**: The workflow reads, analyzes, or reports test coverage — whether triggered by a PR, a schedule, or a slash command. Always consult this prompt before designing the coverage data strategy.
 
-**Prompt file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/test-coverage.md
+**Prompt file**: https://github.com/github/gh-aw/blob/main/.github/aw/test-coverage.md
 
 **Use cases**:
 - "Create a workflow that comments coverage on PRs"
@@ -139,7 +139,7 @@ When you interact with this agent, it will:
 ### Render ASCII Charts in Markdown
 **Load when**: The workflow needs in-markdown charts (sparklines, bars, table+trend views) that must align cleanly and render reliably across GitHub surfaces, including mobile.
 
-**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/asciicharts.md
+**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/asciicharts.md
 
 **Use cases**:
 - "Show a compact trend chart in an issue comment"
@@ -149,7 +149,7 @@ When you interact with this agent, it will:
 ### CLI Commands Reference
 **Load when**: The user asks how to run, compile, debug, or manage workflows from the command line; needs the MCP tool equivalent of a `gh aw` command; or is in a restricted environment (e.g., Copilot Cloud) without direct CLI access.
 
-**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/cli-commands.md
+**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/cli-commands.md
 
 **Use cases**:
 - "How do I trigger workflow X on the main branch?"
@@ -160,7 +160,7 @@ When you interact with this agent, it will:
 ### Token Consumption Optimization
 **Load when**: The user asks how to reduce token usage, lower workflow costs, make a workflow faster or cheaper, or measure the impact of prompt or configuration changes.
 
-**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/token-optimization.md
+**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/token-optimization.md
 
 **Use cases**:
 - "How do I reduce the token cost of this workflow?"
@@ -173,7 +173,7 @@ When you interact with this agent, it will:
 ### Workflow Pattern Selection
 **Load when**: The user asks for architecture, strategy, operating model selection, or pattern recommendations for building agentic workflows.
 
-**Reference file**: https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/patterns.md
+**Reference file**: https://github.com/github/gh-aw/blob/main/.github/aw/patterns.md
 
 **Use cases**:
 - "Which pattern should I use for multi-repo rollout?"
@@ -225,12 +225,12 @@ gh aw compile --validate
 
 ## Important Notes
 
-- Always reference the instructions file at https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/github-agentic-workflows.md for complete documentation
+- Always reference the instructions file at https://github.com/github/gh-aw/blob/main/.github/aw/github-agentic-workflows.md for complete documentation
 - Use the MCP tool `agentic-workflows` when running in GitHub Copilot Cloud
 - Workflows must be compiled to `.lock.yml` files before running in GitHub Actions
 - **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF
 - Follow security best practices: minimal permissions, explicit network access, no template injection
-- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns.
+- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/main/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns.
 - **Single-file output**: When creating a workflow, produce exactly **one** workflow `.md` file. Do not create separate documentation files (architecture docs, runbooks, usage guides, etc.). If documentation is needed, add a brief `## Usage` section inside the workflow file itself.
 - **Triggering runs**: Always use `gh aw run <workflow-name>` to trigger a workflow on demand — not `gh workflow run <file>.lock.yml`. `gh aw run` handles workflow resolution by short name, input parsing and validation, and correct run-tracking for agentic workflows. Use `--ref <branch>` to run on a specific branch.
-- **CLI commands reference**: For a complete guide on all `gh aw` commands and their MCP tool equivalents (for restricted environments), see https://github.com/github/gh-aw/blob/v0.74.8/.github/aw/cli-commands.md
+- **CLI commands reference**: For a complete guide on all `gh aw` commands and their MCP tool equivalents (for restricted environments), see https://github.com/github/gh-aw/blob/main/.github/aw/cli-commands.md
diff --git a/specs/otel-observability-spec.md b/specs/otel-observability-spec.md
index 375aa0e9797..236ec0cc920 100644
--- a/specs/otel-observability-spec.md
+++ b/specs/otel-observability-spec.md
@@ -1,9 +1,9 @@
 ---
 title: OTel Observability Specification
-version: 0.1.0
+version: 0.2.0
 status: Working Draft
 date: 2026-05-19
-last_updated: 2026-05-19
+last_updated: 2026-05-21
 editors:
   - GitHub gh-aw Team
 ---
@@ -38,10 +38,14 @@ Changes to `observability.otlp`, OTLP environment injection, MCP gateway tracing
 6. [Export and Gateway Integration](#6-export-and-gateway-integration)
 7. [Local Mirrors and Artifacts](#7-local-mirrors-and-artifacts)
 8. [Security and Privacy Requirements](#8-security-and-privacy-requirements)
-9. [Implementation Mapping](#9-implementation-mapping)
-10. [Compliance Testing](#10-compliance-testing)
-11. [References](#11-references)
-12. [Change Log](#12-change-log)
+9. [Trace Model](#9-trace-model)
+10. [Span Attribute Contract](#10-span-attribute-contract)
+11. [Resource Attributes](#11-resource-attributes)
+12. [Trace ID Propagation and Lookup](#12-trace-id-propagation-and-lookup)
+13. [Implementation Mapping](#13-implementation-mapping)
+14. [Compliance Testing](#14-compliance-testing)
+15. [References](#15-references)
+16. [Change Log](#16-change-log)
 
 ---
 
@@ -85,7 +89,7 @@ The following documents are informative companions and do not override this spec
 
 ## 2. Conformance
 
-An implementation conforms to this specification if it satisfies all MUST and MUST NOT requirements in Sections 4 through 10.
+An implementation conforms to this specification if it satisfies all MUST and MUST NOT requirements in Sections 4 through 12.
 
 The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119).
 
@@ -97,7 +101,7 @@ This specification defines three conformance levels:
 |---|---|
 | **Level 1 - Config** | Correct parsing and normalization of `observability.otlp` and workflow environment injection as defined in Sections 4 and 5. |
 | **Level 2 - Runtime** | Level 1 plus MCP gateway integration and degraded-mode export behavior from Section 6. |
-| **Level 3 - Complete** | Level 2 plus local mirror, artifact, implementation-mapping, and compliance obligations in Sections 7 through 10. |
+| **Level 3 - Complete** | Level 2 plus local mirror, artifact, trace model, span attribute contract, resource attributes, trace ID propagation, implementation-mapping, and compliance obligations in Sections 7 through 12. |
 
 ---
 
@@ -267,7 +271,326 @@ The JavaScript OTLP helper layer SHOULD remain non-fatal:
 
 ---
 
-## 9. Implementation Mapping
+## 9. Trace Model
+
+### 9.1 Overview
+
+gh-aw emits OpenTelemetry trace spans directly to configured OTLP-compatible vendor endpoints. gh-aw does **not** require or run an OpenTelemetry Collector. All transformation, batching, retry, endpoint selection, and authentication happens in-process before sending to the vendor OTLP endpoint.
+
+Tracing is best-effort. Export failures MUST NOT fail the workflow.
+
+### 9.2 Span Naming Convention
+
+All gh-aw span names MUST follow the pattern: `gh-aw.<job-name>.<operation>`.
+
+When no job name is available, the fallback `job` MUST be used, yielding names such as `gh-aw.job.setup`.
+
+### 9.3 Span Hierarchy
+
+A single trace ID is shared across all jobs in a workflow run. All setup spans share a global parent span ID so they render as siblings in OTLP backends.
+
+```text
+Single Trace: trace_id (32-char hex, shared across all jobs in a run)
+├── Root Setup Parent: parent_span_id (global, shared across all jobs)
+│
+├── Activation Job
+│   ├── gh-aw.activation.setup        (parent: root setup parent)
+│   └── gh-aw.activation.conclusion   (parent: activation setup span)
+│
+├── Agent Job
+│   ├── gh-aw.agent.setup             (parent: root setup parent)
+│   ├── gh-aw.agent.conclusion         (parent: agent setup span)
+│   │   └── gh-aw.agent.agent          (parent: agent conclusion span)
+│   │       [dedicated AI latency measurement]
+│   │
+│
+└── Other Jobs
+    ├── gh-aw.<job-name>.setup         (parent: root setup parent)
+    └── gh-aw.<job-name>.conclusion    (parent: job setup span)
+```
+
+### 9.4 Span Kinds
+
+Span kind assignments MUST follow these rules:
+
+| Span | OTLP `kind` | Rationale |
+|---|---|---|
+| `gh-aw.*.setup` | `SPAN_KIND_INTERNAL` (1) | Internal job lifecycle |
+| `gh-aw.*.conclusion` | `SPAN_KIND_INTERNAL` (1) | Internal job lifecycle |
+| `gh-aw.*.agent` | `SPAN_KIND_CLIENT` (3) | Outbound AI model request |
+
+### 9.5 Span Status
+
+Conclusion spans MUST set `status.code` based on the job outcome:
+
+| Outcome | `status.code` |
+|---|---|
+| `success` | `OK` (1) |
+| `failure`, `timeout`, `cancelled` | `ERROR` (2) |
+
+### 9.6 Exception Events
+
+When errors are present in `agent_output.json`, the conclusion span MUST emit OTel exception events:
+
+```json
+{
+  "timeUnixNano": "...",
+  "name": "exception",
+  "attributes": [
+    {"key": "exception.type", "value": {"stringValue": "gh-aw.<ErrorType>"}},
+    {"key": "exception.message", "value": {"stringValue": "Error description"}}
+  ]
+}
+```
+
+Exception type resolution:
+
+1. If the error message matches the format `type:message`, use `gh-aw.<type>` as the exception type.
+2. Otherwise, derive the type from the run status: `gh-aw.AgentError`, `gh-aw.AgentFailed`, `gh-aw.AgentTimedOut`, or `gh-aw.AgentCancelled`.
+
+---
+
+## 10. Span Attribute Contract
+
+This section defines the attributes each span type MUST or MAY carry.
+
+### 10.1 Setup Span Attributes
+
+**Required attributes** (MUST be present on every setup span):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gh-aw.job.name` | string | Job name from action input |
+| `gh-aw.workflow.name` | string | Workflow name or ID |
+| `gh-aw.run.id` | string | GitHub Actions run ID |
+| `gh-aw.run.attempt` | string | Run attempt number |
+| `gh-aw.run.actor` | string | User or bot initiating the run |
+| `gh-aw.repository` | string | `owner/repo` |
+| `gh-aw.staged` | boolean | Whether this is a staging deployment |
+
+**Conditional attributes** (MUST be present when the value is available):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gen_ai.system` | string | Mapped AI system name (e.g., `github_models`, `anthropic`, `openai`) |
+| `gh-aw.engine.id` | string | Raw engine identifier (`copilot`, `claude`, `codex`, `gemini`, custom) |
+| `gh-aw.event_name` | string | GitHub event type |
+| `gh-aw.trigger.item_type` | string | Triggering item (`issue`, `pull_request`, `discussion`, etc.) |
+| `gh-aw.trigger.item_number` | string | Triggering item ID/number |
+| `gh-aw.trigger.label` | string | Label on triggering item |
+| `gh-aw.trigger.comment_id` | string | Comment ID on triggering item |
+| `gh-aw.episode.id` | string | Episode/session ID for cross-run correlation |
+| `gh-aw.episode.kind` | string | `run` or `workflow_call` |
+| `gh-aw.hop.id` | string | Current workflow invocation ID |
+| `gh-aw.hop.parent_id` | string | Parent workflow invocation ID |
+| `gh-aw.origin.event` | string | Origin event type |
+| `gh-aw.root.repo` | string | Root repository (for dispatched workflows) |
+| `gh-aw.root.workflow_id` | string | Root workflow ID |
+| `gh-aw.frontmatter.source` | string | Frontmatter source type |
+| `gh-aw.frontmatter.emoji` | string | Frontmatter emoji |
+| `gh-aw.frontmatter.body_modified` | boolean | Whether body was edited |
+| `gh-aw.experiment.<name>` | string | Per-experiment variant assignment |
+| `gh-aw.experiments` | string | Compact JSON of all experiment assignments |
+| `gh-aw.deployment.state` | string | Deployment status |
+| `gh-aw.workflow_run.conclusion` | string | Workflow-level outcome |
+
+### 10.2 Conclusion Span Attributes
+
+**Required attributes** (MUST be present on every conclusion span):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gh-aw.workflow.name` | string | Workflow name |
+| `gh-aw.run.id` | string | Run ID |
+| `gh-aw.run.attempt` | string | Attempt number |
+| `gh-aw.run.actor` | string | Actor |
+| `gh-aw.repository` | string | Repository |
+| `gh-aw.run.status` | string | Run outcome (`success`, `failure`, `timeout`, `cancelled`) |
+| `gh-aw.error_count` | int | Number of errors |
+| `gh-aw.warning_count` | int | Number of warnings |
+| `gh-aw.action_minutes` | double | Duration in minutes |
+| `gh-aw.output.item_count` | int | Safe output items produced |
+| `gh-aw.otlp.export_errors` | int | Count of OTLP export failures during this run |
+
+**Conditional attributes** (MUST be present when the value is available):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gh-aw.job.name` | string | Job name |
+| `gen_ai.system` | string | AI system |
+| `gh-aw.engine.id` | string | Engine ID |
+| `gen_ai.request.model` | string | Requested model name |
+| `gh-aw.tracker.id` | string | Tracker identifier |
+| `gh-aw.event_name` | string | Event type |
+| `gh-aw.staged` | boolean | Staging flag |
+| `gh-aw.trigger.*` | string | Trigger context (same fields as setup span) |
+| `gh-aw.frontmatter.*` | string | Frontmatter metadata (same fields as setup span) |
+| `gh-aw.effective_tokens` | int | Effective token count |
+| `gh-aw.turns` | int | Number of agent turns |
+| `gh-aw.estimated_cost_usd` | double | Estimated cost |
+| `gh-aw.agent.conclusion` | string | Agent job outcome |
+| `gh-aw.detection.conclusion` | string | Threat detection outcome |
+| `gh-aw.detection.reason` | string | Detection reasoning |
+| `gh-aw.otlp.export_error_details` | string | Export failure details |
+| `gh-aw.error.count` | int | Output error count |
+| `gh-aw.error.messages` | string | Error messages joined by ` \| ` |
+| `gh-aw.output.item_types` | string | Comma-separated types of safe output items |
+| `gh-aw.github.rate_limit.remaining` | int | API rate limit remaining |
+| `gh-aw.github.rate_limit.limit` | int | API rate limit total |
+| `gh-aw.github.rate_limit.used` | int | API rate limit used |
+| `gh-aw.github.rate_limit.resource` | string | Rate limit resource category |
+| `gh-aw.github.rate_limit.reset` | string | ISO 8601 rate limit reset time |
+| `gh-aw.outcome.total` | int | Total outcomes |
+| `gh-aw.outcome.accepted` | int | Accepted outcomes |
+| `gh-aw.outcome.rejected` | int | Rejected outcomes |
+| `gh-aw.outcome.pending` | int | Pending outcomes |
+| `gh-aw.outcome.ignored` | int | Ignored outcomes |
+| `gh-aw.outcome.acceptance_rate` | double | Acceptance rate |
+| `gh-aw.outcome.waste_rate` | double | Waste rate |
+
+### 10.3 Agent Span Attributes
+
+The dedicated agent span (`gh-aw.*.agent`) follows OpenTelemetry [GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/).
+
+**Required attributes** (MUST be present when available from the AI engine):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gen_ai.system` | string | Mapped AI system name |
+| `gen_ai.request.model` | string | Requested model |
+| `gen_ai.response.model` | string | Resolved runtime model |
+| `gen_ai.operation.name` | string | Always `"chat"` |
+| `gen_ai.workflow.name` | string | Workflow name |
+| `gen_ai.usage.input_tokens` | int | Input tokens consumed |
+| `gen_ai.usage.output_tokens` | int | Output tokens generated |
+| `gen_ai.usage.total_tokens` | int | Total tokens (input + output, excluding cache) |
+| `gen_ai.response.finish_reasons` | string[] | Stop reasons (e.g., `["stop"]`, `["length"]`, `["timeout"]`) |
+
+**Optional attributes** (MAY be present):
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gen_ai.usage.cache_read.input_tokens` | int | Cache read tokens |
+| `gen_ai.usage.cache_creation.input_tokens` | int | Cache write tokens |
+
+---
+
+## 11. Resource Attributes
+
+Resource attributes are applied to all OTLP spans and describe the service and execution environment.
+
+### 11.1 Required Resource Attributes
+
+A conforming implementation MUST include these resource attributes on every exported span:
+
+| Attribute | Type | Description | Example |
+|---|---|---|---|
+| `service.name` | string | `gh-aw.<workflow-id>` or `gh-aw` | `gh-aw.daily-report` |
+| `service.version` | string | gh-aw CLI version or commit SHA | `v0.23.4` |
+| `github.repository` | string | `owner/repo` | `github/gh-aw` |
+| `github.run_id` | string | GitHub Actions run ID | `12345678` |
+| `github.run_attempt` | string | Run attempt number | `1` |
+| `github.actions.run_url` | string | URL to the run | `https://github.com/owner/repo/actions/runs/123` |
+
+### 11.2 Conditional Resource Attributes
+
+These resource attributes MUST be included when the corresponding value is available:
+
+| Attribute | Type | Description |
+|---|---|---|
+| `github.event_name` | string | Event type (e.g., `push`, `pull_request`) |
+| `github.ref` | string | Git ref (branch/tag) |
+| `github.ref_name` | string | Ref name |
+| `github.head_ref` | string | Head ref (for PRs) |
+| `github.sha` | string | Commit SHA |
+| `github.job` | string | Job name |
+| `github.workflow_ref` | string | Workflow ref |
+| `github.actor_id` | string | Actor ID |
+| `runner.os` | string | Runner OS (`Linux`, `Windows`, `macOS`) |
+| `runner.arch` | string | Runner architecture (`X64`, `ARM64`) |
+| `runner.name` | string | Runner name/label |
+| `runner.environment` | string | Runner environment |
+| `gh-aw.awf.version` | string | Agentic Workflows Framework version |
+| `gh-aw.awmg.version` | string | Agentic Workflows Manager version |
+| `deployment.environment` | string | `staging` or `production` |
+
+### 11.3 Instrumentation Scope
+
+All gh-aw spans MUST be emitted under an instrumentation scope with:
+
+| Field | Value |
+|---|---|
+| `scope.name` | `gh-aw` |
+| `scope.version` | The gh-aw CLI version |
+
+---
+
+## 12. Trace ID Propagation and Lookup
+
+### 12.1 Trace ID Format
+
+The OTLP trace ID is a 32-character lowercase hexadecimal string (16 random bytes). The span ID is a 16-character lowercase hexadecimal string (8 random bytes).
+
+Do **not** confuse the OTLP trace ID with `workflow_call_id`, which is derived from the GitHub run ID and attempt number. The OTLP trace ID is the value to search for in vendor backends (Sentry, Honeycomb, Datadog, Grafana Tempo, etc.).
+
+### 12.2 Trace ID Resolution Order
+
+The setup span MUST resolve the trace ID using the following priority order:
+
+1. **Explicit option** — `options.traceId` passed to the setup function (used for activation job reuse).
+2. **Action input** — `INPUT_TRACE_ID` environment variable (from `trace-id` action input, used for cross-job propagation).
+3. **Parent context** — `aw_info.context.otel_trace_id` (propagated from parent workflow via `aw_context`).
+4. **Generate new** — 32-character random hex string via `randomBytes(16).toString("hex")`.
+
+The conclusion span MUST resolve the trace ID using:
+
+1. **Job environment** — `GITHUB_AW_OTEL_TRACE_ID` (set by this job's setup step).
+2. **Parent context** — `aw_info.context.otel_trace_id` (inherited from parent).
+3. **Legacy fallback** — `aw_info.context.workflow_call_id` (converted to hex).
+4. **Generate new** — 32-character random hex string.
+
+### 12.3 Trace ID Storage
+
+After generating or resolving a trace ID, the setup step MUST:
+
+1. **Write to `$GITHUB_OUTPUT`** so downstream jobs can access:
+   - `trace-id` — 32-char hex trace ID
+   - `span-id` — 16-char hex setup span ID
+   - `parent-span-id` — 16-char hex global parent span ID
+
+2. **Write to `$GITHUB_ENV`** so downstream steps in the same job can access:
+   - `GITHUB_AW_OTEL_TRACE_ID` — Trace ID
+   - `GITHUB_AW_OTEL_PARENT_SPAN_ID` — Setup span ID (parent for conclusion span)
+   - `GITHUB_AW_OTEL_JOB_START_MS` — Epoch milliseconds when setup completed
+
+### 12.4 Cross-Job Propagation
+
+The compiler MUST wire setup outputs through the job dependency graph so all jobs in a run share a single trace ID. Downstream jobs receive `needs.<setup-job>.outputs.trace-id` and `needs.<setup-job>.outputs.parent-span-id` as action inputs.
+
+### 12.5 Dispatch and Composite Action Propagation
+
+When a workflow dispatches a child workflow or composite action, parent trace context MUST be passed via `aw_context`:
+
+- `aw_context.otel_trace_id` → child inherits parent trace ID
+- `aw_context.otel_parent_span_id` → child setup span parents under parent's setup span
+
+This context is written to `/tmp/gh-aw/aw_info.json` and propagated through action inputs.
+
+### 12.6 Trace ID Lookup
+
+To find a trace in an OTLP backend:
+
+1. Locate the OTLP trace ID from the GitHub Actions job summary or the `trace-id` output.
+2. Search the backend by trace ID (32-char hex string).
+3. For local debugging, query the JSONL mirror:
+
+```bash
+jq '.resourceSpans[].scopeSpans[].spans[] | {name, traceId, spanId, status}' /tmp/gh-aw/otel.jsonl
+```
+
+---
+
+## 13. Implementation Mapping
 
 This section maps the normative behavior in this specification to the current `gh-aw` implementation. These mappings MUST be kept in sync when behavior changes.
 
@@ -280,12 +603,16 @@ This section maps the normative behavior in this specification to the current `g
 | §6.5 | Trace Context Variables | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/aw_context.cjs` |
 | §7 | Local Mirrors and Artifacts | `actions/setup/js/send_otlp_span.cjs`, `actions/setup/js/constants.cjs`, `actions/setup/post.js` |
 | §8 | Security and Privacy Requirements | `pkg/workflow/observability_otlp.go`, `pkg/workflow/mcp_renderer.go`, `pkg/workflow/mcp_setup_generator.go`, `actions/setup/js/send_otlp_span.cjs` |
+| §9 | Trace Model | `actions/setup/js/send_otlp_span.cjs`, `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/action_conclusion_otlp.cjs` |
+| §10 | Span Attribute Contract | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/action_conclusion_otlp.cjs`, `actions/setup/js/send_otlp_span.cjs` |
+| §11 | Resource Attributes | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/send_otlp_span.cjs` |
+| §12 | Trace ID Propagation | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/aw_context.cjs`, `pkg/workflow/compiler_yaml.go` |
 
 When behavior changes in any mapped file, this table SHOULD be updated in the same change set.
 
 ---
 
-## 10. Compliance Testing
+## 14. Compliance Testing
 
 A conforming implementation MUST include automated coverage for the following behaviors.
 
@@ -301,12 +628,28 @@ A conforming implementation MUST include automated coverage for the following be
 | `T-OTEL-OBS-008` | Local mirror persistence | Helper emission writes `/tmp/gh-aw/otel.jsonl` even when OTLP export fails or is absent. | `actions/setup/js/send_otlp_span.test.cjs` |
 | `T-OTEL-OBS-009` | Trace context propagation | Setup writes valid trace and parent span IDs into runtime environment. | `actions/setup/js/action_setup_otlp.test.cjs`, `actions/setup/js/otlp.test.cjs` |
 | `T-OTEL-OBS-010` | Artifact inclusion | Observability artifacts include the OTEL JSONL mirror when artifact collection is enabled. | `pkg/workflow/compiled_lock_files_test.go` |
+| `T-OTEL-OBS-011` | Span naming convention | All emitted span names follow `gh-aw.<job-name>.<operation>` pattern. | `actions/setup/js/send_otlp_span.test.cjs` |
+| `T-OTEL-OBS-012` | Span hierarchy | Setup spans share a global parent span ID; conclusion spans parent under the setup span. | `actions/setup/js/action_setup_otlp.test.cjs`, `actions/setup/js/action_conclusion_otlp.test.cjs` |
+| `T-OTEL-OBS-013` | Span attribute contract | Setup and conclusion spans contain all required attributes from §10. | `actions/setup/js/action_setup_otlp.test.cjs`, `actions/setup/js/action_conclusion_otlp.test.cjs` |
+| `T-OTEL-OBS-014` | Resource attributes | All exported spans include required resource attributes from §11. | `actions/setup/js/send_otlp_span.test.cjs` |
+| `T-OTEL-OBS-015` | Trace ID resolution order | Trace ID follows the priority chain: explicit option → action input → parent context → generate new. | `actions/setup/js/action_setup_otlp.test.cjs` |
 
 Additional tests SHOULD be added when new helper APIs, new OTLP normalization rules, or new runtime sinks become normative.
 
+### 14.1 Runtime Conformance Workflows
+
+The following agentic workflows provide runtime conformance validation:
+
+| Workflow | Purpose | Coverage |
+|---|---|---|
+| [`smoke-otel-backends.md`](../.github/workflows/smoke-otel-backends.md) | End-to-end OTLP smoke test | Local mirror + Sentry/Grafana/Datadog visibility |
+| [`daily-otel-instrumentation-advisor.md`](../.github/workflows/daily-otel-instrumentation-advisor.md) | Daily code review + live data validation | Sentry + Grafana backend data |
+| [`daily-grafana-otel-instrumentation-advisor.md`](../.github/workflows/daily-grafana-otel-instrumentation-advisor.md) | Grafana-only variant | Grafana Tempo data |
+| [`otlp-data-quality-validator.md`](../.github/workflows/otlp-data-quality-validator.md) | OTLP data quality validation | JSONL + vendor traces + attribute contract |
+
 ---
 
-## 11. References
+## 15. References
 
 ### Normative References
 
@@ -321,9 +664,27 @@ Additional tests SHOULD be added when new helper APIs, new OTLP normalization ru
 - [specs/aw-harness.md](./aw-harness.md)
 - [specs/safe-output-outcome-evaluation.md](./safe-output-outcome-evaluation.md)
 
+### Runtime Conformance Workflows
+
+- [.github/workflows/smoke-otel-backends.md](../.github/workflows/smoke-otel-backends.md) — End-to-end OTLP smoke test
+- [.github/workflows/daily-otel-instrumentation-advisor.md](../.github/workflows/daily-otel-instrumentation-advisor.md) — Daily code review + live data validation
+- [.github/workflows/daily-grafana-otel-instrumentation-advisor.md](../.github/workflows/daily-grafana-otel-instrumentation-advisor.md) — Grafana-only variant
+- [.github/workflows/otlp-data-quality-validator.md](../.github/workflows/otlp-data-quality-validator.md) — OTLP data quality validation
+
 ---
 
-## 12. Change Log
+## 16. Change Log
+
+### Version 0.2.0 (Working Draft)
+
+- Added §9 Trace Model: span naming, hierarchy, kinds, status, exception events
+- Added §10 Span Attribute Contract: required and conditional attributes for setup, conclusion, and agent spans
+- Added §11 Resource Attributes: required and conditional resource attributes, instrumentation scope
+- Added §12 Trace ID Propagation and Lookup: resolution order, storage, cross-job and dispatch propagation
+- Added §14.1 Runtime Conformance Workflows
+- Added compliance tests T-OTEL-OBS-011 through T-OTEL-OBS-015
+- Updated implementation mapping table with §9–§12 entries
+- Renumbered §9–§12 to §13–§16
 
 ### Version 0.1.0 (Working Draft)
 

From 469f81380e24943687c80946a6ee2cc3d3401f22 Mon Sep 17 00:00:00 2001
From: Mara Nikola Kiefer <mnkiefer@github.com>
Date: Thu, 21 May 2026 08:41:05 +0200
Subject: [PATCH 3/7] enhance outcome evaluation with additional attributes and
 metrics

---
 actions/setup/js/emit_outcome_spans.cjs | 35 ++++++++++----
 actions/setup/js/evaluate_outcomes.cjs  | 61 +++++++++++++++++++++++++
 specs/otel-observability-spec.md        | 56 ++++++++++++++++++++++-
 3 files changed, 142 insertions(+), 10 deletions(-)

diff --git a/actions/setup/js/emit_outcome_spans.cjs b/actions/setup/js/emit_outcome_spans.cjs
index c857223e899..344770bb841 100644
--- a/actions/setup/js/emit_outcome_spans.cjs
+++ b/actions/setup/js/emit_outcome_spans.cjs
@@ -148,6 +148,11 @@ async function main() {
     const changedFiles = typeof eval_.changed_files === "number" ? eval_.changed_files : null;
     const additions = typeof eval_.additions === "number" ? eval_.additions : null;
     const deletions = typeof eval_.deletions === "number" ? eval_.deletions : null;
+    const reactionsTotal = typeof eval_.reactions_total === "number" ? eval_.reactions_total : null;
+    const reactionsPositive = typeof eval_.reactions_positive === "number" ? eval_.reactions_positive : null;
+    const reactionsNegative = typeof eval_.reactions_negative === "number" ? eval_.reactions_negative : null;
+    const comments = typeof eval_.comments === "number" ? eval_.comments : null;
+    const zeroTouch = eval_.zero_touch === true;
 
     const attributes = [
       buildAttr("gh-aw.exporter.name", "outcome-collector"),
@@ -168,6 +173,11 @@ async function main() {
     if (changedFiles !== null) attributes.push(buildAttr("gh-aw.outcome.changed_files", changedFiles));
     if (additions !== null) attributes.push(buildAttr("gh-aw.outcome.additions", additions));
     if (deletions !== null) attributes.push(buildAttr("gh-aw.outcome.deletions", deletions));
+    if (reactionsTotal !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_total", reactionsTotal));
+    if (reactionsPositive !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_positive", reactionsPositive));
+    if (reactionsNegative !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_negative", reactionsNegative));
+    if (comments !== null) attributes.push(buildAttr("gh-aw.outcome.comments", comments));
+    if (zeroTouch) attributes.push(buildAttr("gh-aw.outcome.zero_touch", true));
 
     // Map result to OTLP status: accepted=OK, rejected=ERROR, noop=UNSET, pending/ignored=UNSET
     const statusCode = result === "rejected" ? 2 : result === "accepted" ? 1 : 0;
@@ -205,6 +215,8 @@ async function main() {
     buildAttr("gh-aw.outcome.acceptance_rate", getSummaryNumber("acceptance_rate", 0)),
     buildAttr("gh-aw.outcome.waste_rate", getSummaryNumber("waste_rate", 0)),
     buildAttr("gh-aw.outcome.noop_rate", getSummaryNumber("noop_rate", 0)),
+    buildAttr("gh-aw.outcome.zero_touch", getSummaryNumber("zero_touch", 0)),
+    buildAttr("gh-aw.outcome.zero_touch_rate", getSummaryNumber("zero_touch_rate", 0)),
     buildAttr("gh-aw.outcome.item_count", evaluations.length),
   ];
 
@@ -212,15 +224,20 @@ async function main() {
     summaryAttributes.push(buildAttr("gh-aw.outcome.date", summary.date));
   }
 
-  // Median time-to-resolution for resolved items
-  const resolutionTimes = evaluations
-    .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0)
-    .map(e => e.resolution_sec)
-    .sort((a, b) => a - b);
-  if (resolutionTimes.length > 0) {
-    const mid = Math.floor(resolutionTimes.length / 2);
-    const median = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2);
-    summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", median));
+  // Median time-to-resolution: prefer summary value, fall back to local computation
+  const summaryMedian = summary && typeof summary.median_resolution_sec === "number" ? summary.median_resolution_sec : null;
+  if (summaryMedian !== null) {
+    summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", summaryMedian));
+  } else {
+    const resolutionTimes = evaluations
+      .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0)
+      .map(e => e.resolution_sec)
+      .sort((a, b) => a - b);
+    if (resolutionTimes.length > 0) {
+      const mid = Math.floor(resolutionTimes.length / 2);
+      const median = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2);
+      summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", median));
+    }
   }
 
   // Trigger type distribution
diff --git a/actions/setup/js/evaluate_outcomes.cjs b/actions/setup/js/evaluate_outcomes.cjs
index eef8a218129..1addf3239fe 100644
--- a/actions/setup/js/evaluate_outcomes.cjs
+++ b/actions/setup/js/evaluate_outcomes.cjs
@@ -163,6 +163,11 @@ function secondsBetween(from, to) {
  * @property {number | null} changed_files
  * @property {number | null} additions
  * @property {number | null} deletions
+ * @property {number | null} reactions_total
+ * @property {number | null} reactions_positive
+ * @property {number | null} reactions_negative
+ * @property {number | null} comments
+ * @property {boolean} zero_touch
  */
 
 /**
@@ -186,6 +191,11 @@ function evaluateItem(item, defaultRepo) {
     changed_files: null,
     additions: null,
     deletions: null,
+    reactions_total: null,
+    reactions_positive: null,
+    reactions_negative: null,
+    comments: null,
+    zero_touch: false,
   };
 
   if (!url) {
@@ -206,6 +216,18 @@ function evaluateItem(item, defaultRepo) {
     }
     out.result = "accepted";
     out.detail = data.state;
+    out.comments = typeof data.comments === "number" ? data.comments : null;
+
+    // Reactions on issues
+    if (data.reactions && typeof data.reactions === "object") {
+      const r = data.reactions;
+      const positive = (r["+1"] || 0) + (r.heart || 0) + (r.hooray || 0) + (r.rocket || 0);
+      const negative = (r["-1"] || 0) + (r.confused || 0);
+      out.reactions_total = (r.total_count != null) ? r.total_count : positive + negative + (r.laugh || 0) + (r.eyes || 0);
+      out.reactions_positive = positive;
+      out.reactions_negative = negative;
+    }
+
     if (data.state === "closed" && data.created_at && data.closed_at) {
       out.resolution_sec = secondsBetween(data.created_at, data.closed_at);
     }
@@ -228,6 +250,22 @@ function evaluateItem(item, defaultRepo) {
     out.changed_files = typeof data.changed_files === "number" ? data.changed_files : null;
     out.additions = typeof data.additions === "number" ? data.additions : null;
     out.deletions = typeof data.deletions === "number" ? data.deletions : null;
+    out.comments = typeof data.comments === "number" ? data.comments : null;
+
+    // Reactions
+    if (data.reactions && typeof data.reactions === "object") {
+      const r = data.reactions;
+      const positive = (r["+1"] || 0) + (r.heart || 0) + (r.hooray || 0) + (r.rocket || 0);
+      const negative = (r["-1"] || 0) + (r.confused || 0);
+      out.reactions_total = (r.total_count != null) ? r.total_count : positive + negative + (r.laugh || 0) + (r.eyes || 0);
+      out.reactions_positive = positive;
+      out.reactions_negative = negative;
+    }
+
+    // Zero-touch: merged with no human review comments and no issue-level comments
+    if (data.merged === true && (out.review_comments === 0 || out.review_comments === null) && (out.comments === 0 || out.comments === null)) {
+      out.zero_touch = true;
+    }
 
     if (data.merged === true) {
       out.result = "accepted";
@@ -420,6 +458,11 @@ function main() {
           changed_files: evalResult.changed_files,
           additions: evalResult.additions,
           deletions: evalResult.deletions,
+          reactions_total: evalResult.reactions_total,
+          reactions_positive: evalResult.reactions_positive,
+          reactions_negative: evalResult.reactions_negative,
+          comments: evalResult.comments,
+          zero_touch: evalResult.zero_touch || false,
         }) + "\n"
       );
     }
@@ -442,6 +485,21 @@ function main() {
   const wasteRate = total > 0 ? rejected / total : 0;
   const noopRate = total + noop > 0 ? noop / (total + noop) : 0;
 
+  // Economics: zero-touch rate and median time-to-outcome
+  const allEvals = readJSONL(EVAL_JSONL);
+  const acceptedEvals = allEvals.filter(e => e.result === "accepted");
+  const zeroTouchCount = acceptedEvals.filter(e => e.zero_touch === true).length;
+  const zeroTouchRate = acceptedEvals.length > 0 ? zeroTouchCount / acceptedEvals.length : 0;
+  const resolutionTimes = allEvals
+    .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0)
+    .map(e => e.resolution_sec)
+    .sort((a, b) => a - b);
+  let medianResolutionSec = null;
+  if (resolutionTimes.length > 0) {
+    const mid = Math.floor(resolutionTimes.length / 2);
+    medianResolutionSec = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2);
+  }
+
   writeJSONAtomic(SUMMARY_PATH, {
     runs_checked: checked,
     total_outcomes: total,
@@ -453,6 +511,9 @@ function main() {
     acceptance_rate: Math.round(acceptanceRate * 10000) / 10000,
     waste_rate: Math.round(wasteRate * 10000) / 10000,
     noop_rate: Math.round(noopRate * 10000) / 10000,
+    zero_touch: zeroTouchCount,
+    zero_touch_rate: Math.round(zeroTouchRate * 10000) / 10000,
+    median_resolution_sec: medianResolutionSec,
     date: new Date().toISOString().slice(0, 10),
   });
 
diff --git a/specs/otel-observability-spec.md b/specs/otel-observability-spec.md
index 236ec0cc920..626efd5ca0b 100644
--- a/specs/otel-observability-spec.md
+++ b/specs/otel-observability-spec.md
@@ -473,6 +473,58 @@ The dedicated agent span (`gh-aw.*.agent`) follows OpenTelemetry [GenAI semantic
 | `gen_ai.usage.cache_read.input_tokens` | int | Cache read tokens |
 | `gen_ai.usage.cache_creation.input_tokens` | int | Cache write tokens |
 
+### 10.4 Outcome Evaluation Span Attributes
+
+Per-item outcome evaluation spans (`gh-aw.outcome.evaluation`) are emitted by the outcome-collector workflow. Each span represents one safe output item evaluated against the GitHub API.
+
+| Attribute | Type | Condition | Description |
+|---|---|---|---|
+| `gh-aw.outcome.type` | string | Required | Safe output type (e.g., `create_pull_request`, `create_issue`) |
+| `gh-aw.outcome.result` | string | Required | `accepted`, `rejected`, `pending`, `ignored`, `noop` |
+| `gh-aw.outcome.workflow` | string | Required | Source workflow name |
+| `gh-aw.outcome.run_id` | int | Required | Source run ID |
+| `gh-aw.outcome.repo` | string | Required | Repository |
+| `gh-aw.outcome.url` | string | When available | URL to the created object |
+| `gh-aw.outcome.detail` | string | When available | Result detail (e.g., `merged`, `closed`, `open`) |
+| `gh-aw.outcome.created_at` | string | When available | Item creation timestamp |
+| `gh-aw.outcome.event` | string | When available | Triggering event type |
+| `gh-aw.outcome.resolution_sec` | int | When resolved | Seconds from creation to resolution |
+| `gh-aw.outcome.pending_age_sec` | int | When pending | Seconds since creation |
+| `gh-aw.outcome.review_comments` | int | PRs only | Number of review comments |
+| `gh-aw.outcome.comments` | int | When available | Number of issue-level comments |
+| `gh-aw.outcome.changed_files` | int | PRs only | Files changed |
+| `gh-aw.outcome.additions` | int | PRs only | Lines added |
+| `gh-aw.outcome.deletions` | int | PRs only | Lines deleted |
+| `gh-aw.outcome.reactions_total` | int | When available | Total reaction count |
+| `gh-aw.outcome.reactions_positive` | int | When available | Positive reactions (+1, heart, hooray, rocket) |
+| `gh-aw.outcome.reactions_negative` | int | When available | Negative reactions (-1, confused) |
+| `gh-aw.outcome.zero_touch` | boolean | When true | Accepted with no human review comments or issue comments |
+
+### 10.5 Outcome Summary Span Attributes
+
+The fleet summary span (`gh-aw.outcome.summary`) aggregates all evaluated outcomes into a single span with economics metrics.
+
+| Attribute | Type | Description |
+|---|---|---|
+| `gh-aw.outcome.runs_checked` | int | Number of runs evaluated |
+| `gh-aw.outcome.total` | int | Total actionable outcomes |
+| `gh-aw.outcome.accepted` | int | Accepted outcomes |
+| `gh-aw.outcome.rejected` | int | Rejected outcomes |
+| `gh-aw.outcome.ignored` | int | Ignored outcomes |
+| `gh-aw.outcome.pending` | int | Pending outcomes |
+| `gh-aw.outcome.noop` | int | Noop outcomes |
+| `gh-aw.outcome.acceptance_rate` | double | Accepted / (accepted + rejected) |
+| `gh-aw.outcome.waste_rate` | double | Rejected / total |
+| `gh-aw.outcome.noop_rate` | double | Noop / (total + noop) |
+| `gh-aw.outcome.zero_touch` | int | Count of zero-touch accepted outcomes |
+| `gh-aw.outcome.zero_touch_rate` | double | Zero-touch / accepted |
+| `gh-aw.outcome.median_resolution_sec` | int | Median seconds from creation to resolution |
+| `gh-aw.outcome.item_count` | int | Number of per-item spans emitted |
+| `gh-aw.outcome.date` | string | Evaluation date (YYYY-MM-DD) |
+| `gh-aw.outcome.events` | string | Comma-separated distinct trigger events |
+| `gh-aw.outcome.workflows` | string | Comma-separated distinct workflow names |
+| `gh-aw.outcome.types` | string | Comma-separated distinct outcome types |
+
 ---
 
 ## 11. Resource Attributes
@@ -604,7 +656,7 @@ This section maps the normative behavior in this specification to the current `g
 | §7 | Local Mirrors and Artifacts | `actions/setup/js/send_otlp_span.cjs`, `actions/setup/js/constants.cjs`, `actions/setup/post.js` |
 | §8 | Security and Privacy Requirements | `pkg/workflow/observability_otlp.go`, `pkg/workflow/mcp_renderer.go`, `pkg/workflow/mcp_setup_generator.go`, `actions/setup/js/send_otlp_span.cjs` |
 | §9 | Trace Model | `actions/setup/js/send_otlp_span.cjs`, `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/action_conclusion_otlp.cjs` |
-| §10 | Span Attribute Contract | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/action_conclusion_otlp.cjs`, `actions/setup/js/send_otlp_span.cjs` |
+| §10 | Span Attribute Contract | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/action_conclusion_otlp.cjs`, `actions/setup/js/send_otlp_span.cjs`, `actions/setup/js/evaluate_outcomes.cjs`, `actions/setup/js/emit_outcome_spans.cjs` |
 | §11 | Resource Attributes | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/send_otlp_span.cjs` |
 | §12 | Trace ID Propagation | `actions/setup/js/action_setup_otlp.cjs`, `actions/setup/js/aw_context.cjs`, `pkg/workflow/compiler_yaml.go` |
 
@@ -679,6 +731,8 @@ The following agentic workflows provide runtime conformance validation:
 
 - Added §9 Trace Model: span naming, hierarchy, kinds, status, exception events
 - Added §10 Span Attribute Contract: required and conditional attributes for setup, conclusion, and agent spans
+- Added §10.4 Outcome Evaluation Span Attributes: reactions, zero-touch, comments
+- Added §10.5 Outcome Summary Span Attributes: zero-touch rate, median resolution, economics metrics
 - Added §11 Resource Attributes: required and conditional resource attributes, instrumentation scope
 - Added §12 Trace ID Propagation and Lookup: resolution order, storage, cross-job and dispatch propagation
 - Added §14.1 Runtime Conformance Workflows

From ed15447b73bba3d67d2349713c27637b19fb8f26 Mon Sep 17 00:00:00 2001
From: Mara Nikola Kiefer <mnkiefer@github.com>
Date: Thu, 21 May 2026 08:41:58 +0200
Subject: [PATCH 4/7] refactor outcome report structure to enhance clarity and
 actionable insights

---
 .github/workflows/outcome-collector.md | 73 +++++++++++++++++---------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/outcome-collector.md b/.github/workflows/outcome-collector.md
index 0a69a87a464..c33c757f2e0 100644
--- a/.github/workflows/outcome-collector.md
+++ b/.github/workflows/outcome-collector.md
@@ -86,44 +86,69 @@ Use h3 (`###`) or lower for all headers in your report. Never use h1 (`#`) or h2
 Wrap long sections in `<details><summary><b>Section Name</b></summary>` tags to improve readability and reduce scrolling. Keep critical summaries and key metrics always visible.
 
 Suggested structure:
-- Brief summary (always visible)
-- Key metrics or highlights (always visible)
-- Detailed analysis (in `<details>` tags)
-- Recommendations (always visible)
+- Scorecard with economics metrics (always visible)
+- Actionable recommendations with specific next steps (always visible)
+- Per-workflow breakdown (in `<details>` tags)
+- Detailed per-run data (in `<details>` tags)
 
 ```markdown
-## Safe Output Outcomes — {date}
+### Outcome Scorecard — {date}
 
-### Fleet Summary
+| Metric | Value | Status |
+|--------|-------|--------|
+| **Acceptance rate** | **{acceptance_rate}%** | 🟢 >80% / 🟡 60-80% / 🔴 <60% |
+| **Zero-touch rate** | **{zero_touch_rate}%** | 🟢 >50% / 🟡 25-50% / 🔴 <25% |
+| **Waste rate** | {waste_rate}% | 🟢 <10% / 🟡 10-25% / 🔴 >25% |
+| **Median time to resolution** | {median_resolution} | — |
+| Accepted | {accepted} / {total_outcomes} | — |
+| Rejected | {rejected} | — |
+| Zero-touch | {zero_touch} / {accepted} | — |
+| Pending | {pending} | — |
+| Runs checked | {runs_checked} | — |
 
-| Metric | Value |
-|--------|-------|
-| Runs checked | {runs_checked} |
-| Total outcomes | {total_outcomes} |
-| Accepted | {accepted} |
-| Rejected | {rejected} |
-| Ignored | {ignored} |
-| Pending | {pending} |
-| **Acceptance rate** | **{acceptance_rate}%** |
-| Waste rate | {waste_rate}% |
+### 🔴 Action Items
+
+List concrete actions the team should take based on the data:
+
+1. **Highest-waste workflows** — Name the top 2-3 workflows by waste rate. If waste rate >25%, recommend reviewing the prompt or safe-output configuration.
+2. **Stuck pending items** — List any items pending >48 hours. These need human review or the workflow needs a timeout.
+3. **Low zero-touch workflows** — Workflows where accepted items always need human edits indicate the agent's output quality needs improvement.
+4. **Negative reactions** — Items with negative reactions (👎, confused) signal user dissatisfaction even on "accepted" items.
 
 ### Per-Workflow Breakdown
 
-For each workflow with outcomes, show:
-- Workflow name
-- Outcomes: accepted / rejected / ignored
-- Acceptance rate
+For each workflow with outcomes, show a mini-scorecard:
+
+| Workflow | Accepted | Rejected | Pending | Acceptance | Zero-touch | Reactions 👍/👎 |
+|----------|----------|----------|---------|------------|------------|----------------|
+
+Sort by waste rate descending (worst first).
+
+### Reaction Summary
+
+If any items have reactions, summarize:
+- Items with positive reactions (👍 heart rocket hooray): these workflows are producing valued output
+- Items with negative reactions (👎 confused): these need prompt or quality improvements
+- Items with zero reactions: no signal yet
+
+### Trend Signal
 
-### Key Observations
+Compare today's acceptance rate and zero-touch rate against the previous report in cache-memory (if available). Flag:
+- ⬆️ Improving: acceptance rate up >5pp or zero-touch rate up >10pp
+- ⬇️ Regressing: acceptance rate down >5pp or waste rate up >5pp
+- ➡️ Stable: within 5pp of previous
 
-- Which workflows have the highest acceptance rate?
-- Which workflows have the highest waste rate?
-- Any workflows with all outcomes ignored (noise signal)?
+If no previous data exists, skip this section.
 ```
 
 ## Guidelines
 
 - Keep the report factual — numbers only, no speculation
 - Do not re-evaluate outcomes — use the pre-computed data
+- Sort workflows by waste rate descending so the worst performers are at the top
+- Flag any workflow with acceptance rate <60% as needing attention
+- Flag any item pending >48 hours
+- If reactions data is available, include it in the per-workflow breakdown
+- Save this report's key metrics to cache-memory for trend comparison in the next run
 - If no outcomes exist, use `noop`
 - Stop immediately after creating the issue

From cedba0c7e69a2f25c09be0f34d0228cff3f62320 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 May 2026 06:46:14 +0000
Subject: [PATCH 5/7] chore: outline workflow recompilation plan

Co-authored-by: mnkiefer <8320933+mnkiefer@users.noreply.github.com>
---
 .../otlp-data-quality-validator.lock.yml      | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/otlp-data-quality-validator.lock.yml b/.github/workflows/otlp-data-quality-validator.lock.yml
index 6d4286d5a00..46b54f639e5 100644
--- a/.github/workflows/otlp-data-quality-validator.lock.yml
+++ b/.github/workflows/otlp-data-quality-validator.lock.yml
@@ -1,4 +1,4 @@
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"17dcabe392f10a701b05312a2a2a544024a389a44bbf590159964c1892c52074","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"759e69cd162496de334aa3b7220316b6485908c3c63e4436e2a2963728bf6146","strict":true,"agent_id":"copilot"}
 # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.49"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.49"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.49"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.49"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.9","digest":"sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.9@sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388"},{"image":"ghcr.io/github/github-mcp-server:v1.0.4"},{"image":"node:lts-alpine","digest":"sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f","pinned_image":"node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"}]}
 #    ___                   _   _      
 #   / _ \                 | | (_)     
@@ -22,7 +22,7 @@
 #
 # For more information: https://github.github.com/gh-aw/introduction/overview/
 #
-# Validates OTLP trace, metric, and log data quality across app emission, Collector processing, and backend visibility
+# Validates gh-aw OTLP trace data quality across local JSONL mirror, direct vendor export, and backend visibility
 #
 # Resolved workflow manifest:
 #   Imports:
@@ -202,20 +202,20 @@ jobs:
         run: |
           bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
           {
-          cat << 'GH_AW_PROMPT_7de7fa5e3739b47b_EOF'
+          cat << 'GH_AW_PROMPT_bc29e1568146c495_EOF'
           <system>
-          GH_AW_PROMPT_7de7fa5e3739b47b_EOF
+          GH_AW_PROMPT_bc29e1568146c495_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
-          cat << 'GH_AW_PROMPT_7de7fa5e3739b47b_EOF'
+          cat << 'GH_AW_PROMPT_bc29e1568146c495_EOF'
           <safe-output-tools>
           Tools: create_issue, missing_tool, missing_data, noop
           </safe-output-tools>
-          GH_AW_PROMPT_7de7fa5e3739b47b_EOF
+          GH_AW_PROMPT_bc29e1568146c495_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md"
-          cat << 'GH_AW_PROMPT_7de7fa5e3739b47b_EOF'
+          cat << 'GH_AW_PROMPT_bc29e1568146c495_EOF'
           <github-context>
           The following GitHub context information is available for this workflow:
           {{#if github.actor}}
@@ -244,14 +244,14 @@ jobs:
           {{/if}}
           </github-context>
           
-          GH_AW_PROMPT_7de7fa5e3739b47b_EOF
+          GH_AW_PROMPT_bc29e1568146c495_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/cli_proxy_with_safeoutputs_prompt.md"
-          cat << 'GH_AW_PROMPT_7de7fa5e3739b47b_EOF'
+          cat << 'GH_AW_PROMPT_bc29e1568146c495_EOF'
           </system>
           {{#runtime-import .github/workflows/shared/otlp.md}}
           {{#runtime-import .github/workflows/shared/otel-queries.md}}
           {{#runtime-import .github/workflows/otlp-data-quality-validator.md}}
-          GH_AW_PROMPT_7de7fa5e3739b47b_EOF
+          GH_AW_PROMPT_bc29e1568146c495_EOF
           } > "$GH_AW_PROMPT"
       - name: Interpolate variables and render templates
         uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
@@ -466,9 +466,9 @@ jobs:
           mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
           mkdir -p /tmp/gh-aw/safeoutputs
           mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
-          cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_4c77f8b71cbb283e_EOF'
+          cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_53860d35baa3701f_EOF'
           {"create_issue":{"close_older_issues":true,"expires":168,"labels":["observability","telemetry","report"],"max":1,"title_prefix":"[OTLP Validation] "},"create_report_incomplete_issue":{},"max_bot_mentions":1,"mentions":{"enabled":false},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}}
-          GH_AW_SAFE_OUTPUTS_CONFIG_4c77f8b71cbb283e_EOF
+          GH_AW_SAFE_OUTPUTS_CONFIG_53860d35baa3701f_EOF
       - name: Generate Safe Outputs Tools
         env:
           GH_AW_TOOLS_META_JSON: |
@@ -673,7 +673,7 @@ jobs:
           
           mkdir -p /home/runner/.copilot
           GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node)
-          cat << GH_AW_MCP_CONFIG_14247717b4285c48_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
+          cat << GH_AW_MCP_CONFIG_2c6df0af9284b001_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
           {
             "mcpServers": {
               "safeoutputs": {
@@ -703,7 +703,7 @@ jobs:
               }
             }
           }
-          GH_AW_MCP_CONFIG_14247717b4285c48_EOF
+          GH_AW_MCP_CONFIG_2c6df0af9284b001_EOF
       - name: Mount MCP servers as CLIs
         id: mount-mcp-clis
         continue-on-error: true
@@ -1214,7 +1214,7 @@ jobs:
         uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
         env:
           WORKFLOW_NAME: "OTLP Data Quality Validator"
-          WORKFLOW_DESCRIPTION: "Validates OTLP trace, metric, and log data quality across app emission, Collector processing, and backend visibility"
+          WORKFLOW_DESCRIPTION: "Validates gh-aw OTLP trace data quality across local JSONL mirror, direct vendor export, and backend visibility"
           HAS_PATCH: ${{ needs.agent.outputs.has_patch }}
         with:
           script: |

From f913f0d5c11cbf5753c5fc560e731b464536c209 Mon Sep 17 00:00:00 2001
From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com>
Date: Thu, 21 May 2026 08:54:07 +0200
Subject: [PATCH 6/7] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 actions/setup/js/evaluate_outcomes.cjs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/actions/setup/js/evaluate_outcomes.cjs b/actions/setup/js/evaluate_outcomes.cjs
index 1addf3239fe..6abce70d250 100644
--- a/actions/setup/js/evaluate_outcomes.cjs
+++ b/actions/setup/js/evaluate_outcomes.cjs
@@ -263,7 +263,7 @@ function evaluateItem(item, defaultRepo) {
     }
 
     // Zero-touch: merged with no human review comments and no issue-level comments
-    if (data.merged === true && (out.review_comments === 0 || out.review_comments === null) && (out.comments === 0 || out.comments === null)) {
+    if (data.merged === true && out.review_comments === 0 && out.comments === 0) {
       out.zero_touch = true;
     }
 

From 15ceccdb3aa096c3768aab39044f6e90b667775c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 May 2026 07:01:12 +0000
Subject: [PATCH 7/7] fix: address remaining OTLP outcome review feedback

Co-authored-by: mnkiefer <8320933+mnkiefer@users.noreply.github.com>
---
 .../workflows/otlp-data-quality-validator.md  | 16 ++++-----
 actions/setup/js/emit_outcome_spans.cjs       |  2 +-
 actions/setup/js/emit_outcome_spans.test.cjs  | 33 +++++++++++++++++++
 actions/setup/js/evaluate_outcomes.cjs        | 19 ++++++-----
 specs/otel-observability-spec.md              |  4 +--
 5 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/otlp-data-quality-validator.md b/.github/workflows/otlp-data-quality-validator.md
index 5f2baf979cf..aba848a7090 100644
--- a/.github/workflows/otlp-data-quality-validator.md
+++ b/.github/workflows/otlp-data-quality-validator.md
@@ -80,23 +80,23 @@ Infer expectations from:
 ### Step 2: Validate trace completeness and integrity
 
 From the local JSONL mirror (`/tmp/gh-aw/otel.jsonl`), compute and report:
-- unique `trace_id` count (expect 1 per workflow run)
-- unique span identity count using `trace_id + span_id`
-- duplicate spans with same `trace_id + span_id`
+- unique `traceId` count (expect 1 per workflow run)
+- unique span identity count using `traceId + spanId`
+- duplicate spans with same `traceId + spanId`
 
 Validate the expected span hierarchy per the spec (§9.3):
-- all setup spans share a single global `parent_span_id`
+- all setup spans share a single global `parentSpanId`
 - each conclusion span parents under its job's setup span
 - agent spans parent under the conclusion span
 - root setup parent has no parent
 
 Validate required fields per span:
-- `trace_id` (32-char hex)
-- `span_id` (16-char hex)
+- `traceId` (32-char hex)
+- `spanId` (16-char hex)
 - `name` (must match pattern `gh-aw.<job-name>.<operation>`)
 - `kind` (INTERNAL=1 for setup/conclusion, CLIENT=3 for agent)
-- `start_time_unix_nano`
-- `end_time_unix_nano`
+- `startTimeUnixNano`
+- `endTimeUnixNano`
 
 Flag timestamp issues:
 - `start_time > end_time`
diff --git a/actions/setup/js/emit_outcome_spans.cjs b/actions/setup/js/emit_outcome_spans.cjs
index 344770bb841..2d7eccaf95e 100644
--- a/actions/setup/js/emit_outcome_spans.cjs
+++ b/actions/setup/js/emit_outcome_spans.cjs
@@ -215,7 +215,7 @@ async function main() {
     buildAttr("gh-aw.outcome.acceptance_rate", getSummaryNumber("acceptance_rate", 0)),
     buildAttr("gh-aw.outcome.waste_rate", getSummaryNumber("waste_rate", 0)),
     buildAttr("gh-aw.outcome.noop_rate", getSummaryNumber("noop_rate", 0)),
-    buildAttr("gh-aw.outcome.zero_touch", getSummaryNumber("zero_touch", 0)),
+    buildAttr("gh-aw.outcome.zero_touch_count", getSummaryNumber("zero_touch", 0)),
     buildAttr("gh-aw.outcome.zero_touch_rate", getSummaryNumber("zero_touch_rate", 0)),
     buildAttr("gh-aw.outcome.item_count", evaluations.length),
   ];
diff --git a/actions/setup/js/emit_outcome_spans.test.cjs b/actions/setup/js/emit_outcome_spans.test.cjs
index a590f91f948..f13bb36f64e 100644
--- a/actions/setup/js/emit_outcome_spans.test.cjs
+++ b/actions/setup/js/emit_outcome_spans.test.cjs
@@ -182,6 +182,11 @@ describe("emit_outcome_spans.cjs", () => {
       rejected: 1,
       ignored: 0,
       pending: 0,
+      noop: 0,
+      noop_rate: 0,
+      zero_touch: 1,
+      zero_touch_rate: 1,
+      median_resolution_sec: 42,
       acceptance_rate: 0.5,
       waste_rate: 0.5,
       date: "2026-05-13",
@@ -198,6 +203,15 @@ describe("emit_outcome_spans.cjs", () => {
           url: "https://github.com/github/gh-aw/issues/1",
           repo: "github/gh-aw",
           timestamp: "2026-05-13T09:00:00Z",
+          review_comments: 0,
+          changed_files: 3,
+          additions: 10,
+          deletions: 2,
+          reactions_total: 5,
+          reactions_positive: 4,
+          reactions_negative: 1,
+          comments: 0,
+          zero_touch: true,
         }),
         JSON.stringify({
           type: "comment",
@@ -263,10 +277,29 @@ describe("emit_outcome_spans.cjs", () => {
 
     expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.exporter.name", value: "outcome-collector" });
     expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.date", value: "2026-05-13" });
+    expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.zero_touch_count", value: 1 });
     expect(spans[1].attributes).toContainEqual({ key: "gh-aw.exporter.name", value: "outcome-collector" });
     expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.url", value: "https://github.com/github/gh-aw/issues/1" });
     expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.detail", value: "created item" });
     expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.created_at", value: "2026-05-13T09:00:00Z" });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.review_comments", value: 0 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.changed_files", value: 3 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.additions", value: 10 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.deletions", value: 2 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.reactions_total", value: 5 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.reactions_positive", value: 4 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.reactions_negative", value: 1 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.comments", value: 0 });
+    expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.zero_touch", value: true });
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.review_comments")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.changed_files")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.additions")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.deletions")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.reactions_total")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.reactions_positive")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.reactions_negative")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.comments")).toBeUndefined();
+    expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.zero_touch")).toBeUndefined();
 
     expect(mockAppendToOTLPJSONL).toHaveBeenCalledOnce();
     expect(mockSendOTLPToAllEndpoints).not.toHaveBeenCalled();
diff --git a/actions/setup/js/evaluate_outcomes.cjs b/actions/setup/js/evaluate_outcomes.cjs
index 6abce70d250..349d34b9e0e 100644
--- a/actions/setup/js/evaluate_outcomes.cjs
+++ b/actions/setup/js/evaluate_outcomes.cjs
@@ -353,6 +353,9 @@ function main() {
   let pending = 0;
   let total = 0;
   let noop = 0;
+  let zeroTouchCount = 0;
+  /** @type {number[]} */
+  const resolutionTimes = [];
 
   // Clear the evaluations file
   fs.writeFileSync(EVAL_JSONL, "");
@@ -431,6 +434,9 @@ function main() {
       switch (evalResult.result) {
         case "accepted":
           accepted++;
+          if (evalResult.zero_touch === true) {
+            zeroTouchCount++;
+          }
           break;
         case "rejected":
           rejected++;
@@ -439,6 +445,9 @@ function main() {
           pending++;
           break;
       }
+      if (typeof evalResult.resolution_sec === "number" && evalResult.resolution_sec > 0) {
+        resolutionTimes.push(evalResult.resolution_sec);
+      }
 
       fs.appendFileSync(
         EVAL_JSONL,
@@ -486,14 +495,8 @@ function main() {
   const noopRate = total + noop > 0 ? noop / (total + noop) : 0;
 
   // Economics: zero-touch rate and median time-to-outcome
-  const allEvals = readJSONL(EVAL_JSONL);
-  const acceptedEvals = allEvals.filter(e => e.result === "accepted");
-  const zeroTouchCount = acceptedEvals.filter(e => e.zero_touch === true).length;
-  const zeroTouchRate = acceptedEvals.length > 0 ? zeroTouchCount / acceptedEvals.length : 0;
-  const resolutionTimes = allEvals
-    .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0)
-    .map(e => e.resolution_sec)
-    .sort((a, b) => a - b);
+  const zeroTouchRate = accepted > 0 ? zeroTouchCount / accepted : 0;
+  resolutionTimes.sort((a, b) => a - b);
   let medianResolutionSec = null;
   if (resolutionTimes.length > 0) {
     const mid = Math.floor(resolutionTimes.length / 2);
diff --git a/specs/otel-observability-spec.md b/specs/otel-observability-spec.md
index 626efd5ca0b..d0a2989f31b 100644
--- a/specs/otel-observability-spec.md
+++ b/specs/otel-observability-spec.md
@@ -516,7 +516,7 @@ The fleet summary span (`gh-aw.outcome.summary`) aggregates all evaluated outcom
 | `gh-aw.outcome.acceptance_rate` | double | Accepted / (accepted + rejected) |
 | `gh-aw.outcome.waste_rate` | double | Rejected / total |
 | `gh-aw.outcome.noop_rate` | double | Noop / (total + noop) |
-| `gh-aw.outcome.zero_touch` | int | Count of zero-touch accepted outcomes |
+| `gh-aw.outcome.zero_touch_count` | int | Count of zero-touch accepted outcomes |
 | `gh-aw.outcome.zero_touch_rate` | double | Zero-touch / accepted |
 | `gh-aw.outcome.median_resolution_sec` | int | Median seconds from creation to resolution |
 | `gh-aw.outcome.item_count` | int | Number of per-item spans emitted |
@@ -744,4 +744,4 @@ The following agentic workflows provide runtime conformance validation:
 
 - Initial repository-level OTel observability specification
 - Defined the normative `observability.otlp` contract for compiler and runtime behavior
-- Added gateway-integration, local-mirror, implementation-mapping, and conformance-test sections
\ No newline at end of file
+- Added gateway-integration, local-mirror, implementation-mapping, and conformance-test sections