github · pelikhan · May 24, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/.github/workflows/daily-malicious-code-scan.lock.yml b/.github/workflows/daily-malicious-code-scan.lock.yml
diff --git a/docs/src/content/docs/reference/effective-tokens-specification.md b/docs/src/content/docs/reference/effective-tokens-specification.md
@@ -423,6 +423,14 @@ Implementations MAY:
 
 Extensions MUST NOT alter the core ET definition or the default weight values without disclosure.
 
+**ET-EXT-01**: Extensions MUST NOT redefine the default weight values (`w_in`, `w_cache`, `w_out`, `w_reason`) without incrementing the specification version. Any implementation that ships with non-default weight values MUST declare a version bump and MUST update the Compliance Checklist in §10.2 to reflect the changed defaults.
+
+**ET-EXT-02**: Extensions MUST NOT introduce new mandatory fields into the invocation node schema (§6.1) without a corresponding revision to the conformance requirements in §2.3. New fields MAY be added as optional extensions, but implementations MUST NOT reject conforming payloads that omit optional extension fields.
+
+**ET-EXT-03**: Extensions that add new token classes MUST assign unique, non-conflicting class names and MUST NOT reuse the reserved names `input`, `cached_input`, `output`, or `reasoning`. Extension token classes MUST NOT be included in the default `base_weighted_tokens` formula unless a new specification version explicitly incorporates them.
+
+For implementation files that exercise extensibility paths, see the Sync Notes section.
+
 ---
 
 ## 10. Compliance Testing
@@ -694,6 +702,34 @@ The `version` field in `model_multipliers.json` corresponds to the registry sche
 
 ## Sync Notes
 
+### §4–§8 Implementation File Mapping
+
+The table below maps the normative sections of this specification to the implementation files that realize each requirement. Use this mapping to identify which files must be updated when specification sections change.
+
+| Spec Section | Description | Implementation File(s) |
+|---|---|---|
+| §4 Token Accounting Model | Per-invocation ET computation (`base_weighted_tokens`, ET formula) | `pkg/cli/effective_tokens.go` (`populateEffectiveTokens`, `computeBaseWeightedTokens`) |
+| §5 Multi-Invocation Aggregation | `ET_total`, `raw_total_tokens`, `total_invocations` | `pkg/cli/effective_tokens.go` (`AggregateEffectiveTokens`) |
+| §6 Execution Graph Requirements | Node schema, root/sub-agent linkage, graph traversal | `pkg/cli/logs_models.go`, `pkg/cli/logs_episode.go`, `pkg/cli/logs_orchestrator.go` |
+| §7 Reporting | Console and JSON output of ET summaries and per-model breakdowns | `pkg/cli/audit_report.go`, `pkg/cli/audit_report_render_tools.go`, `pkg/cli/audit_diff.go`, `pkg/cli/logs_report.go` |
+| §7.1 OTel Attribute Requirements | OpenTelemetry span attribute emission for ET metrics | `pkg/cli/token_usage.go`, `pkg/cli/logs_run_processor.go` |
+| §8 Implementation Requirements | Completeness, determinism, versioning, partial visibility safeguards | `pkg/cli/effective_tokens.go`, `pkg/cli/forecast_montecarlo.go` |
+
+### §4–§8 Sync Procedure
+
+To keep the specification and implementation synchronized:
+
+1. When changing the ET formula or token class weights (§4), update `pkg/cli/effective_tokens.go` and update the Compliance Checklist in §10.2.
+2. When changing aggregation semantics (§5), update `pkg/cli/effective_tokens.go` and rerun tests `T-ET-010–T-ET-012` and `T-ET-006`.
+3. When changing the execution graph node schema (§6), update `pkg/cli/logs_models.go` and `pkg/cli/logs_episode.go` in the same change.
+4. When changing reporting format or field names (§7), update the affected render files in `pkg/cli/` and run `go test ./pkg/cli/ -run TestAudit`.
+5. When changing OTel attribute names (§7.1), update `pkg/cli/token_usage.go` and verify attribute names with `grep -r "effective_tokens" pkg/`.
+6. After any §8 change affecting determinism or partial visibility, re-run `go test ./pkg/cli/ -run TestEffectiveTokens` and `go test ./pkg/cli/ -run TestRunMonteCarlo`.
+
+Run `grep -r "effective_tokens" pkg/` to confirm all implementation files are captured in the table above.
+
+### Model Multiplier Registry Sync
+
 The Effective Tokens registry is maintained in `pkg/cli/data/model_multipliers.json` and loaded by `pkg/cli/effective_tokens.go`.
 
 To keep specification and implementation synchronized:

diff --git a/docs/src/content/docs/reference/forecast-specification.md b/docs/src/content/docs/reference/forecast-specification.md
@@ -962,12 +962,14 @@ Sync procedure:
 
 Sync follow-up tasks:
 
+- **[Resolved]** Expand forecast fixtures to cover invalid/non-finite `λ` derivation paths and
+  zero-projection fallback behavior. Resolved in `pkg/cli/forecast_montecarlo_test.go` via
+  `TestRunMonteCarloNonFiniteLambda` and `TestRunMonteCarloZeroLambdaFallback`.
 - Add an implementation-level assertion that verbose diagnostics and JSON output are derived from the
-  same `λ` value used by the Monte Carlo engine.
-- Expand forecast fixtures to cover invalid/non-finite `λ` derivation paths and zero-projection
-  fallback behavior.
+  same `λ` value used by the Monte Carlo engine. Track in
+  [#31984](https://github.com/github/gh-aw/issues/31984).
 - Re-review Appendix B whenever the Poisson branch threshold or `observed_runs_per_period`
-  calculation changes.
+  calculation changes. Track in [#31985](https://github.com/github/gh-aw/issues/31985).
 
 ---
 

diff --git a/docs/src/content/docs/reference/mcp-scripts-specification.md b/docs/src/content/docs/reference/mcp-scripts-specification.md
@@ -759,20 +759,24 @@ Implementations MUST provide:
 3. **Resource Limits**: Containers enforce CPU, memory, and filesystem limits
 4. **Network Restrictions**: Network access controlled by workflow configuration
 
-JavaScript tools SHOULD provide:
+JavaScript tools MUST provide:
 
 1. **Module Isolation**: Tools execute in isolated module scope
 2. **Limited Execution**: Use V8 isolates or similar for CPU/memory limits
 3. **No Server Access**: Tools cannot access server internals or other tools
 
+**SM-JS-01**: JavaScript tools MUST execute in a sandboxed V8 context with restricted global scope. Implementations MUST NOT expose Node.js global objects (e.g., `process`, `require`, `__dirname`) to tool scripts unless explicitly permitted by the tool configuration.
+
 ### 7.3 Input Sanitization
 
 Implementations MUST:
 
 1. Validate input types against schema before execution
 2. Reject inputs that do not conform to schema
 3. Prevent code injection via input validation
-4. Apply length limits to string inputs (SHOULD be at least 10KB)
+4. Apply length limits to string inputs (MUST enforce a maximum input string length of at least 10KB)
+
+**SM-IS-01**: Implementations MUST enforce a maximum input string length of at least 10KB for each string-typed input parameter. Inputs exceeding the configured maximum MUST be rejected with a validation error before the tool script is invoked. Implementations MUST NOT silently truncate oversized inputs.
 
 ### 7.4 Output Sanitization
 

diff --git a/pkg/cli/compile_schedule_calendar_test.go b/pkg/cli/compile_schedule_calendar_test.go
@@ -4,9 +4,12 @@ package cli
 
 import (
 	"bytes"
+	"fmt"
 	"os"
+	"strings"
 	"testing"
 
+	"github.com/github/gh-aw/pkg/parser"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -301,3 +304,91 @@ func TestDisplayScheduleCalendar_ContainsAllHourHeaders(t *testing.T) {
 		assert.Contains(t, output, h, "hour header %q should appear in output", h)
 	}
 }
+
+// ---------------------------------------------------------------------------
+// End-to-end: fuzzy schedule → ScatterSchedule → compile_schedule_calendar
+// ---------------------------------------------------------------------------
+
+// TestFuzzyScheduleEndToEnd exercises a fuzzy cron expression through the full
+// pipeline: ScatterSchedule (pkg/parser) → parseCronSchedule → buildScheduleGrid
+// → displayScheduleCalendar. It asserts that:
+//  1. A fuzzy expression is scattered to a valid 5-field cron string.
+//  2. The scattered cron is accepted by parseCronSchedule without error.
+//  3. buildScheduleGrid registers at least one non-zero slot for the workflow.
+//  4. displayScheduleCalendar produces output that contains the workflow name.
+func TestFuzzyScheduleEndToEnd(t *testing.T) {
+fuzzyExpressions := []struct {
+fuzzyCron      string
+workflowID     string
+expectedHours  int // how many distinct hour values we expect (1 for DAILY patterns)
+}{
+{"FUZZY:DAILY * * *", "ci-doctor", 1},
+{"FUZZY:DAILY_WEEKDAYS * * *", "daily-planner", 1},
+{"FUZZY:DAILY_AROUND:14:0 * * *", "weekly-audit", 1},
+}
+
+for _, tt := range fuzzyExpressions {
+t.Run(fmt.Sprintf("%s/%s", tt.fuzzyCron, tt.workflowID), func(t *testing.T) {
+// Step 1: scatter the fuzzy expression to a real cron string.
+scatteredCron, err := parser.ScatterSchedule(tt.fuzzyCron, tt.workflowID)
+require.NoError(t, err, "ScatterSchedule should not error for %s", tt.fuzzyCron)
+require.NotEmpty(t, scatteredCron, "ScatterSchedule should return a non-empty cron")
+require.False(t, strings.HasPrefix(scatteredCron, "FUZZY:"),
+"scattered result must not be a fuzzy expression: %s", scatteredCron)
+
+fields := strings.Fields(scatteredCron)
+require.Len(t, fields, 5,
+"scattered cron %q must have exactly 5 fields", scatteredCron)
+
+// Step 2: parse the scattered cron with parseCronSchedule.
+hours, daysOfWeek, err := parseCronSchedule(scatteredCron)
+require.NoError(t, err,
+"parseCronSchedule should accept scattered cron %q", scatteredCron)
+assert.Len(t, hours, tt.expectedHours,
+"expected %d distinct hour(s) for %s", tt.expectedHours, tt.fuzzyCron)
+assert.NotEmpty(t, daysOfWeek,
+"daysOfWeek should be non-empty for %s", tt.fuzzyCron)
+
+// Step 3: buildScheduleGrid should register at least one slot.
+lockName := tt.workflowID + ".lock.yml"
+statsList := []*WorkflowStats{
+{Workflow: lockName, Schedules: []string{scatteredCron}},
+}
+grid := buildScheduleGrid(statsList)
+require.NotNil(t, grid, "buildScheduleGrid should return non-nil grid")
+
+totalSlots := 0
+for _, day := range grid {
+for _, count := range day {
+totalSlots += count
+}
+}
+assert.Greater(t, totalSlots, 0,
+"grid should contain at least one scheduled slot for %s", scatteredCron)
+
+// Step 4: displayScheduleCalendar should produce output referencing the hour.
+oldStderr := os.Stderr
+r, w, pipeErr := os.Pipe()
+require.NoError(t, pipeErr)
+os.Stderr = w
+
+displayScheduleCalendar(statsList)
+
+w.Close()
+os.Stderr = oldStderr
+
+var buf bytes.Buffer
+_, _ = buf.ReadFrom(r)
+output := buf.String()
+
+assert.Contains(t, output, "Schedule Heatmap",
+"output should contain Schedule Heatmap header")
+// The hour from the scattered cron should appear in the output.
+for _, h := range hours {
+hourStr := fmt.Sprintf("%02d", h)
+assert.Contains(t, output, hourStr,
+"output should contain hour %s from scattered cron %s", hourStr, scatteredCron)
+}
+})
+}
+}
diff --git a/pkg/cli/forecast_montecarlo.go b/pkg/cli/forecast_montecarlo.go
@@ -93,7 +93,7 @@ type ForecastMonteCarloSummary struct {
 // Returns nil when etObservations is empty or observedRunsPerPeriod ≤ 0.
 func runMonteCarlo(etObservations []int, successCount int, observedRunsPerPeriod float64, rng *rand.Rand) *ForecastMonteCarloSummary {
 	n := len(etObservations)
-	if n == 0 || observedRunsPerPeriod <= 0 {
+	if n == 0 || observedRunsPerPeriod <= 0 || math.IsNaN(observedRunsPerPeriod) || math.IsInf(observedRunsPerPeriod, 0) {
 		forecastMonteCarloLog.Printf("Skipping Monte Carlo: observations=%d, runs_per_period=%.2f", n, observedRunsPerPeriod)
 		return nil
 	}

diff --git a/pkg/cli/forecast_montecarlo_test.go b/pkg/cli/forecast_montecarlo_test.go
@@ -100,6 +100,85 @@ func TestRunMonteCarloNilOnEmpty(t *testing.T) {
 	assert.Nil(t, runMonteCarlo([]int{100, 200}, 2, -1.0, rng), "negative lambda")
 }
 
+// TestRunMonteCarloNonFiniteLambda verifies that runMonteCarlo returns nil for
+// non-finite λ inputs (NaN and +Inf) without hanging or panicking.
+// Specification reference: R-MC-001 requires graceful handling of degenerate λ values.
+func TestRunMonteCarloNonFiniteLambda(t *testing.T) {
+	obs := []int{1000, 2000, 3000}
+
+	tests := []struct {
+		name   string
+		lambda float64
+	}{
+		{"NaN lambda", math.NaN()},
+		{"+Inf lambda", math.Inf(1)},
+		{"-Inf lambda", math.Inf(-1)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rng := deterministicRNG()
+			result := runMonteCarlo(obs, len(obs), tt.lambda, rng)
+			assert.Nil(t, result, "non-finite λ=%v should return nil (zero-projection fallback)", tt.lambda)
+		})
+	}
+}
+
+// TestRunMonteCarloZeroLambdaFallback verifies the zero-projection fallback behaviour
+// (R-MC-001): when λ = 0 (observedRunsPerPeriod = 0), runMonteCarlo MUST return nil
+// rather than producing a summary with zero projections, signalling to the caller that
+// there are no runs to project.
+func TestRunMonteCarloZeroLambdaFallback(t *testing.T) {
+	tests := []struct {
+		name                  string
+		etObs                 []int
+		successCount          int
+		observedRunsPerPeriod float64
+		wantNil               bool
+	}{
+		{
+			name:                  "zero observedRunsPerPeriod returns nil",
+			etObs:                 []int{1000, 2000, 3000},
+			successCount:          3,
+			observedRunsPerPeriod: 0.0,
+			wantNil:               true,
+		},
+		{
+			name:                  "negative observedRunsPerPeriod returns nil",
+			etObs:                 []int{1000, 2000, 3000},
+			successCount:          3,
+			observedRunsPerPeriod: -0.001,
+			wantNil:               true,
+		},
+		{
+			name:                  "empty observations returns nil regardless of lambda",
+			etObs:                 []int{},
+			successCount:          0,
+			observedRunsPerPeriod: 5.0,
+			wantNil:               true,
+		},
+		{
+			name:                  "positive lambda with observations returns non-nil",
+			etObs:                 []int{1000, 2000, 3000},
+			successCount:          3,
+			observedRunsPerPeriod: 1.0,
+			wantNil:               false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rng := deterministicRNG()
+			result := runMonteCarlo(tt.etObs, tt.successCount, tt.observedRunsPerPeriod, rng)
+			if tt.wantNil {
+				assert.Nil(t, result, "expected nil for λ=%.4f with %d observations", tt.observedRunsPerPeriod, len(tt.etObs))
+			} else {
+				assert.NotNil(t, result, "expected non-nil for λ=%.4f with %d observations", tt.observedRunsPerPeriod, len(tt.etObs))
+			}
+		})
+	}
+}
+
 // TestRunMonteCarloBasicProperties checks that the Monte Carlo summary satisfies
 // statistical invariants (P10 ≤ P50 ≤ P90, mean ≥ 0, stddev ≥ 0).
 func TestRunMonteCarloBasicProperties(t *testing.T) {

diff --git a/pkg/cli/forecast_test.go b/pkg/cli/forecast_test.go
@@ -3,6 +3,7 @@
 package cli
 
 import (
+	"math/rand"
 	"testing"
 	"time"
 
@@ -114,3 +115,57 @@ func TestDurationEnrichment(t *testing.T) {
 
 	assert.Equal(t, 5*time.Minute, r.Duration)
 }
+
+// TestObservedRunsPerPeriodConsistency verifies that the λ value stored in the
+// JSON-serialisable ForecastWorkflowResult.ObservedRunsPerPeriod field is the same
+// value that would be passed to runMonteCarlo (R-MC-002).
+//
+// This is a structural test: it constructs a result whose ObservedRunsPerPeriod is
+// set by the same arithmetic used in forecastWorkflow, then calls runMonteCarlo with
+// that field directly and asserts the simulation produces sensible output — confirming
+// that no intermediate recalculation or mutation of λ occurs between JSON output and
+// Monte Carlo execution.
+func TestObservedRunsPerPeriodConsistency(t *testing.T) {
+// Reproduce the λ calculation from forecastWorkflow.
+const (
+historyDays   = 30
+sampledRuns   = 15
+projectedDays = 30 // "month" period
+)
+observedRunsPerPeriod := float64(sampledRuns) / float64(historyDays) * float64(projectedDays)
+
+// Populate a ForecastWorkflowResult the same way forecastWorkflow does.
+result := ForecastWorkflowResult{
+WorkflowID:            "ci-doctor",
+Period:                "month",
+SampledRuns:           sampledRuns,
+HistoryDays:           historyDays,
+ObservedRunsPerPeriod: observedRunsPerPeriod,
+}
+
+// Build deterministic ET observations.
+etObs := make([]int, sampledRuns)
+for i := range etObs {
+etObs[i] = 10_000 + i*500
+}
+successCount := sampledRuns
+
+// runMonteCarlo uses result.ObservedRunsPerPeriod as λ — the same field that
+// appears in JSON output. Verify both the field value and the simulation are
+// consistent (non-nil, same λ).
+rng := rand.New(rand.NewSource(99)) //nolint:gosec
+mc := runMonteCarlo(etObs, successCount, result.ObservedRunsPerPeriod, rng)
+require.NotNil(t, mc, "runMonteCarlo must return non-nil for positive ObservedRunsPerPeriod")
+
+// The field exposed in JSON output must equal what was used for MC.
+assert.Equal(t, observedRunsPerPeriod, result.ObservedRunsPerPeriod,
+"ObservedRunsPerPeriod JSON field must equal the λ passed to runMonteCarlo")
+
+// Sanity-check simulation output is plausible for the given λ.
+assert.Greater(t, mc.P50ProjectedEffectiveTokens, 0,
+"P50 should be positive when success rate is 100%%")
+assert.LessOrEqual(t, mc.P10ProjectedEffectiveTokens, mc.P50ProjectedEffectiveTokens,
+"P10 ≤ P50")
+assert.LessOrEqual(t, mc.P50ProjectedEffectiveTokens, mc.P90ProjectedEffectiveTokens,
+"P50 ≤ P90")
+}