From f4f02486cda0cf126c6002de65adcc90a1706c98 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 3 May 2026 19:05:03 +0000 Subject: [PATCH 1/4] feat: add model alias validation enforcing the Model Alias Format spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements compile-time enforcement of the Model Alias Format Specification (docs/src/content/docs/reference/model-alias-specification.md): - V-MAF-001/006: Reject model identifiers not conforming to the ABNF grammar; error message names the offending character and segment type. - V-MAF-002: Reject effort parameter values outside {low, medium, high}. - V-MAF-003: Reject temperature parameter values outside [0.0, 2.0]. - V-MAF-004: Reject glob patterns in engine.model. - V-MAF-005: Reject alias keys containing '/', '?', or '&'. - V-MAF-010: Detect and report circular alias references (DFS) at compile time; abort compilation on any detected cycle. - V-MAF-011: Emit a warning for unrecognised parameter keys. New files: - pkg/workflow/model_identifier.go — ABNF grammar parser - pkg/workflow/model_alias_validation.go — compile-time validation - pkg/workflow/model_alias_validation_test.go — 35 tests covering all rules Wired into ParseWorkflowFile in compiler_orchestrator_workflow.go. Agent-Logs-Url: https://github.com/github/gh-aw/sessions/2f4eea04-b395-4344-947b-11e22a690c20 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_orchestrator_workflow.go | 21 + pkg/workflow/model_alias_validation.go | 299 ++++++++++++ pkg/workflow/model_alias_validation_test.go | 455 ++++++++++++++++++ pkg/workflow/model_identifier.go | 372 ++++++++++++++ 4 files changed, 1147 insertions(+) create mode 100644 pkg/workflow/model_alias_validation.go create mode 100644 pkg/workflow/model_alias_validation_test.go create mode 100644 pkg/workflow/model_identifier.go diff --git a/pkg/workflow/compiler_orchestrator_workflow.go b/pkg/workflow/compiler_orchestrator_workflow.go index 281d4a41b1..f77f358aac 100644 --- a/pkg/workflow/compiler_orchestrator_workflow.go +++ b/pkg/workflow/compiler_orchestrator_workflow.go @@ -66,6 +66,27 @@ func (c *Compiler) ParseWorkflowFile(markdownPath string) (*WorkflowData, error) // Store a stable workflow identifier derived from the file name. workflowData.WorkflowID = GetWorkflowIDFromPath(cleanPath) + // Validate model alias map: identifier syntax, parameter values, glob-in-engine.model, + // alias key format, and circular references (V-MAF-001..006, V-MAF-010, V-MAF-011). + { + var frontmatterModels map[string][]string + if toolsResult.parsedFrontmatter != nil { + frontmatterModels = toolsResult.parsedFrontmatter.Models + } + var engineModel string + if workflowData.EngineConfig != nil { + engineModel = workflowData.EngineConfig.Model + } + if err := c.validateModelAliasMap( + workflowData.ModelMappings, + frontmatterModels, + engineModel, + cleanPath, + ); err != nil { + return nil, err + } + } + // Validate run-install-scripts setting (warning in non-strict mode, error in strict mode) if err := c.validateRunInstallScripts(workflowData); err != nil { return nil, fmt.Errorf("%s: %w", cleanPath, err) diff --git a/pkg/workflow/model_alias_validation.go b/pkg/workflow/model_alias_validation.go new file mode 100644 index 0000000000..f88e6ba74f --- /dev/null +++ b/pkg/workflow/model_alias_validation.go @@ -0,0 +1,299 @@ +// This file implements compile-time validation of the Model Alias Format (MAF) +// as specified in docs/src/content/docs/reference/model-alias-specification.md. +// +// # Validation Rules Implemented +// +// - V-MAF-001: Reject model identifiers not conforming to the grammar (Section 4.1). +// - V-MAF-002: Reject effort values not in {low, medium, high}. +// - V-MAF-003: Reject temperature values outside [0.0, 2.0]. +// - V-MAF-004: Reject glob patterns in engine.model. +// - V-MAF-005: Reject alias keys containing "/", "?", or "&". +// - V-MAF-006: Reject identifiers with characters outside the allowed set; +// error message MUST name the offending character and segment type. +// - V-MAF-010: Detect and report circular alias references (DFS, compile time). +// - V-MAF-011: Emit a warning for unrecognised parameter keys. +// +// # Entry Point +// +// - validateModelAliasMap() is called from ParseWorkflowFile (compiler_orchestrator_workflow.go) +// after ModelMappings is populated. + +package workflow + +import ( + "fmt" + "os" + "sort" + "strings" + + "github.com/github/gh-aw/pkg/console" +) + +var modelAliasValidationLog = newValidationLogger("model_alias") + +// validateModelAliasMap is the main entry point for compile-time model-alias validation. +// It validates: +// - The user-supplied alias map entries in frontmatterModels (V-MAF-001..006, V-MAF-011). +// - The engine.model value (V-MAF-001, V-MAF-004, V-MAF-006). +// - Circular references across the fully-merged alias map (V-MAF-010). +// +// frontmatterModels contains only the aliases declared in the main workflow's +// frontmatter (not builtins or imports). Cycle detection runs over the full +// mergedAliasMap so that cycles spanning multiple layers are also caught. +// +// Returns a non-nil error (causing compilation to abort) for hard violations. +// Warnings are printed to stderr via the compiler's warning counter. +func (c *Compiler) validateModelAliasMap( + mergedAliasMap map[string][]string, + frontmatterModels map[string][]string, + engineModel string, + markdownPath string, +) error { + modelAliasValidationLog.Printf("Validating model alias map: %d merged entries, %d frontmatter entries, engine.model=%q", + len(mergedAliasMap), len(frontmatterModels), engineModel) + + // V-MAF-004: engine.model MUST NOT be a glob pattern. + // GitHub Actions expressions (${{ … }}) are exempt from syntax checks. + if engineModel != "" && !isExpression(engineModel) { + if err := validateEngineModelNotGlob(engineModel, markdownPath); err != nil { + return err + } + // V-MAF-001 + V-MAF-006: validate syntax of engine.model. + if errs := validateModelIdentifierStrings([]string{engineModel}, "engine.model"); len(errs) > 0 { + return formatCompilerError(markdownPath, "error", errs[0], nil) + } + } + + // Validate user-supplied frontmatter aliases only (builtins are pre-validated). + for key, entries := range frontmatterModels { + // V-MAF-005: alias keys MUST NOT contain "/", "?", or "&". + if err := validateAliasKey(key, markdownPath); err != nil { + return err + } + + // V-MAF-001 + V-MAF-002 + V-MAF-003 + V-MAF-006: validate each entry string. + if errs := validateModelIdentifierStrings(entries, "models."+displayKey(key)); len(errs) > 0 { + return formatCompilerError(markdownPath, "error", errs[0], nil) + } + + // V-MAF-011: warn about unrecognised parameter keys in each entry. + c.warnUnrecognizedModelParams(entries, markdownPath) + } + + // V-MAF-010: detect circular alias references across the merged map. + if err := detectCircularModelAliases(mergedAliasMap, markdownPath); err != nil { + return err + } + + modelAliasValidationLog.Print("Model alias map validation passed") + return nil +} + +// ─── V-MAF-004: glob check for engine.model ─────────────────────────────────── + +// validateEngineModelNotGlob returns an error if the engine.model value is a glob +// pattern (contains "*"), which is prohibited in engine.model (V-MAF-004). +func validateEngineModelNotGlob(engineModel, markdownPath string) error { + if strings.Contains(engineModel, "*") { + return formatCompilerError(markdownPath, "error", + fmt.Sprintf("engine.model: glob patterns are not allowed in engine.model; "+ + "got %q — glob patterns may only appear in models alias list entries (V-MAF-004)", engineModel), + nil) + } + return nil +} + +// ─── V-MAF-005: alias key validation ───────────────────────────────────────── + +// validateAliasKey validates a single alias map key (V-MAF-005). +// The empty string key ("") is allowed (default policy). +func validateAliasKey(key, markdownPath string) error { + if key == "" { + return nil // empty string is the default policy — permitted + } + for _, forbidden := range []string{"/", "?", "&"} { + if strings.Contains(key, forbidden) { + return formatCompilerError(markdownPath, "error", + fmt.Sprintf("models: alias key %q must not contain %q (V-MAF-005)", key, forbidden), + nil) + } + } + return nil +} + +// ─── V-MAF-001, 002, 003, 006: identifier syntax & param validation ─────────── + +// validateModelIdentifierStrings validates a slice of model identifier strings. +// Returns a slice of error messages (not wrapped errors) so the caller can +// decide how to report them. +func validateModelIdentifierStrings(identifiers []string, context string) []string { + var errs []string + for _, id := range identifiers { + if id == "" { + errs = append(errs, context+": model identifier must not be empty") + continue + } + // Skip GitHub Actions expressions — they are resolved at runtime. + if isExpression(id) { + continue + } + p, err := ParseModelIdentifier(id) + if err != nil { + // V-MAF-001 / V-MAF-006 + errs = append(errs, fmt.Sprintf("%s: %s", context, err.Error())) + continue + } + // V-MAF-002 and V-MAF-003: validate known parameter values. + if err := ValidateKnownParams(p.Params); err != nil { + errs = append(errs, fmt.Sprintf("%s: %s", context, err.Error())) + } + } + return errs +} + +// ─── V-MAF-011: unknown parameter warning ───────────────────────────────────── + +// warnUnrecognizedModelParams emits a compiler warning for each unrecognised +// parameter key found in the given model identifier strings (V-MAF-011). +func (c *Compiler) warnUnrecognizedModelParams(identifiers []string, markdownPath string) { + for _, id := range identifiers { + if id == "" || isExpression(id) { + continue + } + p, err := ParseModelIdentifier(id) + if err != nil { + continue // syntax errors are reported elsewhere + } + for _, k := range UnrecognizedParams(p.Params) { + msg := fmt.Sprintf("models: unrecognised parameter key %q in %q — "+ + "known parameters are: effort, temperature (V-MAF-011)", k, id) + fmt.Fprintln(os.Stderr, console.FormatWarningMessage( + formatCompilerMessage(markdownPath, "warning", msg))) + c.IncrementWarningCount() + } + } +} + +// ─── V-MAF-010: circular alias detection ───────────────────────────────────── + +// detectCircularModelAliases performs a full DFS cycle check over the merged +// alias map and returns an error naming every alias in the cycle (V-MAF-010). +// +// Algorithm (Section 8.6.1): +// +// For each alias key, perform a depth-first traversal of its list entries. +// Maintain a set of alias names on the current DFS path. +// If any traversal reaches an alias key already on the current path, a cycle +// is detected and MUST be reported as a compile-time error. +func detectCircularModelAliases(aliasMap map[string][]string, markdownPath string) error { + modelAliasValidationLog.Printf("Checking for circular alias references in %d aliases", len(aliasMap)) + + // visited tracks keys for which all DFS descendants have been fully explored + // (no cycle detected from that key). + visited := map[string]bool{} + + // Iterate keys in deterministic order for reproducible error messages. + keys := make([]string, 0, len(aliasMap)) + for k := range aliasMap { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, key := range keys { + if visited[key] { + continue + } + path := []string{} // current DFS path (ordered) + if cycle := dfsCycleCheck(key, aliasMap, visited, path); cycle != nil { + // Format cycle chain for a clear error message. + chain := strings.Join(append(cycle, cycle[0]), " → ") + return formatCompilerError(markdownPath, "error", + fmt.Sprintf("circular alias reference detected: %s\n\n"+ + "Circular alias references are prohibited. Remove or rewrite the cycle in the 'models:' "+ + "frontmatter section (V-MAF-010).", chain), + nil) + } + } + + return nil +} + +// dfsCycleCheck performs a depth-first traversal starting at start. +// onPath holds the set of alias names currently on the active DFS path. +// Returns the cycle chain (slice of alias names forming the loop) or nil. +func dfsCycleCheck( + start string, + aliasMap map[string][]string, + visited map[string]bool, + path []string, +) []string { + // Build a set of aliases on the current path for O(1) membership tests. + onPath := map[string]bool{} + for _, n := range path { + onPath[n] = true + } + + // Inner recursive helper — mutates onPath and path via closure. + var dfs func(node string) []string + dfs = func(node string) []string { + if visited[node] { + return nil + } + if onPath[node] { + // Cycle found — return the chain from node back around. + idx := 0 + for i, n := range path { + if n == node { + idx = i + break + } + } + return path[idx:] + } + + onPath[node] = true + path = append(path, node) + + entries, inMap := aliasMap[node] + if inMap { + for _, entry := range entries { + // Extract base (strip params). + base, _, _ := strings.Cut(entry, "?") + // Only follow bare alias references (not provider-scoped names or globs). + if isAliasReference(base, aliasMap) { + if cycle := dfs(base); cycle != nil { + return cycle + } + } + } + } + + path = path[:len(path)-1] + onPath[node] = false + visited[node] = true + return nil + } + + return dfs(start) +} + +// isAliasReference reports whether base is a bare identifier that refers to +// another alias key in the alias map (as opposed to a provider-scoped name or glob). +func isAliasReference(base string, aliasMap map[string][]string) bool { + if strings.Contains(base, "/") || strings.Contains(base, "*") { + return false + } + _, exists := aliasMap[base] + return exists +} + +// ─── Utilities ──────────────────────────────────────────────────────────────── + +// displayKey returns a human-readable representation of an alias key for use in +// error messages. The empty-string key (default policy) is shown as `""`. +func displayKey(key string) string { + if key == "" { + return `""` + } + return key +} diff --git a/pkg/workflow/model_alias_validation_test.go b/pkg/workflow/model_alias_validation_test.go new file mode 100644 index 0000000000..76a7465f91 --- /dev/null +++ b/pkg/workflow/model_alias_validation_test.go @@ -0,0 +1,455 @@ +//go:build !integration + +package workflow + +import ( + "maps" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ─── T-MAF-001 through T-MAF-009: ParseModelIdentifier (Section 12.1.1) ────── + +// TestParseModelIdentifier_T_MAF_001 – bare alias name. +func TestParseModelIdentifier_T_MAF_001(t *testing.T) { + p, err := ParseModelIdentifier("sonnet") + require.NoError(t, err, "bare alias name should parse without error") + assert.Equal(t, "sonnet", p.Base, "base should be 'sonnet'") + assert.Empty(t, p.Provider, "provider should be empty for bare name") + assert.Empty(t, p.Params, "params should be empty when no query string") + assert.False(t, p.IsGlob, "bare name should not be a glob") +} + +// TestParseModelIdentifier_T_MAF_002 – bare alias name with effort parameter. +func TestParseModelIdentifier_T_MAF_002(t *testing.T) { + p, err := ParseModelIdentifier("opus?effort=high") + require.NoError(t, err, "bare name with effort param should parse") + assert.Equal(t, "opus", p.Base, "base should be 'opus'") + assert.Equal(t, "high", p.Params["effort"], "effort param should be 'high'") +} + +// TestParseModelIdentifier_T_MAF_003 – provider-scoped exact name. +func TestParseModelIdentifier_T_MAF_003(t *testing.T) { + p, err := ParseModelIdentifier("copilot/gpt-5") + require.NoError(t, err, "provider-scoped name should parse") + assert.Equal(t, "copilot", p.Provider, "provider should be 'copilot'") + assert.Equal(t, "gpt-5", p.ModelToken, "model token should be 'gpt-5'") + assert.False(t, p.IsGlob, "exact name should not be a glob") +} + +// TestParseModelIdentifier_T_MAF_004 – provider-scoped with multiple parameters. +func TestParseModelIdentifier_T_MAF_004(t *testing.T) { + p, err := ParseModelIdentifier("openai/o3?effort=low&temperature=0.2") + require.NoError(t, err, "provider-scoped with multiple params should parse") + assert.Equal(t, "openai", p.Provider, "provider should be 'openai'") + assert.Equal(t, "o3", p.ModelToken, "model token should be 'o3'") + assert.Equal(t, "low", p.Params["effort"], "effort should be 'low'") + assert.Equal(t, "0.2", p.Params["temperature"], "temperature should be '0.2'") +} + +// TestParseModelIdentifier_T_MAF_005 – glob pattern in engine.model must be rejected. +func TestParseModelIdentifier_T_MAF_005(t *testing.T) { + compiler := NewCompiler() + // Glob patterns are allowed in alias entries but NOT in engine.model. + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "copilot/*sonnet*", + "/fake/path/workflow.md", + ) + require.Error(t, err, "glob pattern in engine.model should be rejected (V-MAF-004)") + assert.Contains(t, err.Error(), "V-MAF-004", "error should reference V-MAF-004") +} + +// TestParseModelIdentifier_T_MAF_006 – invalid effort value must be rejected. +func TestParseModelIdentifier_T_MAF_006(t *testing.T) { + _, err := ParseModelIdentifier("opus?effort=extreme") + require.NoError(t, err, "syntax parsing should succeed for unknown values") + + // Known-param validation rejects invalid effort values. + p, _ := ParseModelIdentifier("opus?effort=extreme") + err = ValidateKnownParams(p.Params) + require.Error(t, err, "effort=extreme should be rejected (V-MAF-002)") + assert.Contains(t, err.Error(), "V-MAF-002", "error should reference V-MAF-002") +} + +// TestParseModelIdentifier_T_MAF_007 – temperature out of range must be rejected. +func TestParseModelIdentifier_T_MAF_007(t *testing.T) { + p, err := ParseModelIdentifier("gpt-5?temperature=3.0") + require.NoError(t, err, "syntax parsing should succeed") + + err = ValidateKnownParams(p.Params) + require.Error(t, err, "temperature=3.0 should be rejected (V-MAF-003)") + assert.Contains(t, err.Error(), "V-MAF-003", "error should reference V-MAF-003") +} + +// TestParseModelIdentifier_T_MAF_008 – whitespace in identifier must be rejected. +func TestParseModelIdentifier_T_MAF_008(t *testing.T) { + _, err := ParseModelIdentifier("my model") + require.Error(t, err, "whitespace in model identifier should be rejected (V-MAF-006)") + assert.Contains(t, err.Error(), "segment type", "error should name the segment type (V-MAF-006)") +} + +// TestParseModelIdentifier_T_MAF_009 – colon in identifier must be rejected; error must name the char. +func TestParseModelIdentifier_T_MAF_009(t *testing.T) { + _, err := ParseModelIdentifier("my:model") + require.Error(t, err, "colon in model identifier should be rejected (V-MAF-006)") + assert.Contains(t, err.Error(), ":", "error message must identify the offending character (V-MAF-006)") + assert.Contains(t, err.Error(), "segment type", "error must name the segment type (V-MAF-006)") +} + +// ─── Additional syntax tests ────────────────────────────────────────────────── + +func TestParseModelIdentifier_ProviderScopedWithVersion(t *testing.T) { + p, err := ParseModelIdentifier("copilot/claude-opus-4.5") + require.NoError(t, err, "provider-scoped name with version should parse") + assert.Equal(t, "copilot", p.Provider) + assert.Equal(t, "claude-opus-4.5", p.ModelToken) +} + +func TestParseModelIdentifier_GlobPattern(t *testing.T) { + p, err := ParseModelIdentifier("copilot/*sonnet*") + require.NoError(t, err, "glob pattern should parse") + assert.Equal(t, "copilot", p.Provider) + assert.True(t, p.IsGlob, "identifier with * should be a glob") +} + +func TestParseModelIdentifier_BareNameStartDot(t *testing.T) { + _, err := ParseModelIdentifier(".hidden") + require.Error(t, err, "bare name starting with '.' must be rejected") +} + +func TestParseModelIdentifier_BareNameStartDash(t *testing.T) { + _, err := ParseModelIdentifier("-model") + require.Error(t, err, "bare name starting with '-' must be rejected") +} + +func TestParseModelIdentifier_ProviderEndsDash(t *testing.T) { + _, err := ParseModelIdentifier("copilot-/model") + require.Error(t, err, "provider token ending with '-' must be rejected") +} + +func TestParseModelIdentifier_EmptyModelToken(t *testing.T) { + _, err := ParseModelIdentifier("copilot/") + require.Error(t, err, "empty model token must be rejected") +} + +func TestParseModelIdentifier_EmptyString(t *testing.T) { + _, err := ParseModelIdentifier("") + require.Error(t, err, "empty string must be rejected") +} + +func TestParseModelIdentifier_ParamMissingValue(t *testing.T) { + _, err := ParseModelIdentifier("opus?effort=") + require.Error(t, err, "param with empty value must be rejected") +} + +func TestParseModelIdentifier_ParamMissingSeparator(t *testing.T) { + _, err := ParseModelIdentifier("opus?effortonly") + require.Error(t, err, "param without '=' separator must be rejected") +} + +func TestParseModelIdentifier_ParamKeyStartsDigit(t *testing.T) { + _, err := ParseModelIdentifier("opus?1key=val") + require.Error(t, err, "param key starting with digit must be rejected") +} + +func TestParseModelIdentifier_AtSign(t *testing.T) { + _, err := ParseModelIdentifier("my@model") + require.Error(t, err, "@ sign should be rejected (V-MAF-006)") +} + +func TestParseModelIdentifier_ExclamationMark(t *testing.T) { + _, err := ParseModelIdentifier("my!model") + require.Error(t, err, "! sign should be rejected") +} + +// ─── ValidateEffortParam ────────────────────────────────────────────────────── + +func TestValidateEffortParam(t *testing.T) { + tests := []struct { + value string + wantErr bool + }{ + {"low", false}, + {"medium", false}, + {"high", false}, + {"extreme", true}, + {"HIGH", true}, + {"", true}, + } + for _, tt := range tests { + t.Run(tt.value, func(t *testing.T) { + err := ValidateEffortParam(tt.value) + if tt.wantErr { + assert.Error(t, err, "effort=%q should fail validation", tt.value) + } else { + assert.NoError(t, err, "effort=%q should pass validation", tt.value) + } + }) + } +} + +// ─── ValidateTemperatureParam ───────────────────────────────────────────────── + +func TestValidateTemperatureParam(t *testing.T) { + tests := []struct { + value string + wantErr bool + }{ + {"0.0", false}, + {"0.7", false}, + {"1.0", false}, + {"2.0", false}, + {"0", false}, + {"2", false}, + {"-0.1", true}, + {"2.1", true}, + {"3.0", true}, + {"abc", true}, + {"", true}, + } + for _, tt := range tests { + t.Run(tt.value, func(t *testing.T) { + err := ValidateTemperatureParam(tt.value) + if tt.wantErr { + assert.Error(t, err, "temperature=%q should fail validation", tt.value) + } else { + assert.NoError(t, err, "temperature=%q should pass validation", tt.value) + } + }) + } +} + +// ─── UnrecognizedParams ─────────────────────────────────────────────────────── + +func TestUnrecognizedParams(t *testing.T) { + t.Run("known params produce no warnings", func(t *testing.T) { + unknown := UnrecognizedParams(map[string]string{"effort": "high", "temperature": "0.5"}) + assert.Empty(t, unknown, "known params should not be reported as unknown") + }) + + t.Run("unknown param is detected", func(t *testing.T) { + unknown := UnrecognizedParams(map[string]string{"foo": "bar"}) + assert.Contains(t, unknown, "foo", "unrecognised param 'foo' should be reported") + }) + + t.Run("mixed known and unknown", func(t *testing.T) { + unknown := UnrecognizedParams(map[string]string{"effort": "medium", "custom": "value"}) + assert.Contains(t, unknown, "custom", "unrecognised 'custom' should be reported") + assert.NotContains(t, unknown, "effort", "'effort' is known and should not appear") + }) +} + +// ─── V-MAF-005: alias key validation ───────────────────────────────────────── + +func TestValidateAliasKey(t *testing.T) { + tests := []struct { + key string + wantErr bool + }{ + {"sonnet", false}, + {"my-alias", false}, + {"", false}, // empty string = default policy, always allowed + {"a/b", true}, // V-MAF-005: slash + {"a?b", true}, // V-MAF-005: question mark + {"a&b", true}, // V-MAF-005: ampersand + } + for _, tt := range tests { + t.Run(tt.key, func(t *testing.T) { + err := validateAliasKey(tt.key, "/fake/path.md") + if tt.wantErr { + require.Error(t, err, "alias key %q should fail validation (V-MAF-005)", tt.key) + assert.Contains(t, err.Error(), "V-MAF-005", "error should reference V-MAF-005") + } else { + assert.NoError(t, err, "alias key %q should pass validation", tt.key) + } + }) + } +} + +// ─── V-MAF-010: circular alias detection ───────────────────────────────────── + +// T-MAF-040: direct 2-node cycle must be detected. +func TestDetectCircularModelAliases_T_MAF_040(t *testing.T) { + aliasMap := map[string][]string{ + "a": {"b"}, + "b": {"a"}, + } + err := detectCircularModelAliases(aliasMap, "/fake/path.md") + require.Error(t, err, "2-node cycle a → b → a must be detected (T-MAF-040)") + assert.Contains(t, err.Error(), "V-MAF-010", "error should reference V-MAF-010") +} + +// T-MAF-041: longer 3-node cycle must be detected; error message names all aliases. +func TestDetectCircularModelAliases_T_MAF_041(t *testing.T) { + aliasMap := map[string][]string{ + "a": {"b"}, + "b": {"c"}, + "c": {"a"}, + } + err := detectCircularModelAliases(aliasMap, "/fake/path.md") + require.Error(t, err, "3-node cycle a → b → c → a must be detected (T-MAF-041)") + // Error message must name all three aliases. + assert.Contains(t, err.Error(), "a", "cycle error should name alias 'a'") + assert.Contains(t, err.Error(), "b", "cycle error should name alias 'b'") + assert.Contains(t, err.Error(), "c", "cycle error should name alias 'c'") +} + +// Acyclic map should not produce an error. +func TestDetectCircularModelAliases_Acyclic(t *testing.T) { + aliasMap := map[string][]string{ + "small": {"mini"}, + "mini": {"haiku"}, + "haiku": {"copilot/*haiku*"}, + } + err := detectCircularModelAliases(aliasMap, "/fake/path.md") + assert.NoError(t, err, "acyclic alias chain should not be reported as cyclic") +} + +// Provider-scoped entries must NOT be treated as alias references. +func TestDetectCircularModelAliases_ProviderScopedNotFollowed(t *testing.T) { + // "sonnet" has a provider-scoped pattern; there is no "copilot" alias → no cycle. + aliasMap := map[string][]string{ + "sonnet": {"copilot/*sonnet*"}, + } + err := detectCircularModelAliases(aliasMap, "/fake/path.md") + assert.NoError(t, err, "provider-scoped entries should not be treated as alias references") +} + +// ─── validateModelAliasMap (integration) ───────────────────────────────────── + +func TestValidateModelAliasMap_ValidWorkflow(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + map[string][]string{ + "deep-think": {"opus?effort=high", "gpt-5?effort=high"}, + "": {"deep-think", "sonnet"}, + }, + "copilot/gpt-5", + "/fake/path/workflow.md", + ) + assert.NoError(t, err, "valid alias map should pass validation") +} + +func TestValidateModelAliasMap_InvalidEffortInFrontmatter(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + map[string][]string{ + "bad": {"opus?effort=extreme"}, + }, + "", + "/fake/path/workflow.md", + ) + require.Error(t, err, "invalid effort value in frontmatter should fail (V-MAF-002)") +} + +func TestValidateModelAliasMap_InvalidTemperatureInFrontmatter(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + map[string][]string{ + "hot": {"gpt-5?temperature=5.0"}, + }, + "", + "/fake/path/workflow.md", + ) + require.Error(t, err, "out-of-range temperature in frontmatter should fail (V-MAF-003)") +} + +func TestValidateModelAliasMap_CircularFrontmatter(t *testing.T) { + mergedWithCycle := map[string][]string{ + "a": {"b"}, + "b": {"a"}, + } + // Add builtins (non-cyclic) + maps.Copy(mergedWithCycle, BuiltinModelAliases()) + + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + mergedWithCycle, + map[string][]string{ + "a": {"b"}, + "b": {"a"}, + }, + "", + "/fake/path/workflow.md", + ) + require.Error(t, err, "cycle in frontmatter aliases should fail (V-MAF-010)") +} + +func TestValidateModelAliasMap_GlobInEngineModel(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "copilot/*gpt*", + "/fake/path/workflow.md", + ) + require.Error(t, err, "glob in engine.model should fail (V-MAF-004)") +} + +func TestValidateModelAliasMap_InvalidAliasKey(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + map[string][]string{ + "bad/key": {"copilot/some-model"}, + }, + "", + "/fake/path/workflow.md", + ) + require.Error(t, err, "alias key with '/' should fail (V-MAF-005)") +} + +func TestValidateModelAliasMap_ExpressionModelSkipped(t *testing.T) { + // GitHub Actions expressions are exempt from syntax validation. + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "${{ inputs.model }}", + "/fake/path/workflow.md", + ) + assert.NoError(t, err, "GitHub Actions expression in engine.model should be skipped") +} + +func TestValidateModelAliasMap_NoEngineModel(t *testing.T) { + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "", + "/fake/path/workflow.md", + ) + assert.NoError(t, err, "empty engine.model should pass validation") +} + +// ─── T-MAF-030/031/032/033: merge precedence tests (already covered in model_aliases_test.go) +// but also exercised here via validateModelAliasMap for confidence. + +func TestValidateModelAliasMap_BuiltinCyclePreventedBySpec(t *testing.T) { + // The builtin aliases are guaranteed acyclic by the spec. Verify no false positive. + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "", + "/fake/path/workflow.md", + ) + assert.NoError(t, err, "builtin alias map must be acyclic") +} + +// isAliasReference helper tests. +func TestIsAliasReference(t *testing.T) { + aliasMap := map[string][]string{"sonnet": {"copilot/*sonnet*"}} + + assert.True(t, isAliasReference("sonnet", aliasMap), "bare alias key should be detected as alias reference") + assert.False(t, isAliasReference("copilot/model", aliasMap), "provider-scoped entry should not be alias reference") + assert.False(t, isAliasReference("copilot/*sonnet*", aliasMap), "glob entry should not be alias reference") + assert.False(t, isAliasReference("unknown", aliasMap), "unknown bare name is not an alias reference") +} diff --git a/pkg/workflow/model_identifier.go b/pkg/workflow/model_identifier.go new file mode 100644 index 0000000000..e7305c9a53 --- /dev/null +++ b/pkg/workflow/model_identifier.go @@ -0,0 +1,372 @@ +// This file implements the Model Alias Format (MAF) identifier parser as defined in +// the Model Alias Format Specification (docs/src/content/docs/reference/model-alias-specification.md). +// +// # Model Identifier Syntax (Section 4) +// +// A model identifier string takes one of the following forms: +// +// bare-name e.g. "sonnet", "auto" +// provider/model-token e.g. "copilot/gpt-5" +// provider/model-glob-token e.g. "copilot/*sonnet*" +// any of the above + "?" params e.g. "opus?effort=high" +// +// # ABNF Grammar (Section 4.1) +// +// model-identifier = base-identifier [ "?" query-string ] +// +// base-identifier = bare-name +// / provider-scoped +// / glob-pattern +// +// bare-name = 1*( ALPHA / DIGIT / "-" / "_" / "." ) +// ; MUST NOT start with "-" or "." +// +// provider-scoped = provider-token "/" model-token +// +// provider-token = ALPHA 0*( ALPHA / DIGIT / "-" ) +// ; starts with a letter; hyphens allowed but not at end +// +// model-token = model-char 0*( model-char / "." model-char ) +// +// model-char = ALPHA / DIGIT / "-" / "_" +// +// glob-pattern = provider-token "/" model-glob-token +// +// model-glob-token = 1*( model-char / "." / "*" ) +// +// query-string = param *( "&" param ) +// +// param = param-key "=" param-value +// +// param-key = ALPHA 0*( ALPHA / DIGIT / "-" ) +// +// param-value = 1*( ALPHA / DIGIT / "-" / "_" / "." ) + +package workflow + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" +) + +// ParsedModelIdentifier holds the components of a parsed model identifier. +type ParsedModelIdentifier struct { + // Raw is the original unparsed string. + Raw string + // Base is the base identifier (before "?"). + Base string + // Provider is the provider token (empty for bare identifiers). + Provider string + // ModelToken is the model-token portion after the "/" (empty for bare identifiers). + ModelToken string + // IsGlob reports whether the model token contains a "*" wildcard. + IsGlob bool + // Params holds the URL-style query parameters (key → value). + Params map[string]string +} + +// IsBare reports whether the identifier is a bare name (no provider prefix). +func (p *ParsedModelIdentifier) IsBare() bool { return p.Provider == "" } + +// Defined parameter keys recognised by the spec (Section 6). +const ( + modelParamEffort = "effort" + modelParamTemperature = "temperature" +) + +// knownModelParams is the set of parameter keys defined in Section 6. +var knownModelParams = map[string]struct{}{ + modelParamEffort: {}, + modelParamTemperature: {}, +} + +// ─── Character-class helpers ───────────────────────────────────────────────── + +// regex patterns derived directly from the ABNF grammar. +var ( + // provider-token: starts with ALPHA, followed by ALPHA/DIGIT/"-"; must not end with "-" + reProviderToken = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9-]*$`) + + // model-char: ALPHA / DIGIT / "-" / "_" + // model-token: model-char segments separated by "."; each segment starts with ALPHA or DIGIT + reModelToken = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9_-]*(\.[A-Za-z0-9][A-Za-z0-9_-]*)*$`) + + // bare-name: starts with ALPHA/DIGIT; contains ALPHA/DIGIT/"-"/"_"/"." + reBareName = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`) + + // param-key: starts with ALPHA; followed by ALPHA/DIGIT/"-" + reParamKey = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9-]*$`) + + // param-value: 1*(ALPHA / DIGIT / "-" / "_" / ".") + reParamValue = regexp.MustCompile(`^[A-Za-z0-9._-]+$`) +) + +// firstForbiddenChar returns the first character in s that is outside the +// allowed set for the given segment type, or 0 if s is clean. +// This satisfies V-MAF-006: the implementation MUST name the offending character +// and the segment type. +func firstForbiddenChar(s, allowedPattern string) rune { + allowed := regexp.MustCompile(`^[` + allowedPattern + `]$`) + for _, r := range s { + if !allowed.MatchString(string(r)) { + return r + } + } + return 0 +} + +// ParseModelIdentifier parses a model identifier string into its components. +// +// The identifier format follows the ABNF grammar in Section 4.1 of the spec. +// Returns a non-nil *ParsedModelIdentifier on success. +// Returns an error (satisfying V-MAF-001 and V-MAF-006) on syntax violations. +func ParseModelIdentifier(s string) (*ParsedModelIdentifier, error) { + if s == "" { + return nil, errors.New("model identifier must not be empty") + } + + // Split on the first "?" to separate base from query string. + base, rawQuery, _ := strings.Cut(s, "?") + + p := &ParsedModelIdentifier{ + Raw: s, + Base: base, + Params: map[string]string{}, + } + + // ── Validate base identifier ───────────────────────────────────────────── + if strings.Contains(base, "/") { + // Provider-scoped or glob pattern. + provider, modelPart, _ := strings.Cut(base, "/") + + if err := validateProviderToken(provider); err != nil { + return nil, err + } + + p.Provider = provider + + if strings.Contains(modelPart, "*") { + // Glob pattern. + if err := validateModelGlobToken(modelPart); err != nil { + return nil, err + } + p.ModelToken = modelPart + p.IsGlob = true + } else { + // Exact provider-scoped name. + if err := validateModelToken(modelPart); err != nil { + return nil, err + } + p.ModelToken = modelPart + } + } else { + // Bare name. + if err := validateBareName(base); err != nil { + return nil, err + } + } + + // ── Parse and validate query string ───────────────────────────────────── + if rawQuery != "" { + params, err := parseQueryString(rawQuery) + if err != nil { + return nil, err + } + p.Params = params + } + + return p, nil +} + +// ─── Segment validators ─────────────────────────────────────────────────────── + +// validateProviderToken validates a provider token per the ABNF grammar. +func validateProviderToken(token string) error { + if token == "" { + return errors.New("model identifier: provider token must not be empty (segment type: provider)") + } + if !reProviderToken.MatchString(token) { + if r := firstForbiddenChar(token, `A-Za-z0-9-`); r != 0 { + return fmt.Errorf("model identifier: character %q is not allowed in provider token %q (segment type: provider)", r, token) + } + if !isASCIILetter(rune(token[0])) { + return fmt.Errorf("model identifier: provider token %q must start with a letter (segment type: provider)", token) + } + } + // Provider token must not end with "-". + if strings.HasSuffix(token, "-") { + return fmt.Errorf("model identifier: provider token %q must not end with '-' (segment type: provider)", token) + } + return nil +} + +// validateModelToken validates a model token (no wildcards) per the ABNF grammar. +func validateModelToken(token string) error { + if token == "" { + return errors.New("model identifier: model token must not be empty (segment type: model)") + } + if !reModelToken.MatchString(token) { + if r := firstForbiddenChar(token, `A-Za-z0-9_.-`); r != 0 { + return fmt.Errorf("model identifier: character %q is not allowed in model token %q (segment type: model)", r, token) + } + return fmt.Errorf("model identifier: model token %q is syntactically invalid (segment type: model)", token) + } + return nil +} + +// validateModelGlobToken validates a model-glob-token (may contain "*"). +func validateModelGlobToken(token string) error { + if token == "" { + return errors.New("model identifier: model glob token must not be empty (segment type: model)") + } + for _, r := range token { + switch { + case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z', r >= '0' && r <= '9': + // ALPHA / DIGIT — always allowed + case r == '-', r == '_', r == '.', r == '*': + // allowed in glob tokens + default: + return fmt.Errorf("model identifier: character %q is not allowed in glob model token %q (segment type: model)", r, token) + } + } + return nil +} + +// validateBareName validates a bare identifier name per the ABNF grammar. +func validateBareName(name string) error { + if name == "" { + return errors.New("model identifier: bare name must not be empty (segment type: alias)") + } + // Must not start with "-" or ".". + if name[0] == '-' || name[0] == '.' { + return fmt.Errorf("model identifier: bare name %q must not start with '-' or '.' (segment type: alias)", name) + } + if !reBareName.MatchString(name) { + if r := firstForbiddenChar(name, `A-Za-z0-9._-`); r != 0 { + return fmt.Errorf("model identifier: character %q is not allowed in bare name %q (segment type: alias)", r, name) + } + return fmt.Errorf("model identifier: bare name %q is syntactically invalid (segment type: alias)", name) + } + return nil +} + +// ─── Parameter parsing ──────────────────────────────────────────────────────── + +// parseQueryString parses a query string of the form "key=value&key=value…". +// Returns an error if any key or value violates the grammar or if known parameters +// have invalid values. +func parseQueryString(raw string) (map[string]string, error) { + params := map[string]string{} + for pair := range strings.SplitSeq(raw, "&") { + if pair == "" { + return nil, fmt.Errorf("model identifier: empty parameter pair in query string %q", raw) + } + k, v, found := strings.Cut(pair, "=") + if !found { + return nil, fmt.Errorf("model identifier: parameter %q is missing '=' separator", pair) + } + if err := validateParamKey(k); err != nil { + return nil, err + } + if err := validateParamValue(v); err != nil { + return nil, err + } + params[k] = v + } + return params, nil +} + +// validateParamKey validates a parameter key per the ABNF grammar. +func validateParamKey(key string) error { + if key == "" { + return errors.New("model identifier: parameter key must not be empty (segment type: parameter key)") + } + if !reParamKey.MatchString(key) { + if r := firstForbiddenChar(key, `A-Za-z0-9-`); r != 0 { + return fmt.Errorf("model identifier: character %q is not allowed in parameter key %q (segment type: parameter key)", r, key) + } + if !isASCIILetter(rune(key[0])) { + return fmt.Errorf("model identifier: parameter key %q must start with a letter (segment type: parameter key)", key) + } + } + return nil +} + +// validateParamValue validates a parameter value per the ABNF grammar. +func validateParamValue(value string) error { + if value == "" { + return errors.New("model identifier: parameter value must not be empty (segment type: parameter value)") + } + if !reParamValue.MatchString(value) { + if r := firstForbiddenChar(value, `A-Za-z0-9._-`); r != 0 { + return fmt.Errorf("model identifier: character %q is not allowed in parameter value %q (segment type: parameter value)", r, value) + } + } + return nil +} + +// ─── Known-parameter validation ─────────────────────────────────────────────── + +// ValidateEffortParam validates the "effort" parameter value (V-MAF-002). +// Allowed values: low, medium, high. +func ValidateEffortParam(value string) error { + switch value { + case "low", "medium", "high": + return nil + default: + return fmt.Errorf("model parameter 'effort': value %q is not valid; allowed values are: low, medium, high (V-MAF-002)", value) + } +} + +// ValidateTemperatureParam validates the "temperature" parameter value (V-MAF-003). +// Must be a decimal float in [0.0, 2.0]. +func ValidateTemperatureParam(value string) error { + f, err := strconv.ParseFloat(value, 64) + if err != nil { + return fmt.Errorf("model parameter 'temperature': value %q cannot be parsed as a decimal float (V-MAF-003)", value) + } + if f < 0.0 || f > 2.0 { + return fmt.Errorf("model parameter 'temperature': value %q is out of range; must be in [0.0, 2.0] (V-MAF-003)", value) + } + return nil +} + +// ValidateKnownParams validates the known parameters in a parsed identifier. +// Unknown parameters are tolerated (V-MAF-011 emits a warning, not an error). +// Returns an error if a known parameter has an invalid value. +func ValidateKnownParams(params map[string]string) error { + if v, ok := params[modelParamEffort]; ok { + if err := ValidateEffortParam(v); err != nil { + return err + } + } + if v, ok := params[modelParamTemperature]; ok { + if err := ValidateTemperatureParam(v); err != nil { + return err + } + } + return nil +} + +// UnrecognizedParams returns the list of parameter keys in params that are not +// defined in Section 6 (i.e., not effort or temperature). +// Reserved future parameters (Section 6.3) are returned here as well since they +// are not yet defined. +func UnrecognizedParams(params map[string]string) []string { + var unknown []string + for k := range params { + if _, known := knownModelParams[k]; !known { + unknown = append(unknown, k) + } + } + return unknown +} + +// ─── Utility ────────────────────────────────────────────────────────────────── + +func isASCIILetter(r rune) bool { + return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') +} From 36ac0d080f113c26e5a8c38799824ba44471c2e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 3 May 2026 19:13:16 +0000 Subject: [PATCH 2/4] refactor: address code review - replace regex closure with dedicated helpers and struct-based DFS Agent-Logs-Url: https://github.com/github/gh-aw/sessions/2f4eea04-b395-4344-947b-11e22a690c20 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/model_alias_validation.go | 79 +++++++++++++------------- pkg/workflow/model_identifier.go | 68 ++++++++++++++++------ 2 files changed, 89 insertions(+), 58 deletions(-) diff --git a/pkg/workflow/model_alias_validation.go b/pkg/workflow/model_alias_validation.go index f88e6ba74f..17074c286e 100644 --- a/pkg/workflow/model_alias_validation.go +++ b/pkg/workflow/model_alias_validation.go @@ -219,7 +219,7 @@ func detectCircularModelAliases(aliasMap map[string][]string, markdownPath strin } // dfsCycleCheck performs a depth-first traversal starting at start. -// onPath holds the set of alias names currently on the active DFS path. +// visited tracks fully explored nodes (no cycle reachable from there). // Returns the cycle chain (slice of alias names forming the loop) or nil. func dfsCycleCheck( start string, @@ -227,54 +227,53 @@ func dfsCycleCheck( visited map[string]bool, path []string, ) []string { - // Build a set of aliases on the current path for O(1) membership tests. - onPath := map[string]bool{} - for _, n := range path { - onPath[n] = true + state := &dfsState{ + aliasMap: aliasMap, + visited: visited, + onPath: map[string]bool{}, + path: path, } + return state.dfs(start) +} - // Inner recursive helper — mutates onPath and path via closure. - var dfs func(node string) []string - dfs = func(node string) []string { - if visited[node] { - return nil - } - if onPath[node] { - // Cycle found — return the chain from node back around. - idx := 0 - for i, n := range path { - if n == node { - idx = i - break - } +// dfsState holds the mutable state for a single DFS traversal. +type dfsState struct { + aliasMap map[string][]string + visited map[string]bool + onPath map[string]bool + path []string +} + +func (s *dfsState) dfs(node string) []string { + if s.visited[node] { + return nil + } + if s.onPath[node] { + // Cycle found — return the chain from node back around. + for i, n := range s.path { + if n == node { + return s.path[i:] } - return path[idx:] } + return s.path // fallback: should not happen + } + + s.onPath[node] = true + s.path = append(s.path, node) - onPath[node] = true - path = append(path, node) - - entries, inMap := aliasMap[node] - if inMap { - for _, entry := range entries { - // Extract base (strip params). - base, _, _ := strings.Cut(entry, "?") - // Only follow bare alias references (not provider-scoped names or globs). - if isAliasReference(base, aliasMap) { - if cycle := dfs(base); cycle != nil { - return cycle - } - } + for _, entry := range s.aliasMap[node] { + base, _, _ := strings.Cut(entry, "?") + if isAliasReference(base, s.aliasMap) { + if cycle := s.dfs(base); cycle != nil { + return cycle } } - - path = path[:len(path)-1] - onPath[node] = false - visited[node] = true - return nil } - return dfs(start) + s.path = s.path[:len(s.path)-1] + s.onPath[node] = false + s.visited[node] = true + return nil } // isAliasReference reports whether base is a bare identifier that refers to diff --git a/pkg/workflow/model_identifier.go b/pkg/workflow/model_identifier.go index e7305c9a53..338d1842be 100644 --- a/pkg/workflow/model_identifier.go +++ b/pkg/workflow/model_identifier.go @@ -104,20 +104,56 @@ var ( reParamValue = regexp.MustCompile(`^[A-Za-z0-9._-]+$`) ) -// firstForbiddenChar returns the first character in s that is outside the -// allowed set for the given segment type, or 0 if s is clean. -// This satisfies V-MAF-006: the implementation MUST name the offending character -// and the segment type. -func firstForbiddenChar(s, allowedPattern string) rune { - allowed := regexp.MustCompile(`^[` + allowedPattern + `]$`) +// firstForbiddenCharInProviderOrParam returns the first character in s that is not in +// [A-Za-z0-9-], or 0 if all characters are allowed. +// Used for provider tokens and parameter keys (V-MAF-006). +func firstForbiddenCharInProviderOrParam(s string) rune { for _, r := range s { - if !allowed.MatchString(string(r)) { + if !isAlpha(r) && !isDigit(r) && r != '-' { return r } } return 0 } +// firstForbiddenCharInModelToken returns the first character in s that is not in +// [A-Za-z0-9_.-], or 0 if all characters are allowed. +// Used for model tokens (V-MAF-006). +func firstForbiddenCharInModelToken(s string) rune { + for _, r := range s { + if !isAlpha(r) && !isDigit(r) && r != '_' && r != '.' && r != '-' { + return r + } + } + return 0 +} + +// firstForbiddenCharInBareName returns the first character in s that is not in +// [A-Za-z0-9._-], or 0 if all characters are allowed. +// Used for bare names (V-MAF-006). +func firstForbiddenCharInBareName(s string) rune { + for _, r := range s { + if !isAlpha(r) && !isDigit(r) && r != '_' && r != '.' && r != '-' { + return r + } + } + return 0 +} + +// firstForbiddenCharInParamValue returns the first character in s that is not in +// [A-Za-z0-9._-], or 0 if all characters are allowed. +// Used for parameter values (V-MAF-006). +func firstForbiddenCharInParamValue(s string) rune { + for _, r := range s { + if !isAlpha(r) && !isDigit(r) && r != '_' && r != '.' && r != '-' { + return r + } + } + return 0 +} + +func isAlpha(r rune) bool { return isLetter(r) } + // ParseModelIdentifier parses a model identifier string into its components. // // The identifier format follows the ABNF grammar in Section 4.1 of the spec. @@ -189,10 +225,10 @@ func validateProviderToken(token string) error { return errors.New("model identifier: provider token must not be empty (segment type: provider)") } if !reProviderToken.MatchString(token) { - if r := firstForbiddenChar(token, `A-Za-z0-9-`); r != 0 { + if r := firstForbiddenCharInProviderOrParam(token); r != 0 { return fmt.Errorf("model identifier: character %q is not allowed in provider token %q (segment type: provider)", r, token) } - if !isASCIILetter(rune(token[0])) { + if !isAlpha(rune(token[0])) { return fmt.Errorf("model identifier: provider token %q must start with a letter (segment type: provider)", token) } } @@ -209,7 +245,7 @@ func validateModelToken(token string) error { return errors.New("model identifier: model token must not be empty (segment type: model)") } if !reModelToken.MatchString(token) { - if r := firstForbiddenChar(token, `A-Za-z0-9_.-`); r != 0 { + if r := firstForbiddenCharInModelToken(token); r != 0 { return fmt.Errorf("model identifier: character %q is not allowed in model token %q (segment type: model)", r, token) } return fmt.Errorf("model identifier: model token %q is syntactically invalid (segment type: model)", token) @@ -245,7 +281,7 @@ func validateBareName(name string) error { return fmt.Errorf("model identifier: bare name %q must not start with '-' or '.' (segment type: alias)", name) } if !reBareName.MatchString(name) { - if r := firstForbiddenChar(name, `A-Za-z0-9._-`); r != 0 { + if r := firstForbiddenCharInBareName(name); r != 0 { return fmt.Errorf("model identifier: character %q is not allowed in bare name %q (segment type: alias)", r, name) } return fmt.Errorf("model identifier: bare name %q is syntactically invalid (segment type: alias)", name) @@ -285,10 +321,10 @@ func validateParamKey(key string) error { return errors.New("model identifier: parameter key must not be empty (segment type: parameter key)") } if !reParamKey.MatchString(key) { - if r := firstForbiddenChar(key, `A-Za-z0-9-`); r != 0 { + if r := firstForbiddenCharInProviderOrParam(key); r != 0 { return fmt.Errorf("model identifier: character %q is not allowed in parameter key %q (segment type: parameter key)", r, key) } - if !isASCIILetter(rune(key[0])) { + if !isAlpha(rune(key[0])) { return fmt.Errorf("model identifier: parameter key %q must start with a letter (segment type: parameter key)", key) } } @@ -301,7 +337,7 @@ func validateParamValue(value string) error { return errors.New("model identifier: parameter value must not be empty (segment type: parameter value)") } if !reParamValue.MatchString(value) { - if r := firstForbiddenChar(value, `A-Za-z0-9._-`); r != 0 { + if r := firstForbiddenCharInParamValue(value); r != 0 { return fmt.Errorf("model identifier: character %q is not allowed in parameter value %q (segment type: parameter value)", r, value) } } @@ -366,7 +402,3 @@ func UnrecognizedParams(params map[string]string) []string { } // ─── Utility ────────────────────────────────────────────────────────────────── - -func isASCIILetter(r rune) bool { - return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') -} From 0ea3f101a019a2d438eec432a1e455f3fec9e4a0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 3 May 2026 19:23:59 +0000 Subject: [PATCH 3/4] docs(adr): add draft ADR-29995 for compile-time model alias format validation Generated by the Design Decision Gate workflow as a draft for PR #29995. --- ...pile-time-model-alias-format-validation.md | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 docs/adr/29995-compile-time-model-alias-format-validation.md diff --git a/docs/adr/29995-compile-time-model-alias-format-validation.md b/docs/adr/29995-compile-time-model-alias-format-validation.md new file mode 100644 index 0000000000..a21fbbcb4b --- /dev/null +++ b/docs/adr/29995-compile-time-model-alias-format-validation.md @@ -0,0 +1,94 @@ +# ADR-29995: Compile-Time Validation of the Model Alias Format Specification + +**Date**: 2026-05-03 +**Status**: Draft +**Deciders**: Unknown + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +The repository defines a Model Alias Format (MAF) specification (`docs/src/content/docs/reference/model-alias-specification.md`) that governs the syntax of model identifier strings used in workflow frontmatter (`models:` map) and the `engine.model` field. Prior to this change, the spec existed as documentation only: invalid identifiers (bad characters, circular aliases, out-of-range parameters, globs where forbidden) would either be silently accepted at compile time and fail at runtime, or propagate as confusing downstream errors. The workflow compiler already had a validation extension point (`ParseWorkflowFile`), making it a natural site for enforcement. The spec itself defines discrete, numbered validation rules (V-MAF-001 through V-MAF-013), making a rule-by-rule implementation tractable. + +### Decision + +We will enforce the Model Alias Format specification at compile time by wiring an ABNF grammar parser (`ParseModelIdentifier`) and a set of validation functions (`validateModelAliasMap`) into `ParseWorkflowFile`, immediately after the model mappings are populated. Violations of hard rules (V-MAF-001 through V-MAF-010) abort compilation with a specific error message naming the offending character and rule code. Soft rules (V-MAF-011) emit warnings. GitHub Actions expression strings (`${{ … }}`) are exempt from syntax checks because they are resolved at runtime. This approach front-loads error detection at the earliest possible phase, providing actionable feedback before any agent work begins. + +### Alternatives Considered + +#### Alternative 1: Runtime Validation at Agent Invocation + +Validate model identifiers only when an agent is actually dispatched and the model is resolved. This would catch all the same errors but only after the workflow has started executing, meaning partial work may have already been done (steps executed, tokens consumed) before the bad identifier surfaces. It was rejected because the compiler already parses the full workflow graph before execution; shifting validation left is strictly better for the operator experience and aligns with the existing compiler's role as the semantic gatekeeper. + +#### Alternative 2: JSON Schema / YAML Schema Validation + +Encode the MAF grammar constraints as a JSON Schema attached to the workflow file format and validate at schema-validation time. This approach is declarative and tooling-friendly (editors can surface errors inline) but cannot express recursive constraints such as circular alias detection (V-MAF-010) or contextual rules such as "globs are allowed in alias entries but not in `engine.model`" (V-MAF-004). It was rejected because the MAF spec's constraint set exceeds what JSON Schema can express without custom keywords. + +#### Alternative 3: No Enforcement — Spec as Documentation Only + +Leave the spec as normative documentation and rely on code review and convention to prevent violations. This was rejected because experience showed that undocumented or leniently enforced identifier formats accumulate technical debt: callers begin passing provider-scoped globs to `engine.model`, or create circular aliases that are only caught in production, or use undocumented parameter keys that silently have no effect. + +### Consequences + +#### Positive +- Malformed model identifiers are caught at the earliest possible phase, before any agent work begins, producing clear error messages that name the offending character and the violated rule code (e.g., V-MAF-004). +- Circular alias chains — a subtle class of infinite-loop bugs — are now detected deterministically at compile time via DFS cycle detection. +- The spec's numbered validation rules map 1:1 to code, making it straightforward to verify coverage and add new rules as the spec evolves. + +#### Negative +- Workflows that previously relied on leniently accepted (but technically invalid) model identifiers will now fail to compile; operators must update their frontmatter to conform to the spec. +- Adding a validation pass to `ParseWorkflowFile` increases compile-time latency proportional to the number of alias entries; for very large alias maps this may be measurable. + +#### Neutral +- GitHub Actions expressions in model identifier fields (`${{ inputs.model }}`) must be explicitly detected and skipped, adding a dependency on an `isExpression` helper that must be kept in sync with the GHA expression syntax. +- Validation rules V-MAF-012 and V-MAF-013 (effort warning for non-reasoning models; runtime cycle guard) are explicitly out of scope for compile-time enforcement and must remain as runtime/agent-side checks. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Model Identifier Parsing + +1. Implementations **MUST** parse every model identifier string using the ABNF grammar defined in Section 4.1 of the Model Alias Format Specification before the identifier is used in any compilation or execution context. +2. Implementations **MUST NOT** accept a model identifier that contains a character outside the allowed set for its segment type; the resulting error message **MUST** name the offending character and the segment type (V-MAF-001, V-MAF-006). +3. Implementations **MUST NOT** accept an empty model identifier string. +4. Implementations **SHOULD** exempt strings matching the GitHub Actions expression pattern (`${{ … }}`) from syntax validation, as their content is resolved at runtime. + +### Parameter Value Validation + +1. Implementations **MUST** reject any model identifier whose `effort` parameter value is not one of `low`, `medium`, or `high` (V-MAF-002). +2. Implementations **MUST** reject any model identifier whose `temperature` parameter value cannot be parsed as a decimal float in the range `[0.0, 2.0]` inclusive (V-MAF-003). +3. Implementations **SHOULD** emit a warning for each unrecognised parameter key encountered in a model identifier; implementations **MUST NOT** treat an unrecognised parameter key as a hard error (V-MAF-011). + +### Alias Key Constraints + +1. Implementations **MUST NOT** accept an alias map key that contains the characters `/`, `?`, or `&` (V-MAF-005). +2. Implementations **MAY** accept the empty string as an alias key; it is interpreted as the default policy alias. + +### Engine Model Constraints + +1. Implementations **MUST NOT** accept a glob pattern (an identifier containing `*`) as the value of `engine.model` (V-MAF-004). +2. Implementations **MUST** validate the syntax of a non-empty `engine.model` value using the same ABNF grammar applied to alias entries. + +### Circular Alias Detection + +1. Implementations **MUST** perform a depth-first cycle check over the fully merged alias map after all alias layers (builtins, imports, frontmatter) have been combined (V-MAF-010). +2. Implementations **MUST** abort compilation if a cycle is detected; the error message **MUST** name every alias key involved in the cycle. +3. Implementations **MUST NOT** follow provider-scoped names (identifiers containing `/`) or glob patterns as alias references during cycle detection. + +### Compile-Time Integration + +1. Implementations **MUST** invoke model alias validation within `ParseWorkflowFile` immediately after `ModelMappings` is populated, before returning the compiled `WorkflowData`. +2. Implementations **MUST** propagate any hard-validation error as the return value of `ParseWorkflowFile`, preventing further compilation. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/25288249489) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* From e5ea993986e2455ba4b64acdf3a56e38ac9c8fdc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 3 May 2026 19:30:36 +0000 Subject: [PATCH 4/4] fix: address review comments - NaN/Inf temp rejection, engine.model V-MAF-011, fuzz tests - ValidateTemperatureParam: reject NaN and infinite values (math.IsNaN/IsInf check) - parseQueryString doc comment: corrects claim about known-param validation (happens in ValidateKnownParams, not here) - validateModelAliasMap: apply V-MAF-011 warning to engine.model unrecognised params - Add NaN/Inf test cases to TestValidateTemperatureParam - Add TestValidateModelAliasMap_EngineModelUnknownParamEmitsWarning - Add model_identifier_fuzz_test.go with FuzzParseModelIdentifier and FuzzValidateTemperatureParam Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c6fe1e8d-74bb-48d3-968f-c0e4c9dd68a9 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/model_alias_validation.go | 2 + pkg/workflow/model_alias_validation_test.go | 20 +++ pkg/workflow/model_identifier.go | 10 +- pkg/workflow/model_identifier_fuzz_test.go | 146 ++++++++++++++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 pkg/workflow/model_identifier_fuzz_test.go diff --git a/pkg/workflow/model_alias_validation.go b/pkg/workflow/model_alias_validation.go index 17074c286e..15da312457 100644 --- a/pkg/workflow/model_alias_validation.go +++ b/pkg/workflow/model_alias_validation.go @@ -62,6 +62,8 @@ func (c *Compiler) validateModelAliasMap( if errs := validateModelIdentifierStrings([]string{engineModel}, "engine.model"); len(errs) > 0 { return formatCompilerError(markdownPath, "error", errs[0], nil) } + // V-MAF-011: warn about unrecognised parameter keys in engine.model. + c.warnUnrecognizedModelParams([]string{engineModel}, markdownPath) } // Validate user-supplied frontmatter aliases only (builtins are pre-validated). diff --git a/pkg/workflow/model_alias_validation_test.go b/pkg/workflow/model_alias_validation_test.go index 76a7465f91..79c938e15c 100644 --- a/pkg/workflow/model_alias_validation_test.go +++ b/pkg/workflow/model_alias_validation_test.go @@ -210,6 +210,12 @@ func TestValidateTemperatureParam(t *testing.T) { {"3.0", true}, {"abc", true}, {"", true}, + // NaN and Inf must be rejected (strconv.ParseFloat accepts them) + {"NaN", true}, + {"nan", true}, + {"+Inf", true}, + {"-Inf", true}, + {"Inf", true}, } for _, tt := range tests { t.Run(tt.value, func(t *testing.T) { @@ -444,6 +450,20 @@ func TestValidateModelAliasMap_BuiltinCyclePreventedBySpec(t *testing.T) { assert.NoError(t, err, "builtin alias map must be acyclic") } +func TestValidateModelAliasMap_EngineModelUnknownParamEmitsWarning(t *testing.T) { + // V-MAF-011: engine.model with an unrecognised query parameter should not cause an + // error but should increment the warning counter. + compiler := NewCompiler() + err := compiler.validateModelAliasMap( + BuiltinModelAliases(), + nil, + "opus?unknownparam=value", + "/fake/path/workflow.md", + ) + require.NoError(t, err, "unknown param in engine.model should not be a hard error") + assert.Positive(t, compiler.GetWarningCount(), "unknown param in engine.model should emit a V-MAF-011 warning") +} + // isAliasReference helper tests. func TestIsAliasReference(t *testing.T) { aliasMap := map[string][]string{"sonnet": {"copilot/*sonnet*"}} diff --git a/pkg/workflow/model_identifier.go b/pkg/workflow/model_identifier.go index 338d1842be..a282548eee 100644 --- a/pkg/workflow/model_identifier.go +++ b/pkg/workflow/model_identifier.go @@ -47,6 +47,7 @@ package workflow import ( "errors" "fmt" + "math" "regexp" "strconv" "strings" @@ -292,8 +293,8 @@ func validateBareName(name string) error { // ─── Parameter parsing ──────────────────────────────────────────────────────── // parseQueryString parses a query string of the form "key=value&key=value…". -// Returns an error if any key or value violates the grammar or if known parameters -// have invalid values. +// Returns an error if any key or value violates the grammar (syntax only). +// Known-parameter value validation is performed separately by ValidateKnownParams. func parseQueryString(raw string) (map[string]string, error) { params := map[string]string{} for pair := range strings.SplitSeq(raw, "&") { @@ -358,12 +359,15 @@ func ValidateEffortParam(value string) error { } // ValidateTemperatureParam validates the "temperature" parameter value (V-MAF-003). -// Must be a decimal float in [0.0, 2.0]. +// Must be a finite decimal float in [0.0, 2.0]. func ValidateTemperatureParam(value string) error { f, err := strconv.ParseFloat(value, 64) if err != nil { return fmt.Errorf("model parameter 'temperature': value %q cannot be parsed as a decimal float (V-MAF-003)", value) } + if math.IsNaN(f) || math.IsInf(f, 0) { + return fmt.Errorf("model parameter 'temperature': value %q is not a finite number (V-MAF-003)", value) + } if f < 0.0 || f > 2.0 { return fmt.Errorf("model parameter 'temperature': value %q is out of range; must be in [0.0, 2.0] (V-MAF-003)", value) } diff --git a/pkg/workflow/model_identifier_fuzz_test.go b/pkg/workflow/model_identifier_fuzz_test.go new file mode 100644 index 0000000000..771e56bddd --- /dev/null +++ b/pkg/workflow/model_identifier_fuzz_test.go @@ -0,0 +1,146 @@ +//go:build !integration + +package workflow + +import ( + "math" + "strconv" + "testing" +) + +// FuzzParseModelIdentifier fuzz tests the MAF identifier parser (Section 4.1 of the spec). +// +// The fuzzer validates that: +// 1. The parser never panics on any input. +// 2. A successfully parsed identifier has a non-empty Base. +// 3. p.Raw always equals the original input on success. +// 4. Known-parameter validation and UnrecognizedParams never panic on the parsed output. +// +// To run the fuzzer: +// +// go test -v -fuzz=FuzzParseModelIdentifier -fuzztime=30s ./pkg/workflow +func FuzzParseModelIdentifier(f *testing.F) { + // Valid bare names. + f.Add("sonnet") + f.Add("auto") + f.Add("gpt-5") + f.Add("my_model") + f.Add("model.v2") + + // Valid provider-scoped names. + f.Add("copilot/gpt-5") + f.Add("openai/o3") + f.Add("anthropic/claude-opus-4.5") + f.Add("google/gemini-pro") + + // Valid glob patterns. + f.Add("copilot/*sonnet*") + f.Add("copilot/*") + f.Add("openai/gpt-*") + + // Valid identifiers with parameters. + f.Add("opus?effort=high") + f.Add("gpt-5?temperature=0.7") + f.Add("openai/o3?effort=low&temperature=0.2") + f.Add("sonnet?effort=medium") + f.Add("sonnet?temperature=2.0") + f.Add("sonnet?temperature=0.0") + + // Edge cases that the parser must handle without panicking. + f.Add("") + f.Add("a") + f.Add("a/b") + f.Add("a?b=c") + + // Inputs that should be rejected. + f.Add(".hidden") + f.Add("-model") + f.Add("my model") + f.Add("copilot/") + f.Add("copilot-/model") + f.Add("?effort=high") + f.Add("opus?effort=") + f.Add("opus?=value") + f.Add("opus?effort") + f.Add("my@model") + f.Add("a:b") + f.Add("a\x00b") + f.Add("a\nb") + + f.Fuzz(func(t *testing.T, input string) { + // Must never panic. + p, err := ParseModelIdentifier(input) + if err != nil { + // Rejected input — nothing further to check. + return + } + + // A successful parse must preserve the original input. + if p.Raw != input { + t.Errorf("ParseModelIdentifier(%q): Raw=%q, want %q", input, p.Raw, input) + } + // Base must be non-empty on success. + if p.Base == "" { + t.Errorf("ParseModelIdentifier(%q): Base must not be empty on success", input) + } + + // These helpers must not panic on the parsed output. + _ = ValidateKnownParams(p.Params) + _ = UnrecognizedParams(p.Params) + }) +} + +// FuzzValidateTemperatureParam fuzz tests that ValidateTemperatureParam: +// 1. Never panics on any string input. +// 2. Rejects NaN and infinite values. +// 3. Accepts only finite values in [0.0, 2.0]. +// +// To run: +// +// go test -v -fuzz=FuzzValidateTemperatureParam -fuzztime=30s ./pkg/workflow +func FuzzValidateTemperatureParam(f *testing.F) { + // Valid values. + f.Add("0.0") + f.Add("0.7") + f.Add("1.0") + f.Add("2.0") + f.Add("0") + f.Add("2") + f.Add("1.5") + // Boundary violations. + f.Add("-0.1") + f.Add("2.1") + f.Add("3.0") + // Special float strings accepted by strconv.ParseFloat but not by the spec. + f.Add("NaN") + f.Add("nan") + f.Add("+Inf") + f.Add("-Inf") + f.Add("Inf") + f.Add("inf") + // Non-numeric strings. + f.Add("") + f.Add("abc") + f.Add("1.0a") + f.Add("1,0") + + f.Fuzz(func(t *testing.T, input string) { + // Must never panic. + err := ValidateTemperatureParam(input) + + if err == nil { + // If accepted, the value must be a finite float in [0.0, 2.0]. + f64, parseErr := strconv.ParseFloat(input, 64) + if parseErr != nil { + t.Errorf("ValidateTemperatureParam(%q): accepted value that cannot be re-parsed as float64", input) + return + } + if math.IsNaN(f64) || math.IsInf(f64, 0) { + t.Errorf("ValidateTemperatureParam(%q): accepted non-finite value", input) + } + if f64 < 0.0 || f64 > 2.0 { + t.Errorf("ValidateTemperatureParam(%q): accepted out-of-range value %v", input, f64) + } + } + }) +}