diff --git a/.changeset/patch-model-inventory-2026-05-22.md b/.changeset/patch-model-inventory-2026-05-22.md new file mode 100644 index 00000000000..20a7dd0cd28 --- /dev/null +++ b/.changeset/patch-model-inventory-2026-05-22.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Model inventory update: remove `deprecated_models` metadata from multiplier payloads; keep complete model history by default; update daily model inventory guidance and effective tokens registry requirements to retain unseen models unless explicitly removed by maintainers. diff --git a/.github/workflows/daily-model-inventory.md b/.github/workflows/daily-model-inventory.md index 2956f7aa16c..1ae17460af7 100644 --- a/.github/workflows/daily-model-inventory.md +++ b/.github/workflows/daily-model-inventory.md @@ -492,7 +492,8 @@ For each provider's enriched data, attempt to infer or validate the ET multiplie Produce a consolidated multiplier gap table listing: - Models present in the live inventory but **missing** from `model_multipliers.json` — include the provider name for each model (e.g. "openai", "anthropic", "gemini", "copilot") -- Models in `model_multipliers.json` that are **no longer returned** by any API (stale) +- Models in `model_multipliers.json` that are **not currently returned** by live APIs; keep these + in the payload as historical entries (do not propose automatic removals) - Models where the **inferred multiplier** differs from the stored one ### Step 4: Identify New or Updated Model Families @@ -570,9 +571,11 @@ in the enriched `models.json` artifact (context limits, capabilities, billing fi |----------|----------|--------------------:|-------| | ... | ... | ... | ... | -#### Stale entries (no longer returned by any API) +#### Historical entries not currently returned List model IDs that appear in `model_multipliers.json` but are absent from all live inventories. +Treat these as historical records that should remain in the payload unless a human explicitly +decides to delete them. #### Inferred vs stored discrepancies @@ -596,7 +599,7 @@ List the complete sorted model IDs for each provider. ### Notes -Any caveats, stale patterns removed, or aliases that are already well-covered. +Any caveats, historical entries retained, or aliases that are already well-covered. ``` If no updates are needed (all live models are already covered by existing aliases, all diff --git a/actions/setup/js/model_multipliers.json b/actions/setup/js/model_multipliers.json index fb442e292f3..059f2042bca 100644 --- a/actions/setup/js/model_multipliers.json +++ b/actions/setup/js/model_multipliers.json @@ -1,6 +1,6 @@ { "version": "1", - "description": "Effective Tokens (ET) computation data per the gh-aw Effective Tokens Specification v0.2.0. Token class weights are applied first to normalize across token classes, then the per-model multiplier scales the result relative to the reference model. Model lifecycle: deprecated models must carry a deprecated marker for one minor version before removal (R-REG-009).", + "description": "Effective Tokens (ET) computation data per the gh-aw Effective Tokens Specification v0.2.0. Token class weights are applied first to normalize across token classes, then the per-model multiplier scales the result relative to the reference model. The registry keeps complete model history; entries are removed only by explicit manual deletion.", "reference_model": "claude-sonnet-4.5", "token_class_weights": { "input": 1.0, @@ -143,35 +143,5 @@ "gemma-4-31b-it": 0.2, "grok-code-fast-1": 0.33, "raptor-mini": 0.33 - }, - "deprecated_models": { - "claude-3-5-haiku": true, - "claude-3-5-opus": true, - "claude-3-5-sonnet": true, - "claude-3-7-sonnet": true, - "claude-3-haiku": true, - "claude-3-opus": true, - "claude-3-sonnet": true, - "claude-haiku-4-5": true, - "claude-haiku-4.5": true, - "claude-opus-4": true, - "claude-opus-4-1": true, - "claude-opus-4-5": true, - "claude-opus-4.5": true, - "claude-opus-4.6": true, - "claude-sonnet-4": true, - "claude-sonnet-4-5": true, - "claude-sonnet-4.5": true, - "claude-sonnet-4.6": true, - "gemini-1.5-flash": true, - "gemini-1.5-pro": true, - "gemini-2.5-flash-native-audio-preview-12-2025": true, - "gpt-4": true, - "gpt-4-turbo": true, - "gpt-5": true, - "gpt-5-chat-latest": true, - "gpt-5-mini": true, - "gpt-5-nano": true, - "gpt-5-pro": true } } diff --git a/docs/src/content/docs/reference/effective-tokens-specification.md b/docs/src/content/docs/reference/effective-tokens-specification.md index 612806b491b..fe888d70124 100644 --- a/docs/src/content/docs/reference/effective-tokens-specification.md +++ b/docs/src/content/docs/reference/effective-tokens-specification.md @@ -684,7 +684,7 @@ This file is embedded at compile time into the `gh-aw` binary using a Go `//go:e **R-REG-008**: When adding support for a new model, maintainers MUST register the model in `pkg/cli/data/model_multipliers.json` with a concrete numeric multiplier before release. If calibration is incomplete, the model MUST be omitted from the registry and the implementation fallback behavior in R-REG-005 applies. -**R-REG-009**: When a model is scheduled for removal from the registry, it MUST remain in `pkg/cli/data/model_multipliers.json` with a `deprecated` marker in a comment or companion metadata field for at least one minor version before it is deleted. Implementations SHOULD emit a warning when a `deprecated` model is encountered at runtime, advising callers to migrate to a supported model. A model entry MUST NOT be silently removed between consecutive minor versions; removal without the one-version deprecation notice is a breaking change and MUST be accompanied by a major version bump of the registry `version` field. +**R-REG-009**: The registry MUST preserve complete model history. Models that are no longer returned by current provider inventories MUST remain in `pkg/cli/data/model_multipliers.json` unless maintainers manually delete them in an explicit change. ### Registry Versioning @@ -700,7 +700,7 @@ To keep specification and implementation synchronized: 1. Update this specification's registry requirements when adding, removing, or re-scaling model multipliers. 2. Update `pkg/cli/data/model_multipliers.json` in the same change. -3. When deprecating a model, add a `deprecated` comment alongside the entry and keep it in the registry for at least one minor version before removal (R-REG-009). Update the registry `version` field on removal. +3. Keep historical model entries in the registry by default. Only remove entries via explicit manual deletion when needed (R-REG-009), and update the registry `version` field on removal. 4. Verify loading and fallback behavior in `pkg/cli/effective_tokens_test.go` (`TestModelMultipliersJSONEmbedded`, `TestResolveEffectiveWeightsDefault`, and inventory checks). 5. Run `make build` so the embedded registry is rebuilt into the `gh-aw` binary. 6. Re-run registry validation coverage after any registry edit so malformed multiplier entries fail @@ -728,7 +728,7 @@ Conforming releases SHOULD include a test assertion for newly added model multip ### Version 0.3.0 (Draft) - **Added**: Model Multiplier Registry section with normative requirements R-REG-001 through R-REG-009 -- **Added**: R-REG-009: model deprecation/sunset lifecycle norm (models must carry a `deprecated` marker for one minor version before removal) +- **Updated**: R-REG-009 to require complete model history retention and explicit manual deletion instead of deprecated-model lifecycle markers - **Added**: Compliance test skeleton file `pkg/cli/effective_tokens_compliance_test.go` with Go test stubs for T-ET-001..T-ET-031 - **Added**: T-ET-032 requirement for deterministic post-order aggregation in deep (3+ level) partially observed execution graphs - **Updated**: Compliance checklist §10.2 status column from "Required" to "Implemented" for all test IDs T-ET-001–T-ET-031 (all tests now implemented and passing) diff --git a/pkg/cli/data/model_multipliers.json b/pkg/cli/data/model_multipliers.json index eefada9cd8f..f50af45e1a7 100644 --- a/pkg/cli/data/model_multipliers.json +++ b/pkg/cli/data/model_multipliers.json @@ -1,6 +1,6 @@ { "version": "1", - "description": "Effective Tokens (ET) computation data per the gh-aw Effective Tokens Specification v0.2.0. Token class weights are applied first to normalize across token classes, then the per-model multiplier scales the result relative to the reference model. Model lifecycle: deprecated models must carry a deprecated marker for one minor version before removal (R-REG-009).", + "description": "Effective Tokens (ET) computation data per the gh-aw Effective Tokens Specification v0.2.0. Token class weights are applied first to normalize across token classes, then the per-model multiplier scales the result relative to the reference model. The registry keeps complete model history; entries are removed only by explicit manual deletion.", "reference_model": "claude-sonnet-4.5", "token_class_weights": { "input": 1.0, @@ -146,41 +146,5 @@ "gemma-4-31b-it": 0.2, "grok-code-fast-1": 0.33, "raptor-mini": 0.33 - }, - "deprecated_models": { - "claude-3-5-haiku": true, - "claude-3-5-opus": true, - "claude-3-5-sonnet": true, - "claude-3-7-sonnet": true, - "claude-3-haiku": true, - "claude-3-opus": true, - "claude-3-sonnet": true, - "claude-haiku-4-5": true, - "claude-haiku-4.5": true, - "claude-opus-4": true, - "claude-opus-4-1": true, - "claude-opus-4-5": true, - "claude-opus-4.5": true, - "claude-opus-4.6": true, - "claude-sonnet-4": true, - "claude-sonnet-4-5": true, - "claude-sonnet-4.5": true, - "claude-sonnet-4.6": true, - "gemini-1.5-flash": true, - "gemini-1.5-pro": true, - "gemini-2.5-computer-use-preview": true, - "gemini-2.5-flash-native-audio-preview-12-2025": true, - "gemini-3.1-flash-live-preview": true, - "gpt-4": true, - "gpt-4-turbo": true, - "gpt-41-copilot": true, - "gpt-5": true, - "gpt-5-chat-latest": true, - "gpt-5-mini": true, - "gpt-5-nano": true, - "gpt-5-pro": true, - "grok-code-fast-1": true, - "o1-mini": true, - "raptor-mini": true } } diff --git a/pkg/cli/effective_tokens_test.go b/pkg/cli/effective_tokens_test.go index 924f09d9aee..9cc76597d5c 100644 --- a/pkg/cli/effective_tokens_test.go +++ b/pkg/cli/effective_tokens_test.go @@ -3,7 +3,6 @@ package cli import ( - "encoding/json" "testing" "github.com/github/gh-aw/pkg/types" @@ -135,25 +134,6 @@ func TestModelMultipliersInventoryUpdate20260521(t *testing.T) { assert.InDelta(t, 6.0, loadedMultipliers["gemini-3.1-pro"], 1e-9, "gemini-3.1-pro should be present with official billing multiplier") } -func TestModelMultipliersDeprecatedMetadata20260521(t *testing.T) { - var raw map[string]any - require.NoError(t, json.Unmarshal(modelMultipliersJSON, &raw), "embedded model multipliers JSON should parse") - - deprecated, ok := raw["deprecated_models"].(map[string]any) - require.True(t, ok, "deprecated_models metadata should be present as a map") - assert.Len(t, deprecated, 34, "deprecated_models metadata should list all currently stale models") - assert.Equal(t, true, deprecated["gpt-5"], "gpt-5 should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["claude-sonnet-4.5"], "claude-sonnet-4.5 should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["gemini-1.5-pro"], "gemini-1.5-pro should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["gpt-4"], "gpt-4 should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["gpt-41-copilot"], "gpt-41-copilot should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["grok-code-fast-1"], "grok-code-fast-1 should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["o1-mini"], "o1-mini should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["raptor-mini"], "raptor-mini should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["gemini-2.5-computer-use-preview"], "gemini-2.5-computer-use-preview should be marked deprecated in metadata") - assert.Equal(t, true, deprecated["gemini-3.1-flash-live-preview"], "gemini-3.1-flash-live-preview should be marked deprecated in metadata") -} - func TestPopulateEffectiveTokensWithCustomWeights(t *testing.T) { loadedMultipliers = nil