diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index 141fe13e7f..c267eb36e0 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -241,7 +241,7 @@ jobs: id: pick-experiment uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: - GH_AW_EXPERIMENT_SPEC: '{"caveman":["yes","no"]}' + GH_AW_EXPERIMENT_SPEC: '{"caveman":{"variants":["yes","no"]}}' GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments with: diff --git a/actions/setup/js/pick_experiment.cjs b/actions/setup/js/pick_experiment.cjs index c6a50a6540..2d0095e3ac 100644 --- a/actions/setup/js/pick_experiment.cjs +++ b/actions/setup/js/pick_experiment.cjs @@ -7,25 +7,22 @@ * Selects A/B experiment variants for the current workflow run. * * Environment variables (set by the compiled workflow step): - * GH_AW_EXPERIMENT_SPEC - JSON object mapping experiment name → array of variant strings - * e.g. '{"feature1":["A","B"],"style":["concise","detailed"]}' + * GH_AW_EXPERIMENT_SPEC - JSON object mapping experiment name → variant config. + * Each value is either a legacy bare array of strings + * or a new object with a 'variants' field and optional + * metadata: weight, start_date, end_date, description, metric. + * e.g. '{"feature1":["A","B"],"style":{"variants":["concise","detailed"],"weight":[70,30]}}' * GH_AW_EXPERIMENT_STATE_FILE - Absolute path to the JSON state file to read/write * e.g. /tmp/gh-aw/experiments/state.json * GH_AW_EXPERIMENT_STATE_DIR - Directory that holds the state file (created if missing) * e.g. /tmp/gh-aw/experiments * * Algorithm: - * For each experiment the function maintains a counter per variant in the state file. - * The variant with the lowest invocation count is selected next (ties are broken by - * variant order, yielding a deterministic round-robin across runs). - * This ensures that across N runs every variant is used approximately N/K times where - * K is the number of variants, satisfying basic A/B statistical balance. - * - * Outputs: - * - Sets core.setOutput(name, selected) for each experiment (e.g. caveman=yes). - * - Sets core.setOutput('experiments', JSON.stringify(assignments)) for the full map. - * - Writes the updated counter state back to GH_AW_EXPERIMENT_STATE_FILE. - * - Appends a Markdown step summary with the assignment table and cumulative counts. + * When weight is provided the variant is chosen by weighted-random selection. + * Otherwise the variant with the lowest invocation count is selected next (ties are + * broken by variant order, yielding a deterministic round-robin across runs). + * When start_date or end_date is provided and today falls outside that window the + * control variant (first variant) is used and no counter is incremented. */ const fs = require("fs"); @@ -37,6 +34,31 @@ const path = require("path"); * Maps experiment name → variant → cumulative invocation count. */ +/** + * @typedef {Object} ExperimentConfig + * @property {string[]} variants - Array of variant values (length >= 2) + * @property {number[]|undefined} weight - Optional per-variant weights (same length as variants) + * @property {string|undefined} start_date - ISO-8601 date; inactive before this date + * @property {string|undefined} end_date - ISO-8601 date; inactive after this date + * @property {string|undefined} description + * @property {string|undefined} metric + * @property {number|undefined} issue + */ + +/** + * Normalize a raw spec entry (either a legacy bare array or the new object form) into + * an ExperimentConfig object. + * + * @param {string[]|ExperimentConfig} raw + * @returns {ExperimentConfig} + */ +function normalizeConfig(raw) { + if (Array.isArray(raw)) { + return { variants: raw }; + } + return raw; +} + /** * Load and parse the state JSON file. Returns an empty state if the file does not exist * or cannot be parsed (e.g. first run or corrupted cache). @@ -69,6 +91,26 @@ function saveState(stateFile, state) { fs.writeFileSync(stateFile, JSON.stringify(state, null, 2) + "\n", "utf8"); } +/** + * Return true when today (UTC) falls within the optional [start_date, end_date] window. + * A missing date is treated as unbounded (open interval). + * + * @param {string|undefined} startDate - YYYY-MM-DD or undefined + * @param {string|undefined} endDate - YYYY-MM-DD or undefined + * @param {string} [todayOverride] - Override today's date for testing (YYYY-MM-DD) + * @returns {boolean} + */ +function isWithinDateWindow(startDate, endDate, todayOverride) { + const today = todayOverride || new Date().toISOString().slice(0, 10); + if (startDate && today < startDate) { + return false; + } + if (endDate && today > endDate) { + return false; + } + return true; +} + /** * Pick the variant for one experiment using a balanced least-used selection. * The variant with the lowest cumulative count is chosen; ties are broken by @@ -93,6 +135,35 @@ function pickVariant(name, variants, state) { return selected; } +/** + * Pick the variant for one experiment using weighted random selection. + * Each variant is chosen with probability proportional to its weight. + * Zero-weight variants are never selected. + * + * @param {string[]} variants - Array of variant values (length >= 2) + * @param {number[]} weight - Per-variant weights (same length as variants, all >= 0) + * @returns {string} The selected variant + */ +function pickVariantWeighted(variants, weight) { + const total = weight.reduce((a, b) => a + b, 0); + if (total <= 0) { + // All weights are zero – fall back to first variant (control). + return variants[0]; + } + let rnd = Math.random() * total; + for (let i = 0; i < variants.length; i++) { + rnd -= weight[i]; + if (rnd <= 0) { + return variants[i]; + } + } + // Floating-point rounding guard: return last non-zero-weight variant. + for (let i = variants.length - 1; i >= 0; i--) { + if (weight[i] > 0) return variants[i]; + } + return variants[0]; +} + /** * Increment the counter for the chosen variant. * @@ -111,22 +182,22 @@ function recordVariant(name, variant, state) { * Append a Markdown step summary describing the experiment assignments. * * @param {Record} assignments - Maps experiment name → selected variant - * @param {Record} spec - Maps experiment name → variants array + * @param {Record} configs - Normalized config per experiment * @param {ExperimentState} state - Updated state (post-selection) * @param {any} core - @actions/core */ -async function writeSummary(assignments, spec, state, core) { +async function writeSummary(assignments, configs, state, core) { const names = Object.keys(assignments).sort(); const lines = ["## 🧪 A/B Experiment Assignments", "", "| Experiment | Selected Variant | All Variants | Cumulative Counts |", "| --- | --- | --- | --- |"]; for (const name of names) { const selected = assignments[name]; - const variants = spec[name] || []; + const variants = configs[name]?.variants || []; const counts = state.counts[name] || {}; const countsStr = variants.map(v => `${v}: ${counts[v] || 0}`).join(", "); lines.push(`| \`${name}\` | **${selected}** | ${variants.join(", ")} | ${countsStr} |`); } lines.push(""); - lines.push("_Variants are selected by balanced round-robin to ensure statistical relevance across runs._"); + lines.push("_Variants are selected by balanced round-robin (or weighted) to ensure statistical relevance across runs._"); await core.summary.addRaw(lines.join("\n")).write(); } @@ -138,21 +209,28 @@ async function main() { const stateFile = process.env.GH_AW_EXPERIMENT_STATE_FILE || "/tmp/gh-aw/experiments/state.json"; const stateDir = process.env.GH_AW_EXPERIMENT_STATE_DIR || "/tmp/gh-aw/experiments"; - /** @type {Record} */ - let spec; + /** @type {Record} */ + let rawSpec; try { - spec = JSON.parse(specRaw); + rawSpec = JSON.parse(specRaw); } catch (e) { core.setFailed(`Failed to parse GH_AW_EXPERIMENT_SPEC: ${e.message}`); return; } - const experimentNames = Object.keys(spec).sort(); + const experimentNames = Object.keys(rawSpec).sort(); if (experimentNames.length === 0) { core.info("No experiments defined – nothing to do."); return; } + // Normalize all spec entries to ExperimentConfig objects. + /** @type {Record} */ + const configs = {}; + for (const name of experimentNames) { + configs[name] = normalizeConfig(rawSpec[name]); + } + // Ensure the state directory exists so that the cache-save step can find it. fs.mkdirSync(stateDir, { recursive: true }); @@ -162,12 +240,28 @@ async function main() { const assignments = {}; for (const name of experimentNames) { - const variants = spec[name]; + const cfg = configs[name]; + const variants = cfg.variants; if (!Array.isArray(variants) || variants.length < 2) { core.warning(`Experiment "${name}" has fewer than 2 variants – skipping.`); continue; } - const selected = pickVariant(name, variants, state); + + // Date-window check: use control variant (first variant) when outside the window. + if (!isWithinDateWindow(cfg.start_date, cfg.end_date)) { + const control = variants[0]; + assignments[name] = control; + core.setOutput(name, control); + core.info(`Experiment "${name}": outside date window – using control variant "${control}"`); + continue; + } + + let selected; + if (cfg.weight && cfg.weight.length === variants.length) { + selected = pickVariantWeighted(variants, cfg.weight); + } else { + selected = pickVariant(name, variants, state); + } recordVariant(name, selected, state); assignments[name] = selected; @@ -197,7 +291,7 @@ async function main() { } // Write step summary. - await writeSummary(assignments, spec, state, core); + await writeSummary(assignments, configs, state, core); } -module.exports = { main, pickVariant, loadState, saveState, recordVariant }; +module.exports = { main, pickVariant, pickVariantWeighted, loadState, saveState, recordVariant, isWithinDateWindow, normalizeConfig }; diff --git a/actions/setup/js/pick_experiment.test.cjs b/actions/setup/js/pick_experiment.test.cjs index 86b1cd8d05..a82008f8f1 100644 --- a/actions/setup/js/pick_experiment.test.cjs +++ b/actions/setup/js/pick_experiment.test.cjs @@ -19,7 +19,7 @@ const mockCore = { global.core = mockCore; -const { pickVariant, loadState, saveState, recordVariant, main } = await import("./pick_experiment.cjs"); +const { pickVariant, pickVariantWeighted, loadState, saveState, recordVariant, isWithinDateWindow, normalizeConfig, main } = await import("./pick_experiment.cjs"); describe("pick_experiment", () => { /** @type {string} */ @@ -256,5 +256,132 @@ describe("pick_experiment", () => { expect(mockCore.setFailed).toHaveBeenCalled(); }); + + it("accepts new object-form spec and picks variant", async () => { + const stateFile = path.join(tmpDir, "state.json"); + process.env.GH_AW_EXPERIMENT_SPEC = JSON.stringify({ + style: { variants: ["concise", "verbose"] }, + }); + process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile; + process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir; + + await main(); + + expect(mockCore.setOutput).toHaveBeenCalledWith("style", "concise"); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + }); + + it("uses control variant when today is before start_date", async () => { + const stateFile = path.join(tmpDir, "state.json"); + // Use a far-future start_date to ensure we're always before it. + process.env.GH_AW_EXPERIMENT_SPEC = JSON.stringify({ + style: { variants: ["concise", "verbose"], start_date: "2099-01-01" }, + }); + process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile; + process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir; + + await main(); + + // Should use control variant (first: concise) without recording a count. + expect(mockCore.setOutput).toHaveBeenCalledWith("style", "concise"); + // Counter for 'style' should NOT have been incremented. + const state = loadState(stateFile); + expect(state.counts["style"]).toBeUndefined(); + }); + + it("uses control variant when today is after end_date", async () => { + const stateFile = path.join(tmpDir, "state.json"); + process.env.GH_AW_EXPERIMENT_SPEC = JSON.stringify({ + style: { variants: ["concise", "verbose"], end_date: "2000-01-01" }, + }); + process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile; + process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir; + + await main(); + + expect(mockCore.setOutput).toHaveBeenCalledWith("style", "concise"); + }); + }); + + // ── pickVariantWeighted ──────────────────────────────────────────────────── + + describe("pickVariantWeighted", () => { + it("always selects the only non-zero-weight variant when one weight is 100", () => { + // With weight [0, 100] the second variant must always be selected. + for (let i = 0; i < 20; i++) { + expect(pickVariantWeighted(["A", "B"], [0, 100])).toBe("B"); + } + }); + + it("always selects the only non-zero-weight variant when one weight is 0", () => { + for (let i = 0; i < 20; i++) { + expect(pickVariantWeighted(["A", "B"], [100, 0])).toBe("A"); + } + }); + + it("falls back to first variant when all weights are zero", () => { + expect(pickVariantWeighted(["A", "B"], [0, 0])).toBe("A"); + }); + + it("distributes variants proportionally across many runs", () => { + const counts = { A: 0, B: 0 }; + const N = 1000; + for (let i = 0; i < N; i++) { + const v = pickVariantWeighted(["A", "B"], [70, 30]); + counts[v]++; + } + // With weights 70:30 we expect ~70% A and ~30% B. Allow 10% absolute tolerance. + expect(counts["A"] / N).toBeCloseTo(0.7, 1); + expect(counts["B"] / N).toBeCloseTo(0.3, 1); + }); + }); + + // ── isWithinDateWindow ──────────────────────────────────────────────────── + + describe("isWithinDateWindow", () => { + it("returns true when no dates are specified", () => { + expect(isWithinDateWindow(undefined, undefined, "2026-06-01")).toBe(true); + }); + + it("returns true when today equals start_date", () => { + expect(isWithinDateWindow("2026-06-01", undefined, "2026-06-01")).toBe(true); + }); + + it("returns false when today is before start_date", () => { + expect(isWithinDateWindow("2026-06-01", undefined, "2026-05-31")).toBe(false); + }); + + it("returns true when today equals end_date", () => { + expect(isWithinDateWindow(undefined, "2026-06-30", "2026-06-30")).toBe(true); + }); + + it("returns false when today is after end_date", () => { + expect(isWithinDateWindow(undefined, "2026-06-30", "2026-07-01")).toBe(false); + }); + + it("returns true when today is within [start_date, end_date]", () => { + expect(isWithinDateWindow("2026-05-01", "2026-06-30", "2026-06-01")).toBe(true); + }); + + it("returns false when today is before the window", () => { + expect(isWithinDateWindow("2026-05-01", "2026-06-30", "2026-04-30")).toBe(false); + }); + + it("returns false when today is after the window", () => { + expect(isWithinDateWindow("2026-05-01", "2026-06-30", "2026-07-01")).toBe(false); + }); + }); + + // ── normalizeConfig ─────────────────────────────────────────────────────── + + describe("normalizeConfig", () => { + it("wraps a bare array in a variants object", () => { + expect(normalizeConfig(["A", "B"])).toEqual({ variants: ["A", "B"] }); + }); + + it("passes through an object-form config unchanged", () => { + const cfg = { variants: ["A", "B"], weight: [70, 30] }; + expect(normalizeConfig(cfg)).toBe(cfg); + }); }); }); diff --git a/docs/adr/29618-rich-experiment-metadata-schema-extension.md b/docs/adr/29618-rich-experiment-metadata-schema-extension.md new file mode 100644 index 0000000000..b3e59f919f --- /dev/null +++ b/docs/adr/29618-rich-experiment-metadata-schema-extension.md @@ -0,0 +1,92 @@ +# ADR-29618: Rich Experiment Metadata Schema Extension with Weighted Selection and Date Gating + +**Date**: 2026-05-01 +**Status**: Draft +**Deciders**: pelikhan, copilot-swe-agent + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +ADR-29534 introduced the `experiments:` frontmatter field with a bare-array form (`caveman: [yes, no]`) and a least-used round-robin selection algorithm. In practice, teams running A/B experiments needed capabilities the bare-array form could not express: non-uniform probability splits (e.g., 70/30 for a high-risk variant), automatic deactivation after an end date, machine-readable metadata (description, linked issue, metric name) for governance tooling, and backward compatibility with the many existing workflows already using the bare-array form. Extending the schema to support these requirements while keeping the existing syntax working required an explicit decision about how to version the schema and how the selection algorithm should adapt to the richer spec. + +### Decision + +We will extend the `experiments:` frontmatter schema to accept two mutually exclusive forms via a JSON Schema `oneOf`: (1) the legacy bare-array form, unchanged, and (2) a new object form whose only required field is `variants`, plus optional fields `description`, `metric`, `weight`, `issue`, `start_date`, and `end_date`. The runtime normalizes both forms into a typed `ExperimentConfig` struct at parse time. When a `weight` array of the same length as `variants` is provided, the JS runtime uses weighted-random selection instead of round-robin; when `start_date` or `end_date` is provided and today falls outside the window, the control variant (first variant) is returned without incrementing any counter. This decision extends ADR-29534; its Rule 5 ("implementations **MUST** select the variant with the lowest cumulative invocation count") now applies only when no `weight` is provided. + +### Alternatives Considered + +#### Alternative 1: Separate Top-Level `experiments_config` Map + +Add a parallel top-level key (`experiments_config:`) for metadata while keeping `experiments:` as bare arrays. This would avoid schema `oneOf` complexity and keep the selection algorithm single-mode. It was rejected because splitting variant lists from their metadata across two top-level keys makes frontmatter harder to read, breaks co-location, and doubles the number of keys a reader must reconcile to understand a single experiment. + +#### Alternative 2: Break Backward Compatibility and Require the Object Form + +Require all experiments to use the new object form (dropping bare-array support). This produces a simpler `additionalProperties` schema (no `oneOf`) and eliminates the normalization layer in the compiler. It was rejected because there are existing compiled lock files and live workflows using the bare-array form; a breaking change would require a coordinated migration of all callers without delivering user-visible value. + +#### Alternative 3: Embed Weights in a Separate Env Var at Compile Time + +Pass weights and metadata as additional env vars (e.g., `GH_AW_EXPERIMENT_WEIGHTS_`) rather than embedding them in the existing `GH_AW_EXPERIMENT_SPEC` JSON blob. This avoids touching the spec format but multiplies the number of env vars injected per experiment and complicates the JS runtime which must rejoin them. It was rejected in favour of enriching the existing spec JSON, which is already a structured object. + +### Consequences + +#### Positive +- Full backward compatibility: all existing bare-array workflows continue to work without modification. +- Weighted selection enables statistically-designed experiments where one variant carries greater risk and should be shown less frequently. +- Date-range gating automates experiment lifecycle — no manual intervention needed to deactivate an experiment after its end date. +- Machine-readable metadata (`description`, `metric`, `issue`) enables governance tooling to discover and audit experiments without reading compiler internals. + +#### Negative +- The `oneOf` in the JSON schema adds complexity to schema validation errors: consumers receive less precise error messages when the value matches neither branch. +- The compiler now calls `extractExperimentConfigsFromFrontmatter` and `extractExperimentsFromFrontmatter` separately, adding a redundant parse pass over the same frontmatter map (two passes instead of one). +- Rule 5 of ADR-29534 is partially superseded: "must use least-used selection" is now conditional on the absence of `weight`. This inconsistency between the two ADRs must be resolved before either is marked Accepted. +- Weighted random selection is non-deterministic across runs (unlike round-robin), which may produce statistically unbalanced results over small sample sizes without the experimenter understanding the difference. + +#### Neutral +- The `normalizeConfig()` function in `pick_experiment.cjs` encapsulates the coercion from bare array to object form; callers in `main()` operate only on `ExperimentConfig` objects after this point. +- The `ExperimentConfig` struct is added to `frontmatter_types.go` alongside `FrontmatterConfig` and `WorkflowData`; its JSON tags use snake_case to match the YAML field names and the JS runtime's property names. +- `FrontmatterConfig.ExperimentConfigs` is tagged `json:"-"` so it does not appear in any serialized frontmatter output. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Frontmatter Schema (extends ADR-29534 §Frontmatter Schema) + +1. Each value in the `experiments` map **MUST** conform to exactly one of two forms: (a) a bare array of two or more variant strings, or (b) an object with a required `variants` field (array of ≥ 2 strings) and the optional fields defined below. +2. The `variants` field in the object form **MUST** satisfy the same constraints as the bare-array form: it **MUST** contain at least two string entries. +3. If present, the `weight` field **MUST** be an array of non-negative integers whose length equals the length of `variants`; any other length **MUST** be treated as absent (i.e., fall back to round-robin). +4. If present, `start_date` and `end_date` **MUST** be ISO-8601 date strings matching the pattern `YYYY-MM-DD`; non-conforming values **SHOULD** be ignored (treated as absent) rather than causing a hard error. +5. The `description`, `metric`, and `issue` fields are **OPTIONAL** and carry no runtime effect on variant selection; implementations **MAY** surface them in step summaries or artifact metadata. + +### Variant Selection (amends ADR-29534 Rule 5) + +6. When `weight` is provided and its length equals the length of `variants`, implementations **MUST** use weighted-random selection: each variant is chosen with probability proportional to its weight value. +7. When all weight values are zero, implementations **MUST** fall back to the first variant (control) rather than erroring. +8. When `weight` is absent or its length does not match `variants`, implementations **MUST** use the least-used (round-robin) selection algorithm defined in ADR-29534 Rule 5. +9. Weighted-random selection **MUST NOT** increment any variant counter; counter state is only updated by round-robin selection. + +### Date-Range Gating + +10. When `start_date` is provided and the current date (UTC, `YYYY-MM-DD`) is strictly before `start_date`, implementations **MUST** return the control variant (first entry in `variants`) and **MUST NOT** increment any counter. +11. When `end_date` is provided and the current date (UTC, `YYYY-MM-DD`) is strictly after `end_date`, implementations **MUST** return the control variant and **MUST NOT** increment any counter. +12. Date comparison **MUST** use UTC date; local timezone offsets **MUST NOT** affect the result. +13. When both `start_date` and `end_date` are provided and the current date is within `[start_date, end_date]` (both endpoints inclusive), the experiment is active and normal selection applies. + +### Compiler Integration + +14. The compiler **MUST** parse both bare-array and object-form experiments in a single pass and expose the result via `WorkflowData.ExperimentConfigs` (a `map[string]*ExperimentConfig`) in addition to the existing `WorkflowData.Experiments` (`map[string][]string`). +15. `buildExperimentSpecJSON` **MUST** embed the full `ExperimentConfig` JSON object (including metadata fields) when a config is available, so that the JS runtime receives all fields in `GH_AW_EXPERIMENT_SPEC`. +16. When no config is available for a name (legacy code path), `buildExperimentSpecJSON` **MUST** fall back to emitting a bare variants array for backward compatibility. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. This ADR amends ADR-29534; in case of conflict between the two, this ADR takes precedence for the `weight` and date-range fields, and ADR-29534 governs all other aspects of the experiments feature. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/25232335913) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 3ff7a9d8b6..f0d83fd97f 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -2731,24 +2731,85 @@ ] }, "experiments": { - "description": "A/B testing experiments. Each key is an experiment name; the value is an array of two or more variant strings. At runtime the activation job picks a variant using actions/cache to maintain consistent assignment across runs. Use ${{ experiments. }} in the workflow prompt to reference the selected variant. When multiple experiments are declared, assignments are statistically balanced using a counter that round-robins across variants.", + "description": "A/B testing experiments. Each key is an experiment name; the value is either an array of two or more variant strings (bare-array form) or an object with a 'variants' field plus optional metadata fields (description, metric, weight, issue, start_date, end_date). At runtime the activation job picks a variant using actions/cache to maintain consistent assignment across runs. Use ${{ experiments. }} in the workflow prompt to reference the selected variant. When multiple experiments are declared, assignments are statistically balanced using a counter that round-robins across variants (or weighted when 'weight' is provided).", "type": "object", "propertyNames": { "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$", "description": "Experiment names must be valid identifiers: start with a letter or underscore, followed by letters, digits, or underscores." }, "additionalProperties": { - "type": "array", - "items": { "type": "string" }, - "minItems": 2, - "description": "Array of variant values for this experiment. Must contain at least two variants." + "oneOf": [ + { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 2, + "description": "Bare-array form: list of variant values for this experiment. Must contain at least two variants." + }, + { + "type": "object", + "required": ["variants"], + "properties": { + "variants": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 2, + "description": "Array of variant values for this experiment. Must contain at least two variants." + }, + "description": { + "type": "string", + "description": "Human-readable description of what this experiment tests." + }, + "metric": { + "type": "string", + "description": "Primary metric to observe (e.g. 'effective_tokens')." + }, + "weight": { + "type": "array", + "items": { + "type": "integer", + "minimum": 0 + }, + "description": "Per-variant probability weights (relative, need not sum to 100). Length must equal the number of variants." + }, + "issue": { + "type": "integer", + "minimum": 1, + "description": "GitHub issue number tracking this experiment." + }, + "start_date": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$", + "description": "ISO-8601 date (YYYY-MM-DD). Experiment is inactive before this date; control variant is used." + }, + "end_date": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$", + "description": "ISO-8601 date (YYYY-MM-DD). Experiment is inactive after this date; control variant is used." + } + }, + "additionalProperties": false, + "description": "Object form: experiment config with variants and optional metadata." + } + ] }, "examples": [ { "feature1": ["A", "B"] }, { - "prompt_style": ["concise", "detailed", "structured"], + "prompt_style": { + "variants": ["concise", "verbose"], + "description": "Test whether concise vs verbose prompts reduce token consumption", + "metric": "effective_tokens", + "weight": [50, 50], + "issue": 1234, + "start_date": "2026-05-01", + "end_date": "2026-06-15" + }, "model_temp": ["low", "high"] } ] diff --git a/pkg/workflow/compiler_experiments.go b/pkg/workflow/compiler_experiments.go index 4600ff0872..ce37bcc33f 100644 --- a/pkg/workflow/compiler_experiments.go +++ b/pkg/workflow/compiler_experiments.go @@ -19,15 +19,37 @@ const experimentsCacheDir = "/tmp/gh-aw/experiments" // experimentStateFile is the path to the experiment state JSON written by pick_experiment.cjs. const experimentStateFile = experimentsCacheDir + "/state.json" -// extractExperimentsFromFrontmatter reads the "experiments" map from a raw frontmatter map. -// Each key is an experiment name; each value must be a []string (or []any of strings) of -// variant values. Invalid entries are silently skipped. -// Experiment names must match [a-zA-Z_][a-zA-Z0-9_]* (identifier style) so they can be used +// experimentNamePattern validates experiment names as identifier-style keys. +// Experiment names must match [a-zA-Z_][a-zA-Z0-9_]* so they can be used // as GitHub Actions step output names and in ${{ experiments. }} expressions without // bracket notation. Names that do not match are skipped with a warning. var experimentNamePattern = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`) +// extractExperimentsFromFrontmatter reads the "experiments" map from a raw frontmatter map. +// Both the bare-array form and the new object form with metadata fields are accepted. +// Invalid entries (bad name pattern, missing/insufficient variants) are skipped with a +// warning logged to the debug logger. func extractExperimentsFromFrontmatter(frontmatter map[string]any) map[string][]string { + return experimentVariantsFromConfigs(extractExperimentConfigsFromFrontmatter(frontmatter)) +} + +// experimentVariantsFromConfigs derives the simple name→variants map from a configs map. +// Returns nil when configs is empty so callers can use len-checks without special-casing. +func experimentVariantsFromConfigs(configs map[string]*ExperimentConfig) map[string][]string { + if len(configs) == 0 { + return nil + } + result := make(map[string][]string, len(configs)) + for name, cfg := range configs { + result[name] = cfg.Variants + } + return result +} + +// extractExperimentConfigsFromFrontmatter reads the "experiments" map and returns +// fully-typed ExperimentConfig objects. Both the bare-array form and the new object +// form are accepted. +func extractExperimentConfigsFromFrontmatter(frontmatter map[string]any) map[string]*ExperimentConfig { raw, ok := frontmatter["experiments"] if !ok || raw == nil { return nil @@ -36,33 +58,113 @@ func extractExperimentsFromFrontmatter(frontmatter map[string]any) map[string][] if !ok { return nil } - result := make(map[string][]string, len(rawMap)) + result := make(map[string]*ExperimentConfig, len(rawMap)) for name, val := range rawMap { if !experimentNamePattern.MatchString(name) { experimentsLog.Printf("Skipping experiment %q: name must match [a-zA-Z_][a-zA-Z0-9_]*", name) continue } - switch v := val.(type) { - case []string: - if len(v) >= 2 { - result[name] = v + cfg := extractOneExperimentConfig(name, val) + if cfg != nil { + result[name] = cfg + } + } + if len(result) == 0 { + return nil + } + return result +} + +// extractOneExperimentConfig converts a single raw experiment value into an ExperimentConfig. +// Returns nil when the value is invalid (e.g. fewer than two variants). +func extractOneExperimentConfig(name string, val any) *ExperimentConfig { + switch v := val.(type) { + case []string: + if len(v) >= 2 { + return &ExperimentConfig{Variants: v} + } + case []any: + var variants []string + for _, item := range v { + if s, ok := item.(string); ok { + variants = append(variants, s) } + } + if len(variants) >= 2 { + return &ExperimentConfig{Variants: variants} + } + case map[string]any: + // New object form: extract variants and optional metadata fields. + cfg := &ExperimentConfig{} + varRaw, ok := v["variants"] + if !ok { + experimentsLog.Printf("Skipping experiment %q: object form requires 'variants' field", name) + return nil + } + switch vv := varRaw.(type) { + case []string: + cfg.Variants = vv case []any: - var variants []string - for _, item := range v { + for _, item := range vv { if s, ok := item.(string); ok { - variants = append(variants, s) + cfg.Variants = append(cfg.Variants, s) } } - if len(variants) >= 2 { - result[name] = variants + } + if len(cfg.Variants) < 2 { + experimentsLog.Printf("Skipping experiment %q: must have at least 2 variants", name) + return nil + } + if d, ok := v["description"].(string); ok { + cfg.Description = d + } + if m, ok := v["metric"].(string); ok { + cfg.Metric = m + } + if sd, ok := v["start_date"].(string); ok { + cfg.StartDate = sd + } + if ed, ok := v["end_date"].(string); ok { + cfg.EndDate = ed + } + if issue, ok := v["issue"]; ok { + switch n := issue.(type) { + case int: + cfg.Issue = n + case int64: + cfg.Issue = int(n) + case float64: + cfg.Issue = int(n) } } + if weightRaw, ok := v["weight"]; ok { + cfg.Weight = extractIntSlice(weightRaw) + } + return cfg } - if len(result) == 0 { - return nil + return nil +} + +// extractIntSlice converts a raw value to a []int, accepting []any of numeric values. +func extractIntSlice(raw any) []int { + switch v := raw.(type) { + case []int: + return v + case []any: + var result []int + for _, item := range v { + switch n := item.(type) { + case int: + result = append(result, n) + case int64: + result = append(result, int(n)) + case float64: + result = append(result, int(n)) + } + } + return result } - return result + return nil } // generateExperimentSteps creates the steps that pick and upload A/B experiment variants. @@ -98,8 +200,8 @@ func (c *Compiler) generateExperimentSteps(data *WorkflowData) []string { ) // ── Step 2: Pick experiment variants ────────────────────────────────────── - // Build the JSON spec: {"feature1":["A","B"],...} - specJSON := buildExperimentSpecJSON(data.Experiments, experimentNames) + // Build the JSON spec including full metadata when available. + specJSON := buildExperimentSpecJSON(data.Experiments, data.ExperimentConfigs, experimentNames) steps = append(steps, " - name: Pick experiment variants\n", @@ -144,14 +246,14 @@ func (c *Compiler) generateExperimentSteps(data *WorkflowData) []string { } // buildExperimentSpecJSON builds a compact JSON object from the experiments map. +// When configs is non-nil and contains an entry for a name, the full ExperimentConfig +// (variants + metadata) is embedded so that pick_experiment.cjs can use weighted +// selection, date-range gating, and other metadata. +// When no config is available a bare variants array is emitted for backward compatibility. // Uses encoding/json for proper escaping of all special characters. -// Caller is responsible for escaping single quotes (” in YAML) when embedding the -// result in a YAML single-quoted scalar, since JSON string values may contain literal -// single quotes (e.g. "Bob's"). -func buildExperimentSpecJSON(experiments map[string][]string, names []string) string { - // Build JSON manually with encoding/json for individual values to ensure - // correct escaping of all special characters. We iterate names (a sorted slice) - // rather than the map directly to produce deterministic output. +// Caller is responsible for escaping single quotes when embedding the result in a YAML +// single-quoted scalar (each ' must be doubled to ” per YAML spec §7.3.3). +func buildExperimentSpecJSON(experiments map[string][]string, configs map[string]*ExperimentConfig, names []string) string { var sb strings.Builder sb.WriteString("{") for i, name := range names { @@ -159,10 +261,18 @@ func buildExperimentSpecJSON(experiments map[string][]string, names []string) st sb.WriteString(",") } keyBytes, _ := json.Marshal(name) - varBytes, _ := json.Marshal(experiments[name]) sb.Write(keyBytes) sb.WriteString(":") - sb.Write(varBytes) + + // Use the full config when available so the JS can consume metadata. + if cfg, ok := configs[name]; ok && cfg != nil { + cfgBytes, _ := json.Marshal(cfg) + sb.Write(cfgBytes) + } else { + // Fallback: bare variants array (legacy behaviour). + varBytes, _ := json.Marshal(experiments[name]) + sb.Write(varBytes) + } } sb.WriteString("}") return sb.String() diff --git a/pkg/workflow/compiler_experiments_test.go b/pkg/workflow/compiler_experiments_test.go index fa3d6e980b..2643308314 100644 --- a/pkg/workflow/compiler_experiments_test.go +++ b/pkg/workflow/compiler_experiments_test.go @@ -100,6 +100,53 @@ func TestExtractExperimentsFromFrontmatter(t *testing.T) { }, want: map[string][]string{"valid": {"X", "Y"}}, }, + { + name: "new object form with variants only", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "style": map[string]any{ + "variants": []any{"concise", "verbose"}, + }, + }, + }, + want: map[string][]string{"style": {"concise", "verbose"}}, + }, + { + name: "new object form with full metadata", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "prompt_style": map[string]any{ + "variants": []any{"concise", "verbose"}, + "description": "Test prompt styles", + "weight": []any{50.0, 50.0}, + "start_date": "2026-01-01", + "end_date": "2026-12-31", + "issue": float64(1234), + }, + }, + }, + want: map[string][]string{"prompt_style": {"concise", "verbose"}}, + }, + { + name: "new object form skips when variants missing", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "bad": map[string]any{"description": "no variants"}, + "good": []any{"A", "B"}, + }, + }, + want: map[string][]string{"good": {"A", "B"}}, + }, + { + name: "new object form skips when fewer than two variants", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "bad": map[string]any{"variants": []any{"only-one"}}, + "good": []any{"A", "B"}, + }, + }, + want: map[string][]string{"good": {"A", "B"}}, + }, } for _, tt := range tests { @@ -130,16 +177,40 @@ func TestBuildExperimentSpecJSON(t *testing.T) { "style": {"concise", "detailed"}, } names := []string{"feature1", "style"} - got := buildExperimentSpecJSON(experiments, names) + // Without configs, bare-array fallback is used. + got := buildExperimentSpecJSON(experiments, nil, names) assert.JSONEq(t, `{"feature1":["A","B"],"style":["concise","detailed"]}`, got, "JSON spec should match expected structure") } +func TestBuildExperimentSpecJSONWithConfigs(t *testing.T) { + experiments := map[string][]string{ + "style": {"concise", "detailed"}, + } + configs := map[string]*ExperimentConfig{ + "style": { + Variants: []string{"concise", "detailed"}, + Description: "Test prompt style", + Weight: []int{70, 30}, + StartDate: "2026-01-01", + EndDate: "2026-12-31", + }, + } + names := []string{"style"} + got := buildExperimentSpecJSON(experiments, configs, names) + // Full config object should be embedded. + assert.Contains(t, got, `"variants"`, "should include variants key") + assert.Contains(t, got, `"weight"`, "should include weight key") + assert.Contains(t, got, `"start_date"`, "should include start_date key") + assert.Contains(t, got, `"end_date"`, "should include end_date key") + assert.Contains(t, got, "concise", "should include variant value") +} + func TestBuildExperimentSpecJSONEscaping(t *testing.T) { experiments := map[string][]string{ `quote"test`: {`val"1`, `val\2`}, } names := []string{`quote"test`} - got := buildExperimentSpecJSON(experiments, names) + got := buildExperimentSpecJSON(experiments, nil, names) assert.Contains(t, got, `\"`, "double quotes should be escaped in JSON") } @@ -251,3 +322,112 @@ func TestBuildExperimentArtifactDownloadStep_NoPrefix(t *testing.T) { // Artifact name should be just the base name (no prefix) assert.Contains(t, joined, " name: experiment\n", "artifact name should be unqualified for non-workflow_call") } + +// ── extractExperimentConfigsFromFrontmatter ─────────────────────────────── + +func TestExtractExperimentConfigsFromFrontmatter(t *testing.T) { + tests := []struct { + name string + frontmatter map[string]any + check func(t *testing.T, got map[string]*ExperimentConfig) + }{ + { + name: "nil returns nil", + frontmatter: map[string]any{}, + check: func(t *testing.T, got map[string]*ExperimentConfig) { + assert.Nil(t, got, "nil when no experiments") + }, + }, + { + name: "bare array form returns config with variants only", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "feature1": []any{"A", "B"}, + }, + }, + check: func(t *testing.T, got map[string]*ExperimentConfig) { + require.NotNil(t, got, "config should exist") + cfg := got["feature1"] + require.NotNil(t, cfg, "feature1 config should exist") + assert.Equal(t, []string{"A", "B"}, cfg.Variants, "variants should match") + assert.Empty(t, cfg.Description, "no description") + assert.Nil(t, cfg.Weight, "no weight") + }, + }, + { + name: "object form with all metadata fields", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "prompt_style": map[string]any{ + "variants": []any{"concise", "verbose"}, + "description": "Test prompt styles", + "metric": "effective_tokens", + "weight": []any{60.0, 40.0}, + "issue": float64(1234), + "start_date": "2026-05-01", + "end_date": "2026-06-15", + }, + }, + }, + check: func(t *testing.T, got map[string]*ExperimentConfig) { + require.NotNil(t, got, "config should exist") + cfg := got["prompt_style"] + require.NotNil(t, cfg, "prompt_style config should exist") + assert.Equal(t, []string{"concise", "verbose"}, cfg.Variants, "variants should match") + assert.Equal(t, "Test prompt styles", cfg.Description, "description should match") + assert.Equal(t, "effective_tokens", cfg.Metric, "metric should match") + assert.Equal(t, []int{60, 40}, cfg.Weight, "weight should match") + assert.Equal(t, 1234, cfg.Issue, "issue should match") + assert.Equal(t, "2026-05-01", cfg.StartDate, "start_date should match") + assert.Equal(t, "2026-06-15", cfg.EndDate, "end_date should match") + }, + }, + { + name: "mixed bare array and object form in same map", + frontmatter: map[string]any{ + "experiments": map[string]any{ + "bare": []any{"X", "Y"}, + "object": map[string]any{"variants": []any{"P", "Q"}, "weight": []any{30.0, 70.0}}, + }, + }, + check: func(t *testing.T, got map[string]*ExperimentConfig) { + require.NotNil(t, got, "configs should exist") + require.Len(t, got, 2, "two experiments") + assert.Equal(t, []string{"X", "Y"}, got["bare"].Variants, "bare variants") + assert.Equal(t, []string{"P", "Q"}, got["object"].Variants, "object variants") + assert.Equal(t, []int{30, 70}, got["object"].Weight, "object weight") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractExperimentConfigsFromFrontmatter(tt.frontmatter) + tt.check(t, got) + }) + } +} + +// ── generateExperimentSteps with ExperimentConfigs ──────────────────────── + +func TestGenerateExperimentSteps_WithConfigs(t *testing.T) { + c := &Compiler{} + data := &WorkflowData{ + Experiments: map[string][]string{ + "style": {"concise", "detailed"}, + }, + ExperimentConfigs: map[string]*ExperimentConfig{ + "style": { + Variants: []string{"concise", "detailed"}, + Weight: []int{70, 30}, + StartDate: "2026-01-01", + }, + }, + } + steps := c.generateExperimentSteps(data) + joined := strings.Join(steps, "") + // The spec should embed the full config object. + assert.Contains(t, joined, `"variants"`, "spec should include variants key") + assert.Contains(t, joined, `"weight"`, "spec should include weight key") + assert.Contains(t, joined, `"start_date"`, "spec should include start_date key") +} diff --git a/pkg/workflow/compiler_orchestrator_workflow.go b/pkg/workflow/compiler_orchestrator_workflow.go index 268aad5546..7e9598f173 100644 --- a/pkg/workflow/compiler_orchestrator_workflow.go +++ b/pkg/workflow/compiler_orchestrator_workflow.go @@ -383,8 +383,9 @@ func (c *Compiler) extractAdditionalConfigurations( // This runs last so that all section-specific configurations have been resolved first. applyTopLevelGitHubAppFallbacks(workflowData) - // Extract experiments configuration - workflowData.Experiments = extractExperimentsFromFrontmatter(frontmatter) + // Extract experiments configuration once; derive the simple variants map from the configs. + workflowData.ExperimentConfigs = extractExperimentConfigsFromFrontmatter(frontmatter) + workflowData.Experiments = experimentVariantsFromConfigs(workflowData.ExperimentConfigs) return nil } diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go index cd6b77d8bf..8a41fce343 100644 --- a/pkg/workflow/compiler_types.go +++ b/pkg/workflow/compiler_types.go @@ -513,6 +513,7 @@ type WorkflowData struct { ConcurrencyGroupExpr string // cached concurrency group expression extracted from Concurrency YAML (for performance optimization); populated by applyDefaults CachedConcurrencyGroupExprErr error // cached result of validateConcurrencyGroupExpression(ConcurrencyGroupExpr); nil = valid; populated by applyDefaults Experiments map[string][]string // A/B testing experiments: maps experiment name to variant list (from frontmatter) + ExperimentConfigs map[string]*ExperimentConfig // Full A/B experiment metadata (populated alongside Experiments) CachedConcurrencyGroupExprSet bool // true once CachedConcurrencyGroupExprErr has been populated; distinguishes "valid (nil)" from "not yet computed" CachedParsedToolsets []string // cached result of ParseGitHubToolsets for the GitHub tool (for performance optimization); populated by applyDefaults CachedAllowedDomainsStr string // cached allowed-domains string for sanitization (for performance optimization); computed once and reused across multiple compilation steps diff --git a/pkg/workflow/frontmatter_parsing.go b/pkg/workflow/frontmatter_parsing.go index eabc2938b5..1dbb6b2872 100644 --- a/pkg/workflow/frontmatter_parsing.go +++ b/pkg/workflow/frontmatter_parsing.go @@ -69,6 +69,11 @@ func ParseFrontmatterConfig(frontmatter map[string]any) (*FrontmatterConfig, err } } + // Populate typed ExperimentConfigs from the raw frontmatter map so that both the + // legacy bare-array form and the new object form are available as ExperimentConfig + // structs without callers needing to type-assert config.Experiments entries. + config.ExperimentConfigs = extractExperimentConfigsFromFrontmatter(frontmatter) + frontmatterTypesLog.Printf("Successfully parsed frontmatter config: name=%s, engine=%v", config.Name, config.Engine) return &config, nil } diff --git a/pkg/workflow/frontmatter_types.go b/pkg/workflow/frontmatter_types.go index 1f90bb1c9d..51e6c26df7 100644 --- a/pkg/workflow/frontmatter_types.go +++ b/pkg/workflow/frontmatter_types.go @@ -105,6 +105,36 @@ type PermissionsConfig struct { GitHubAppPermissionsConfig } +// ExperimentConfig represents the rich metadata for a single A/B experiment. +// The bare-array form (e.g. prompt_style: [concise, verbose]) is normalized to this +// struct with only the Variants field populated. +type ExperimentConfig struct { + // Variants is the ordered list of variant strings for this experiment (required, ≥ 2). + Variants []string `json:"variants"` + + // Description is a human-readable explanation of what the experiment tests. + Description string `json:"description,omitempty"` + + // Metric names the primary metric that should be observed (e.g. "effective_tokens"). + Metric string `json:"metric,omitempty"` + + // Weight holds an optional per-variant probability weight. When provided its length + // must equal the length of Variants. Values are relative (they need not sum to 100). + Weight []int `json:"weight,omitempty"` + + // Issue is an optional GitHub issue number that tracks this experiment. + Issue int `json:"issue,omitempty"` + + // StartDate is an optional ISO-8601 date (YYYY-MM-DD) before which the experiment + // is not active. When today is before this date the control variant (first variant) + // is used. + StartDate string `json:"start_date,omitempty"` + + // EndDate is an optional ISO-8601 date (YYYY-MM-DD) after which the experiment is + // no longer active. When today is after this date the control variant is used. + EndDate string `json:"end_date,omitempty"` +} + // RateLimitConfig represents rate limiting configuration for workflow triggers // Limits how many times a user can trigger a workflow within a time window type RateLimitConfig struct { @@ -207,10 +237,15 @@ type FrontmatterConfig struct { SecretMasking *SecretMaskingConfig `json:"secret-masking,omitempty"` Observability *ObservabilityConfig `json:"observability,omitempty"` - // A/B testing experiments: maps experiment name to a list of variant values. - // Variants are picked at runtime using actions/cache to maintain state across runs. - // Use ${{ experiments.name }} in the workflow prompt to reference the selected variant. - Experiments map[string][]string `json:"experiments,omitempty"` + // A/B testing experiments: maps experiment name to either a bare variant array or an + // object-form ExperimentConfig. Typed as map[string]any so JSON unmarshaling succeeds + // for both the legacy bare-array form and the new object form; use ExperimentConfigs for + // typed access. See ExperimentConfig and extractExperimentConfigsFromFrontmatter. + Experiments map[string]any `json:"experiments,omitempty"` + + // ExperimentConfigs holds the fully-typed experiment metadata, populated alongside + // Experiments during frontmatter parsing. Keys match those of Experiments. + ExperimentConfigs map[string]*ExperimentConfig `json:"-"` // Rate limiting configuration RateLimit *RateLimitConfig `json:"rate-limit,omitempty"` diff --git a/pkg/workflow/frontmatter_types_test.go b/pkg/workflow/frontmatter_types_test.go index 7b1aa1e3d0..2a32b45576 100644 --- a/pkg/workflow/frontmatter_types_test.go +++ b/pkg/workflow/frontmatter_types_test.go @@ -444,6 +444,42 @@ func TestFrontmatterConfigFieldExtraction(t *testing.T) { t.Error("node runtime should exist") } }) + + t.Run("parses object-form experiments without unmarshal error", func(t *testing.T) { + frontmatter := map[string]any{ + "experiments": map[string]any{ + // Object form: must not cause json.Unmarshal to fail. + "prompt_style": map[string]any{ + "variants": []any{"concise", "verbose"}, + "weight": []any{70.0, 30.0}, + }, + // Bare-array form: must still work alongside the object form. + "caveman": []any{"yes", "no"}, + }, + } + + config, err := ParseFrontmatterConfig(frontmatter) + if err != nil { + t.Fatalf("ParseFrontmatterConfig should not fail on object-form experiments: %v", err) + } + + if config.ExperimentConfigs == nil { + t.Fatal("ExperimentConfigs should be populated") + } + if len(config.ExperimentConfigs) != 2 { + t.Errorf("expected 2 experiment configs, got %d", len(config.ExperimentConfigs)) + } + ps := config.ExperimentConfigs["prompt_style"] + if ps == nil { + t.Fatal("prompt_style config should exist") + } + if len(ps.Variants) != 2 || ps.Variants[0] != "concise" { + t.Errorf("unexpected variants: %v", ps.Variants) + } + if len(ps.Weight) != 2 || ps.Weight[0] != 70 { + t.Errorf("unexpected weight: %v", ps.Weight) + } + }) } func TestFrontmatterConfigBackwardCompatibility(t *testing.T) {