github · pelikhan · May 3, 2026 · May 3, 2026 · May 3, 2026 · May 3, 2026
diff --git a/actions/setup/js/pick_experiment.cjs b/actions/setup/js/pick_experiment.cjs
@@ -20,7 +20,8 @@
  * Algorithm:
  *   When weight is provided the variant is chosen by weighted-random selection.
  *   Otherwise the variant with the lowest invocation count is selected next (ties are
- *   broken by variant order, yielding a deterministic round-robin across runs).
+ *   broken by random selection, ensuring no variant is systematically favoured on the
+ *   first run or whenever counts are equal).
  *   When start_date or end_date is provided and today falls outside that window the
  *   control variant (first variant) is used and no counter is incremented.
  */
@@ -124,8 +125,10 @@ function isWithinDateWindow(startDate, endDate, todayOverride) {
 
 /**
  * Pick the variant for one experiment using a balanced least-used selection.
- * The variant with the lowest cumulative count is chosen; ties are broken by
- * the order of the variants array so selection is deterministic.
+ * The variant with the lowest cumulative count is chosen; when multiple variants
+ * share the lowest count (including the initial empty-cache state where all counts
+ * are zero), one is selected at random to avoid systematically favouring the first
+ * declared variant.
  *
  * @param {string} name       - Experiment name
  * @param {string[]} variants - Array of variant values (length >= 2)
@@ -135,15 +138,17 @@ function isWithinDateWindow(startDate, endDate, todayOverride) {
 function pickVariant(name, variants, state) {
   const counts = state.counts[name] || {};
   let minCount = Infinity;
-  let selected = variants[0];
+  let tied = [];
   for (const variant of variants) {
     const c = counts[variant] || 0;
     if (c < minCount) {
       minCount = c;
-      selected = variant;
+      tied = [variant];
+    } else if (c === minCount) {
+      tied.push(variant);
     }
   }
-  return selected;
+  return tied[Math.floor(Math.random() * tied.length)];
 }
 
 /**
@@ -279,7 +284,7 @@ async function writeSummary(assignments, configs, state, core) {
     }
   }
 
-  lines.push("_Variants are selected by balanced round-robin (or weighted) to ensure statistical relevance across runs._");
+  lines.push("_Variants are selected by balanced round-robin (or weighted) to ensure statistical relevance across runs. Ties are broken randomly so no variant is systematically favoured on the first run._");
   await core.summary.addRaw(lines.join("\n")).write();
 }
 

diff --git a/actions/setup/js/pick_experiment.test.cjs b/actions/setup/js/pick_experiment.test.cjs
@@ -33,9 +33,17 @@ describe("pick_experiment", () => {
   // ── pickVariant ────────────────────────────────────────────────────────────
 
   describe("pickVariant", () => {
-    it("selects the first variant when counts are equal", () => {
+    it("breaks ties randomly for two-variant experiment when counts are equal", () => {
       const state = { counts: {} };
+      // Math.floor(0 * 2) = 0 → tied[0] = "A"
+      vi.spyOn(Math, "random").mockReturnValueOnce(0);
       expect(pickVariant("f", ["A", "B"], state)).toBe("A");
+
+      // Math.floor(0.5 * 2) = 1 → tied[1] = "B"
+      vi.spyOn(Math, "random").mockReturnValueOnce(0.5);
+      expect(pickVariant("f", ["A", "B"], state)).toBe("B");
+
+      vi.restoreAllMocks();
     });
 
     it("selects the least-used variant", () => {
@@ -48,14 +56,32 @@ describe("pick_experiment", () => {
       expect(pickVariant("f", ["A", "B", "C"], state)).toBe("C");
     });
 
-    it("returns the first variant when all counts are equal (tie-break by order)", () => {
+    it("randomly selects from all tied variants when all counts are equal", () => {
       const state = { counts: { f: { A: 1, B: 1, C: 1 } } };
+      // All three variants are tied; verify the random index is respected.
+      // Math.floor(0   * 3) = 0 → tied[0] = "A"
+      // Math.floor(0.4 * 3) = 1 → tied[1] = "B"  (0.4*3=1.2)
+      // Math.floor(0.7 * 3) = 2 → tied[2] = "C"  (0.7*3=2.1)
+      vi.spyOn(Math, "random").mockReturnValueOnce(0).mockReturnValueOnce(0.4).mockReturnValueOnce(0.7);
       expect(pickVariant("f", ["A", "B", "C"], state)).toBe("A");
+      expect(pickVariant("f", ["A", "B", "C"], state)).toBe("B");
+      expect(pickVariant("f", ["A", "B", "C"], state)).toBe("C");
+
+      vi.restoreAllMocks();
     });
 
-    it("handles unknown experiment name (no counts yet)", () => {
+    it("handles unknown experiment name (no counts yet) by picking randomly", () => {
       const state = { counts: {} };
+      // Both variants are tied with zero counts; verify the random index is respected.
+      // Math.floor(0   * 2) = 0 → tied[0] = "X"
+      // Math.floor(0.5 * 2) = 1 → tied[1] = "Y"
+      vi.spyOn(Math, "random").mockReturnValueOnce(0);
       expect(pickVariant("new", ["X", "Y"], state)).toBe("X");
+
+      vi.spyOn(Math, "random").mockReturnValueOnce(0.5);
+      expect(pickVariant("new", ["X", "Y"], state)).toBe("Y");
+
+      vi.restoreAllMocks();
     });
   });
 
@@ -147,13 +173,18 @@ describe("pick_experiment", () => {
       process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile;
       process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir;
 
+      // Force Math.random → 0 so the first tied variant ("A") is selected.
+      vi.spyOn(Math, "random").mockReturnValue(0);
+
       await main();
 
       // Individual output per experiment
       expect(mockCore.setOutput).toHaveBeenCalledWith("feature1", "A");
       // Combined JSON output
       expect(mockCore.setOutput).toHaveBeenCalledWith("experiments", JSON.stringify({ feature1: "A" }));
       expect(mockCore.setFailed).not.toHaveBeenCalled();
+
+      vi.restoreAllMocks();
     });
 
     it("persists state between calls to simulate multi-run balance", async () => {
@@ -164,14 +195,18 @@ describe("pick_experiment", () => {
       process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile;
       process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir;
 
+      // Force Math.random → 0 so the first tied variant ("X") is selected on the first run.
+      vi.spyOn(Math, "random").mockReturnValue(0);
+
       // First run → X
       await main();
       const firstCall = mockCore.setOutput.mock.calls.find(c => c[0] === "feat");
       expect(firstCall?.[1]).toBe("X");
 
+      vi.restoreAllMocks();
       vi.clearAllMocks();
 
-      // Second run → Y (state persisted from first call)
+      // Second run → Y (state persisted from first call; Y has the lower count)
       await main();
       const secondCall = mockCore.setOutput.mock.calls.find(c => c[0] === "feat");
       expect(secondCall?.[1]).toBe("Y");
@@ -198,12 +233,17 @@ describe("pick_experiment", () => {
       process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile;
       process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir;
 
+      // Force Math.random → 0 so the first tied variant is chosen for each experiment.
+      vi.spyOn(Math, "random").mockReturnValue(0);
+
       await main();
 
       const assignmentsFile = path.join(tmpDir, "assignments.json");
       expect(fs.existsSync(assignmentsFile)).toBe(true);
       const assignments = JSON.parse(fs.readFileSync(assignmentsFile, "utf8"));
       expect(assignments).toEqual({ feature1: "A", style: "concise" });
+
+      vi.restoreAllMocks();
     });
 
     it("overwrites assignments.json on successive runs reflecting the current variant", async () => {
@@ -212,14 +252,18 @@ describe("pick_experiment", () => {
       process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile;
       process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir;
 
+      // Force Math.random → 0 so the first tied variant ("X") is chosen on the first run.
+      vi.spyOn(Math, "random").mockReturnValue(0);
+
       // First run → X
       await main();
       const assignmentsFile = path.join(tmpDir, "assignments.json");
       expect(JSON.parse(fs.readFileSync(assignmentsFile, "utf8"))).toEqual({ feat: "X" });
 
+      vi.restoreAllMocks();
       vi.clearAllMocks();
 
-      // Second run → Y
+      // Second run → Y (Y has the lower count after first run recorded X)
       await main();
       expect(JSON.parse(fs.readFileSync(assignmentsFile, "utf8"))).toEqual({ feat: "Y" });
     });
@@ -265,10 +309,15 @@ describe("pick_experiment", () => {
       process.env.GH_AW_EXPERIMENT_STATE_FILE = stateFile;
       process.env.GH_AW_EXPERIMENT_STATE_DIR = tmpDir;
 
+      // Force Math.random → 0 so the first tied variant ("concise") is chosen.
+      vi.spyOn(Math, "random").mockReturnValue(0);
+
       await main();
 
       expect(mockCore.setOutput).toHaveBeenCalledWith("style", "concise");
       expect(mockCore.setFailed).not.toHaveBeenCalled();
+
+      vi.restoreAllMocks();
     });
 
     it("uses control variant when today is before start_date", async () => {

diff --git a/docs/adr/29534-frontmatter-ab-experiments-variant-selection.md b/docs/adr/29534-frontmatter-ab-experiments-variant-selection.md
@@ -24,7 +24,7 @@ An external feature-flag service would provide a mature A/B testing API with das
 
 #### Alternative 2: Random Per-Run Variant Selection Without State Persistence
 
-Selecting a variant randomly on each run (e.g., `Math.random()`) requires no cache and no persistent state. This was rejected because it does not guarantee balance: over N runs, some variants may appear much more (or less) often than `N/K`, making statistically meaningful comparisons impossible without a large number of runs. The least-used counter approach achieves approximate balance in far fewer runs.
+Selecting a variant randomly on each run (e.g., `Math.random()`) requires no cache and no persistent state. This was rejected as the sole selection strategy because it does not guarantee balance: over N runs, some variants may appear much more (or less) often than `N/K`, making statistically meaningful comparisons impossible without a large number of runs. The least-used counter approach achieves approximate balance in far fewer runs. However, random selection is retained as the tie-breaking strategy within the least-used algorithm — when multiple variants share the minimum count (including the initial empty-cache state), one is chosen at random to avoid systematically favouring the first declared variant.
 
 #### Alternative 3: CI/CD Environment Variables Set Externally
 
@@ -65,7 +65,7 @@ Teams could manually pass variant values as repository variables or dispatch inp
 ### Variant Selection
 
 5. Implementations **MUST** select the variant with the lowest cumulative invocation count across all previous runs (least-used selection).
-6. When two or more variants share the lowest count (including the initial state where all counts are zero), implementations **MUST** break ties by selecting the variant appearing earliest in the declared list.
+6. When two or more variants share the lowest count (including the initial state where all counts are zero), implementations **MUST** break ties by selecting uniformly at random from the tied variants, so no variant is systematically favoured on the first run or whenever counts are equal.
 7. Variant counts **MUST** be persisted between workflow runs using the GitHub Actions cache, keyed by a combination of the sanitized workflow ID and the current run ID, with a restore-key prefix that matches any prior run for that workflow ID.
 8. Implementations **MUST** expose each selected variant as a named step output (`steps.pick-experiment.outputs.<experiment-name>`) and **MUST** also set a combined JSON output (`steps.pick-experiment.outputs.experiments`) containing all variant assignments.
 9. Implementations **MUST** upload the experiment state directory as an artifact named `experiment` (using `if: always()`) so that assignments are available for post-run analysis even when subsequent steps fail.

diff --git a/docs/src/content/docs/guides/experiments.md b/docs/src/content/docs/guides/experiments.md
@@ -84,7 +84,7 @@ Address the issue described above.
 
 ## Statistical balancing
 
-The activation job maintains a per-variant invocation counter in an `actions/cache` entry keyed by workflow ID. The variant with the lowest cumulative count is selected on each run; ties are broken by variant order. Over N runs every variant is used approximately N/K times (K = variant count), providing basic A/B balance with no configuration.
+The activation job maintains a per-variant invocation counter in an `actions/cache` entry keyed by workflow ID. The variant with the lowest cumulative count is selected on each run; when multiple variants share the lowest count (including the very first run when the cache is empty), one is chosen at random so no variant is systematically favoured. Over N runs every variant is used approximately N/K times (K = variant count), providing basic A/B balance with no configuration.
 
 The counter persists across workflow runs via the GitHub Actions cache. A fresh repository starts from zero counts.
 

diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json
@@ -2749,7 +2749,7 @@
       ]
     },
     "experiments": {
-      "description": "A/B testing experiments. Each key is an experiment name; the value is either an array of two or more variant strings (bare-array form) or an object with a 'variants' field plus optional metadata fields (description, metric, weight, issue, start_date, end_date, hypothesis, secondary_metrics, guardrail_metrics, min_samples, owner). At runtime the activation job picks a variant using actions/cache to maintain consistent assignment across runs. Use ${{ experiments.<name> }} in the workflow prompt to reference the selected variant. When multiple experiments are declared, assignments are statistically balanced using a counter that round-robins across variants (or weighted when 'weight' is provided).",
+      "description": "A/B testing experiments. Each key is an experiment name; the value is either an array of two or more variant strings (bare-array form) or an object with a 'variants' field plus optional metadata fields (description, metric, weight, issue, start_date, end_date, hypothesis, secondary_metrics, guardrail_metrics, min_samples, owner). At runtime the activation job picks a variant using actions/cache to maintain consistent assignment across runs. Use ${{ experiments.<name> }} in the workflow prompt to reference the selected variant. When multiple experiments are declared, assignments are statistically balanced using a least-used counter that round-robins across variants (or weighted when 'weight' is provided); ties are broken randomly so no variant is systematically favoured on the first run.",
       "type": "object",
       "propertyNames": {
         "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$",