githubnext · mrjf · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.autoloop/programs/perf-comparison/program.md b/.autoloop/programs/perf-comparison/program.md
@@ -22,9 +22,9 @@ This is an open-ended program — it runs continuously, always adding the next b
    - Creates the same dataset as the TypeScript version
    - Runs the same operation with the same loop structure
    - Outputs the same JSON format
-5. **Run both benchmarks** via `benchmarks/run_benchmarks.sh` and capture results.
-6. **Update `benchmarks/results.json`** with the new timing data.
-7. **Update `playground/benchmarks.html`** to display the new function's comparison metrics.
+5. **Update `playground/benchmarks.html`** if needed to display the new function's comparison metrics.
+
+The evaluation step (below) runs `benchmarks/run_benchmarks.sh` to execute **every** TS/Python benchmark pair and regenerates `benchmarks/results.json` with the real timing data. That regenerated file is what gets committed on a successful iteration, so when the autoloop branch is merged to `main`, the pages workflow (`.github/workflows/pages.yml`) picks up the real results and `playground/benchmarks.html` renders real comparison data instead of "No benchmark data available yet."
 
 ### Key constraints
 
@@ -50,24 +50,59 @@ Do NOT modify:
 
 ## Evaluation
 
+The evaluation runs `benchmarks/run_benchmarks.sh`, which executes every TS/Python
+benchmark pair and writes real timing data to `benchmarks/results.json`. The metric
+is the number of benchmarks that appear in that regenerated file — i.e. the number
+of function pairs whose benchmarks actually ran to completion and produced valid
+JSON output. This means a benchmark pair is only "counted" if it truly runs, and
+the committed `benchmarks/results.json` always reflects real data that the
+`pages.yml` workflow will copy to the playground on merge to `main`.
+
 ```bash
-# Set up Python environment if needed
+set -euo pipefail
+
+# Ensure Python and pandas are available
 if ! command -v python3 &>/dev/null; then
-  echo "Python3 not found, skipping"
+  echo "ERROR: python3 is required but not found" >&2
+  exit 1
 fi
-pip3 install pandas --quiet 2>/dev/null || true
-
-# Count the number of benchmark pairs (functions with both TS and Python benchmarks)
-ts_benchmarks=$(ls benchmarks/tsb/bench_*.ts 2>/dev/null | wc -l | tr -d ' ')
-py_benchmarks=$(ls benchmarks/pandas/bench_*.py 2>/dev/null | wc -l | tr -d ' ')
-
-# The metric is the minimum of the two (both must exist for a complete benchmark)
-if [ "$ts_benchmarks" -lt "$py_benchmarks" ]; then
-  count=$ts_benchmarks
-else
-  count=$py_benchmarks
+python3 -c "import pandas" 2>/dev/null || pip3 install pandas --quiet
+
+# Ensure Bun is available (install if missing — autoloop runners may not have it).
+# Failure to install Bun is logged but does not abort the script, because we must
+# still emit the final metric line for autoloop to parse.
+if ! command -v bun &>/dev/null; then
+  curl -fsSL https://bun.sh/install | bash || echo "WARN: bun install script failed" >&2
+  export PATH="$HOME/.bun/bin:$PATH"
+fi
+if ! command -v bun &>/dev/null; then
+  echo "ERROR: bun is not available after install attempt; benchmarks will fail" >&2
 fi
 
+# Install JS/TS dependencies so benchmark scripts can import from src/.
+# `|| true` keeps the script alive so the final metric is still emitted; any
+# errors are visible in the autoloop logs for debugging.
+bun install --silent || echo "WARN: bun install failed; benchmarks may fail to import src/" >&2
+
+# Run every benchmark pair and regenerate benchmarks/results.json with real data.
+# This is the file .github/workflows/pages.yml copies into the playground, so
+# committing it here is what makes real benchmark data appear on the pages site
+# once the autoloop branch is merged to main. Output is left visible so
+# per-benchmark failures can be diagnosed from autoloop logs; `|| true` ensures
+# we still reach the metric emission below if the script exits nonzero.
+bash benchmarks/run_benchmarks.sh || echo "WARN: run_benchmarks.sh exited nonzero" >&2
+
+# Metric: number of benchmark entries in the regenerated results.json.
+count=$(python3 -c "
+import json
+try:
+    with open('benchmarks/results.json') as f:
+        data = json.load(f)
+    print(len(data.get('benchmarks', [])))
+except Exception:
+    print(0)
+")
+
 echo "{\"benchmarked_functions\": ${count:-0}}"
 ```
 

diff --git a/src/reshape/explode.ts b/src/reshape/explode.ts
@@ -36,7 +36,7 @@ export interface ExplodeOptions {
    * When `false` (default), the original row labels are propagated
    * (duplicated once for each element of each list value).
    */
-  readonly ignore_index?: boolean;
+  readonly ignoreIndex?: boolean;
 }
 
 // ─── helpers ──────────────────────────────────────────────────────────────────
@@ -89,7 +89,7 @@ function expandCell(value: Scalar): Scalar[] {
  * ```
  */
 export function explodeSeries(series: Series<Scalar>, options?: ExplodeOptions): Series<Scalar> {
-  const ignoreIndex = options?.ignore_index ?? false;
+  const ignoreIndex = options?.ignoreIndex ?? false;
   const outValues: Scalar[] = [];
   const outLabels: Label[] = [];
 
@@ -122,7 +122,7 @@ export function explodeSeries(series: Series<Scalar>, options?: ExplodeOptions):
  * Explode one or more list-valued columns of a DataFrame into multiple rows.
  *
  * All other columns have their values repeated to match the expanded rows.
- * Row labels are propagated (duplicated) unless `ignore_index` is `true`.
+ * Row labels are propagated (duplicated) unless `ignoreIndex` is `true`.
  *
  * When multiple columns are specified they must have the same list lengths per
  * row — pandas raises a `ValueError` for mismatched lengths; here each column
@@ -150,9 +150,8 @@ export function explodeDataFrame(
   column: string | readonly string[],
   options?: ExplodeOptions,
 ): DataFrame {
-  const ignoreIndex = options?.ignore_index ?? false;
+  const ignoreIndex = options?.ignoreIndex ?? false;
   const explodeCols: readonly string[] = typeof column === "string" ? [column] : column;
-
   // Validate column names
   for (const col of explodeCols) {
     if (!df.columns.values.includes(col)) {