From 2d6c438c09b58afbd7e59d1b075119fc52bd4114 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 07:50:56 +0000 Subject: [PATCH 1/4] [Autoloop: perf-comparison] Iteration 319: Add pdArray/PandasArray benchmark pair Run: https://github.com/githubnext/tsb/actions/runs/25984977778 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benchmarks/pandas/bench_pd_array.py | 50 +++++++++++++++++++++++++++++ benchmarks/tsb/bench_pd_array.ts | 42 ++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 benchmarks/pandas/bench_pd_array.py create mode 100644 benchmarks/tsb/bench_pd_array.ts diff --git a/benchmarks/pandas/bench_pd_array.py b/benchmarks/pandas/bench_pd_array.py new file mode 100644 index 00000000..7dc56a63 --- /dev/null +++ b/benchmarks/pandas/bench_pd_array.py @@ -0,0 +1,50 @@ +""" +Benchmark: pandas.array() — create and iterate typed arrays. +Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...} +""" +import json +import time +import pandas as pd +import numpy as np + +SIZE = 10_000 +WARMUP = 5 +ITERATIONS = 100 + +int_data = list(range(SIZE)) +float_data = [i * 0.5 for i in range(SIZE)] +string_data = [f"item_{i % 100}" for i in range(SIZE)] +mixed_data = [None if i % 3 == 0 else i for i in range(SIZE)] + + +def run(): + a = pd.array(int_data, dtype="Int64") + b = pd.array(float_data, dtype="Float64") + c = pd.array(string_data, dtype="string") + d = pd.array(mixed_data, dtype="Int64") + + # Access elements + _ = a[-1] + _ = b[0] + _ = len(c) + _ = d[0] + + +for _ in range(WARMUP): + run() + +start = time.perf_counter() +for _ in range(ITERATIONS): + run() +total_ms = (time.perf_counter() - start) * 1000 + +print( + json.dumps( + { + "function": "pd_array", + "mean_ms": total_ms / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total_ms, + } + ) +) diff --git a/benchmarks/tsb/bench_pd_array.ts b/benchmarks/tsb/bench_pd_array.ts new file mode 100644 index 00000000..466df5fc --- /dev/null +++ b/benchmarks/tsb/bench_pd_array.ts @@ -0,0 +1,42 @@ +/** + * Benchmark: pdArray / PandasArray — create and iterate typed arrays. + * Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { pdArray } from "../../src/index.js"; + +const SIZE = 10_000; +const WARMUP = 5; +const ITERATIONS = 100; + +const intData = Array.from({ length: SIZE }, (_, i) => i); +const floatData = Array.from({ length: SIZE }, (_, i) => i * 0.5); +const stringData = Array.from({ length: SIZE }, (_, i) => `item_${i % 100}`); +const mixedData = Array.from({ length: SIZE }, (_, i) => (i % 3 === 0 ? null : i)); + +function run(): void { + const a = pdArray(intData, "int64"); + const b = pdArray(floatData, "float64"); + const c = pdArray(stringData, "string"); + const d = pdArray(mixedData); + + // Access elements and iterate + void a.at(SIZE - 1); + void b.toArray(); + void c.at(0); + void d.length; +} + +for (let i = 0; i < WARMUP; i++) run(); + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) run(); +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "pd_array", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); From c04e78cdd376565defd6533a6975443b9f65cfd5 Mon Sep 17 00:00:00 2001 From: Russell Horton Date: Sun, 17 May 2026 01:01:21 -0700 Subject: [PATCH 2/4] chore: trigger CI [evergreen] From d70da5f30628a74f560b4b34aed7a704a54d5664 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 01:39:40 +0000 Subject: [PATCH 3/4] [Autoloop: perf-comparison] Iteration 320: Add to_from_dict benchmark pair Run: https://github.com/githubnext/tsb/actions/runs/26008813008 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benchmarks/pandas/bench_to_from_dict.py | 47 +++++++++++++++++++++++++ benchmarks/tsb/bench_to_from_dict.ts | 47 +++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 benchmarks/pandas/bench_to_from_dict.py create mode 100644 benchmarks/tsb/bench_to_from_dict.ts diff --git a/benchmarks/pandas/bench_to_from_dict.py b/benchmarks/pandas/bench_to_from_dict.py new file mode 100644 index 00000000..e90467e1 --- /dev/null +++ b/benchmarks/pandas/bench_to_from_dict.py @@ -0,0 +1,47 @@ +""" +Benchmark: DataFrame.to_dict / DataFrame.from_dict — dict orient conversions. +Tests: list, records, split, index orient round-trips on a 10k-row DataFrame. +Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...} +""" +import time +import json +import pandas as pd + +SIZE = 10_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({ + "a": list(range(SIZE)), + "b": [i * 1.5 for i in range(SIZE)], + "c": [f"str_{i % 100}" for i in range(SIZE)], +}) + +small_list = {"a": [1, 2, 3], "b": [4, 5, 6]} +small_df = pd.DataFrame(small_list) +small_index = {0: {"a": 1, "b": 4}, 1: {"a": 2, "b": 5}} + +for _ in range(WARMUP): + df.to_dict(orient="list") + df.to_dict(orient="records") + df.to_dict(orient="split") + df.to_dict(orient="index") + pd.DataFrame.from_dict(small_list) + pd.DataFrame.from_dict(small_index, orient="index") + +start = time.perf_counter() +for _ in range(ITERATIONS): + df.to_dict(orient="list") + df.to_dict(orient="records") + df.to_dict(orient="split") + df.to_dict(orient="index") + pd.DataFrame.from_dict(small_list) + pd.DataFrame.from_dict(small_index, orient="index") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "to_from_dict", + "mean_ms": round(total / ITERATIONS, 3), + "iterations": ITERATIONS, + "total_ms": round(total, 3), +})) diff --git a/benchmarks/tsb/bench_to_from_dict.ts b/benchmarks/tsb/bench_to_from_dict.ts new file mode 100644 index 00000000..260b91b6 --- /dev/null +++ b/benchmarks/tsb/bench_to_from_dict.ts @@ -0,0 +1,47 @@ +/** + * Benchmark: toDictOriented / fromDictOriented — DataFrame ↔ dict conversions. + * Tests all orient variants: "list", "records", "split", "index", "tight". + * Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame, toDictOriented, fromDictOriented } from "../../src/index.js"; + +const SIZE = 10_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i), + b: Array.from({ length: SIZE }, (_, i) => i * 1.5), + c: Array.from({ length: SIZE }, (_, i) => `str_${i % 100}`), +}); + +for (let i = 0; i < WARMUP; i++) { + toDictOriented(df, "list"); + toDictOriented(df, "records"); + toDictOriented(df, "split"); + toDictOriented(df, "index"); + toDictOriented(df, "tight"); + fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] }); + fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index"); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + toDictOriented(df, "list"); + toDictOriented(df, "records"); + toDictOriented(df, "split"); + toDictOriented(df, "index"); + toDictOriented(df, "tight"); + fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] }); + fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index"); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "to_from_dict", + mean_ms: Math.round((total / ITERATIONS) * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(total * 1000) / 1000, + }), +); From 67a896a992c101b6328048f72fe5ebce8f20a986 Mon Sep 17 00:00:00 2001 From: Russell Horton Date: Sun, 17 May 2026 19:09:23 -0700 Subject: [PATCH 4/4] chore: trigger CI [evergreen]