Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions benchmarks/pandas/bench_pd_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Benchmark: pandas.array() — create and iterate typed arrays.
Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import pandas as pd
import numpy as np

SIZE = 10_000
WARMUP = 5
ITERATIONS = 100

int_data = list(range(SIZE))
float_data = [i * 0.5 for i in range(SIZE)]
string_data = [f"item_{i % 100}" for i in range(SIZE)]
mixed_data = [None if i % 3 == 0 else i for i in range(SIZE)]


def run():
a = pd.array(int_data, dtype="Int64")
b = pd.array(float_data, dtype="Float64")
c = pd.array(string_data, dtype="string")
d = pd.array(mixed_data, dtype="Int64")

# Access elements
_ = a[-1]
_ = b[0]
_ = len(c)
_ = d[0]


for _ in range(WARMUP):
run()

start = time.perf_counter()
for _ in range(ITERATIONS):
run()
total_ms = (time.perf_counter() - start) * 1000

print(
json.dumps(
{
"function": "pd_array",
"mean_ms": total_ms / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total_ms,
}
)
)
47 changes: 47 additions & 0 deletions benchmarks/pandas/bench_to_from_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Benchmark: DataFrame.to_dict / DataFrame.from_dict — dict orient conversions.
Tests: list, records, split, index orient round-trips on a 10k-row DataFrame.
Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import time
import json
import pandas as pd

SIZE = 10_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({
"a": list(range(SIZE)),
"b": [i * 1.5 for i in range(SIZE)],
"c": [f"str_{i % 100}" for i in range(SIZE)],
})

small_list = {"a": [1, 2, 3], "b": [4, 5, 6]}
small_df = pd.DataFrame(small_list)
small_index = {0: {"a": 1, "b": 4}, 1: {"a": 2, "b": 5}}

for _ in range(WARMUP):
df.to_dict(orient="list")
df.to_dict(orient="records")
df.to_dict(orient="split")
df.to_dict(orient="index")
pd.DataFrame.from_dict(small_list)
pd.DataFrame.from_dict(small_index, orient="index")

start = time.perf_counter()
for _ in range(ITERATIONS):
df.to_dict(orient="list")
df.to_dict(orient="records")
df.to_dict(orient="split")
df.to_dict(orient="index")
pd.DataFrame.from_dict(small_list)
pd.DataFrame.from_dict(small_index, orient="index")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "to_from_dict",
"mean_ms": round(total / ITERATIONS, 3),
"iterations": ITERATIONS,
"total_ms": round(total, 3),
}))
42 changes: 42 additions & 0 deletions benchmarks/tsb/bench_pd_array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/**
* Benchmark: pdArray / PandasArray — create and iterate typed arrays.
* Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...}
*/
import { pdArray } from "../../src/index.js";

const SIZE = 10_000;
const WARMUP = 5;
const ITERATIONS = 100;

const intData = Array.from({ length: SIZE }, (_, i) => i);
const floatData = Array.from({ length: SIZE }, (_, i) => i * 0.5);
const stringData = Array.from({ length: SIZE }, (_, i) => `item_${i % 100}`);
const mixedData = Array.from({ length: SIZE }, (_, i) => (i % 3 === 0 ? null : i));

function run(): void {
const a = pdArray(intData, "int64");
const b = pdArray(floatData, "float64");
const c = pdArray(stringData, "string");
const d = pdArray(mixedData);

// Access elements and iterate
void a.at(SIZE - 1);
void b.toArray();
void c.at(0);
void d.length;
}

for (let i = 0; i < WARMUP; i++) run();

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) run();
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "pd_array",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
47 changes: 47 additions & 0 deletions benchmarks/tsb/bench_to_from_dict.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Benchmark: toDictOriented / fromDictOriented — DataFrame ↔ dict conversions.
* Tests all orient variants: "list", "records", "split", "index", "tight".
* Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...}
*/
import { DataFrame, toDictOriented, fromDictOriented } from "../../src/index.js";

const SIZE = 10_000;
const WARMUP = 5;
const ITERATIONS = 50;

const df = new DataFrame({
a: Array.from({ length: SIZE }, (_, i) => i),
b: Array.from({ length: SIZE }, (_, i) => i * 1.5),
c: Array.from({ length: SIZE }, (_, i) => `str_${i % 100}`),
});

for (let i = 0; i < WARMUP; i++) {
toDictOriented(df, "list");
toDictOriented(df, "records");
toDictOriented(df, "split");
toDictOriented(df, "index");
toDictOriented(df, "tight");
fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] });
fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index");
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
toDictOriented(df, "list");
toDictOriented(df, "records");
toDictOriented(df, "split");
toDictOriented(df, "index");
toDictOriented(df, "tight");
fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] });
fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index");
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "to_from_dict",
mean_ms: Math.round((total / ITERATIONS) * 1000) / 1000,
iterations: ITERATIONS,
total_ms: Math.round(total * 1000) / 1000,
}),
);
Loading