diff --git a/benchmarks/pandas/bench_str_findall.py b/benchmarks/pandas/bench_str_findall.py new file mode 100644 index 00000000..d17a33d8 --- /dev/null +++ b/benchmarks/pandas/bench_str_findall.py @@ -0,0 +1,33 @@ +""" +Benchmark: str.findall, str.extract (first match), str.count on 10k-element string Series +""" +import json +import time +import pandas as pd + +ROWS = 10_000 +WARMUP = 3 +ITERATIONS = 10 + +data = [f"item{i} code{i * 3} ref{i + 1}" for i in range(ROWS)] +s = pd.Series(data) +pat = r"\d+" + +for _ in range(WARMUP): + s.str.findall(pat) + s.str.extract(r"(\d+)", expand=False) + s.str.count(pat) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.str.findall(pat) + s.str.extract(r"(\d+)", expand=False) + s.str.count(pat) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "str_findall", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_to_markdown.py b/benchmarks/pandas/bench_to_markdown.py new file mode 100644 index 00000000..cb586a98 --- /dev/null +++ b/benchmarks/pandas/bench_to_markdown.py @@ -0,0 +1,36 @@ +"""Benchmark: to_markdown and to_latex on a 1000-row DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 1_000 +WARMUP = 3 +ITERATIONS = 10 + +a = np.arange(ROWS) * 1.5 +b = [f"item_{i % 50}" for i in range(ROWS)] +c = np.arange(ROWS) % 100 +df = pd.DataFrame({"a": a, "b": b, "c": c}) + +for _ in range(WARMUP): + df.to_markdown() + df.to_latex() + +start_md = time.perf_counter() +for _ in range(ITERATIONS): + df.to_markdown() +total_md = (time.perf_counter() - start_md) * 1000 + +start_ltx = time.perf_counter() +for _ in range(ITERATIONS): + df.to_latex() +total_ltx = (time.perf_counter() - start_ltx) * 1000 + +total = total_md + total_ltx + +print(json.dumps({ + "function": "to_markdown_latex", + "mean_ms": total / (ITERATIONS * 2), + "iterations": ITERATIONS * 2, + "total_ms": total, +})) diff --git a/benchmarks/tsb/bench_str_findall.ts b/benchmarks/tsb/bench_str_findall.ts new file mode 100644 index 00000000..1c9f8894 --- /dev/null +++ b/benchmarks/tsb/bench_str_findall.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: strFindall, strFindFirst, strFindallCount on 10k-element string Series + */ +import { Series, strFindall, strFindFirst, strFindallCount } from "../../src/index.js"; + +const ROWS = 10_000; +const WARMUP = 3; +const ITERATIONS = 10; +const data = Array.from({ length: ROWS }, (_, i) => `item${i} code${i * 3} ref${i + 1}`); +const s = new Series({ data }); +const pat = /\d+/g; + +for (let i = 0; i < WARMUP; i++) { + strFindall(s, pat); + strFindFirst(s, pat); + strFindallCount(s, pat); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + strFindall(s, pat); + strFindFirst(s, pat); + strFindallCount(s, pat); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "str_findall", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_to_markdown.ts b/benchmarks/tsb/bench_to_markdown.ts new file mode 100644 index 00000000..a7a21c91 --- /dev/null +++ b/benchmarks/tsb/bench_to_markdown.ts @@ -0,0 +1,41 @@ +/** + * Benchmark: toMarkdown and toLaTeX on a 1000-row DataFrame + */ +import { DataFrame, toMarkdown, toLaTeX } from "../../src/index.js"; + +const ROWS = 1_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.5); +const b = Array.from({ length: ROWS }, (_, i) => `item_${i % 50}`); +const c = Int32Array.from({ length: ROWS }, (_, i) => i % 100); +const df = DataFrame.fromColumns({ a, b, c }); + +for (let i = 0; i < WARMUP; i++) { + toMarkdown(df); + toLaTeX(df); +} + +const startMd = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + toMarkdown(df); +} +const totalMd = performance.now() - startMd; + +const startLtx = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + toLaTeX(df); +} +const totalLtx = performance.now() - startLtx; + +const total = totalMd + totalLtx; + +console.log( + JSON.stringify({ + function: "to_markdown_latex", + mean_ms: total / (ITERATIONS * 2), + iterations: ITERATIONS * 2, + total_ms: total, + }), +);