From 5efbc5028ed17be916f1349214b9d2b3c65f1c40 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 12 May 2026 07:29:14 +0000 Subject: [PATCH 01/16] [Autoloop: build-tsb-pandas-typescript-migration] Iteration 310: Add pd.api.extensions (ExtensionDtype, ExtensionArray, accessor registration) Run: https://github.com/githubnext/tsessebe/actions/runs/25719465265 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/extensions.html | 261 +++++++++++++++++++++++++ src/core/extensions.ts | 356 ++++++++++++++++++++++++++++++++++ src/core/index.ts | 17 ++ src/core/pd_api.ts | 3 + src/index.ts | 17 ++ tests/core/extensions.test.ts | 294 ++++++++++++++++++++++++++++ 6 files changed, 948 insertions(+) create mode 100644 playground/extensions.html create mode 100644 src/core/extensions.ts create mode 100644 tests/core/extensions.test.ts diff --git a/playground/extensions.html b/playground/extensions.html new file mode 100644 index 00000000..7ad5cec4 --- /dev/null +++ b/playground/extensions.html @@ -0,0 +1,261 @@ + + +
+ + +
+ The api.extensions namespace lets you build custom array types and dtypes
+ that integrate with tsb DataFrames and Series — mirroring pandas.api.extensions.
+
| Symbol | Mirrors | Description |
|---|---|---|
ExtensionDtype | pandas.api.extensions.ExtensionDtype | Abstract base class for custom dtypes |
ExtensionArray | pandas.api.extensions.ExtensionArray | Abstract base class for custom 1-D arrays |
registerExtensionDtype(cls) | register_extension_dtype | Register a dtype so it can be resolved from a string |
constructExtensionDtypeFromString(s) | internal pandas helper | Resolve a string to a registered extension dtype |
registerSeriesAccessor(name, cls) | register_series_accessor | Register a custom accessor on Series |
registerDataFrameAccessor(name, cls) | register_dataframe_accessor | Register a custom accessor on DataFrame |
registerIndexAccessor(name, cls) | register_index_accessor | Register a custom accessor on Index |
getRegisteredAccessors(target) | — | Return all registered accessors for a target |
+ Subclass ExtensionDtype to define a new dtype.
+ Implement name, type, kind, and
+ optionally construct_from_string so the dtype can be resolved
+ from a plain string.
+
import { ExtensionDtype } from "tsb"; + +class IPDtype extends ExtensionDtype { + get name() { return "ip"; } + get type() { return String; } + get kind() { return "O"; } + + static override construct_from_string(s: string): IPDtype | null { + return s === "ip" ? new IPDtype() : null; + } +} + +const d = new IPDtype(); +console.log(d.name); // "ip" +console.log(d.kind); // "O" +console.log(d.isNumeric); // false +console.log(String(d)); // "ip"+
+ Subclass ExtensionArray to hold a column of your custom elements.
+ At a minimum, implement dtype, length, getItem,
+ and slice. The default isna and toArray
+ implementations call getItem repeatedly — override them for performance.
+
import { ExtensionArray } from "tsb"; + +class IPArray extends ExtensionArray { + readonly _data: (string | null)[]; + + constructor(data: (string | null)[]) { + super(); + this._data = data; + } + + get dtype() { return new IPDtype(); } + get length() { return this._data.length; } + + getItem(i: number): string | null { + const idx = i < 0 ? this._data.length + i : i; + return this._data[idx] ?? null; + } + + slice(start: number, stop: number): IPArray { + return new IPArray(this._data.slice(start, stop)); + } +} + +const arr = new IPArray(["1.1.1.1", null, "8.8.8.8"]); +console.log(arr.length); // 3 +console.log(arr.getItem(0)); // "1.1.1.1" +console.log(arr.getItem(-1)); // "8.8.8.8" +console.log(arr.isna()); // [false, true, false] +console.log(arr.toArray()); // ["1.1.1.1", null, "8.8.8.8"]+
+ Call registerExtensionDtype to make a dtype resolvable by name.
+ Then use constructExtensionDtypeFromString to look it up — this
+ is what tsb uses internally when you pass a dtype string.
+
import { + registerExtensionDtype, + constructExtensionDtypeFromString, +} from "tsb"; + +registerExtensionDtype(IPDtype); + +const dtype = constructExtensionDtypeFromString("ip"); +console.log(dtype?.name); // "ip" +console.log(dtype instanceof IPDtype); // true + +constructExtensionDtypeFromString("unknown"); // null+
+ Use registerSeriesAccessor, registerDataFrameAccessor,
+ or registerIndexAccessor to attach a custom accessor class to tsb objects.
+ Call getRegisteredAccessors("series") to retrieve all registered
+ accessors for a given target.
+
import { + registerSeriesAccessor, + getRegisteredAccessors, +} from "tsb"; + +class GeoAccessor { + constructor(private readonly _series: unknown) {} + centroid() { return [0, 0]; } +} + +registerSeriesAccessor("geo", GeoAccessor); + +const accessors = getRegisteredAccessors("series"); +const Cls = accessors.get("geo")!; +const acc = new Cls(mySeries); +// acc.centroid() → [0, 0]+
api.extensions
+ All the above is also available through the unified api namespace:
+
import { api } from "tsb"; + +api.extensions.registerExtensionDtype(IPDtype); +api.extensions.constructExtensionDtypeFromString("ip"); // IPDtype instance +api.extensions.registerSeriesAccessor("geo", GeoAccessor); +api.extensions.getRegisteredAccessors("series").get("geo"); // GeoAccessor+ +
| Method / Class | Signature | Description |
|---|---|---|
ExtensionDtype | abstract class | Base for custom dtypes. Implement name, type, kind. |
ExtensionArray | abstract class | Base for custom arrays. Implement dtype, length, getItem, slice. |
registerExtensionDtype(cls) | (cls: typeof ExtensionDtype) → void | Register a dtype subclass by name. |
constructExtensionDtypeFromString(s) | (s: string) → ExtensionDtype | null | Resolve a string to a registered dtype. |
registerSeriesAccessor(name, cls) | (name: string, cls: new(obj) → unknown) → void | Register accessor on Series. |
registerDataFrameAccessor(name, cls) | (name: string, cls: new(obj) → unknown) → void | Register accessor on DataFrame. |
registerIndexAccessor(name, cls) | (name: string, cls: new(obj) → unknown) → void | Register accessor on Index. |
getRegisteredAccessors(target) | ("series" | "dataframe" | "index") → ReadonlyMap | Get all registered accessors for a target. |
pdArray(data, dtype?) — create typed arrays from any iterable. Dtype inference for int64/float64/bool/string/datetime. Mirrors pandas.array().
+Loading tsb runtime…
+
+ pdArray(data, dtype?) — create a typed array, mirroring
+ pandas.array().
+
+ When no dtype is passed, pdArray infers the best
+ dtype from the data: integers → "int64", floats →
+ "float64", booleans → "bool", strings →
+ "string", Dates → "datetime".
+
Pass a dtype string to override inference.
null or undefined are treated as NA and preserved in the array.
PandasArray implements the iterator protocol — use for...of or spread.
+ Port of pandas.DataFrame.to_markdown() and
+ pandas.DataFrame.to_latex(). Render any DataFrame or Series
+ as a Markdown or LaTeX table string.
+
Renders a DataFrame as a GitHub-Flavoured Markdown table.
+ +import { DataFrame, toMarkdown } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol"],
+ score: [92, 85, 78],
+ grade: ["A", "B", "C"],
+});
+
+console.log(toMarkdown(df));
+// | | name | score | grade |
+// |---|-------|-------|-------|
+// | 0 | Alice | 92 | A |
+// | 1 | Bob | 85 | B |
+// | 2 | Carol | 78 | C |
+
+// Right-align numeric columns
+console.log(toMarkdown(df, { colAlign: "right" }));
+
+// Without index
+console.log(toMarkdown(df, { index: false }));
+
+// Round floats to 2 decimal places
+const df2 = DataFrame.fromColumns({ x: [1.2345, 6.789], y: [0.1, 0.2] });
+console.log(toMarkdown(df2, { floatFormat: 2 }));
+
+ Renders a DataFrame as a LaTeX tabular (or longtable) environment.
import { DataFrame, toLaTeX } from "tsb";
+
+const df = DataFrame.fromColumns({
+ method: ["Newton", "Euler", "RK4"],
+ error: [0.0001, 0.01, 0.000001],
+});
+
+// Default: booktabs rules, with index
+console.log(toLaTeX(df));
+// \begin{tabular}{lll}
+// \toprule
+// & method & error \\
+// \midrule
+// 0 & Newton & 0.0001 \\
+// 1 & Euler & 0.01 \\
+// 2 & RK4 & 1e-6 \\
+// \bottomrule
+// \end{tabular}
+
+// With caption and table environment
+console.log(toLaTeX(df, {
+ tableEnv: true,
+ caption: "Numerical methods comparison",
+ label: "tab:methods",
+ floatFormat: 6,
+}));
+
+
+ Same functions work directly on a Series.
import { Series, seriesToMarkdown, seriesToLaTeX } from "tsb";
+
+const s = new Series({ data: [10, 20, 30], name: "price" });
+
+console.log(seriesToMarkdown(s));
+// | | price |
+// |---|-------|
+// | 0 | 10 |
+// | 1 | 20 |
+// | 2 | 30 |
+
+console.log(seriesToLaTeX(s));
+// \begin{tabular}{ll}
+// \toprule
+// & price \\
+// \midrule
+// 0 & 10 \\
+// ...
+// \end{tabular}
+
+
+ | Function | Description |
|---|---|
toMarkdown(df, opts?) | Render DataFrame as Markdown table string |
seriesToMarkdown(s, opts?) | Render Series as Markdown table string |
toLaTeX(df, opts?) | Render DataFrame as LaTeX tabular/longtable string |
seriesToLaTeX(s, opts?) | Render Series as LaTeX table string |
| Option | Type | Default | Description |
|---|---|---|---|
index | boolean | true | Include row index column |
colAlign | "left"|"center"|"right"|"none" | "none" | Column alignment in separator row |
floatFormat | number | undefined | Decimal places for numeric values |
| Option | Type | Default | Description |
|---|---|---|---|
index | boolean | true | Include row index column |
colFormat | string | auto "l…l" | LaTeX column format spec |
booktabs | boolean | true | Use \\toprule/\\midrule/\\bottomrule |
longtable | boolean | false | Use longtable instead of tabular |
tableEnv | boolean | false | Wrap in table environment |
caption | string | — | \\caption{} text (requires tableEnv) |
label | string | — | \\label{} text (requires tableEnv) |
floatFormat | number | undefined | Decimal places for numeric values |
toMarkdown() and toLaTeX() — render DataFrames and Series as Markdown tables or LaTeX tabular environments. Mirrors pandas.DataFrame.to_markdown() and to_latex().
+Side-by-side performance comparison of tsb (TypeScript/Bun) vs pandas (Python). Timing metrics for each function.
+ +
Port of pandas.DataFrame.to_markdown() and
pandas.DataFrame.to_latex(). Render any DataFrame or Series
as a Markdown or LaTeX table string.
Renders a DataFrame as a GitHub-Flavoured Markdown table.
- -import { DataFrame, toMarkdown } from "tsb";
+
+ toMarkdown — render a DataFrame as a Markdown table
+ Mirrors pandas.DataFrame.to_markdown(). Supports alignment, index toggle, and float formatting.
+
+
+ TypeScript
+
+
+
+
+
+
-