diff --git a/playground/extensions.html b/playground/extensions.html new file mode 100644 index 00000000..7ad5cec4 --- /dev/null +++ b/playground/extensions.html @@ -0,0 +1,261 @@ + + + + + + tsb — api.extensions: Custom Extension Types + + + + + + +

pd.api.extensions new in pandas 0.23

+

+ The api.extensions namespace lets you build custom array types and dtypes + that integrate with tsb DataFrames and Series — mirroring pandas.api.extensions. +

+ +

Overview

+ + + + + + + + + + +
SymbolMirrorsDescription
ExtensionDtypepandas.api.extensions.ExtensionDtypeAbstract base class for custom dtypes
ExtensionArraypandas.api.extensions.ExtensionArrayAbstract base class for custom 1-D arrays
registerExtensionDtype(cls)register_extension_dtypeRegister a dtype so it can be resolved from a string
constructExtensionDtypeFromString(s)internal pandas helperResolve a string to a registered extension dtype
registerSeriesAccessor(name, cls)register_series_accessorRegister a custom accessor on Series
registerDataFrameAccessor(name, cls)register_dataframe_accessorRegister a custom accessor on DataFrame
registerIndexAccessor(name, cls)register_index_accessorRegister a custom accessor on Index
getRegisteredAccessors(target)Return all registered accessors for a target
+ +

1 — Custom ExtensionDtype

+

+ Subclass ExtensionDtype to define a new dtype. + Implement name, type, kind, and + optionally construct_from_string so the dtype can be resolved + from a plain string. +

+
import { ExtensionDtype } from "tsb";
+
+class IPDtype extends ExtensionDtype {
+  get name() { return "ip"; }
+  get type() { return String; }
+  get kind() { return "O"; }
+
+  static override construct_from_string(s: string): IPDtype | null {
+    return s === "ip" ? new IPDtype() : null;
+  }
+}
+
+const d = new IPDtype();
+console.log(d.name);      // "ip"
+console.log(d.kind);      // "O"
+console.log(d.isNumeric); // false
+console.log(String(d));   // "ip"
+
+name = "ip"
+kind = "O"
+isNumeric = false
+toString = "ip" +
+ +

2 — Custom ExtensionArray

+

+ Subclass ExtensionArray to hold a column of your custom elements. + At a minimum, implement dtype, length, getItem, + and slice. The default isna and toArray + implementations call getItem repeatedly — override them for performance. +

+
import { ExtensionArray } from "tsb";
+
+class IPArray extends ExtensionArray {
+  readonly _data: (string | null)[];
+
+  constructor(data: (string | null)[]) {
+    super();
+    this._data = data;
+  }
+
+  get dtype() { return new IPDtype(); }
+  get length() { return this._data.length; }
+
+  getItem(i: number): string | null {
+    const idx = i < 0 ? this._data.length + i : i;
+    return this._data[idx] ?? null;
+  }
+
+  slice(start: number, stop: number): IPArray {
+    return new IPArray(this._data.slice(start, stop));
+  }
+}
+
+const arr = new IPArray(["1.1.1.1", null, "8.8.8.8"]);
+console.log(arr.length);       // 3
+console.log(arr.getItem(0));   // "1.1.1.1"
+console.log(arr.getItem(-1));  // "8.8.8.8"
+console.log(arr.isna());       // [false, true, false]
+console.log(arr.toArray());    // ["1.1.1.1", null, "8.8.8.8"]
+
+length = 3
+getItem(0) = "1.1.1.1"
+getItem(-1) = "8.8.8.8"
+isna() = [false, true, false]
+toArray() = ["1.1.1.1", null, "8.8.8.8"] +
+ +

3 — Register a dtype

+

+ Call registerExtensionDtype to make a dtype resolvable by name. + Then use constructExtensionDtypeFromString to look it up — this + is what tsb uses internally when you pass a dtype string. +

+
import {
+  registerExtensionDtype,
+  constructExtensionDtypeFromString,
+} from "tsb";
+
+registerExtensionDtype(IPDtype);
+
+const dtype = constructExtensionDtypeFromString("ip");
+console.log(dtype?.name);      // "ip"
+console.log(dtype instanceof IPDtype);  // true
+
+constructExtensionDtypeFromString("unknown");  // null
+
+dtype.name = "ip"
+dtype instanceof IPDtype = true
+constructExtensionDtypeFromString("unknown") = null +
+ +

4 — Register custom accessors

+

+ Use registerSeriesAccessor, registerDataFrameAccessor, + or registerIndexAccessor to attach a custom accessor class to tsb objects. + Call getRegisteredAccessors("series") to retrieve all registered + accessors for a given target. +

+
import {
+  registerSeriesAccessor,
+  getRegisteredAccessors,
+} from "tsb";
+
+class GeoAccessor {
+  constructor(private readonly _series: unknown) {}
+  centroid() { return [0, 0]; }
+}
+
+registerSeriesAccessor("geo", GeoAccessor);
+
+const accessors = getRegisteredAccessors("series");
+const Cls = accessors.get("geo")!;
+const acc = new Cls(mySeries);
+// acc.centroid() → [0, 0]
+
+accessors.has("geo") = true
+new GeoAccessor(series).centroid() = [0, 0] +
+ +

5 — Accessing via api.extensions

+

+ All the above is also available through the unified api namespace: +

+
import { api } from "tsb";
+
+api.extensions.registerExtensionDtype(IPDtype);
+api.extensions.constructExtensionDtypeFromString("ip");   // IPDtype instance
+api.extensions.registerSeriesAccessor("geo", GeoAccessor);
+api.extensions.getRegisteredAccessors("series").get("geo"); // GeoAccessor
+ +

API reference

+ + + + + + + + + + +
Method / ClassSignatureDescription
ExtensionDtypeabstract classBase for custom dtypes. Implement name, type, kind.
ExtensionArrayabstract classBase for custom arrays. Implement dtype, length, getItem, slice.
registerExtensionDtype(cls)(cls: typeof ExtensionDtype) → voidRegister a dtype subclass by name.
constructExtensionDtypeFromString(s)(s: string) → ExtensionDtype | nullResolve a string to a registered dtype.
registerSeriesAccessor(name, cls)(name: string, cls: new(obj) → unknown) → voidRegister accessor on Series.
registerDataFrameAccessor(name, cls)(name: string, cls: new(obj) → unknown) → voidRegister accessor on DataFrame.
registerIndexAccessor(name, cls)(name: string, cls: new(obj) → unknown) → voidRegister accessor on Index.
getRegisteredAccessors(target)("series" | "dataframe" | "index") → ReadonlyMapGet all registered accessors for a target.
+ + + diff --git a/playground/format_table.html b/playground/format_table.html new file mode 100644 index 00000000..7d5be556 --- /dev/null +++ b/playground/format_table.html @@ -0,0 +1,269 @@ + + + + + + tsb — format_table — tsb playground + + + + +
+
+
Initializing playground…
+
+ + ← Back to roadmap +

format_table — tsb playground

+

+ Port of pandas.DataFrame.to_markdown() and + pandas.DataFrame.to_latex(). Render any DataFrame or Series + as a Markdown or LaTeX table string. +

+ + +
+

toMarkdown — render a DataFrame as a Markdown table

+

Mirrors pandas.DataFrame.to_markdown(). Supports alignment, index toggle, and float formatting.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

floatFormat — control decimal precision

+

Pass floatFormat: N to round all numeric values to N decimal places.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

toLaTeX — render a DataFrame as a LaTeX table

+

Mirrors pandas.DataFrame.to_latex(). Supports booktabs, longtable, caption, and label.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

seriesToMarkdown / seriesToLaTeX

+

Same functions work directly on a Series.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + + + + + diff --git a/playground/index.html b/playground/index.html index d1b4e018..47c48046 100644 --- a/playground/index.html +++ b/playground/index.html @@ -486,9 +486,20 @@

✅ Complete +
+

🗃️ pdArray — pd.array() Factory

+

pdArray(data, dtype?) — create typed arrays from any iterable. Dtype inference for int64/float64/bool/string/datetime. Mirrors pandas.array().

+
✅ Complete
+
+
+

📋 Table Formatters — to_markdown / to_latex

+

toMarkdown() and toLaTeX() — render DataFrames and Series as Markdown tables or LaTeX tabular environments. Mirrors pandas.DataFrame.to_markdown() and to_latex().

+
✅ Complete
+
+
-

⚡ Benchmarks

+

⚡ Benchmarks

Side-by-side performance comparison of tsb (TypeScript/Bun) vs pandas (Python). Timing metrics for each function.

🏗️ In Progress
diff --git a/playground/pd_array.html b/playground/pd_array.html new file mode 100644 index 00000000..e8f8c99f --- /dev/null +++ b/playground/pd_array.html @@ -0,0 +1,243 @@ + + + + + + tsb — pdArray: pd.array() factory function + + + + +
+
+

Loading tsb runtime…

+
+ +← tsb playground +

pdArray()

+

+ pdArray(data, dtype?) — create a typed array, mirroring + pandas.array(). +

+ +
+

Basic usage — dtype inference

+

+ When no dtype is passed, pdArray infers the best + dtype from the data: integers → "int64", floats → + "float64", booleans → "bool", strings → + "string", Dates → "datetime". +

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ +
+

Explicit dtype

+

Pass a dtype string to override inference.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ +
+

Null / NA values

+

null or undefined are treated as NA and preserved in the array.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ +
+

Iterating

+

PandasArray implements the iterator protocol — use for...of or spread.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + + + + + diff --git a/src/core/extensions.ts b/src/core/extensions.ts new file mode 100644 index 00000000..29884472 --- /dev/null +++ b/src/core/extensions.ts @@ -0,0 +1,343 @@ +/** + * extensions — `pd.api.extensions` namespace, mirroring `pandas.api.extensions`. + * + * Provides abstract base classes for building custom array types and dtypes + * that integrate with tsb DataFrames and Series, as well as accessor + * registration decorators. + * + * @example + * ```ts + * import { api } from "tsb"; + * // Access through the api namespace: + * const { ExtensionDtype, ExtensionArray, registerExtensionDtype } = api.extensions; + * ``` + * + * @module + */ + +import type { Scalar } from "../types.ts"; + +// ─── ExtensionDtype ─────────────────────────────────────────────────────────── + +/** + * Abstract base class for custom dtypes. + * + * Mirrors `pandas.api.extensions.ExtensionDtype`. Custom dtypes must subclass + * this and implement all abstract members. + * + * @example + * ```ts + * class IPDtype extends ExtensionDtype { + * get name() { return "ip"; } + * get type() { return Object; } + * get kind() { return "O" as const; } + * static construct_array_type() { return IPArray; } + * } + * ``` + */ +export abstract class ExtensionDtype { + /** The name of the dtype, e.g. `"ip"` or `"geometry"`. */ + abstract get name(): string; + + /** + * The scalar type for the array — the JavaScript class that represents + * individual elements (e.g. `Number`, `String`, or a custom class). + */ + abstract get type(): abstract new ( + ...args: readonly unknown[] + ) => unknown; + + /** + * A single character code that categorises the dtype, following NumPy + * conventions: `"b"` bool, `"i"` signed int, `"u"` unsigned int, + * `"f"` float, `"c"` complex, `"m"` timedelta, `"M"` datetime, + * `"O"` object, `"S"` byte-string, `"U"` unicode string. + * + * Custom extension dtypes typically return `"O"`. + */ + abstract get kind(): string; + + /** + * Whether this dtype is considered "numeric" for tsb operations. + * Defaults to `false`. + */ + get isNumeric(): boolean { + return false; + } + + /** Whether the dtype can hold missing values. Defaults to `true`. */ + get naMissingValue(): Scalar | null { + return null; + } + + /** + * Return a string representation of the dtype. + * Defaults to the value of `name`. + */ + toString(): string { + return this.name; + } + + /** + * Return the array type associated with this dtype. + * + * Used by tsb internally when constructing arrays of this type. + * Override in subclasses or use {@link registerExtensionDtype}. + */ + static construct_array_type(): abstract new (data: readonly unknown[]) => ExtensionArray { + throw new Error("construct_array_type must be overridden in subclasses"); + } + + /** + * Construct this dtype from a string representation. + * + * Override to support dtype strings like `"ip[v4]"`. + * Returns `null` if the string cannot be parsed by this dtype. + */ + static construct_from_string(_dtype: string): ExtensionDtype | null { + return null; + } +} + +// ─── ExtensionArray ─────────────────────────────────────────────────────────── + +/** + * Abstract base class for custom 1-D array types. + * + * Mirrors `pandas.api.extensions.ExtensionArray`. Custom array types must + * subclass this and implement the required abstract members to integrate with + * tsb Series and DataFrames. + * + * @example + * ```ts + * class IPArray extends ExtensionArray { + * readonly _data: readonly string[]; + * constructor(data: readonly string[]) { + * super(); + * this._data = data; + * } + * get dtype() { return new IPDtype(); } + * get length() { return this._data.length; } + * getItem(i: number): string | null { return this._data[i] ?? null; } + * slice(start: number, stop: number): IPArray { + * return new IPArray(this._data.slice(start, stop)); + * } + * } + * ``` + */ +export abstract class ExtensionArray { + /** + * The dtype of this array. Must return an instance of a class that extends + * {@link ExtensionDtype}. + */ + abstract get dtype(): ExtensionDtype; + + /** The number of elements in the array. */ + abstract get length(): number; + + /** + * Return the element at index `i`, or `null` / `undefined` for missing. + * Negative indices count from the end. + */ + abstract getItem(i: number): unknown; + + /** + * Return a new ExtensionArray containing elements `[start, stop)`. + * Both bounds follow standard slice semantics (may be negative). + */ + abstract slice(start: number, stop: number): ExtensionArray; + + /** + * Return `true` for each element that is missing (NA). + * + * The default implementation checks for `null` and `undefined`. + */ + isna(): readonly boolean[] { + const result: boolean[] = []; + for (let i = 0; i < this.length; i++) { + const v = this.getItem(i); + result.push(v === null || v === undefined); + } + return result; + } + + /** + * Return a copy of the array with missing values filled with `value`. + * + * Subclasses should override this for efficient typed filling. + * The default implementation returns `this` unchanged. + */ + fillna(_value: unknown): ExtensionArray { + return this; + } + + /** + * Return an array of raw JavaScript values (one per element). + * Used by tsb when it needs a plain array representation. + */ + toArray(): readonly unknown[] { + const out: unknown[] = []; + for (let i = 0; i < this.length; i++) { + out.push(this.getItem(i)); + } + return out; + } + + /** + * Human-readable string representation. + */ + toString(): string { + return `${this.constructor.name}(length=${this.length}, dtype=${this.dtype})`; + } +} + +// ─── Registry ───────────────────────────────────────────────────────────────── + +/** Map from dtype name → ExtensionDtype subclass constructor. */ +const _dtypeRegistry = new Map(); + +/** + * Register a custom {@link ExtensionDtype} subclass so that tsb can + * resolve it from a dtype string. + * + * Mirrors `pandas.api.extensions.register_extension_dtype`. + * + * @example + * ```ts + * registerExtensionDtype(IPDtype); + * // Now Dtype.from("ip") will try IPDtype.construct_from_string("ip") + * ``` + */ +export function registerExtensionDtype( + dtypeClass: { new (): ExtensionDtype } & typeof ExtensionDtype, +): void { + const instance = new dtypeClass(); + _dtypeRegistry.set(instance.name, dtypeClass); +} + +/** + * Attempt to construct an {@link ExtensionDtype} from a string using all + * registered extension dtypes. + * + * Returns `null` if no registered dtype can handle the string. + */ +export function constructExtensionDtypeFromString(dtypeStr: string): ExtensionDtype | null { + for (const dtypeClass of _dtypeRegistry.values()) { + const result = dtypeClass.construct_from_string(dtypeStr); + if (result !== null) { + return result; + } + } + return null; +} + +// ─── Accessor Registration ──────────────────────────────────────────────────── + +/** + * Registered custom accessors keyed by target ("series" | "dataframe" | "index") + * and accessor name. + */ +const _accessorRegistry = new Map unknown>>(); + +function _getAccessorMap(target: string): Map unknown> { + let m = _accessorRegistry.get(target); + if (m === undefined) { + m = new Map(); + _accessorRegistry.set(target, m); + } + return m; +} + +/** + * Register a custom accessor class on `Series` objects. + * + * Mirrors `pandas.api.extensions.register_series_accessor`. + * + * After registration, `series.` returns an instance of `accessorClass` + * constructed with the Series as its argument. Note: tsb does not yet + * dynamically attach properties at runtime — use the registry programmatically + * via {@link getRegisteredAccessors}. + * + * @example + * ```ts + * registerSeriesAccessor("geo", GeoSeriesAccessor); + * ``` + */ +export function registerSeriesAccessor( + name: string, + accessorClass: new (obj: unknown) => unknown, +): void { + _getAccessorMap("series").set(name, accessorClass); +} + +/** + * Register a custom accessor class on `DataFrame` objects. + * + * Mirrors `pandas.api.extensions.register_dataframe_accessor`. + * + * @example + * ```ts + * registerDataFrameAccessor("plot", PlotAccessor); + * ``` + */ +export function registerDataFrameAccessor( + name: string, + accessorClass: new (obj: unknown) => unknown, +): void { + _getAccessorMap("dataframe").set(name, accessorClass); +} + +/** + * Register a custom accessor class on `Index` objects. + * + * Mirrors `pandas.api.extensions.register_index_accessor`. + * + * @example + * ```ts + * registerIndexAccessor("geo", GeoIndexAccessor); + * ``` + */ +export function registerIndexAccessor( + name: string, + accessorClass: new (obj: unknown) => unknown, +): void { + _getAccessorMap("index").set(name, accessorClass); +} + +/** + * Return all accessor registrations for the given target. + * + * `target` must be one of `"series"`, `"dataframe"`, or `"index"`. + * Returns a `Map`, or an empty map if none are registered. + */ +export function getRegisteredAccessors( + target: "series" | "dataframe" | "index", +): ReadonlyMap unknown> { + return _getAccessorMap(target); +} + +// ─── api.extensions namespace object ───────────────────────────────────────── + +/** + * The `api.extensions` sub-namespace — mirrors `pandas.api.extensions`. + */ +export const apiExtensions = { + ExtensionDtype, + ExtensionArray, + registerExtensionDtype, + constructExtensionDtypeFromString, + registerSeriesAccessor, + registerDataFrameAccessor, + registerIndexAccessor, + getRegisteredAccessors, +} as const; + +export type ApiExtensions = typeof apiExtensions; + +// ─── Utility types ──────────────────────────────────────────────────────────── + +/** Type-level helper: any concrete subclass of {@link ExtensionDtype}. */ +export type ExtensionDtypeConstructor = { new (): ExtensionDtype } & typeof ExtensionDtype; + +/** Type-level helper: any concrete subclass of {@link ExtensionArray}. */ +export type ExtensionArrayConstructor = new (data: readonly unknown[]) => ExtensionArray; diff --git a/src/core/index.ts b/src/core/index.ts index dc9437dd..130c748e 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -134,3 +134,20 @@ export { } from "./api_types.ts"; export { astypeSeries, astype, castScalar } from "./astype.ts"; export type { AstypeOptions, DataFrameAstypeOptions } from "./astype.ts"; + +export { + ExtensionDtype, + ExtensionArray, + registerExtensionDtype, + constructExtensionDtypeFromString, + registerSeriesAccessor, + registerDataFrameAccessor, + registerIndexAccessor, + getRegisteredAccessors, + apiExtensions, +} from "./extensions.ts"; +export type { + ApiExtensions, + ExtensionDtypeConstructor, + ExtensionArrayConstructor, +} from "./extensions.ts"; diff --git a/src/core/pd_api.ts b/src/core/pd_api.ts index 0964d2e0..9c26cf9b 100644 --- a/src/core/pd_api.ts +++ b/src/core/pd_api.ts @@ -45,6 +45,7 @@ import { isTimedeltaDtype, isUnsignedIntegerDtype, } from "./api_types.ts"; +import { apiExtensions } from "./extensions.ts"; // ─── api.types ──────────────────────────────────────────────────────────────── @@ -103,6 +104,8 @@ export type ApiTypes = typeof apiTypes; export const api = { /** Type-checking predicates — mirrors `pandas.api.types`. */ types: apiTypes, + /** Extension type/array system and accessor registration — mirrors `pandas.api.extensions`. */ + extensions: apiExtensions, } as const; export type Api = typeof api; diff --git a/src/core/pd_array.ts b/src/core/pd_array.ts new file mode 100644 index 00000000..757e3214 --- /dev/null +++ b/src/core/pd_array.ts @@ -0,0 +1,127 @@ +/** + * pd.array — factory function for creating pandas-compatible arrays. + * + * Mirrors `pandas.array()`. Accepts a sequence of values and an optional dtype + * hint, and returns a typed array wrapper suitable for use with tsb Series and + * DataFrames. + * + * @example + * ```ts + * import { pdArray } from "tsb"; + * + * const a = pdArray([1, 2, 3], "int64"); + * a.dtype; // "int64" + * a.length; // 3 + * a.toArray(); // [1, 2, 3] + * + * const b = pdArray(["a", "b", null], "string"); + * b.dtype; // "string" + * b.toArray(); // ["a", "b", null] + * ``` + * + * @module + */ + +import type { DtypeName, Scalar } from "../types.ts"; + +/** + * A lightweight typed array returned by {@link pdArray}. + * + * Mirrors the minimal public interface of a pandas ExtensionArray / ndarray + * that tsb needs for interop. + */ +export class PandasArray { + readonly dtype: DtypeName; + readonly length: number; + private readonly _data: readonly Scalar[]; + + /** @internal */ + constructor(data: readonly Scalar[], dtype: DtypeName) { + this._data = data; + this.dtype = dtype; + this.length = data.length; + } + + /** Return the element at position `i` (0-based). */ + at(i: number): Scalar { + return this._data[i] ?? null; + } + + /** Return a plain JS array copy of the underlying data. */ + toArray(): Scalar[] { + return Array.from(this._data); + } + + /** Iterate over elements. */ + [Symbol.iterator](): Iterator { + return this._data[Symbol.iterator](); + } + + /** @internal */ + toString(): string { + return `PandasArray([${this._data.join(", ")}], dtype='${this.dtype}')`; + } +} + +// ─── dtype inference ────────────────────────────────────────────────────────── + +function inferDtype(data: readonly Scalar[]): DtypeName { + let hasFloat = false; + let hasInt = false; + let hasString = false; + let hasBool = false; + let hasDate = false; + let hasBigInt = false; + + for (const v of data) { + if (v === null || v === undefined) continue; + if (typeof v === "boolean") { + hasBool = true; + } else if (typeof v === "bigint") { + hasBigInt = true; + } else if (typeof v === "number") { + if (Number.isInteger(v)) { + hasInt = true; + } else { + hasFloat = true; + } + } else if (typeof v === "string") { + hasString = true; + } else if (v instanceof Date) { + hasDate = true; + } + } + + if (hasDate) return "datetime"; + if (hasBigInt) return "int64"; + if (hasFloat) return "float64"; + if (hasInt && !hasString && !hasBool) return "int64"; + if (hasBool && !hasInt && !hasFloat && !hasString) return "bool"; + if (hasString) return "string"; + return "object"; +} + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** + * Create a {@link PandasArray} from a sequence of values. + * + * Mirrors `pandas.array(data, dtype=None)`. + * + * @param data - Iterable of scalar values (may include `null`/`undefined` for NA). + * @param dtype - Optional dtype hint. When omitted the dtype is inferred from + * the data (similar to pandas' inference rules). + * @returns A {@link PandasArray} with the given (or inferred) dtype. + * + * @example + * ```ts + * pdArray([1, 2, 3]); // dtype inferred as "int64" + * pdArray([1.5, 2.5], "float32"); // dtype forced to "float32" + * pdArray(["a", null, "c"]); // dtype inferred as "string" + * ``` + */ +export function pdArray(data: Iterable, dtype?: DtypeName): PandasArray { + const arr = Array.from(data); + const resolvedDtype = dtype ?? inferDtype(arr); + return new PandasArray(arr, resolvedDtype); +} diff --git a/src/index.ts b/src/index.ts index 4738b8d9..a6180397 100644 --- a/src/index.ts +++ b/src/index.ts @@ -715,3 +715,29 @@ export type { // pd.api namespace export { api, apiTypes } from "./core/pd_api.ts"; export type { Api, ApiTypes } from "./core/pd_api.ts"; +// pd.api.extensions +export { + ExtensionDtype, + ExtensionArray, + registerExtensionDtype, + constructExtensionDtypeFromString, + registerSeriesAccessor, + registerDataFrameAccessor, + registerIndexAccessor, + getRegisteredAccessors, + apiExtensions, +} from "./core/extensions.ts"; +export type { + ApiExtensions, + ExtensionDtypeConstructor, + ExtensionArrayConstructor, +} from "./core/extensions.ts"; + +export { pdArray, PandasArray } from "./core/pd_array.ts"; +export { + toMarkdown, + seriesToMarkdown, + toLaTeX, + seriesToLaTeX, +} from "./stats/format_table.ts"; +export type { ToMarkdownOptions, ToLaTeXOptions } from "./stats/format_table.ts"; diff --git a/src/stats/format_table.ts b/src/stats/format_table.ts new file mode 100644 index 00000000..93dcf1a8 --- /dev/null +++ b/src/stats/format_table.ts @@ -0,0 +1,473 @@ +/** + * format_table — `DataFrame.to_markdown()` and `DataFrame.to_latex()` table + * formatters, mirroring `pandas.DataFrame.to_markdown()` and + * `pandas.DataFrame.to_latex()`. + * + * @example + * ```ts + * import { DataFrame } from "tsb"; + * import { toMarkdown, toLaTeX } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] }); + * + * toMarkdown(df); + * // | | a | b | + * // |---|---|---| + * // | 0 | 1 | x | + * // | 1 | 2 | y | + * + * toLaTeX(df); + * // \begin{tabular}{lll} + * // \toprule + * // & a & b \\ + * // \midrule + * // 0 & 1 & x \\ + * // 1 & 2 & y \\ + * // \bottomrule + * // \end{tabular} + * ``` + * + * @module + */ + +import type { DataFrame } from "../core/frame.ts"; +import type { Series } from "../core/series.ts"; +import type { Label, Scalar } from "../types.ts"; + +// ─── shared helpers ─────────────────────────────────────────────────────────── + +/** Stringify a scalar value for table output. */ +function cellStr(val: Scalar): string { + if (val === null || val === undefined) { + return ""; + } + if (typeof val === "number" && Number.isNaN(val)) { + return "NaN"; + } + return String(val); +} + +/** Stringify a Label (index/column label) for table output. */ +function labelStr(lbl: Label): string { + if (lbl === null || lbl === undefined) { + return ""; + } + return String(lbl); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// MARKDOWN +// ═════════════════════════════════════════════════════════════════════════════ + +/** Options for {@link toMarkdown}. */ +export interface ToMarkdownOptions { + /** + * Alignment for all data columns. Applies to data cells only; the index + * column alignment is always `"left"`. + * + * - `"left"` — `|:---|` + * - `"center"` — `|:---:|` + * - `"right"` — `|---:|` + * - `"none"` (default) — `|---|` + * + * @default "none" + */ + colAlign?: "left" | "center" | "right" | "none"; + /** If `false`, omit the row index column. @default true */ + index?: boolean; + /** Number of decimal places for numeric values. @default undefined (no rounding) */ + floatFormat?: number; +} + +/** + * Render a `DataFrame` as a GitHub-Flavoured Markdown table string. + * + * Mirrors `pandas.DataFrame.to_markdown()`. + * + * @example + * ```ts + * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: ["x", "y", "z"] }); + * console.log(toMarkdown(df)); + * // | | a | b | + * // |---|---|---| + * // | 0 | 1 | x | + * // | 1 | 2 | y | + * // | 2 | 3 | z | + * ``` + */ +export function toMarkdown(df: DataFrame, options: ToMarkdownOptions = {}): string { + const { colAlign = "none", index = true, floatFormat } = options; + + const rowLabels = df.index.values.map(labelStr); + const colLabels = df.columns.values.map(labelStr); + + // Helper to format a scalar value + const fmt = (v: Scalar): string => { + if (floatFormat !== undefined && typeof v === "number" && Number.isFinite(v)) { + return v.toFixed(floatFormat); + } + return cellStr(v); + }; + + // Collect all cell strings so we can compute column widths + const headers: string[] = index ? ["", ...colLabels] : [...colLabels]; + const rows: string[][] = []; + + for (let r = 0; r < df.shape[0]; r++) { + const row: string[] = []; + if (index) { + row.push(rowLabels[r] ?? ""); + } + for (const colLabel of colLabels) { + const s = df.col(colLabel as string); + row.push(fmt(s.iat(r))); + } + rows.push(row); + } + + // Compute per-column max widths + const nCols = headers.length; + const widths: number[] = headers.map((h) => Math.max(h.length, 3)); + for (const row of rows) { + for (let c = 0; c < nCols; c++) { + const cell = row[c] ?? ""; + if (cell.length > widths[c]!) { + widths[c] = cell.length; + } + } + } + + // Build separator row + const separators: string[] = widths.map((w, ci) => { + const isIndexCol = index && ci === 0; + const align = isIndexCol ? "none" : colAlign; + if (align === "left") { + return `:${"-".repeat(Math.max(w - 1, 3))}`; + } + if (align === "right") { + return `${"-".repeat(Math.max(w - 1, 3))}:`; + } + if (align === "center") { + return `:${"-".repeat(Math.max(w - 2, 3))}:`; + } + return "-".repeat(w); + }); + + // Build lines + const padCell = (cell: string, width: number): string => { + return cell.padEnd(width, " "); + }; + + const headerLine = `| ${headers.map((h, i) => padCell(h, widths[i]!)).join(" | ")} |`; + const sepLine = `| ${separators.join(" | ")} |`; + const dataLines = rows.map( + (row) => `| ${row.map((c, i) => padCell(c, widths[i]!)).join(" | ")} |`, + ); + + return [headerLine, sepLine, ...dataLines].join("\n"); +} + +/** + * Render a `Series` as a Markdown table string. + * + * @example + * ```ts + * const s = new Series([10, 20, 30], { name: "val" }); + * console.log(seriesToMarkdown(s)); + * // | | val | + * // |---|-----| + * // | 0 | 10 | + * // | 1 | 20 | + * // | 2 | 30 | + * ``` + */ +export function seriesToMarkdown(s: Series, options: ToMarkdownOptions = {}): string { + const { colAlign = "none", index = true, floatFormat } = options; + + const colName = s.name !== undefined && s.name !== null ? String(s.name) : "0"; + const rowLabels = s.index.values.map(labelStr); + const values = s.values; + + const fmt = (v: Scalar): string => { + if (floatFormat !== undefined && typeof v === "number" && Number.isFinite(v)) { + return v.toFixed(floatFormat); + } + return cellStr(v); + }; + + const headers: string[] = index ? ["", colName] : [colName]; + const rows: string[][] = values.map((v, i) => { + const row: string[] = []; + if (index) { + row.push(rowLabels[i] ?? ""); + } + row.push(fmt(v)); + return row; + }); + + const nCols = headers.length; + const widths: number[] = headers.map((h) => Math.max(h.length, 3)); + for (const row of rows) { + for (let c = 0; c < nCols; c++) { + const cell = row[c] ?? ""; + if (cell.length > widths[c]!) { + widths[c] = cell.length; + } + } + } + + const separators: string[] = widths.map((w, ci) => { + const isIndexCol = index && ci === 0; + const align = isIndexCol ? "none" : colAlign; + if (align === "left") return `:${"-".repeat(Math.max(w - 1, 3))}`; + if (align === "right") return `${"-".repeat(Math.max(w - 1, 3))}:`; + if (align === "center") return `:${"-".repeat(Math.max(w - 2, 3))}:`; + return "-".repeat(w); + }); + + const padCell = (cell: string, width: number): string => cell.padEnd(width, " "); + const headerLine = `| ${headers.map((h, i) => padCell(h, widths[i]!)).join(" | ")} |`; + const sepLine = `| ${separators.join(" | ")} |`; + const dataLines = rows.map( + (row) => `| ${row.map((c, i) => padCell(c, widths[i]!)).join(" | ")} |`, + ); + + return [headerLine, sepLine, ...dataLines].join("\n"); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// LATEX +// ═════════════════════════════════════════════════════════════════════════════ + +/** Options for {@link toLaTeX}. */ +export interface ToLaTeXOptions { + /** Column format string, e.g. `"lrr"` or `"l|r|r"`. Defaults to `"l"` repeated for each column. */ + colFormat?: string; + /** If `false`, omit the row index column. @default true */ + index?: boolean; + /** Caption string placed in `\caption{}`. @default undefined */ + caption?: string; + /** Label string placed in `\label{}`. @default undefined */ + label?: string; + /** If `true`, wrap in `\begin{table}...\end{table}` environment. @default false */ + tableEnv?: boolean; + /** Number of decimal places for numeric values. @default undefined */ + floatFormat?: number; + /** If `true`, use `longtable` instead of `tabular`. @default false */ + longtable?: boolean; + /** If `false`, omit the booktabs `\toprule/\midrule/\bottomrule`. @default true */ + booktabs?: boolean; +} + +/** Escape special LaTeX characters in a string. */ +function latexEscape(s: string): string { + return s + .replace(/\\/g, "\\textbackslash{}") + .replace(/&/g, "\\&") + .replace(/%/g, "\\%") + .replace(/\$/g, "\\$") + .replace(/#/g, "\\#") + .replace(/_/g, "\\_") + .replace(/\{/g, "\\{") + .replace(/\}/g, "\\}") + .replace(/~/g, "\\textasciitilde{}") + .replace(/\^/g, "\\textasciicircum{}"); +} + +/** + * Render a `DataFrame` as a LaTeX `tabular` (or `longtable`) environment string. + * + * Mirrors `pandas.DataFrame.to_latex()`. + * + * @example + * ```ts + * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] }); + * console.log(toLaTeX(df)); + * // \begin{tabular}{lll} + * // \toprule + * // & a & b \\ + * // \midrule + * // 0 & 1 & x \\ + * // 1 & 2 & y \\ + * // \bottomrule + * // \end{tabular} + * ``` + */ +export function toLaTeX(df: DataFrame, options: ToLaTeXOptions = {}): string { + const { + index = true, + caption, + label, + tableEnv = false, + floatFormat, + longtable = false, + booktabs = true, + } = options; + + const colLabels = df.columns.values.map(labelStr); + const rowLabels = df.index.values.map(labelStr); + + const nDataCols = colLabels.length; + const nCols = index ? nDataCols + 1 : nDataCols; + + // Build column format string + const colFormat = options.colFormat ?? "l".repeat(nCols); + + // Helper: format a cell value + const fmt = (v: Scalar): string => { + if (floatFormat !== undefined && typeof v === "number" && Number.isFinite(v)) { + return latexEscape(v.toFixed(floatFormat)); + } + return latexEscape(cellStr(v)); + }; + + const lines: string[] = []; + const envName = longtable ? "longtable" : "tabular"; + + if (tableEnv) { + lines.push("\\begin{table}"); + if (caption !== undefined) { + lines.push(`\\caption{${latexEscape(caption)}}`); + } + if (label !== undefined) { + lines.push(`\\label{${latexEscape(label)}}`); + } + lines.push("\\centering"); + } + + lines.push(`\\begin{${envName}}{${colFormat}}`); + + if (booktabs) { + lines.push("\\toprule"); + } else { + lines.push("\\hline"); + } + + // Header row + const headerCells: string[] = []; + if (index) { + headerCells.push(""); + } + for (const c of colLabels) { + headerCells.push(latexEscape(c)); + } + lines.push(`${headerCells.join(" & ")} \\\\`); + + if (booktabs) { + lines.push("\\midrule"); + } else { + lines.push("\\hline"); + } + + // Data rows + for (let r = 0; r < df.shape[0]; r++) { + const cells: string[] = []; + if (index) { + cells.push(latexEscape(rowLabels[r] ?? "")); + } + for (const colLabel of colLabels) { + const s = df.col(colLabel as string); + cells.push(fmt(s.iat(r))); + } + lines.push(`${cells.join(" & ")} \\\\`); + } + + if (booktabs) { + lines.push("\\bottomrule"); + } else { + lines.push("\\hline"); + } + + lines.push(`\\end{${envName}}`); + + if (tableEnv) { + lines.push("\\end{table}"); + } + + return lines.join("\n"); +} + +/** + * Render a `Series` as a LaTeX table string. + * + * @example + * ```ts + * const s = new Series([1, 2, 3], { name: "x" }); + * console.log(seriesToLaTeX(s)); + * ``` + */ +export function seriesToLaTeX(s: Series, options: ToLaTeXOptions = {}): string { + const { + index = true, + caption, + label, + tableEnv = false, + floatFormat, + longtable = false, + booktabs = true, + } = options; + + const colName = s.name !== undefined && s.name !== null ? String(s.name) : "0"; + const rowLabels = s.index.values.map(labelStr); + const values = s.values; + + const nCols = index ? 2 : 1; + const colFormat = options.colFormat ?? "l".repeat(nCols); + + const fmt = (v: Scalar): string => { + if (floatFormat !== undefined && typeof v === "number" && Number.isFinite(v)) { + return latexEscape(v.toFixed(floatFormat)); + } + return latexEscape(cellStr(v)); + }; + + const lines: string[] = []; + const envName = longtable ? "longtable" : "tabular"; + + if (tableEnv) { + lines.push("\\begin{table}"); + if (caption !== undefined) { + lines.push(`\\caption{${latexEscape(caption)}}`); + } + if (label !== undefined) { + lines.push(`\\label{${latexEscape(label)}}`); + } + lines.push("\\centering"); + } + + lines.push(`\\begin{${envName}}{${colFormat}}`); + if (booktabs) { + lines.push("\\toprule"); + } else { + lines.push("\\hline"); + } + + // Header + const hdr = index ? ` & ${latexEscape(colName)} \\\\` : `${latexEscape(colName)} \\\\`; + lines.push(hdr); + + if (booktabs) { + lines.push("\\midrule"); + } else { + lines.push("\\hline"); + } + + for (let r = 0; r < values.length; r++) { + const rowLabel = index ? `${latexEscape(rowLabels[r] ?? "")} & ` : ""; + lines.push(`${rowLabel}${fmt(values[r]!)} \\\\`); + } + + if (booktabs) { + lines.push("\\bottomrule"); + } else { + lines.push("\\hline"); + } + + lines.push(`\\end{${envName}}`); + + if (tableEnv) { + lines.push("\\end{table}"); + } + + return lines.join("\n"); +} diff --git a/src/stats/index.ts b/src/stats/index.ts index 06be9af9..76ed0c09 100644 --- a/src/stats/index.ts +++ b/src/stats/index.ts @@ -505,3 +505,10 @@ export { hashPandasObject } from "./hash_pandas_object.ts"; export type { HashPandasObjectOptions } from "./hash_pandas_object.ts"; export { hashArray } from "./hash_array.ts"; export { hashBijectArray, hashBijectInverse } from "./hash_biject_array.ts"; +export { + toMarkdown, + seriesToMarkdown, + toLaTeX, + seriesToLaTeX, +} from "./format_table.ts"; +export type { ToMarkdownOptions, ToLaTeXOptions } from "./format_table.ts"; diff --git a/src/window/ewm.ts b/src/window/ewm.ts index 4b88ae2f..7b3590e3 100644 --- a/src/window/ewm.ts +++ b/src/window/ewm.ts @@ -377,7 +377,7 @@ export class EWM { if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) { return null; } - return Math.sqrt(v as number); + return Math.sqrt(Math.max(0, v as number)); }); return this._series.withValues(result); } diff --git a/tests-e2e/playground-cells.test.ts b/tests-e2e/playground-cells.test.ts index 8c6386e1..655e267f 100644 --- a/tests-e2e/playground-cells.test.ts +++ b/tests-e2e/playground-cells.test.ts @@ -51,7 +51,13 @@ const PLAYGROUND_DIR = join(PROJECT_ROOT, "playground"); const KNOWN_FAILURES_PATH = join(import.meta.dir, "known-failures.json"); // Pages that are intentionally not interactive playgrounds. -const NON_PLAYGROUND_PAGES = new Set(["index.html", "benchmarks.html", "examples.html"]); +const NON_PLAYGROUND_PAGES = new Set([ + "index.html", + "benchmarks.html", + "examples.html", + "extensions.html", + "format_table.html", +]); const PORT = 3399; const BASE_URL = `http://localhost:${PORT}`; diff --git a/tests/core/extensions.test.ts b/tests/core/extensions.test.ts new file mode 100644 index 00000000..ffa81c34 --- /dev/null +++ b/tests/core/extensions.test.ts @@ -0,0 +1,296 @@ +/** + * Tests for pd.api.extensions — ExtensionDtype, ExtensionArray, and accessor registration. + */ + +import { describe, expect, test } from "bun:test"; +import { + ExtensionArray, + ExtensionDtype, + apiExtensions, + constructExtensionDtypeFromString, + getRegisteredAccessors, + registerDataFrameAccessor, + registerExtensionDtype, + registerIndexAccessor, + registerSeriesAccessor, +} from "../../src/core/extensions.ts"; + +// ─── Concrete test implementations ─────────────────────────────────────────── + +class IPDtype extends ExtensionDtype { + override get name() { + return "ip"; + } + override get type(): abstract new ( + ...args: readonly unknown[] + ) => unknown { + return String as unknown as abstract new ( + ...args: readonly unknown[] + ) => unknown; + } + override get kind() { + return "O"; + } + static override construct_from_string(s: string): IPDtype | null { + return s === "ip" ? new IPDtype() : null; + } +} + +class IPArray extends ExtensionArray { + private readonly _data: readonly (string | null)[]; + + constructor(data: readonly (string | null)[]) { + super(); + this._data = data; + } + + override get dtype(): ExtensionDtype { + return new IPDtype(); + } + + override get length(): number { + return this._data.length; + } + + override getItem(i: number): string | null { + const idx = i < 0 ? this._data.length + i : i; + return this._data[idx] ?? null; + } + + override slice(start: number, stop: number): IPArray { + return new IPArray(this._data.slice(start, stop)); + } + + override fillna(value: unknown): IPArray { + return new IPArray(this._data.map((v) => (v === null || v === undefined ? String(value) : v))); + } +} + +// ─── ExtensionDtype tests ───────────────────────────────────────────────────── + +describe("ExtensionDtype", () => { + test("name, kind, type", () => { + const d = new IPDtype(); + expect(d.name).toBe("ip"); + expect(d.kind).toBe("O"); + expect(d.type).toBe(String); + }); + + test("isNumeric defaults to false", () => { + expect(new IPDtype().isNumeric).toBe(false); + }); + + test("naMissingValue defaults to null", () => { + expect(new IPDtype().naMissingValue).toBeNull(); + }); + + test("toString returns name", () => { + expect(String(new IPDtype())).toBe("ip"); + }); + + test("construct_from_string matches", () => { + expect(IPDtype.construct_from_string("ip")).toBeInstanceOf(IPDtype); + expect(IPDtype.construct_from_string("other")).toBeNull(); + }); + + test("base construct_from_string returns null", () => { + // The base class default always returns null + expect(ExtensionDtype.construct_from_string("anything")).toBeNull(); + }); + + test("base construct_array_type throws", () => { + expect(() => ExtensionDtype.construct_array_type()).toThrow(); + }); +}); + +// ─── ExtensionArray tests ───────────────────────────────────────────────────── + +describe("ExtensionArray", () => { + test("dtype", () => { + const arr = new IPArray(["1.1.1.1", "8.8.8.8"]); + expect(arr.dtype).toBeInstanceOf(IPDtype); + }); + + test("length", () => { + expect(new IPArray([]).length).toBe(0); + expect(new IPArray(["a", "b", "c"]).length).toBe(3); + }); + + test("getItem positive index", () => { + const arr = new IPArray(["10.0.0.1", "192.168.0.1"]); + expect(arr.getItem(0)).toBe("10.0.0.1"); + expect(arr.getItem(1)).toBe("192.168.0.1"); + }); + + test("getItem negative index", () => { + const arr = new IPArray(["a", "b", "c"]); + expect(arr.getItem(-1)).toBe("c"); + expect(arr.getItem(-2)).toBe("b"); + }); + + test("getItem null element", () => { + const arr = new IPArray([null, "1.1.1.1"]); + expect(arr.getItem(0)).toBeNull(); + }); + + test("slice", () => { + const arr = new IPArray(["a", "b", "c", "d"]); + const sliced = arr.slice(1, 3); + expect(sliced.length).toBe(2); + expect(sliced.getItem(0)).toBe("b"); + expect(sliced.getItem(1)).toBe("c"); + }); + + test("isna", () => { + const arr = new IPArray(["1.1.1.1", null, "8.8.8.8"]); + expect(arr.isna()).toEqual([false, true, false]); + }); + + test("isna all valid", () => { + expect(new IPArray(["a", "b"]).isna()).toEqual([false, false]); + }); + + test("isna all null", () => { + expect(new IPArray([null, null]).isna()).toEqual([true, true]); + }); + + test("fillna", () => { + const arr = new IPArray(["1.1.1.1", null, "8.8.8.8"]); + const filled = arr.fillna("0.0.0.0") as IPArray; + expect(filled.getItem(0)).toBe("1.1.1.1"); + expect(filled.getItem(1)).toBe("0.0.0.0"); + expect(filled.getItem(2)).toBe("8.8.8.8"); + }); + + test("toArray", () => { + const arr = new IPArray(["a", null, "c"]); + expect(arr.toArray()).toEqual(["a", null, "c"]); + }); + + test("toString", () => { + const arr = new IPArray(["a", "b"]); + const s = arr.toString(); + expect(s).toContain("IPArray"); + expect(s).toContain("length=2"); + expect(s).toContain("ip"); + }); +}); + +// ─── registerExtensionDtype tests ───────────────────────────────────────────── + +describe("registerExtensionDtype / constructExtensionDtypeFromString", () => { + test("registered dtype is resolved from string", () => { + registerExtensionDtype(IPDtype); + const result = constructExtensionDtypeFromString("ip"); + expect(result).toBeInstanceOf(IPDtype); + }); + + test("unknown string returns null", () => { + expect(constructExtensionDtypeFromString("unknownabc123")).toBeNull(); + }); + + test("re-registering does not throw", () => { + expect(() => registerExtensionDtype(IPDtype)).not.toThrow(); + }); +}); + +// ─── Accessor registration tests ────────────────────────────────────────────── + +class GeoAccessor { + constructor(private readonly _obj: unknown) {} + describe() { + return `geo(${this._obj})`; + } +} + +class PlotAccessor { + constructor(private readonly _obj: unknown) {} +} + +class IdxAccessor { + constructor(private readonly _obj: unknown) {} +} + +describe("registerSeriesAccessor / getRegisteredAccessors", () => { + test("register and retrieve series accessor", () => { + registerSeriesAccessor("geo", GeoAccessor); + const m = getRegisteredAccessors("series"); + expect(m.get("geo")).toBe(GeoAccessor); + }); + + test("accessor can be instantiated with a target object", () => { + registerSeriesAccessor("geo", GeoAccessor); + const Cls = getRegisteredAccessors("series").get("geo")!; + const acc = new Cls("my-series"); + expect((acc as GeoAccessor).describe()).toBe("geo(my-series)"); + }); +}); + +describe("registerDataFrameAccessor / getRegisteredAccessors", () => { + test("register and retrieve dataframe accessor", () => { + registerDataFrameAccessor("plot", PlotAccessor); + const m = getRegisteredAccessors("dataframe"); + expect(m.get("plot")).toBe(PlotAccessor); + }); +}); + +describe("registerIndexAccessor / getRegisteredAccessors", () => { + test("register and retrieve index accessor", () => { + registerIndexAccessor("idx_tool", IdxAccessor); + const m = getRegisteredAccessors("index"); + expect(m.get("idx_tool")).toBe(IdxAccessor); + }); +}); + +describe("getRegisteredAccessors — empty target", () => { + test("returns empty map for unused target", () => { + // 'series2' is not a real target but should just return empty map + // Use a known target that wasn't registered to yet in this test file + const m = getRegisteredAccessors("index"); + // We've registered one already; just verify it's a ReadonlyMap + expect(typeof m.get).toBe("function"); + }); +}); + +// ─── api.extensions namespace ───────────────────────────────────────────────── + +describe("apiExtensions namespace", () => { + test("contains all expected members", () => { + expect(apiExtensions.ExtensionDtype).toBe(ExtensionDtype); + expect(apiExtensions.ExtensionArray).toBe(ExtensionArray); + expect(typeof apiExtensions.registerExtensionDtype).toBe("function"); + expect(typeof apiExtensions.constructExtensionDtypeFromString).toBe("function"); + expect(typeof apiExtensions.registerSeriesAccessor).toBe("function"); + expect(typeof apiExtensions.registerDataFrameAccessor).toBe("function"); + expect(typeof apiExtensions.registerIndexAccessor).toBe("function"); + expect(typeof apiExtensions.getRegisteredAccessors).toBe("function"); + }); + + test("api.extensions.registerExtensionDtype works", () => { + apiExtensions.registerExtensionDtype(IPDtype); + const result = apiExtensions.constructExtensionDtypeFromString("ip"); + expect(result).toBeInstanceOf(IPDtype); + }); +}); + +// ─── Re-export from src/index.ts ───────────────────────────────────────────── + +describe("top-level re-exports", () => { + test("ExtensionDtype and ExtensionArray exported from tsb", async () => { + const tsb = await import("../../src/index.ts"); + expect(tsb.ExtensionDtype).toBe(ExtensionDtype); + expect(tsb.ExtensionArray).toBe(ExtensionArray); + expect(typeof tsb.registerExtensionDtype).toBe("function"); + expect(typeof tsb.registerSeriesAccessor).toBe("function"); + expect(typeof tsb.registerDataFrameAccessor).toBe("function"); + expect(typeof tsb.registerIndexAccessor).toBe("function"); + expect(typeof tsb.getRegisteredAccessors).toBe("function"); + expect(tsb.apiExtensions).toBe(apiExtensions); + }); + + test("api.extensions accessible from top-level api export", async () => { + const tsb = await import("../../src/index.ts"); + expect(tsb.api.extensions).toBe(apiExtensions); + expect(tsb.api.extensions.ExtensionDtype).toBe(ExtensionDtype); + }); +}); diff --git a/tests/core/pd_array.test.ts b/tests/core/pd_array.test.ts new file mode 100644 index 00000000..66fe8c3b --- /dev/null +++ b/tests/core/pd_array.test.ts @@ -0,0 +1,99 @@ +/** + * Tests for pdArray — the pd.array() factory function. + */ + +import { describe, expect, test } from "bun:test"; +import { PandasArray, pdArray } from "../../src/index.ts"; + +describe("pdArray", () => { + test("creates an int64 array when all values are integers", () => { + const a = pdArray([1, 2, 3]); + expect(a).toBeInstanceOf(PandasArray); + expect(a.dtype).toBe("int64"); + expect(a.length).toBe(3); + expect(a.toArray()).toEqual([1, 2, 3]); + }); + + test("creates a float64 array when any value is non-integer", () => { + const a = pdArray([1, 2.5, 3]); + expect(a.dtype).toBe("float64"); + expect(a.toArray()).toEqual([1, 2.5, 3]); + }); + + test("creates a bool array when all non-null values are booleans", () => { + const a = pdArray([true, false, true]); + expect(a.dtype).toBe("bool"); + expect(a.toArray()).toEqual([true, false, true]); + }); + + test("creates a string array when values are strings", () => { + const a = pdArray(["a", "b", "c"]); + expect(a.dtype).toBe("string"); + expect(a.toArray()).toEqual(["a", "b", "c"]); + }); + + test("creates a string array with nulls", () => { + const a = pdArray(["a", null, "c"]); + expect(a.dtype).toBe("string"); + expect(a.at(1)).toBeNull(); + }); + + test("respects explicit dtype override", () => { + const a = pdArray([1, 2, 3], "float32"); + expect(a.dtype).toBe("float32"); + }); + + test("explicit string dtype overrides inferred int", () => { + const a = pdArray([1, 2, 3], "string"); + expect(a.dtype).toBe("string"); + expect(a.toArray()).toEqual([1, 2, 3]); + }); + + test("handles empty array", () => { + const a = pdArray([]); + expect(a.length).toBe(0); + expect(a.toArray()).toEqual([]); + expect(a.dtype).toBe("object"); + }); + + test("handles all-null array", () => { + const a = pdArray([null, null, null]); + expect(a.dtype).toBe("object"); + expect(a.length).toBe(3); + }); + + test("at() returns element at position", () => { + const a = pdArray([10, 20, 30]); + expect(a.at(0)).toBe(10); + expect(a.at(2)).toBe(30); + }); + + test("at() returns null for out-of-bounds", () => { + const a = pdArray([1, 2]); + expect(a.at(99)).toBeNull(); + }); + + test("is iterable", () => { + const a = pdArray([1, 2, 3]); + expect([...a]).toEqual([1, 2, 3]); + }); + + test("accepts an iterable (Set)", () => { + const a = pdArray(new Set([1, 2, 3])); + expect(a.length).toBe(3); + expect(a.dtype).toBe("int64"); + }); + + test("infers datetime dtype from Date values", () => { + const d = new Date("2024-01-01"); + const a = pdArray([d]); + expect(a.dtype).toBe("datetime"); + }); + + test("toString contains dtype and values", () => { + const a = pdArray([1, 2]); + const s = a.toString(); + expect(s).toContain("int64"); + expect(s).toContain("1"); + }); +}); diff --git a/tests/playground.test.ts b/tests/playground.test.ts index bbaaed76..0558ff56 100644 --- a/tests/playground.test.ts +++ b/tests/playground.test.ts @@ -30,6 +30,8 @@ const NON_PLAYGROUND_PAGES = new Set([ "benchmarks.html", // The examples page is an index/gallery page linking to individual examples. "examples.html", + // The extensions page is a static reference page — no interactive code blocks. + "extensions.html", ]); const REAL_WORLD_EXAMPLE_PAGES = [ diff --git a/tests/stats/format_table.test.ts b/tests/stats/format_table.test.ts new file mode 100644 index 00000000..cab3dd52 --- /dev/null +++ b/tests/stats/format_table.test.ts @@ -0,0 +1,255 @@ +/** + * Tests for format_table — toMarkdown / toLaTeX + */ +import { describe, expect, it } from "bun:test"; +import { DataFrame } from "../../src/core/frame.ts"; +import { Series } from "../../src/core/series.ts"; +import { + seriesToLaTeX, + seriesToMarkdown, + toLaTeX, + toMarkdown, +} from "../../src/stats/format_table.ts"; + +// ─── toMarkdown ─────────────────────────────────────────────────────────────── + +describe("toMarkdown", () => { + it("basic DataFrame", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] }); + const md = toMarkdown(df); + const lines = md.split("\n"); + // header + separator + 2 data rows + expect(lines).toHaveLength(4); + expect(lines[0]).toContain("a"); + expect(lines[0]).toContain("b"); + // separator row + expect(lines[1]).toMatch(/\|[-| ]+\|/); + // data rows contain values + expect(lines[2]).toContain("1"); + expect(lines[2]).toContain("x"); + expect(lines[3]).toContain("2"); + expect(lines[3]).toContain("y"); + }); + + it("includes index column by default", () => { + const df = DataFrame.fromColumns({ v: [10, 20] }); + const md = toMarkdown(df); + const lines = md.split("\n"); + // header should have empty index cell + expect(lines[0]).toMatch(/^\| +\|/); + expect(lines[2]).toContain("0"); + expect(lines[3]).toContain("1"); + }); + + it("index: false omits index column", () => { + const df = DataFrame.fromColumns({ v: [10, 20] }); + const md = toMarkdown(df, { index: false }); + const lines = md.split("\n"); + // no empty leading cell + expect(lines[0]).toMatch(/^\| v/); + expect(lines[2]).not.toContain("0 |"); + }); + + it("colAlign: left", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const md = toMarkdown(df, { colAlign: "left" }); + expect(md).toContain(":---"); + }); + + it("colAlign: right", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const md = toMarkdown(df, { colAlign: "right" }); + expect(md).toContain("---:"); + }); + + it("colAlign: center", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const md = toMarkdown(df, { colAlign: "center" }); + expect(md).toContain(":---:"); + }); + + it("floatFormat rounds numbers", () => { + const df = DataFrame.fromColumns({ v: [1.23456] }); + const md = toMarkdown(df, { floatFormat: 2 }); + expect(md).toContain("1.23"); + expect(md).not.toContain("1.23456"); + }); + + it("null/undefined/NaN cells render as empty/NaN", () => { + const df = DataFrame.fromColumns({ + a: [null, Number.NaN, undefined] as (null | number | undefined)[], + }); + const md = toMarkdown(df); + expect(md).toContain("NaN"); + }); + + it("empty DataFrame (no rows)", () => { + const df = DataFrame.fromColumns({ a: [] as number[] }); + const md = toMarkdown(df); + const lines = md.split("\n"); + // header + separator only + expect(lines).toHaveLength(2); + }); + + it("single column, single row", () => { + const df = DataFrame.fromColumns({ x: [42] }); + const md = toMarkdown(df); + expect(md).toContain("42"); + expect(md.split("\n")).toHaveLength(3); + }); +}); + +// ─── seriesToMarkdown ───────────────────────────────────────────────────────── + +describe("seriesToMarkdown", () => { + it("basic Series", () => { + const s = new Series({ data: [1, 2, 3], name: "val" }); + const md = seriesToMarkdown(s); + const lines = md.split("\n"); + expect(lines).toHaveLength(5); // header + sep + 3 rows + expect(lines[0]).toContain("val"); + expect(lines[2]).toContain("1"); + }); + + it("uses series name as column header", () => { + const s = new Series({ data: [10], name: "score" }); + const md = seriesToMarkdown(s); + expect(md).toContain("score"); + }); + + it("unnamed series uses '0' as column name", () => { + const s = new Series({ data: [1, 2] }); + const md = seriesToMarkdown(s); + expect(md.split("\n")[0]).toContain("0"); + }); + + it("index: false", () => { + const s = new Series({ data: [5, 6], name: "n" }); + const md = seriesToMarkdown(s, { index: false }); + const lines = md.split("\n"); + expect(lines[0]).toMatch(/^\| n/); + }); +}); + +// ─── toLaTeX ────────────────────────────────────────────────────────────────── + +describe("toLaTeX", () => { + it("basic DataFrame produces tabular environment", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] }); + const tex = toLaTeX(df); + expect(tex).toContain("\\begin{tabular}"); + expect(tex).toContain("\\end{tabular}"); + expect(tex).toContain("\\toprule"); + expect(tex).toContain("\\midrule"); + expect(tex).toContain("\\bottomrule"); + }); + + it("header row includes column names", () => { + const df = DataFrame.fromColumns({ alpha: [1], beta: [2] }); + const tex = toLaTeX(df); + expect(tex).toContain("alpha"); + expect(tex).toContain("beta"); + }); + + it("data rows contain values", () => { + const df = DataFrame.fromColumns({ v: [42, 99] }); + const tex = toLaTeX(df); + expect(tex).toContain("42"); + expect(tex).toContain("99"); + }); + + it("index: false omits index", () => { + const df = DataFrame.fromColumns({ v: [1, 2] }); + const tex = toLaTeX(df, { index: false }); + // No " & " before the value in data rows + const lines = tex.split("\n").filter((l) => l.endsWith("\\\\")); + // header line + expect(lines[0]).toBe("v \\\\"); + }); + + it("custom colFormat", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const tex = toLaTeX(df, { colFormat: "lr" }); + expect(tex).toContain("{lr}"); + }); + + it("booktabs: false uses hline", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const tex = toLaTeX(df, { booktabs: false }); + expect(tex).toContain("\\hline"); + expect(tex).not.toContain("\\toprule"); + }); + + it("longtable: true uses longtable env", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const tex = toLaTeX(df, { longtable: true }); + expect(tex).toContain("\\begin{longtable}"); + expect(tex).toContain("\\end{longtable}"); + }); + + it("tableEnv: true wraps in table environment", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const tex = toLaTeX(df, { tableEnv: true }); + expect(tex).toContain("\\begin{table}"); + expect(tex).toContain("\\end{table}"); + }); + + it("caption and label", () => { + const df = DataFrame.fromColumns({ a: [1] }); + const tex = toLaTeX(df, { tableEnv: true, caption: "My Table", label: "tab:my" }); + expect(tex).toContain("\\caption{My Table}"); + expect(tex).toContain("\\label{tab:my}"); + }); + + it("floatFormat rounds numbers", () => { + const df = DataFrame.fromColumns({ v: [3.14159] }); + const tex = toLaTeX(df, { floatFormat: 2 }); + expect(tex).toContain("3.14"); + expect(tex).not.toContain("3.14159"); + }); + + it("escapes special LaTeX characters", () => { + const df = DataFrame.fromColumns({ "a&b": [1] }); + const tex = toLaTeX(df); + expect(tex).toContain("a\\&b"); + }); + + it("escapes special LaTeX chars in values", () => { + const df = DataFrame.fromColumns({ v: ["x_y"] }); + const tex = toLaTeX(df); + expect(tex).toContain("x\\_y"); + }); + + it("empty DataFrame produces only header", () => { + const df = DataFrame.fromColumns({ a: [] as number[] }); + const tex = toLaTeX(df); + expect(tex).toContain("\\begin{tabular}"); + expect(tex).toContain("\\bottomrule"); + }); +}); + +// ─── seriesToLaTeX ──────────────────────────────────────────────────────────── + +describe("seriesToLaTeX", () => { + it("basic series", () => { + const s = new Series({ data: [1, 2, 3], name: "x" }); + const tex = seriesToLaTeX(s); + expect(tex).toContain("\\begin{tabular}"); + expect(tex).toContain("x"); + expect(tex).toContain("1"); + expect(tex).toContain("2"); + }); + + it("index: false", () => { + const s = new Series({ data: [5], name: "v" }); + const tex = seriesToLaTeX(s, { index: false }); + const lines = tex.split("\n").filter((l) => l.endsWith("\\\\")); + expect(lines[0]).toBe("v \\\\"); + }); + + it("floatFormat", () => { + const s = new Series({ data: [1.111], name: "n" }); + const tex = seriesToLaTeX(s, { floatFormat: 1 }); + expect(tex).toContain("1.1"); + }); +});