diff --git a/src/core/reindex.ts b/src/core/reindex.ts index c89e1c8d..71c8c31b 100644 --- a/src/core/reindex.ts +++ b/src/core/reindex.ts @@ -156,58 +156,58 @@ function applyBfill( * On a tie (equidistant left and right), prefer the right (forward) value — * matching pandas' `method="nearest"` behaviour. */ -function applyNearest(values: Scalar[], present: readonly boolean[]): Scalar[] { +function buildLeftNearest( + values: Scalar[], + present: readonly boolean[], +): { dist: number[]; val: Scalar[] } { const n = values.length; - const out = values.slice(); - - // left[i] = { dist, val } of the nearest valid position to the left (or null) - const leftDist: number[] = new Array(n).fill(-1); - const leftVal: Scalar[] = new Array(n).fill(null); + const dist: number[] = new Array(n).fill(-1); + const val: Scalar[] = new Array(n).fill(null); let lastIdx = -1; for (let i = 0; i < n; i++) { - if (present[i]) { - lastIdx = i; - } + if (present[i]) lastIdx = i; if (lastIdx >= 0) { - leftDist[i] = i - lastIdx; - leftVal[i] = values[lastIdx]; + dist[i] = i - lastIdx; + val[i] = values[lastIdx]; } } + return { dist, val }; +} - // right[i] = { dist, val } of the nearest valid position to the right (or null) - const rightDist: number[] = new Array(n).fill(-1); - const rightVal: Scalar[] = new Array(n).fill(null); +function buildRightNearest( + values: Scalar[], + present: readonly boolean[], +): { dist: number[]; val: Scalar[] } { + const n = values.length; + const dist: number[] = new Array(n).fill(-1); + const val: Scalar[] = new Array(n).fill(null); let nextIdx = -1; for (let i = n - 1; i >= 0; i--) { - if (present[i]) { - nextIdx = i; - } + if (present[i]) nextIdx = i; if (nextIdx >= 0) { - rightDist[i] = nextIdx - i; - rightVal[i] = values[nextIdx]; + dist[i] = nextIdx - i; + val[i] = values[nextIdx]; } } + return { dist, val }; +} - for (let i = 0; i < n; i++) { - if (present[i]) { - continue; - } - const ld = leftDist[i]; - const rd = rightDist[i]; - if (ld === -1 && rd === -1) { - out[i] = null; - } else if (ld === -1) { - out[i] = rightVal[i]; - } else if (rd === -1) { - out[i] = leftVal[i]; - } else if (rd !== undefined && ld !== undefined && rd <= ld) { - // prefer right on tie - out[i] = rightVal[i]; - } else { - out[i] = leftVal[i]; +function pickNearest(ld: number, rd: number, leftVal: Scalar, rightVal: Scalar): Scalar { + if (ld === -1 && rd === -1) return null; + if (ld === -1) return rightVal; + if (rd === -1) return leftVal; + return rd <= ld ? rightVal : leftVal; // prefer right on tie +} + +function applyNearest(values: Scalar[], present: readonly boolean[]): Scalar[] { + const out = values.slice(); + const left = buildLeftNearest(values, present); + const right = buildRightNearest(values, present); + for (let i = 0; i < values.length; i++) { + if (!present[i]) { + out[i] = pickNearest(left.dist[i] ?? -1, right.dist[i] ?? -1, left.val[i] ?? null, right.val[i] ?? null); } } - return out; } diff --git a/tests/stats/xs.test.ts b/tests/stats/xs.test.ts index 6d79a631..b7cda62b 100644 --- a/tests/stats/xs.test.ts +++ b/tests/stats/xs.test.ts @@ -215,6 +215,230 @@ describe("xsSeries (MultiIndex)", () => { // ─── property tests ─────────────────────────────────────────────────────────── +// ─── pandas-parity: named & negative level lookup ──────────────────────────── +// +// pandas.DataFrame.xs / Series.xs accept `level=` as either an integer +// (positional, supports negatives) or a string (level name). The +// implementation in src/stats/xs.ts supports both via resolveLevel(); the +// existing tests only covered positive integer levels. + +describe("xsDataFrame (MultiIndex) — pandas-parity level lookup", () => { + const mi = MultiIndex.fromTuples( + [ + ["A", 1], + ["A", 2], + ["B", 1], + ["B", 2], + ], + { names: ["letter", "num"] }, + ); + const df = new DataFrame( + new Map([ + ["val", new Series({ data: [10, 20, 30, 40], index: mi as unknown as Index })], + ]), + mi as unknown as Index, + ["val"], + ); + + test("named level lookup matches numeric level lookup (outer)", () => { + const byName = xsDataFrame(df, "A", { level: "letter" }) as DataFrame; + const byNum = xsDataFrame(df, "A", { level: 0 }) as DataFrame; + expect(byName.get("val")?.values).toEqual(byNum.get("val")?.values); + expect(byName.get("val")?.values).toEqual([10, 20]); + }); + + test("named level lookup matches numeric level lookup (inner)", () => { + const byName = xsDataFrame(df, 1, { level: "num" }) as DataFrame; + const byNum = xsDataFrame(df, 1, { level: 1 }) as DataFrame; + expect(byName.get("val")?.values).toEqual(byNum.get("val")?.values); + expect(byName.get("val")?.values).toEqual([10, 30]); + }); + + test("negative level index resolves from the end (-1 → last level)", () => { + const r = xsDataFrame(df, 1, { level: -1 }) as DataFrame; + expect(r.get("val")?.values).toEqual([10, 30]); + }); + + test("throws on out-of-range positive level number", () => { + expect(() => xsDataFrame(df, "A", { level: 5 })).toThrow(/out of range/); + }); + + test("throws on out-of-range negative level number", () => { + expect(() => xsDataFrame(df, "A", { level: -5 })).toThrow(/out of range/); + }); + + test("throws on unknown level name", () => { + expect(() => xsDataFrame(df, "A", { level: "missing" })).toThrow(/Level name/); + }); +}); + +// ─── pandas-parity: dropLevel=false ────────────────────────────────────────── +// +// pandas xs has `drop_level=True` as its default. When `drop_level=False` +// the matched level must remain in the result index (so the caller can see +// what was matched). This was implemented but had no test coverage. + +describe("xsDataFrame (MultiIndex) — dropLevel option", () => { + const mi = MultiIndex.fromTuples( + [ + ["A", 1], + ["A", 2], + ["B", 1], + ["B", 2], + ], + { names: ["letter", "num"] }, + ); + const df = new DataFrame( + new Map([ + ["val", new Series({ data: [10, 20, 30, 40], index: mi as unknown as Index })], + ]), + mi as unknown as Index, + ["val"], + ); + + test("dropLevel=true (default) reduces a 2-level MultiIndex to a flat Index", () => { + const r = xsDataFrame(df, "A") as DataFrame; + expect(r.index).not.toBeInstanceOf(MultiIndex); + expect(r.index.values).toEqual([1, 2]); + }); + + test("dropLevel=false preserves the matched level (still a MultiIndex with both names)", () => { + const r = xsDataFrame(df, "A", { dropLevel: false }) as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.nlevels).toBe(2); + expect(ri.names).toEqual(["letter", "num"]); + expect(ri.at(0)).toEqual(["A", 1]); + expect(ri.at(1)).toEqual(["A", 2]); + expect(r.get("val")?.values).toEqual([10, 20]); + }); + + test("dropLevel=false on inner-level xs preserves both levels", () => { + const r = xsDataFrame(df, 1, { level: "num", dropLevel: false }) as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.names).toEqual(["letter", "num"]); + expect(ri.at(0)).toEqual(["A", 1]); + expect(ri.at(1)).toEqual(["B", 1]); + expect(r.get("val")?.values).toEqual([10, 30]); + }); +}); + +// ─── pandas-parity: 3-level MultiIndex ─────────────────────────────────────── +// +// pandas tests xs against MultiIndexes deeper than 2 levels. The reduction +// logic (droplevel of the matched level) needs to leave the remaining levels +// (and their names) in the right order — only 2-level cases were covered. + +describe("xsDataFrame — 3-level MultiIndex", () => { + const mi3 = MultiIndex.fromTuples( + [ + ["A", "x", 1], + ["A", "x", 2], + ["A", "y", 1], + ["B", "x", 1], + ], + { names: ["L0", "L1", "L2"] }, + ); + const df3 = new DataFrame( + new Map([ + ["v", new Series({ data: [1, 2, 3, 4], index: mi3 as unknown as Index })], + ]), + mi3 as unknown as Index, + ["v"], + ); + + test("xs at outer level drops L0, leaves a 2-level MultiIndex named [L1, L2]", () => { + const r = xsDataFrame(df3, "A") as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.nlevels).toBe(2); + expect(ri.names).toEqual(["L1", "L2"]); + expect(r.get("v")?.values).toEqual([1, 2, 3]); + }); + + test("xs at middle level drops L1, leaves a 2-level MultiIndex named [L0, L2]", () => { + const r = xsDataFrame(df3, "x", { level: "L1" }) as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.nlevels).toBe(2); + expect(ri.names).toEqual(["L0", "L2"]); + expect(r.get("v")?.values).toEqual([1, 2, 4]); + }); + + test("xs at innermost level drops L2, leaves a 2-level MultiIndex named [L0, L1]", () => { + const r = xsDataFrame(df3, 1, { level: "L2" }) as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.names).toEqual(["L0", "L1"]); + expect(r.get("v")?.values).toEqual([1, 3, 4]); + }); + + test("xs at outer level with dropLevel=false keeps all three levels", () => { + const r = xsDataFrame(df3, "A", { dropLevel: false }) as DataFrame; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.nlevels).toBe(3); + expect(ri.names).toEqual(["L0", "L1", "L2"]); + }); +}); + +// ─── pandas-parity: xsSeries level / dropLevel options ─────────────────────── + +describe("xsSeries (MultiIndex) — pandas-parity level & dropLevel", () => { + const s = new Series({ + data: [100, 200, 300], + index: MultiIndex.fromTuples( + [ + ["X", 1], + ["X", 2], + ["Y", 1], + ], + { names: ["g", "n"] }, + ) as unknown as Index, + }); + + test("named level lookup matches numeric level lookup", () => { + const byName = xsSeries(s, 1, { level: "n" }) as Series; + const byNum = xsSeries(s, 1, { level: 1 }) as Series; + expect(byName.values).toEqual(byNum.values); + expect(byName.values).toEqual([100, 300]); + }); + + test("negative level index resolves from the end", () => { + const r = xsSeries(s, 1, { level: -1 }) as Series; + expect(r.values).toEqual([100, 300]); + }); + + test("dropLevel=false keeps result as a MultiIndex preserving both levels", () => { + const r = xsSeries(s, "X", { dropLevel: false }) as Series; + expect(r.index).toBeInstanceOf(MultiIndex); + const ri = r.index as unknown as MultiIndex; + expect(ri.nlevels).toBe(2); + expect(ri.names).toEqual(["g", "n"]); + expect(r.values).toEqual([100, 200]); + }); + + test("throws on unknown level name", () => { + expect(() => xsSeries(s, "X", { level: "missing" })).toThrow(/Level name/); + }); + + test("throws on out-of-range level number", () => { + expect(() => xsSeries(s, "X", { level: 9 })).toThrow(/out of range/); + }); + + test("Series name is preserved when xs returns a Series (multi-match)", () => { + const dup = new Series({ + data: [1, 2, 3, 4], + index: ["a", "b", "a", "c"], + name: "mycol", + }); + const r = xsSeries(dup, "a") as Series; + expect(r.name).toBe("mycol"); + expect(r.values).toEqual([1, 3]); + }); +}); + describe("xsDataFrame property tests", () => { test("axis=1 returns the exact column Series", () => { fc.assert(