Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 37 additions & 37 deletions src/core/reindex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,58 +156,58 @@ function applyBfill(
* On a tie (equidistant left and right), prefer the right (forward) value —
* matching pandas' `method="nearest"` behaviour.
*/
function applyNearest(values: Scalar[], present: readonly boolean[]): Scalar[] {
function buildLeftNearest(
values: Scalar[],
present: readonly boolean[],
): { dist: number[]; val: Scalar[] } {
const n = values.length;
const out = values.slice();

// left[i] = { dist, val } of the nearest valid position to the left (or null)
const leftDist: number[] = new Array(n).fill(-1);
const leftVal: Scalar[] = new Array(n).fill(null);
const dist: number[] = new Array(n).fill(-1);
const val: Scalar[] = new Array(n).fill(null);
let lastIdx = -1;
for (let i = 0; i < n; i++) {
if (present[i]) {
lastIdx = i;
}
if (present[i]) lastIdx = i;
if (lastIdx >= 0) {
leftDist[i] = i - lastIdx;
leftVal[i] = values[lastIdx];
dist[i] = i - lastIdx;
val[i] = values[lastIdx];
}
}
return { dist, val };
}

// right[i] = { dist, val } of the nearest valid position to the right (or null)
const rightDist: number[] = new Array(n).fill(-1);
const rightVal: Scalar[] = new Array(n).fill(null);
function buildRightNearest(
values: Scalar[],
present: readonly boolean[],
): { dist: number[]; val: Scalar[] } {
const n = values.length;
const dist: number[] = new Array(n).fill(-1);
const val: Scalar[] = new Array(n).fill(null);
let nextIdx = -1;
for (let i = n - 1; i >= 0; i--) {
if (present[i]) {
nextIdx = i;
}
if (present[i]) nextIdx = i;
if (nextIdx >= 0) {
rightDist[i] = nextIdx - i;
rightVal[i] = values[nextIdx];
dist[i] = nextIdx - i;
val[i] = values[nextIdx];
}
}
return { dist, val };
}

for (let i = 0; i < n; i++) {
if (present[i]) {
continue;
}
const ld = leftDist[i];
const rd = rightDist[i];
if (ld === -1 && rd === -1) {
out[i] = null;
} else if (ld === -1) {
out[i] = rightVal[i];
} else if (rd === -1) {
out[i] = leftVal[i];
} else if (rd !== undefined && ld !== undefined && rd <= ld) {
// prefer right on tie
out[i] = rightVal[i];
} else {
out[i] = leftVal[i];
function pickNearest(ld: number, rd: number, leftVal: Scalar, rightVal: Scalar): Scalar {
if (ld === -1 && rd === -1) return null;
if (ld === -1) return rightVal;
if (rd === -1) return leftVal;
return rd <= ld ? rightVal : leftVal; // prefer right on tie
}

function applyNearest(values: Scalar[], present: readonly boolean[]): Scalar[] {
const out = values.slice();
const left = buildLeftNearest(values, present);
const right = buildRightNearest(values, present);
for (let i = 0; i < values.length; i++) {
if (!present[i]) {
out[i] = pickNearest(left.dist[i] ?? -1, right.dist[i] ?? -1, left.val[i] ?? null, right.val[i] ?? null);
}
}

return out;
}

Expand Down
224 changes: 224 additions & 0 deletions tests/stats/xs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,230 @@ describe("xsSeries (MultiIndex)", () => {

// ─── property tests ───────────────────────────────────────────────────────────

// ─── pandas-parity: named & negative level lookup ────────────────────────────
//
// pandas.DataFrame.xs / Series.xs accept `level=` as either an integer
// (positional, supports negatives) or a string (level name). The
// implementation in src/stats/xs.ts supports both via resolveLevel(); the
// existing tests only covered positive integer levels.

describe("xsDataFrame (MultiIndex) — pandas-parity level lookup", () => {
const mi = MultiIndex.fromTuples(
[
["A", 1],
["A", 2],
["B", 1],
["B", 2],
],
{ names: ["letter", "num"] },
);
const df = new DataFrame(
new Map([
["val", new Series({ data: [10, 20, 30, 40], index: mi as unknown as Index<string> })],
]),
mi as unknown as Index<string>,
["val"],
);

test("named level lookup matches numeric level lookup (outer)", () => {
const byName = xsDataFrame(df, "A", { level: "letter" }) as DataFrame;
const byNum = xsDataFrame(df, "A", { level: 0 }) as DataFrame;
expect(byName.get("val")?.values).toEqual(byNum.get("val")?.values);
expect(byName.get("val")?.values).toEqual([10, 20]);
});

test("named level lookup matches numeric level lookup (inner)", () => {
const byName = xsDataFrame(df, 1, { level: "num" }) as DataFrame;
const byNum = xsDataFrame(df, 1, { level: 1 }) as DataFrame;
expect(byName.get("val")?.values).toEqual(byNum.get("val")?.values);
expect(byName.get("val")?.values).toEqual([10, 30]);
});

test("negative level index resolves from the end (-1 → last level)", () => {
const r = xsDataFrame(df, 1, { level: -1 }) as DataFrame;
expect(r.get("val")?.values).toEqual([10, 30]);
});

test("throws on out-of-range positive level number", () => {
expect(() => xsDataFrame(df, "A", { level: 5 })).toThrow(/out of range/);
});

test("throws on out-of-range negative level number", () => {
expect(() => xsDataFrame(df, "A", { level: -5 })).toThrow(/out of range/);
});

test("throws on unknown level name", () => {
expect(() => xsDataFrame(df, "A", { level: "missing" })).toThrow(/Level name/);
});
});

// ─── pandas-parity: dropLevel=false ──────────────────────────────────────────
//
// pandas xs has `drop_level=True` as its default. When `drop_level=False`
// the matched level must remain in the result index (so the caller can see
// what was matched). This was implemented but had no test coverage.

describe("xsDataFrame (MultiIndex) — dropLevel option", () => {
const mi = MultiIndex.fromTuples(
[
["A", 1],
["A", 2],
["B", 1],
["B", 2],
],
{ names: ["letter", "num"] },
);
const df = new DataFrame(
new Map([
["val", new Series({ data: [10, 20, 30, 40], index: mi as unknown as Index<string> })],
]),
mi as unknown as Index<string>,
["val"],
);

test("dropLevel=true (default) reduces a 2-level MultiIndex to a flat Index", () => {
const r = xsDataFrame(df, "A") as DataFrame;
expect(r.index).not.toBeInstanceOf(MultiIndex);
expect(r.index.values).toEqual([1, 2]);
});

test("dropLevel=false preserves the matched level (still a MultiIndex with both names)", () => {
const r = xsDataFrame(df, "A", { dropLevel: false }) as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.nlevels).toBe(2);
expect(ri.names).toEqual(["letter", "num"]);
expect(ri.at(0)).toEqual(["A", 1]);
expect(ri.at(1)).toEqual(["A", 2]);
expect(r.get("val")?.values).toEqual([10, 20]);
});

test("dropLevel=false on inner-level xs preserves both levels", () => {
const r = xsDataFrame(df, 1, { level: "num", dropLevel: false }) as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.names).toEqual(["letter", "num"]);
expect(ri.at(0)).toEqual(["A", 1]);
expect(ri.at(1)).toEqual(["B", 1]);
expect(r.get("val")?.values).toEqual([10, 30]);
});
});

// ─── pandas-parity: 3-level MultiIndex ───────────────────────────────────────
//
// pandas tests xs against MultiIndexes deeper than 2 levels. The reduction
// logic (droplevel of the matched level) needs to leave the remaining levels
// (and their names) in the right order — only 2-level cases were covered.

describe("xsDataFrame — 3-level MultiIndex", () => {
const mi3 = MultiIndex.fromTuples(
[
["A", "x", 1],
["A", "x", 2],
["A", "y", 1],
["B", "x", 1],
],
{ names: ["L0", "L1", "L2"] },
);
const df3 = new DataFrame(
new Map([
["v", new Series({ data: [1, 2, 3, 4], index: mi3 as unknown as Index<string> })],
]),
mi3 as unknown as Index<string>,
["v"],
);

test("xs at outer level drops L0, leaves a 2-level MultiIndex named [L1, L2]", () => {
const r = xsDataFrame(df3, "A") as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.nlevels).toBe(2);
expect(ri.names).toEqual(["L1", "L2"]);
expect(r.get("v")?.values).toEqual([1, 2, 3]);
});

test("xs at middle level drops L1, leaves a 2-level MultiIndex named [L0, L2]", () => {
const r = xsDataFrame(df3, "x", { level: "L1" }) as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.nlevels).toBe(2);
expect(ri.names).toEqual(["L0", "L2"]);
expect(r.get("v")?.values).toEqual([1, 2, 4]);
});

test("xs at innermost level drops L2, leaves a 2-level MultiIndex named [L0, L1]", () => {
const r = xsDataFrame(df3, 1, { level: "L2" }) as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.names).toEqual(["L0", "L1"]);
expect(r.get("v")?.values).toEqual([1, 3, 4]);
});

test("xs at outer level with dropLevel=false keeps all three levels", () => {
const r = xsDataFrame(df3, "A", { dropLevel: false }) as DataFrame;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.nlevels).toBe(3);
expect(ri.names).toEqual(["L0", "L1", "L2"]);
});
});

// ─── pandas-parity: xsSeries level / dropLevel options ───────────────────────

describe("xsSeries (MultiIndex) — pandas-parity level & dropLevel", () => {
const s = new Series({
data: [100, 200, 300],
index: MultiIndex.fromTuples(
[
["X", 1],
["X", 2],
["Y", 1],
],
{ names: ["g", "n"] },
) as unknown as Index<string>,
});

test("named level lookup matches numeric level lookup", () => {
const byName = xsSeries(s, 1, { level: "n" }) as Series<Scalar>;
const byNum = xsSeries(s, 1, { level: 1 }) as Series<Scalar>;
expect(byName.values).toEqual(byNum.values);
expect(byName.values).toEqual([100, 300]);
});

test("negative level index resolves from the end", () => {
const r = xsSeries(s, 1, { level: -1 }) as Series<Scalar>;
expect(r.values).toEqual([100, 300]);
});

test("dropLevel=false keeps result as a MultiIndex preserving both levels", () => {
const r = xsSeries(s, "X", { dropLevel: false }) as Series<Scalar>;
expect(r.index).toBeInstanceOf(MultiIndex);
const ri = r.index as unknown as MultiIndex;
expect(ri.nlevels).toBe(2);
expect(ri.names).toEqual(["g", "n"]);
expect(r.values).toEqual([100, 200]);
});

test("throws on unknown level name", () => {
expect(() => xsSeries(s, "X", { level: "missing" })).toThrow(/Level name/);
});

test("throws on out-of-range level number", () => {
expect(() => xsSeries(s, "X", { level: 9 })).toThrow(/out of range/);
});

test("Series name is preserved when xs returns a Series (multi-match)", () => {
const dup = new Series({
data: [1, 2, 3, 4],
index: ["a", "b", "a", "c"],
name: "mycol",
});
const r = xsSeries(dup, "a") as Series<Scalar>;
expect(r.name).toBe("mycol");
expect(r.values).toEqual([1, 3]);
});
});

describe("xsDataFrame property tests", () => {
test("axis=1 returns the exact column Series", () => {
fc.assert(
Expand Down
Loading