memory: HybridRetriever split-on-ambiguous rerank refinement (monotonic, additive)

jddunn · jddunn · commit ca4409757247 · 2026-04-20T18:43:11.000-07:00
diff --git a/src/memory/core/types.ts b/src/memory/core/types.ts
@@ -274,6 +274,17 @@ export interface CognitiveRetrievalResult {
       llmLatencyMs: number;
       notes?: string[];
     };
+    /**
+     * Step-6: when `HybridRetriever` runs with `splitAmbiguousThreshold`
+     * set, the bottom fraction of traces by first-pass rerank score
+     * are split at sentence boundaries and rescored. Replacements are
+     * recorded here for post-hoc analysis.
+     */
+    splitOnAmbiguous?: {
+      threshold: number;
+      candidateCount: number;
+      replacedIds: string[];
+    };
   };
 }
 
diff --git a/src/memory/retrieval/hybrid/HybridRetriever.ts b/src/memory/retrieval/hybrid/HybridRetriever.ts
@@ -77,6 +77,16 @@ export interface HybridRetrieverOptions {
    * to the raw query without aborting retrieval.
    */
   hydeRetriever?: HydeRetriever;
+  /**
+   * Step-6: enable split-on-ambiguous rerank refinement. When set to a
+   * value in (0, 1], the bottom fraction of traces by first-pass
+   * rerank score are split at sentence boundaries, rescored with a
+   * second rerank call (same query), and replaced by their better
+   * half ONLY IF the better half outscores the original. Monotonic.
+   *
+   * Default: undefined (no split, Step 3 behavior preserved).
+   */
+  splitAmbiguousThreshold?: number;
   /** Default dense weight in RRF. @default 0.7 */
   defaultDenseWeight?: number;
   /** Default sparse weight in RRF. @default 0.3 */
@@ -121,6 +131,7 @@ export class HybridRetriever {
   private readonly memoryStore: MemoryStore;
   private readonly rerankerService?: RerankerService;
   private readonly hydeRetriever?: HydeRetriever;
+  private readonly splitAmbiguousThreshold?: number;
   private readonly defaultDenseWeight: number;
   private readonly defaultSparseWeight: number;
   private readonly defaultRrfK: number;
@@ -130,6 +141,7 @@ export class HybridRetriever {
     this.bm25 = new BM25Index(opts.bm25Config);
     this.rerankerService = opts.rerankerService;
     this.hydeRetriever = opts.hydeRetriever;
+    this.splitAmbiguousThreshold = opts.splitAmbiguousThreshold;
     this.defaultDenseWeight = opts.defaultDenseWeight ?? 0.7;
     this.defaultSparseWeight = opts.defaultSparseWeight ?? 0.3;
     this.defaultRrfK = opts.defaultRrfK ?? 60;
@@ -214,6 +226,7 @@ export class HybridRetriever {
     }
 
     // Optional rerank: same 0.7 cognitive + 0.3 neural blend as baseline.
+    let splitDiagnostic: { threshold: number; candidateCount: number; replacedIds: string[] } | undefined;
     if (this.rerankerService && hydrated.length > 0) {
       try {
         const rerankerOutput = await this.rerankerService.rerank(
@@ -236,6 +249,21 @@ export class HybridRetriever {
             trace.retrievalScore = 0.7 * trace.retrievalScore + 0.3 * neural;
           }
         }
+
+        // Step-6: split-on-ambiguous refinement.
+        if (
+          this.splitAmbiguousThreshold !== undefined &&
+          this.splitAmbiguousThreshold > 0 &&
+          hydrated.length > 0
+        ) {
+          splitDiagnostic = await this.refineAmbiguous(
+            hydrated,
+            neuralScores,
+            query,
+            this.splitAmbiguousThreshold,
+          );
+        }
+
         hydrated.sort((a, b) => b.retrievalScore - a.retrievalScore);
       } catch {
         // Reranker errors are non-critical: keep RRF ordering.
@@ -250,6 +278,7 @@ export class HybridRetriever {
       scoringMs: denseTimings.scoringMs,
       totalMs: Date.now() - startTime,
       hypothesis: hypothesisDiagnostic,
+      splitOnAmbiguous: splitDiagnostic,
     });
   }
 
@@ -263,6 +292,7 @@ export class HybridRetriever {
       scoringMs: number;
       totalMs: number;
       hypothesis?: string;
+      splitOnAmbiguous?: { threshold: number; candidateCount: number; replacedIds: string[] };
     },
   ): CognitiveRetrievalResult {
     return {
@@ -275,7 +305,111 @@ export class HybridRetriever {
         totalTimeMs: d.totalMs,
         ...(d.escalations ? { escalations: d.escalations } : {}),
         ...(d.hypothesis ? { hyde: { hypothesis: d.hypothesis } } : {}),
+        ...(d.splitOnAmbiguous ? { splitOnAmbiguous: d.splitOnAmbiguous } : {}),
       },
     };
   }
+
+  /**
+   * Step-6: split bottom-fraction traces by neural score, rescore the
+   * halves, replace a trace's content with its better half IFF the
+   * better half's neural score outranks the original's. Monotonic.
+   *
+   * Modifies `hydrated` in place: `trace.content` and `trace.retrievalScore`
+   * are updated for replaced traces. Returns a diagnostic summary.
+   */
+  private async refineAmbiguous(
+    hydrated: ScoredMemoryTrace[],
+    neuralScores: Map<string, number>,
+    query: string,
+    threshold: number,
+  ): Promise<{ threshold: number; candidateCount: number; replacedIds: string[] }> {
+    const replacedIds: string[] = [];
+
+    const sortedByNeural = hydrated
+      .map((t) => ({ trace: t, neural: neuralScores.get(t.id) ?? 0 }))
+      .sort((a, b) => a.neural - b.neural);
+    const candidateCount = Math.ceil(hydrated.length * threshold);
+    const candidates = sortedByNeural.slice(0, candidateCount);
+
+    type Split = { traceId: string; halfAId: string; halfBId: string; halfA: string; halfB: string; originalNeural: number };
+    const splits: Split[] = [];
+    for (const { trace, neural } of candidates) {
+      const halves = this.splitAtMidpointSentence(trace.content);
+      if (!halves) continue;
+      splits.push({
+        traceId: trace.id,
+        halfAId: `${trace.id}::a`,
+        halfBId: `${trace.id}::b`,
+        halfA: halves[0],
+        halfB: halves[1],
+        originalNeural: neural,
+      });
+    }
+
+    if (splits.length === 0) {
+      return { threshold, candidateCount, replacedIds };
+    }
+
+    const halfDocs = splits.flatMap((s) => [
+      { id: s.halfAId, content: s.halfA },
+      { id: s.halfBId, content: s.halfB },
+    ]);
+    let halfScores: Map<string, number>;
+    try {
+      const halfOut = await this.rerankerService!.rerank(
+        { query, documents: halfDocs },
+        { topN: halfDocs.length },
+      );
+      halfScores = new Map(halfOut.results.map((r) => [r.id, r.relevanceScore]));
+    } catch {
+      return { threshold, candidateCount, replacedIds };
+    }
+
+    const traceById = new Map(hydrated.map((t) => [t.id, t]));
+    for (const s of splits) {
+      const a = halfScores.get(s.halfAId) ?? -Infinity;
+      const b = halfScores.get(s.halfBId) ?? -Infinity;
+      const winningScore = Math.max(a, b);
+      if (winningScore <= s.originalNeural) continue;
+      const winningText = a >= b ? s.halfA : s.halfB;
+      const trace = traceById.get(s.traceId);
+      if (!trace) continue;
+      trace.content = winningText;
+      trace.retrievalScore += 0.3 * (winningScore - s.originalNeural);
+      replacedIds.push(s.traceId);
+    }
+
+    return { threshold, candidateCount, replacedIds };
+  }
+
+  /**
+   * Split a string at the sentence boundary nearest its midpoint.
+   * Returns [firstHalf, secondHalf] or null if the string is too short
+   * or no valid boundary is found.
+   */
+  private splitAtMidpointSentence(text: string): [string, string] | null {
+    if (text.length < 50) return null;
+    const mid = Math.floor(text.length / 2);
+    const window = Math.floor(text.length * 0.4);
+    const lo = Math.max(0, mid - window);
+    const hi = Math.min(text.length, mid + window);
+    for (let offset = 0; offset <= window; offset++) {
+      for (const sign of [-1, 1] as const) {
+        const i = mid + sign * offset;
+        if (i < lo || i > hi) continue;
+        if (
+          i > 0 &&
+          i < text.length - 1 &&
+          /[.!?]/.test(text[i]) &&
+          /\s/.test(text[i + 1])
+        ) {
+          return [text.slice(0, i + 1).trim(), text.slice(i + 1).trim()];
+        }
+      }
+    }
+    const spaceIdx = text.indexOf(' ', mid);
+    if (spaceIdx === -1 || spaceIdx >= text.length - 1) return null;
+    return [text.slice(0, spaceIdx).trim(), text.slice(spaceIdx + 1).trim()];
+  }
 }
diff --git a/src/memory/retrieval/hybrid/__tests__/HybridRetriever.spec.ts b/src/memory/retrieval/hybrid/__tests__/HybridRetriever.spec.ts
@@ -211,3 +211,96 @@ describe('HybridRetriever + HyDE', () => {
     expect(reranker.lastQuery).toBe('what is the user up to?');
   });
 });
+
+class TableDrivenReranker {
+  public lastQuery: string | undefined;
+  public callCount = 0;
+  public calls: Array<{ query: string; docIds: string[] }> = [];
+  constructor(private readonly scoreTable: Record<string, number>) {}
+  async rerank(input: { query: string; documents: Array<{ id: string; content: string; originalScore?: number }> }) {
+    this.lastQuery = input.query;
+    this.callCount += 1;
+    this.calls.push({ query: input.query, docIds: input.documents.map((d) => d.id) });
+    return {
+      results: input.documents.map((d) => ({
+        id: d.id,
+        relevanceScore: this.scoreTable[d.id] ?? 0,
+        originalScore: d.originalScore,
+      })),
+      model: 'table-rerank',
+      usage: { searchUnits: 1 },
+    };
+  }
+}
+
+describe('HybridRetriever + split-on-ambiguous', () => {
+  it('identifies bottom-N% by rerank score at threshold=0.3', async () => {
+    const traces = Array.from({ length: 10 }, (_, i) =>
+      mkTrace(`t${i}`, 0.9 - i * 0.01, `probe sentence one. probe sentence two. probe sentence three ${'x'.repeat(40)}.`),
+    );
+    const memoryStore = new FakeMemoryStore(traces);
+    const neural: Record<string, number> = {};
+    traces.forEach((t, i) => { neural[t.id] = 1 - i * 0.1; });
+    traces.forEach((t) => {
+      neural[`${t.id}::a`] = -1;
+      neural[`${t.id}::b`] = -1;
+    });
+    const reranker = new TableDrivenReranker(neural);
+    const r = new HybridRetriever({
+      memoryStore: memoryStore as unknown as MemoryStore,
+      rerankerService: reranker as unknown as RerankerService,
+      splitAmbiguousThreshold: 0.3,
+    });
+    for (const t of traces) r.bm25.addDocument(t.id, t.content);
+    const result = await r.retrieve('probe', neutralMood, scope, { recallTopK: 10 });
+    expect(reranker.callCount).toBe(2);
+    expect(reranker.calls[1].docIds).toHaveLength(6);
+    expect(result.diagnostics.splitOnAmbiguous?.candidateCount).toBe(3);
+    expect(result.diagnostics.splitOnAmbiguous?.replacedIds).toEqual([]);
+  });
+
+  it('replaces content only when winning half outscores original', async () => {
+    const longContent = 'probe first half sentence. probe second half sentence with lots of extra padding so splitting works right here.';
+    const traces = [
+      mkTrace('t0', 0.9, longContent),
+      mkTrace('t1', 0.8, longContent),
+      mkTrace('t2', 0.7, longContent),
+    ];
+    const memoryStore = new FakeMemoryStore(traces);
+    const neural: Record<string, number> = { t0: 0.9, t1: 0.5, t2: 0.2 };
+    neural['t2::a'] = 0.1;
+    neural['t2::b'] = 0.7;
+    neural['t1::a'] = 0.0;
+    neural['t1::b'] = 0.0;
+    const reranker = new TableDrivenReranker(neural);
+    const r = new HybridRetriever({
+      memoryStore: memoryStore as unknown as MemoryStore,
+      rerankerService: reranker as unknown as RerankerService,
+      splitAmbiguousThreshold: 0.34,
+    });
+    for (const t of traces) r.bm25.addDocument(t.id, t.content);
+    const result = await r.retrieve('probe', neutralMood, scope, { recallTopK: 10 });
+    const replaced = result.retrieved.find((t) => t.id === 't2');
+    expect(replaced).toBeDefined();
+    expect(replaced!.content).toBe('probe second half sentence with lots of extra padding so splitting works right here.');
+    const unchanged = result.retrieved.find((t) => t.id === 't1');
+    expect(unchanged).toBeDefined();
+    expect(unchanged!.content).toBe(longContent);
+    expect(result.diagnostics.splitOnAmbiguous?.replacedIds).toEqual(['t2']);
+  });
+
+  it('split disabled (threshold=0 or undefined) → no second rerank call', async () => {
+    const traces = [mkTrace('t0', 0.9, 'probe some content'), mkTrace('t1', 0.8, 'probe other content')];
+    const memoryStore = new FakeMemoryStore(traces);
+    const neural: Record<string, number> = { t0: 0.9, t1: 0.8 };
+    const reranker = new TableDrivenReranker(neural);
+    const r = new HybridRetriever({
+      memoryStore: memoryStore as unknown as MemoryStore,
+      rerankerService: reranker as unknown as RerankerService,
+    });
+    for (const t of traces) r.bm25.addDocument(t.id, t.content);
+    const result = await r.retrieve('probe', neutralMood, scope, { recallTopK: 10 });
+    expect(reranker.callCount).toBe(1);
+    expect(result.diagnostics.splitOnAmbiguous).toBeUndefined();
+  });
+});