@@ -38,6 +38,21 @@ export interface HydeConfig {
3838 hypothesisSystemPrompt ?: string ;
3939 /** Use full-answer granularity (recommended by research). Default: true. */
4040 fullAnswerGranularity ?: boolean ;
41+ /**
42+ * Number of diverse hypothetical documents to generate per query.
43+ *
44+ * Multi-hypothesis HyDE generates N hypotheses from different perspectives
45+ * (technical, practical/example, overview) and searches with each embedding.
46+ * Results are deduplicated by chunk ID, keeping the highest score.
47+ *
48+ * Higher values improve recall at the cost of additional LLM calls.
49+ * - 1: Original single-hypothesis HyDE (fastest)
50+ * - 3: Recommended default (good diversity/cost tradeoff)
51+ * - 5: Maximum diversity (highest recall, most expensive)
52+ *
53+ * Default: 3.
54+ */
55+ hypothesisCount ?: number ;
4156}
4257
4358export const DEFAULT_HYDE_CONFIG : Required < HydeConfig > = {
@@ -51,6 +66,7 @@ export const DEFAULT_HYDE_CONFIG: Required<HydeConfig> = {
5166 'You are a knowledgeable assistant. Generate a concise, factual answer to the following question. ' +
5267 'This answer will be used for semantic search, so be specific and include relevant technical terms.' ,
5368 fullAnswerGranularity : true ,
69+ hypothesisCount : 3 ,
5470} ;
5571
5672function clampUnitInterval ( value : unknown , fallback : number ) : number {
@@ -78,12 +94,18 @@ export function resolveHydeConfig(partial?: Partial<HydeConfig>): Required<HydeC
7894 ? Math . floor ( merged . maxHypothesisTokens )
7995 : DEFAULT_HYDE_CONFIG . maxHypothesisTokens ;
8096
97+ const hypothesisCount =
98+ typeof merged . hypothesisCount === 'number' && Number . isFinite ( merged . hypothesisCount ) && merged . hypothesisCount >= 1
99+ ? Math . floor ( merged . hypothesisCount )
100+ : DEFAULT_HYDE_CONFIG . hypothesisCount ;
101+
81102 return {
82103 ...merged ,
83104 initialThreshold,
84105 minThreshold,
85106 thresholdStep,
86107 maxHypothesisTokens,
108+ hypothesisCount,
87109 } ;
88110}
89111
@@ -114,6 +136,27 @@ export interface HydeRetrievalResult {
114136 retrievalLatencyMs : number ;
115137}
116138
139+ /**
140+ * Result from multi-hypothesis HyDE retrieval.
141+ *
142+ * Contains all generated hypotheses and the deduplicated, merged result set
143+ * from searching with each hypothesis embedding.
144+ *
145+ * @interface HydeMultiRetrievalResult
146+ */
147+ export interface HydeMultiRetrievalResult {
148+ /** All generated hypotheses. */
149+ hypotheses : string [ ] ;
150+ /** Deduplicated query result (union of all hypothesis searches, highest score per doc). */
151+ queryResult : QueryResult ;
152+ /** Number of hypotheses generated. */
153+ hypothesisCount : number ;
154+ /** Total time for all hypothesis generations (ms). */
155+ hypothesisLatencyMs : number ;
156+ /** Total time for all embedding + retrieval passes (ms). */
157+ retrievalLatencyMs : number ;
158+ }
159+
117160// ── Core Retriever ─────────────────────────────────────────────────────────
118161
119162/**
@@ -168,6 +211,225 @@ export class HydeRetriever {
168211 } ;
169212 }
170213
214+ /**
215+ * Generate multiple hypothetical documents from different perspectives.
216+ *
217+ * Each hypothesis approaches the query from a different angle, improving
218+ * recall by covering more of the semantic space. Uses chain-of-thought
219+ * prompting to ensure diverse, high-quality hypotheses.
220+ *
221+ * The system prompt asks the LLM to generate N diverse hypotheses:
222+ * - Hypothesis 1: Technical/formal perspective
223+ * - Hypothesis 2: Practical/example perspective
224+ * - Hypothesis 3: Overview/summary perspective
225+ * - (Additional hypotheses explore further angles)
226+ *
227+ * @param {string } query - The user query to generate hypotheses for.
228+ * @param {number } [count] - Number of hypotheses to generate. Default: config.hypothesisCount (3).
229+ * @returns {Promise<{ hypotheses: string[]; latencyMs: number }> } Generated hypotheses and timing.
230+ * @throws {Error } If the LLM call fails.
231+ *
232+ * @example
233+ * ```typescript
234+ * const { hypotheses, latencyMs } = await retriever.generateMultipleHypotheses(
235+ * 'How does BM25 scoring work?',
236+ * 3,
237+ * );
238+ * // hypotheses[0]: Technical explanation with formulas
239+ * // hypotheses[1]: Practical example with code
240+ * // hypotheses[2]: High-level conceptual overview
241+ * ```
242+ */
243+ async generateMultipleHypotheses (
244+ query : string ,
245+ count ?: number ,
246+ ) : Promise < { hypotheses : string [ ] ; latencyMs : number } > {
247+ const n = count ?? this . config . hypothesisCount ;
248+
249+ // For n=1, fall back to the single-hypothesis path
250+ if ( n <= 1 ) {
251+ const result = await this . generateHypothesis ( query ) ;
252+ return { hypotheses : [ result . hypothesis ] , latencyMs : result . latencyMs } ;
253+ }
254+
255+ const start = Date . now ( ) ;
256+
257+ const systemPrompt = [
258+ this . config . hypothesisSystemPrompt ,
259+ this . config . fullAnswerGranularity
260+ ? 'Write complete hypothetical answers in natural language prose.'
261+ : 'Write concise hypothetical answers suitable for semantic retrieval.' ,
262+ `Keep each answer under ${ this . config . maxHypothesisTokens } tokens.` ,
263+ ] . join ( ' ' ) ;
264+
265+ const userPrompt = [
266+ 'Think step by step:' ,
267+ '1. What is this question really asking?' ,
268+ '2. What kind of document would contain the answer?' ,
269+ '3. What vocabulary and terminology would that document use?' ,
270+ '4. Write a brief version of that hypothetical document.' ,
271+ '' ,
272+ `Generate ${ n } diverse hypothetical documents that would answer: "${ query } "` ,
273+ '' ,
274+ 'Each hypothesis MUST take a DIFFERENT perspective or focus on a' ,
275+ 'DIFFERENT aspect of the question. Be diverse in vocabulary and approach.' ,
276+ '' ,
277+ ...Array . from ( { length : n } , ( _ , i ) => {
278+ const perspectives = [
279+ 'technical/formal perspective with precise terminology' ,
280+ 'practical/example perspective with concrete use cases' ,
281+ 'overview/summary perspective with broad context' ,
282+ 'troubleshooting/diagnostic perspective' ,
283+ 'comparative perspective contrasting with alternatives' ,
284+ ] ;
285+ const perspectiveLabel = perspectives [ i % perspectives . length ] ;
286+ return `Hypothesis ${ i + 1 } (${ perspectiveLabel } ):` ;
287+ } ) ,
288+ ] . join ( '\n' ) ;
289+
290+ const rawResponse = await this . llmCaller ( systemPrompt , userPrompt ) ;
291+
292+ // Parse the response: split on "Hypothesis N:" markers
293+ const hypotheses : string [ ] = [ ] ;
294+ const hypothesisRegex = / H y p o t h e s i s \s + \d + \s * (?: \( [ ^ ) ] * \) ) ? : \s * / gi;
295+ const parts = rawResponse . split ( hypothesisRegex ) . filter ( ( p ) => p . trim ( ) . length > 0 ) ;
296+
297+ for ( const part of parts ) {
298+ const trimmed = part . trim ( ) ;
299+ if ( trimmed . length > 0 ) {
300+ hypotheses . push ( trimmed ) ;
301+ }
302+ }
303+
304+ // If parsing failed (LLM didn't follow format), treat entire response as one hypothesis
305+ // and generate remaining hypotheses individually as fallback
306+ if ( hypotheses . length === 0 ) {
307+ hypotheses . push ( rawResponse . trim ( ) ) ;
308+ }
309+
310+ // If we got fewer hypotheses than requested, generate remaining individually
311+ while ( hypotheses . length < n ) {
312+ const fallbackResult = await this . generateHypothesis ( query ) ;
313+ hypotheses . push ( fallbackResult . hypothesis ) ;
314+ }
315+
316+ // Trim to exactly n hypotheses
317+ return {
318+ hypotheses : hypotheses . slice ( 0 , n ) ,
319+ latencyMs : Date . now ( ) - start ,
320+ } ;
321+ }
322+
323+ /**
324+ * Multi-hypothesis retrieval: generates N diverse hypotheses, searches with each,
325+ * and merges results by deduplication (keeping the highest score per document).
326+ *
327+ * This dramatically improves recall compared to single-hypothesis HyDE because
328+ * one bad hypothesis doesn't ruin everything — other hypotheses can still find
329+ * relevant documents from different angles.
330+ *
331+ * Pipeline:
332+ * 1. Generate N hypotheses via {@link generateMultipleHypotheses}
333+ * 2. Embed each hypothesis
334+ * 3. Search the vector store with each embedding
335+ * 4. Union all results, deduplicate by document ID, keep highest score
336+ *
337+ * @param {object } opts - Retrieval options.
338+ * @param {string } opts.query - The user query.
339+ * @param {IVectorStore } opts.vectorStore - Vector store to search.
340+ * @param {string } opts.collectionName - Collection to search in.
341+ * @param {Partial<QueryOptions> } [opts.queryOptions] - Additional query options.
342+ * @param {number } [opts.hypothesisCount] - Override hypothesis count for this call.
343+ * @returns {Promise<HydeMultiRetrievalResult> } Deduplicated results from all hypotheses.
344+ *
345+ * @example
346+ * ```typescript
347+ * const result = await retriever.retrieveMulti({
348+ * query: 'How does BM25 work?',
349+ * vectorStore: myStore,
350+ * collectionName: 'knowledge-base',
351+ * hypothesisCount: 3,
352+ * });
353+ * console.log(`Found ${result.queryResult.documents.length} unique docs from ${result.hypothesisCount} hypotheses`);
354+ * ```
355+ */
356+ async retrieveMulti ( opts : {
357+ query : string ;
358+ vectorStore : IVectorStore ;
359+ collectionName : string ;
360+ queryOptions ?: Partial < QueryOptions > ;
361+ hypothesisCount ?: number ;
362+ } ) : Promise < HydeMultiRetrievalResult > {
363+ const count = opts . hypothesisCount ?? this . config . hypothesisCount ;
364+
365+ // Step 1: Generate multiple hypotheses
366+ const { hypotheses, latencyMs : hypothesisLatencyMs } =
367+ await this . generateMultipleHypotheses ( opts . query , count ) ;
368+
369+ // Step 2: Embed all hypotheses
370+ const retrievalStart = Date . now ( ) ;
371+ const embeddingResponse = await this . embeddingManager . generateEmbeddings ( {
372+ texts : hypotheses ,
373+ } ) ;
374+
375+ if ( ! embeddingResponse . embeddings || embeddingResponse . embeddings . length === 0 ) {
376+ return {
377+ hypotheses,
378+ queryResult : { documents : [ ] } ,
379+ hypothesisCount : hypotheses . length ,
380+ hypothesisLatencyMs,
381+ retrievalLatencyMs : Date . now ( ) - retrievalStart ,
382+ } ;
383+ }
384+
385+ // Step 3: Search with each embedding in parallel
386+ const {
387+ minSimilarityScore : _ignoredMinSimilarityScore ,
388+ ...extraQueryOptions
389+ } = opts . queryOptions ?? { } ;
390+
391+ const searchPromises = embeddingResponse . embeddings
392+ . filter ( ( emb ) => emb && emb . length > 0 )
393+ . map ( ( embedding ) =>
394+ opts . vectorStore . query ( opts . collectionName , embedding , {
395+ topK : extraQueryOptions . topK ?? 5 ,
396+ includeTextContent : true ,
397+ includeMetadata : true ,
398+ ...extraQueryOptions ,
399+ } ) ,
400+ ) ;
401+
402+ const searchResults = await Promise . all ( searchPromises ) ;
403+
404+ // Step 4: Merge and deduplicate — keep highest score per document ID
405+ const docMap = new Map < string , ( typeof searchResults ) [ 0 ] [ 'documents' ] [ 0 ] > ( ) ;
406+
407+ for ( const result of searchResults ) {
408+ for ( const doc of result . documents ) {
409+ const existing = docMap . get ( doc . id ) ;
410+ if ( ! existing || doc . similarityScore > existing . similarityScore ) {
411+ docMap . set ( doc . id , doc ) ;
412+ }
413+ }
414+ }
415+
416+ // Sort by similarity score descending
417+ const mergedDocs = Array . from ( docMap . values ( ) ) . sort (
418+ ( a , b ) => b . similarityScore - a . similarityScore ,
419+ ) ;
420+
421+ // Apply topK limit
422+ const topK = opts . queryOptions ?. topK ?? 5 ;
423+
424+ return {
425+ hypotheses,
426+ queryResult : { documents : mergedDocs . slice ( 0 , topK ) } ,
427+ hypothesisCount : hypotheses . length ,
428+ hypothesisLatencyMs,
429+ retrievalLatencyMs : Date . now ( ) - retrievalStart ,
430+ } ;
431+ }
432+
171433 /**
172434 * Embed the hypothesis and search the vector store.
173435 * Uses adaptive thresholding: starts at initialThreshold, steps down
0 commit comments