@@ -112,6 +112,28 @@ export interface SqlVectorStoreConfig extends VectorStoreProviderConfig {
112112 * @default 'agentos_rag_'
113113 */
114114 tablePrefix ?: string ;
115+
116+ /**
117+ * Optional text processing pipeline for hybrid search tokenization.
118+ * Replaces the built-in regex tokenizer with configurable stemming,
119+ * lemmatization, and stop word handling.
120+ * @see createRagPipeline from core/text-processing
121+ */
122+ pipeline ?: import ( '../../../core/text-processing/TextProcessingPipeline' ) . TextProcessingPipeline ;
123+
124+ /**
125+ * Document count threshold before HNSW sidecar activates.
126+ * Below this count, brute-force cosine similarity is used.
127+ * Set to 0 to disable HNSW. Set to Infinity to always use brute-force.
128+ * @default 1000
129+ */
130+ hnswThreshold ?: number ;
131+
132+ /**
133+ * Embedding dimensions for the HNSW sidecar index.
134+ * @default 1536
135+ */
136+ hnswDimensions ?: number ;
115137}
116138
117139// ============================================================================
@@ -196,6 +218,12 @@ export class SqlVectorStore implements IVectorStore {
196218 private readonly providerId : string ;
197219 private tablePrefix : string = 'agentos_rag_' ;
198220
221+ /** Optional HNSW sidecar for O(log n) vector search when available. */
222+ private sidecar : import ( '../../../core/vector-search/HnswIndexSidecar' ) . HnswIndexSidecar | null = null ;
223+
224+ /** Optional text processing pipeline for hybrid search tokenization. */
225+ private pipeline ?: import ( '../../../core/text-processing/TextProcessingPipeline' ) . TextProcessingPipeline ;
226+
199227 /**
200228 * Constructs a SqlVectorStore instance.
201229 * The store is not operational until `initialize()` is called.
@@ -246,8 +274,33 @@ export class SqlVectorStore implements IVectorStore {
246274 // Create schema
247275 await this . createSchema ( ) ;
248276
277+ // Store pipeline reference
278+ this . pipeline = this . config . pipeline ;
279+
280+ // Initialize HNSW sidecar for accelerated vector search
281+ if ( this . config . hnswThreshold !== Infinity ) {
282+ try {
283+ const { HnswIndexSidecar } = await import ( '../../../core/vector-search/HnswIndexSidecar' ) ;
284+ this . sidecar = new HnswIndexSidecar ( ) ;
285+
286+ // Derive sidecar index path from adapter config
287+ const storagePath = ( this . config . storage as any ) ?. filePath ?? ( this . config . storage as any ) ?. database ;
288+ const indexPath = storagePath ? `${ storagePath } .hnsw` : `/tmp/agentos-rag-${ this . providerId } .hnsw` ;
289+
290+ await this . sidecar . initialize ( {
291+ indexPath,
292+ dimensions : this . config . hnswDimensions ?? this . config . defaultEmbeddingDimension ?? 1536 ,
293+ metric : this . config . similarityMetric ?? 'cosine' ,
294+ activationThreshold : this . config . hnswThreshold ?? 1000 ,
295+ } ) ;
296+ } catch {
297+ /* HNSW sidecar unavailable (hnswlib-node not installed) — brute-force fallback */
298+ this . sidecar = null ;
299+ }
300+ }
301+
249302 this . isInitialized = true ;
250- console . log ( `SqlVectorStore (ID: ${ this . providerId } , Config ID: ${ this . config . id } ) initialized successfully.` ) ;
303+ console . log ( `SqlVectorStore (ID: ${ this . providerId } , Config ID: ${ this . config . id } ) initialized successfully${ this . sidecar ?. isAvailable ( ) ? ' (HNSW sidecar ready)' : '' } .` ) ;
251304 }
252305
253306 /**
@@ -526,6 +579,37 @@ export class SqlVectorStore implements IVectorStore {
526579 [ countResult ?. count ?? 0 , now , collectionName ]
527580 ) ;
528581
582+ // ── HNSW sidecar: add upserted vectors + check threshold ──────────
583+ if ( this . sidecar ?. isAvailable ( ) && upsertedIds . length > 0 ) {
584+ const docsWithEmbeddings = documents
585+ . filter ( d => upsertedIds . includes ( d . id ) && d . embedding ?. length > 0 )
586+ . map ( d => ( { id : d . id , embedding : d . embedding } ) ) ;
587+
588+ if ( this . sidecar . isActive ( ) ) {
589+ await this . sidecar . addBatch ( docsWithEmbeddings ) ;
590+ } else {
591+ // Check if we just crossed the activation threshold
592+ const docCount = countResult ?. count ?? 0 ;
593+ const threshold = this . config . hnswThreshold ?? 1000 ;
594+ if ( docCount >= threshold ) {
595+ // Load ALL embeddings from SQLite and rebuild the HNSW index
596+ const allRows = await this . adapter . all < DocumentRow > (
597+ `SELECT id, embedding_blob FROM ${ this . tablePrefix } documents WHERE collection_name = ?` ,
598+ [ collectionName ] ,
599+ ) ;
600+ const allItems = allRows
601+ . map ( row => ( {
602+ id : row . id ,
603+ embedding : isLegacyJsonBlob ( row . embedding_blob )
604+ ? JSON . parse ( row . embedding_blob as string ) as number [ ]
605+ : blobToEmbedding ( row . embedding_blob as Buffer ) ,
606+ } ) )
607+ . filter ( item => item . embedding . length > 0 ) ;
608+ await this . sidecar . rebuildFromData ( allItems ) ;
609+ }
610+ }
611+ }
612+
529613 return {
530614 upsertedCount : upsertedIds . length ,
531615 upsertedIds,
@@ -561,6 +645,51 @@ export class SqlVectorStore implements IVectorStore {
561645 ) ;
562646 }
563647
648+ // ── HNSW fast path ─────────────────────────────────────────────────
649+ // When the sidecar is active, use O(log n) ANN search to get top
650+ // candidates by ID, then fetch full documents from SQLite. Falls through
651+ // to brute-force when the sidecar is inactive or unavailable.
652+ if ( this . sidecar ?. isActive ( ) ) {
653+ const hnswCandidates = await this . sidecar . search ( queryEmbedding , topK * 3 ) ;
654+ if ( hnswCandidates . length > 0 ) {
655+ const candidateIds = hnswCandidates . map ( c => c . id ) ;
656+ const placeholders = candidateIds . map ( ( ) => '?' ) . join ( ',' ) ;
657+ let hnswQuery = `SELECT * FROM ${ this . tablePrefix } documents WHERE collection_name = ? AND id IN (${ placeholders } )` ;
658+ const hnswParams : unknown [ ] = [ collectionName , ...candidateIds ] ;
659+ if ( options ?. filter ) {
660+ const filterSQL = this . buildMetadataFilterSQL ( options . filter ) ;
661+ hnswQuery += filterSQL . clause ;
662+ hnswParams . push ( ...filterSQL . params ) ;
663+ }
664+ const rows = await this . adapter . all < DocumentRow > ( hnswQuery , hnswParams ) ;
665+
666+ const scoreMap = new Map ( hnswCandidates . map ( c => [ c . id , c . score ] ) ) ;
667+ const candidates : RetrievedVectorDocument [ ] = rows . map ( row => {
668+ const metadata = row . metadata_json ? JSON . parse ( row . metadata_json ) : undefined ;
669+ const embedding = options ?. includeEmbedding
670+ ? ( isLegacyJsonBlob ( row . embedding_blob ) ? JSON . parse ( row . embedding_blob as string ) : blobToEmbedding ( row . embedding_blob as Buffer ) )
671+ : [ ] ;
672+ const doc : RetrievedVectorDocument = {
673+ id : row . id ,
674+ embedding,
675+ similarityScore : scoreMap . get ( row . id ) ?? 0 ,
676+ } ;
677+ if ( options ?. includeMetadata !== false && metadata ) doc . metadata = metadata ;
678+ if ( options ?. includeTextContent && row . text_content ) doc . textContent = row . text_content ;
679+ return doc ;
680+ } ) . filter ( d => options ?. minSimilarityScore === undefined || d . similarityScore >= options . minSimilarityScore ) ;
681+
682+ candidates . sort ( ( a , b ) => b . similarityScore - a . similarityScore ) ;
683+ const results = candidates . slice ( 0 , topK ) ;
684+ return {
685+ documents : results ,
686+ queryId : `sql-hnsw-query-${ uuidv4 ( ) } ` ,
687+ stats : { totalCandidates : hnswCandidates . length , filteredCandidates : candidates . length , returnedCount : results . length } ,
688+ } ;
689+ }
690+ }
691+
692+ // ── Brute-force fallback ──────────────────────────────────────────
564693 // Build query with SQL-level metadata filtering via json_extract()
565694 let query = `SELECT * FROM ${ this . tablePrefix } documents WHERE collection_name = ?` ;
566695 const params : unknown [ ] = [ collectionName ] ;
@@ -692,11 +821,12 @@ export class SqlVectorStore implements IVectorStore {
692821 }
693822 const rows = await this . adapter . all < DocumentRow > ( hybridQuery , hybridParams ) ;
694823
695- const tokenize = ( text : string ) : string [ ] =>
696- text
697- . toLowerCase ( )
698- . split ( / [ ^ a - z 0 - 9 _ ] + / g)
699- . filter ( ( t ) => t . length > 2 ) ;
824+ const tokenize = ( text : string ) : string [ ] => {
825+ /* Use pluggable pipeline when configured */
826+ if ( this . pipeline ) return this . pipeline . processToStrings ( text ) ;
827+ /* Fallback: built-in regex tokenizer */
828+ return text . toLowerCase ( ) . split ( / [ ^ a - z 0 - 9 _ ] + / g) . filter ( ( t ) => t . length > 2 ) ;
829+ } ;
700830
701831 const queryTerms = tokenize ( queryText ) ;
702832 const queryTermSet = new Set ( queryTerms ) ;
@@ -1026,6 +1156,11 @@ export class SqlVectorStore implements IVectorStore {
10261156 return ;
10271157 }
10281158
1159+ if ( this . sidecar ) {
1160+ await this . sidecar . shutdown ( ) ;
1161+ this . sidecar = null ;
1162+ }
1163+
10291164 if ( this . ownsAdapter && this . adapter ) {
10301165 await this . adapter . close ( ) ;
10311166 }
0 commit comments