@@ -51,6 +51,7 @@ import type {
5151 ISpeechToTextProvider ,
5252 MultimodalIndexerConfig ,
5353} from './types.js' ;
54+ import type { VisionPipeline } from '../../core/vision/VisionPipeline.js' ;
5455
5556// ---------------------------------------------------------------------------
5657// Constants
@@ -157,26 +158,39 @@ export class MultimodalIndexer {
157158 * @param deps.embeddingManager - Manager for generating text embeddings.
158159 * @param deps.vectorStore - Vector store for document storage and search.
159160 * @param deps.visionProvider - Optional vision LLM for image description.
161+ * @param deps.visionPipeline - Optional full vision pipeline with OCR, handwriting,
162+ * document understanding, CLIP embeddings, and cloud fallback. When provided,
163+ * it is wrapped as an {@link IVisionProvider} via {@link PipelineVisionProvider},
164+ * overriding any `visionProvider` passed alongside it.
160165 * @param deps.sttProvider - Optional STT provider for audio transcription.
161166 * @param deps.config - Optional configuration overrides.
162167 *
163168 * @throws {Error } If embeddingManager or vectorStore is missing.
164169 *
165170 * @example
166171 * ```typescript
172+ * // With a simple vision LLM provider
167173 * const indexer = new MultimodalIndexer({
168174 * embeddingManager,
169175 * vectorStore,
170176 * visionProvider: myVisionLLM,
171177 * sttProvider: myWhisperService,
172178 * config: { defaultCollection: 'knowledge' },
173179 * });
180+ *
181+ * // With the full vision pipeline (recommended)
182+ * const indexer = new MultimodalIndexer({
183+ * embeddingManager,
184+ * vectorStore,
185+ * visionPipeline: myVisionPipeline,
186+ * });
174187 * ```
175188 */
176189 constructor ( deps : {
177190 embeddingManager : IEmbeddingManager ;
178191 vectorStore : IVectorStore ;
179192 visionProvider ?: IVisionProvider ;
193+ visionPipeline ?: VisionPipeline ;
180194 sttProvider ?: ISpeechToTextProvider ;
181195 config ?: MultimodalIndexerConfig ;
182196 } ) {
@@ -189,9 +203,21 @@ export class MultimodalIndexer {
189203
190204 this . _embeddingManager = deps . embeddingManager ;
191205 this . _vectorStore = deps . vectorStore ;
192- this . _visionProvider = deps . visionProvider ;
193206 this . _sttProvider = deps . sttProvider ;
194207
208+ // If a full VisionPipeline is provided, wrap it as an IVisionProvider.
209+ // This gives the indexer access to the progressive OCR + vision pipeline
210+ // for image description, while maintaining backward compatibility with
211+ // the simpler IVisionProvider interface.
212+ if ( deps . visionPipeline ) {
213+ // Lazy import to avoid circular dependency at module load time.
214+ // PipelineVisionProvider is a thin adapter — safe to require synchronously.
215+ const { PipelineVisionProvider } = require ( '../../core/vision/providers/PipelineVisionProvider.js' ) ;
216+ this . _visionProvider = new PipelineVisionProvider ( deps . visionPipeline ) ;
217+ } else {
218+ this . _visionProvider = deps . visionProvider ;
219+ }
220+
195221 this . _config = {
196222 defaultCollection : deps . config ?. defaultCollection ?? DEFAULT_COLLECTION ,
197223 imageDescriptionPrompt : deps . config ?. imageDescriptionPrompt ?? DEFAULT_IMAGE_DESCRIPTION_PROMPT ,
0 commit comments