cap-js · David-Kunz · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1 @@
-models/
-embeddings/
 node_modules/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
+## Version 0.0.4 - 2025-09-25
+
+### Changed
+
+- Vendor embeddings and model to not require internet access
+
 ## Version 0.0.3 - 2025-09-22
 
 ### Changed

diff --git a/README.md b/README.md
@@ -165,5 +165,6 @@ Copyright 2025 SAP SE or an SAP affiliate company and @cap-js/cds-mcp contributo
 ## Acknowledgments
 
 - **onnxruntime-web** is used for creating embeddings locally.
+- **Xenova/all-MiniLM-L6-v2** is used as the embeddings model.
 - **@huggingface/transformers.js** is used to compare the output of the WordPiece tokenizer.
 - **@modelcontextprotocol/sdk** provides the SDK for MCP.
diff --git a/embeddings/code-chunks.bin b/embeddings/code-chunks.bin
diff --git a/embeddings/code-chunks.etag b/embeddings/code-chunks.etag
@@ -0,0 +1 @@
+W/"13f5a6-19962049eb0"
diff --git a/embeddings/code-chunks.json b/embeddings/code-chunks.json
diff --git a/lib/calculateEmbeddings.js b/lib/calculateEmbeddings.js
@@ -1,120 +1,38 @@
 import fs from 'fs/promises'
-import { constants } from 'fs'
 import path from 'path'
 import { fileURLToPath } from 'url'
 import * as ort from 'onnxruntime-web'
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
 
-const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
 const MODEL_DIR = path.resolve(__dirname, '..', 'models')
 
-const FILES = ['onnx/model.onnx', 'tokenizer.json', 'tokenizer_config.json']
-
-async function saveFile(buffer, outputPath) {
-  await fs.writeFile(outputPath, Buffer.from(buffer))
-}
-
-async function fileExists(filePath) {
-  try {
-    await fs.access(filePath, constants.F_OK)
-    return true
-  } catch {
-    return false
-  }
-}
-
-async function downloadFile(url, outputPath) {
-  const res = await fetch(url)
-  if (!res.ok) throw new Error(`Failed to download ${url}, status ${res.status}`)
-
-  if (url.endsWith('.onnx')) {
-    const arrayBuffer = await res.arrayBuffer()
-    await saveFile(arrayBuffer, outputPath)
-  } else if (url.endsWith('.json')) {
-    const json = await res.json()
-    await saveFile(JSON.stringify(json, null, 2), outputPath)
-  } else {
-    const text = await res.text()
-    await saveFile(text, outputPath)
-  }
-}
-
-async function downloadModelIfNeeded() {
-  try {
-    await fs.access(MODEL_DIR)
-  } catch {
-    await fs.mkdir(MODEL_DIR, { recursive: true })
-  }
-
-  for (const file of FILES) {
-    const filePath = path.join(MODEL_DIR, path.basename(file))
-    if (!(await fileExists(filePath))) {
-      const url = `https://huggingface.co/${MODEL_NAME}/resolve/main/${file}`
-      await downloadFile(url, filePath)
-    }
-  }
-}
-
-async function forceRedownloadModel() {
-  // Reset session and vocab to force reinitialization
-  session = null
-  vocab = null
-
-  // Delete all model files to force re-download
-  for (const file of FILES) {
-    const filePath = path.join(MODEL_DIR, path.basename(file))
-    if (await fileExists(filePath)) {
-      await fs.unlink(filePath).catch(() => {})
-    }
-  }
-
-  // Force re-download
-  await downloadModelIfNeeded()
-}
-
 async function initializeModelAndVocab() {
   const modelPath = path.join(MODEL_DIR, 'model.onnx')
   const vocabPath = path.join(MODEL_DIR, 'tokenizer.json')
 
-  const loadModelAndVocab = async () => {
-    // Load model as buffer for onnxruntime-web
-    const modelBuffer = await fs.readFile(modelPath)
-    session = await ort.InferenceSession.create(modelBuffer)
+  // Load model as buffer for onnxruntime-web
+  const modelBuffer = await fs.readFile(modelPath)
+  session = await ort.InferenceSession.create(modelBuffer)
 
-    // Try to parse tokenizer JSON
-    const tokenizerJson = JSON.parse(await fs.readFile(vocabPath, 'utf-8'))
+  // Try to parse tokenizer JSON
+  const tokenizerJson = JSON.parse(await fs.readFile(vocabPath, 'utf-8'))
 
-    // Validate tokenizer structure
-    if (!tokenizerJson.model || !tokenizerJson.model.vocab) {
-      throw new Error('Invalid tokenizer structure: missing model.vocab')
-    }
-
-    vocab = tokenizerJson.model.vocab
-
-    // Convert to clean Map to avoid prototype pollution
-    const cleanVocab = new Map()
-    for (const [token, id] of Object.entries(vocab)) {
-      if (typeof id === 'number') {
-        cleanVocab.set(token, id)
-      }
-    }
-    vocab = cleanVocab
+  // Validate tokenizer structure
+  if (!tokenizerJson.model || !tokenizerJson.model.vocab) {
+    throw new Error('Invalid tokenizer structure: missing model.vocab')
   }
 
-  try {
-    await loadModelAndVocab()
-  } catch {
-    // Model or tokenizer is corrupted, force re-download
-    await forceRedownloadModel()
-
-    // Retry initialization after re-download
-    try {
-      await loadModelAndVocab()
-    } catch {
-      throw new Error('Failed to restore valid tokenizer after re-download')
+  vocab = tokenizerJson.model.vocab
+
+  // Convert to clean Map to avoid prototype pollution
+  const cleanVocab = new Map()
+  for (const [token, id] of Object.entries(vocab)) {
+    if (typeof id === 'number') {
+      cleanVocab.set(token, id)
     }
   }
+  vocab = cleanVocab
 }
 
 /**
@@ -402,21 +320,15 @@ async function processChunkedEmbeddings(chunks, session) {
 let session = null
 let vocab = null
 
-// Start downloading and initializing model when module loads
+// Initialize model when module loads
 const modelInitPromise = (async () => {
   try {
-    await downloadModelIfNeeded()
     await initializeModelAndVocab()
   } catch {
-    // Don't throw here - let the main function handle initialization
+    // do not throw
   }
 })()
 
-export function resetSession() {
-  session = null
-  vocab = null
-}
-
 export default async function calculateEmbeddings(text) {
   // Wait for the model to be preloaded, then ensure it's initialized
   await modelInitPromise
@@ -441,17 +353,6 @@ export default async function calculateEmbeddings(text) {
     return normalized
   }
 
-  try {
-    const pooledEmbedding = await processChunkedEmbeddings(chunks, session)
-    return normalizeEmbedding(pooledEmbedding)
-  } catch {
-    // If inference fails, it might be due to model corruption
-    // Try to recover by re-downloading and reinitializing
-
-    await forceRedownloadModel()
-    await initializeModelAndVocab()
-
-    const retryPooledEmbedding = await processChunkedEmbeddings(chunks, session)
-    return normalizeEmbedding(retryPooledEmbedding)
-  }
+  const pooledEmbedding = await processChunkedEmbeddings(chunks, session)
+  return normalizeEmbedding(pooledEmbedding)
 }