Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
models/
embeddings/
node_modules/
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
The format is based on [Keep a Changelog](http://keepachangelog.com/).

## Version 0.0.4 - 2025-09-25

### Changed

- Vendor embeddings and model to not require internet access

## Version 0.0.3 - 2025-09-22

### Changed
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,6 @@ Copyright 2025 SAP SE or an SAP affiliate company and @cap-js/cds-mcp contributo
## Acknowledgments

- **onnxruntime-web** is used for creating embeddings locally.
- **Xenova/all-MiniLM-L6-v2** is used as the embeddings model.
- **@huggingface/transformers.js** is used to compare the output of the WordPiece tokenizer.
- **@modelcontextprotocol/sdk** provides the SDK for MCP.
Binary file added embeddings/code-chunks.bin
Binary file not shown.
1 change: 1 addition & 0 deletions embeddings/code-chunks.etag
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
W/"13f5a6-19962049eb0"
2,689 changes: 2,689 additions & 0 deletions embeddings/code-chunks.json

Large diffs are not rendered by default.

139 changes: 20 additions & 119 deletions lib/calculateEmbeddings.js
Original file line number Diff line number Diff line change
@@ -1,120 +1,38 @@
import fs from 'fs/promises'
import { constants } from 'fs'
import path from 'path'
import { fileURLToPath } from 'url'
import * as ort from 'onnxruntime-web'

const __dirname = path.dirname(fileURLToPath(import.meta.url))

const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
const MODEL_DIR = path.resolve(__dirname, '..', 'models')

const FILES = ['onnx/model.onnx', 'tokenizer.json', 'tokenizer_config.json']

async function saveFile(buffer, outputPath) {
await fs.writeFile(outputPath, Buffer.from(buffer))
}

async function fileExists(filePath) {
try {
await fs.access(filePath, constants.F_OK)
return true
} catch {
return false
}
}

async function downloadFile(url, outputPath) {
const res = await fetch(url)
if (!res.ok) throw new Error(`Failed to download ${url}, status ${res.status}`)

if (url.endsWith('.onnx')) {
const arrayBuffer = await res.arrayBuffer()
await saveFile(arrayBuffer, outputPath)
} else if (url.endsWith('.json')) {
const json = await res.json()
await saveFile(JSON.stringify(json, null, 2), outputPath)
} else {
const text = await res.text()
await saveFile(text, outputPath)
}
}

async function downloadModelIfNeeded() {
try {
await fs.access(MODEL_DIR)
} catch {
await fs.mkdir(MODEL_DIR, { recursive: true })
}

for (const file of FILES) {
const filePath = path.join(MODEL_DIR, path.basename(file))
if (!(await fileExists(filePath))) {
const url = `https://huggingface.co/${MODEL_NAME}/resolve/main/${file}`
await downloadFile(url, filePath)
}
}
}

async function forceRedownloadModel() {
// Reset session and vocab to force reinitialization
session = null
vocab = null

// Delete all model files to force re-download
for (const file of FILES) {
const filePath = path.join(MODEL_DIR, path.basename(file))
if (await fileExists(filePath)) {
await fs.unlink(filePath).catch(() => {})
}
}

// Force re-download
await downloadModelIfNeeded()
}

async function initializeModelAndVocab() {
const modelPath = path.join(MODEL_DIR, 'model.onnx')
const vocabPath = path.join(MODEL_DIR, 'tokenizer.json')

const loadModelAndVocab = async () => {
// Load model as buffer for onnxruntime-web
const modelBuffer = await fs.readFile(modelPath)
session = await ort.InferenceSession.create(modelBuffer)
// Load model as buffer for onnxruntime-web
const modelBuffer = await fs.readFile(modelPath)
session = await ort.InferenceSession.create(modelBuffer)

// Try to parse tokenizer JSON
const tokenizerJson = JSON.parse(await fs.readFile(vocabPath, 'utf-8'))
// Try to parse tokenizer JSON
const tokenizerJson = JSON.parse(await fs.readFile(vocabPath, 'utf-8'))

// Validate tokenizer structure
if (!tokenizerJson.model || !tokenizerJson.model.vocab) {
throw new Error('Invalid tokenizer structure: missing model.vocab')
}

vocab = tokenizerJson.model.vocab

// Convert to clean Map to avoid prototype pollution
const cleanVocab = new Map()
for (const [token, id] of Object.entries(vocab)) {
if (typeof id === 'number') {
cleanVocab.set(token, id)
}
}
vocab = cleanVocab
// Validate tokenizer structure
if (!tokenizerJson.model || !tokenizerJson.model.vocab) {
throw new Error('Invalid tokenizer structure: missing model.vocab')
}

try {
await loadModelAndVocab()
} catch {
// Model or tokenizer is corrupted, force re-download
await forceRedownloadModel()

// Retry initialization after re-download
try {
await loadModelAndVocab()
} catch {
throw new Error('Failed to restore valid tokenizer after re-download')
vocab = tokenizerJson.model.vocab

// Convert to clean Map to avoid prototype pollution
const cleanVocab = new Map()
for (const [token, id] of Object.entries(vocab)) {
if (typeof id === 'number') {
cleanVocab.set(token, id)
}
}
vocab = cleanVocab
}

/**
Expand Down Expand Up @@ -402,21 +320,15 @@ async function processChunkedEmbeddings(chunks, session) {
let session = null
let vocab = null

// Start downloading and initializing model when module loads
// Initialize model when module loads
const modelInitPromise = (async () => {
try {
await downloadModelIfNeeded()
await initializeModelAndVocab()
} catch {
// Don't throw here - let the main function handle initialization
// do not throw
}
})()

export function resetSession() {
session = null
vocab = null
}

export default async function calculateEmbeddings(text) {
// Wait for the model to be preloaded, then ensure it's initialized
await modelInitPromise
Expand All @@ -441,17 +353,6 @@ export default async function calculateEmbeddings(text) {
return normalized
}

try {
const pooledEmbedding = await processChunkedEmbeddings(chunks, session)
return normalizeEmbedding(pooledEmbedding)
} catch {
// If inference fails, it might be due to model corruption
// Try to recover by re-downloading and reinitializing

await forceRedownloadModel()
await initializeModelAndVocab()

const retryPooledEmbedding = await processChunkedEmbeddings(chunks, session)
return normalizeEmbedding(retryPooledEmbedding)
}
const pooledEmbedding = await processChunkedEmbeddings(chunks, session)
return normalizeEmbedding(pooledEmbedding)
}
Loading