Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 5f57033

Browse files
authored
feat: ship ONNX runtime on Windows (#716)
1 parent d72d08a commit 5f57033

File tree

10 files changed

+256
-45
lines changed

10 files changed

+256
-45
lines changed

cortex-js/src/infrastructure/commanders/models/model-pull.command.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import { ModelNotFoundException } from '@/infrastructure/exception/model-not-fou
1010
aliases: ['download'],
1111
arguments: '<model_id>',
1212
argsDescription: { model_id: 'Model repo to pull' },
13-
description: 'Download a model. Working with HuggingFace model id.',
13+
description:
14+
'Download a model from a registry. Working with HuggingFace repositories. For available models, please visit https://huggingface.co/cortexhub',
1415
})
1516
@SetCommandContext()
1617
export class ModelPullCommand extends CommandRunner {

cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
import { createWriteStream, existsSync, rmSync } from 'fs';
1+
import {
2+
cpSync,
3+
createWriteStream,
4+
existsSync,
5+
readdir,
6+
readdirSync,
7+
rmSync,
8+
} from 'fs';
29
import { delimiter, join } from 'path';
310
import { HttpService } from '@nestjs/axios';
411
import { Presets, SingleBar } from 'cli-progress';
@@ -12,6 +19,7 @@ import { rm } from 'fs/promises';
1219
import { exec } from 'child_process';
1320
import { appPath } from '@/utils/app-path';
1421
import {
22+
CORTEX_ONNX_ENGINE_RELEASES_URL,
1523
CORTEX_RELEASES_URL,
1624
CUDA_DOWNLOAD_URL,
1725
} from '@/infrastructure/constants/cortex';
@@ -59,7 +67,7 @@ export class InitCliUsecases {
5967
exit(1);
6068
}
6169

62-
console.log(`Downloading engine file ${engineFileName}`);
70+
console.log(`Downloading Llama.cpp engine file ${engineFileName}`);
6371
const dataFolderPath = await this.fileManagerService.getDataFolderPath();
6472
const engineDir = join(dataFolderPath, 'cortex-cpp');
6573
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
@@ -109,6 +117,9 @@ export class InitCliUsecases {
109117
exit(1);
110118
}
111119
await rm(destination, { force: true });
120+
121+
// Ship ONNX Runtime on Windows by default
122+
if (process.platform === 'win32') await this.installONNXEngine();
112123
};
113124

114125
parseEngineFileName = (options?: InitOptions) => {
@@ -187,6 +198,7 @@ export class InitCliUsecases {
187198
).replace('<platform>', platform);
188199
const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz');
189200

201+
console.log('Downloading CUDA Toolkit dependency...');
190202
const download = await firstValueFrom(
191203
this.httpService.get(url, {
192204
responseType: 'stream',
@@ -283,6 +295,109 @@ export class InitCliUsecases {
283295
});
284296
};
285297

298+
/**
299+
* Download and install ONNX engine
300+
* @param version
301+
* @param engineFileName
302+
*/
303+
async installONNXEngine(
304+
version: string = 'latest',
305+
engineFileName: string = 'windows-amd64',
306+
) {
307+
const res = await firstValueFrom(
308+
this.httpService.get(
309+
CORTEX_ONNX_ENGINE_RELEASES_URL +
310+
`${version === 'latest' ? '/latest' : ''}`,
311+
{
312+
headers: {
313+
'X-GitHub-Api-Version': '2022-11-28',
314+
Accept: 'application/vnd.github+json',
315+
},
316+
},
317+
),
318+
);
319+
320+
if (!res?.data) {
321+
console.log('Failed to fetch releases');
322+
exit(1);
323+
}
324+
325+
let release = res?.data;
326+
if (Array.isArray(res?.data)) {
327+
release = Array(res?.data)[0].find(
328+
(e) => e.name === version.replace('v', ''),
329+
);
330+
}
331+
const toDownloadAsset = release.assets.find((s: any) =>
332+
s.name.includes(engineFileName),
333+
);
334+
335+
if (!toDownloadAsset) {
336+
console.log(`Could not find engine file ${engineFileName}`);
337+
exit(1);
338+
}
339+
340+
console.log(`Downloading ONNX engine file ${engineFileName}`);
341+
const dataFolderPath = await this.fileManagerService.getDataFolderPath();
342+
const engineDir = join(dataFolderPath, 'cortex-cpp');
343+
344+
const download = await firstValueFrom(
345+
this.httpService.get(toDownloadAsset.browser_download_url, {
346+
responseType: 'stream',
347+
}),
348+
);
349+
if (!download) {
350+
console.log('Failed to download model');
351+
process.exit(1);
352+
}
353+
354+
const destination = join(dataFolderPath, toDownloadAsset.name);
355+
356+
await new Promise((resolve, reject) => {
357+
const writer = createWriteStream(destination);
358+
let receivedBytes = 0;
359+
const totalBytes = download.headers['content-length'];
360+
361+
writer.on('finish', () => {
362+
bar.stop();
363+
resolve(true);
364+
});
365+
366+
writer.on('error', (error) => {
367+
bar.stop();
368+
reject(error);
369+
});
370+
371+
const bar = new SingleBar({}, Presets.shades_classic);
372+
bar.start(100, 0);
373+
374+
download.data.on('data', (chunk: any) => {
375+
receivedBytes += chunk.length;
376+
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
377+
});
378+
379+
download.data.pipe(writer);
380+
});
381+
382+
try {
383+
await decompress(destination, join(engineDir, 'engines'));
384+
} catch (e) {
385+
console.error('Error decompressing file', e);
386+
exit(1);
387+
}
388+
await rm(destination, { force: true });
389+
390+
// Copy the additional files to the cortex-cpp directory
391+
for (const file of readdirSync(join(engineDir, 'engines', 'cortex.onnx'))) {
392+
if (file !== 'engine.dll') {
393+
await cpSync(
394+
join(engineDir, 'engines', 'cortex.onnx', file),
395+
join(engineDir, file),
396+
);
397+
}
398+
}
399+
}
400+
286401
private checkFileExistenceInPaths = (
287402
file: string,
288403
paths: string[],

cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import { load } from 'js-yaml';
1717
import { existsSync, readdirSync, readFileSync } from 'fs';
1818
import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id';
1919
import { getHFModelMetadata } from '@/utils/huggingface';
20+
import { createWriteStream, mkdirSync, promises } from 'node:fs';
21+
import { firstValueFrom } from 'rxjs';
2022

2123
@Injectable()
2224
export class ModelsCliUsecases {
@@ -118,40 +120,116 @@ export class ModelsCliUsecases {
118120
process.exit(1);
119121
}
120122

121-
await this.pullHuggingFaceModel(modelId);
122-
const bar = new SingleBar({}, Presets.shades_classic);
123-
bar.start(100, 0);
124-
const callback = (progress: number) => {
125-
bar.update(progress);
126-
};
123+
if (modelId.includes('onnx')) {
124+
await this.pullOnnxModel(modelId);
125+
} else {
126+
await this.pullGGUFModel(modelId);
127+
const bar = new SingleBar({}, Presets.shades_classic);
128+
bar.start(100, 0);
129+
const callback = (progress: number) => {
130+
bar.update(progress);
131+
};
132+
133+
try {
134+
await this.modelsUsecases.downloadModel(modelId, callback);
135+
136+
const model = await this.modelsUsecases.findOne(modelId);
137+
const fileUrl = join(
138+
await this.fileService.getModelsPath(),
139+
normalizeModelId(modelId),
140+
basename((model?.files as string[])[0]),
141+
);
142+
await this.modelsUsecases.update(modelId, {
143+
files: [fileUrl],
144+
name: modelId.replace(':default', ''),
145+
});
146+
} catch (err) {
147+
bar.stop();
148+
throw err;
149+
}
150+
}
151+
}
152+
153+
/**
154+
* It's to pull ONNX model from HuggingFace repository
155+
* @param modelId
156+
*/
157+
private async pullOnnxModel(modelId: string) {
158+
const modelsContainerDir = await this.fileService.getModelsPath();
159+
160+
if (!existsSync(modelsContainerDir)) {
161+
mkdirSync(modelsContainerDir, { recursive: true });
162+
}
163+
164+
const modelFolder = join(modelsContainerDir, normalizeModelId(modelId));
165+
await promises.mkdir(modelFolder, { recursive: true }).catch(() => {});
127166

128-
try {
129-
await this.modelsUsecases.downloadModel(modelId, callback);
167+
const files = [
168+
'genai_config.json',
169+
'model.onnx',
170+
'model.onnx.data',
171+
'model.yml',
172+
'special_tokens_map.json',
173+
'tokenizer.json',
174+
'tokenizer_config.json',
175+
];
176+
const repo = modelId.split(':')[0];
177+
const branch = modelId.split(':')[1] || 'default';
178+
for (const file of files) {
179+
console.log(`Downloading ${file}`);
180+
const bar = new SingleBar({}, Presets.shades_classic);
181+
bar.start(100, 0);
130182

131-
const model = await this.modelsUsecases.findOne(modelId);
132-
const fileUrl = join(
133-
await this.fileService.getModelsPath(),
134-
normalizeModelId(modelId),
135-
basename((model?.files as string[])[0]),
183+
const response = await firstValueFrom(
184+
this.httpService.get(
185+
`https://huggingface.co/cortexhub/${repo}/resolve/${branch}/${file}?download=true`,
186+
{
187+
responseType: 'stream',
188+
},
189+
),
136190
);
137-
await this.modelsUsecases.update(modelId, {
138-
files: [fileUrl],
139-
name: modelId.replace(':default', ''),
191+
if (!response) {
192+
throw new Error('Failed to download model');
193+
}
194+
195+
await new Promise((resolve, reject) => {
196+
const writer = createWriteStream(join(modelFolder, file));
197+
let receivedBytes = 0;
198+
const totalBytes = response.headers['content-length'];
199+
200+
writer.on('finish', () => {
201+
resolve(true);
202+
});
203+
204+
writer.on('error', (error) => {
205+
reject(error);
206+
});
207+
208+
response.data.on('data', (chunk: any) => {
209+
receivedBytes += chunk.length;
210+
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
211+
});
212+
213+
response.data.pipe(writer);
140214
});
141-
} catch (err) {
142215
bar.stop();
143-
throw err;
144216
}
145-
}
146217

147-
//// PRIVATE METHODS ////
218+
const model: CreateModelDto = load(
219+
readFileSync(join(modelFolder, 'model.yml'), 'utf-8'),
220+
) as CreateModelDto;
221+
model.files = [join(modelFolder)];
222+
model.model = modelId
148223

224+
if (!(await this.modelsUsecases.findOne(modelId)))
225+
await this.modelsUsecases.create(model);
226+
}
149227
/**
150228
* It's to pull model from HuggingFace repository
151229
* It could be a model from Jan's repo or other authors
152230
* @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
153231
*/
154-
private async pullHuggingFaceModel(modelId: string) {
232+
private async pullGGUFModel(modelId: string) {
155233
const data: HuggingFaceRepoData =
156234
await this.modelsUsecases.fetchModelMetadata(modelId);
157235

@@ -179,6 +257,7 @@ export class ModelsCliUsecases {
179257
} else {
180258
modelVersion = data.siblings.find((e) => e.rfilename.includes('.gguf'));
181259
}
260+
182261
if (!modelVersion) throw 'No expected quantization found';
183262
const metadata = await getHFModelMetadata(modelVersion.downloadUrl!);
184263

@@ -203,12 +282,17 @@ export class ModelsCliUsecases {
203282
// Default Model Settings
204283
ctx_len: 4096,
205284
ngl: 100,
206-
engine: 'cortex.llamacpp',
285+
engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
207286
};
208287
if (!(await this.modelsUsecases.findOne(modelId)))
209288
await this.modelsUsecases.create(model);
210289
}
211290

291+
/**
292+
* Parse preset file
293+
* @param preset
294+
* @returns
295+
*/
212296
private async parsePreset(preset?: string): Promise<object> {
213297
const presetsFolder = await this.fileService.getPresetsPath();
214298

cortex-js/src/infrastructure/constants/cortex.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ export const CORTEX_JS_STOP_API_SERVER_URL = (
4242
export const CORTEX_RELEASES_URL =
4343
'https://api.github.com/repos/janhq/cortex/releases';
4444

45+
export const CORTEX_ONNX_ENGINE_RELEASES_URL =
46+
'https://api.github.com/repos/janhq/cortex.onnx/releases';
47+
4548
export const CUDA_DOWNLOAD_URL =
4649
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz';
4750

cortex-js/src/infrastructure/constants/huggingface.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export const HUGGING_FACE_TREE_REF_URL = (
22
repo: string,
33
tree: string,
44
path: string,
5-
) => `https://huggingface.co/janhq/${repo}/resolve/${tree}/${path}`;
5+
) => `https://huggingface.co/cortexhub/${repo}/resolve/${tree}/${path}`;
66

77
export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = (
88
author: string,

cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file-
1515

1616
@Injectable()
1717
export default class CortexProvider extends OAIEngineExtension {
18-
provider: string = 'cortex.llamacpp';
18+
provider: string = 'cortex';
1919
apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`;
2020

2121
private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`;

cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import { existsSync } from 'fs';
1111
@Injectable()
1212
export class ExtensionRepositoryImpl implements ExtensionRepository {
1313
// Initialize the Extensions Map with the key-value pairs of the core providers.
14-
extensions = new Map<string, Extension>([['cortex', this.cortexProvider]]);
14+
extensions = new Map<string, Extension>([
15+
['cortex.llamacpp', this.cortexProvider],
16+
['cortex.onnx', this.cortexProvider],
17+
]);
1518

1619
constructor(
1720
@Inject('CORTEX_PROVIDER')

0 commit comments

Comments
 (0)