Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 2012869

Browse files
committed
feature: support local GGUF model pull
1 parent 84216df commit 2012869

File tree

9 files changed

+90
-44
lines changed

9 files changed

+90
-44
lines changed

cortex-js/package.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@
3939
},
4040
"dependencies": {
4141
"@cortexso/cortex.js": "^0.1.5",
42-
"@huggingface/gguf": "^0.1.5",
43-
"@huggingface/hub": "^0.15.1",
4442
"@nestjs/axios": "^3.0.2",
4543
"@nestjs/common": "^10.0.0",
4644
"@nestjs/config": "^3.2.2",
@@ -59,6 +57,7 @@
5957
"cortex-cpp": "0.4.34",
6058
"cpu-instructions": "^0.0.11",
6159
"decompress": "^4.2.1",
60+
"hyllama": "^0.2.2",
6261
"js-yaml": "^4.1.0",
6362
"nest-commander": "^3.13.0",
6463
"ora": "5.4.1",
@@ -94,10 +93,10 @@
9493
"@yao-pkg/pkg": "^5.12.0",
9594
"cpx": "^1.5.0",
9695
"env-cmd": "10.1.0",
96+
"eslint": "8.57.0",
9797
"eslint-config-prettier": "9.1.0",
9898
"eslint-plugin-import": "2.29.1",
9999
"eslint-plugin-prettier": "5.2.1",
100-
"eslint": "8.57.0",
101100
"hanbi": "^1.0.3",
102101
"is-primitive": "^3.0.1",
103102
"jest": "^29.5.0",

cortex-js/src/infrastructure/commanders/models/model-pull.command.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { downloadProgress } from '@/utils/download-progress';
1919
import { CortexClient } from '../services/cortex.client';
2020
import { DownloadType } from '@/domain/models/download.interface';
2121
import ora from 'ora';
22+
import { isLocalFile } from '@/utils/urls';
2223

2324
@SubCommand({
2425
name: 'pull',
@@ -61,9 +62,8 @@ export class ModelPullCommand extends BaseCommand {
6162
exit(1);
6263
});
6364

64-
ora().succeed('Model downloaded');
65-
6665
await downloadProgress(this.cortex, modelId);
66+
ora().succeed('Model downloaded');
6767

6868
const existingModel = await this.cortex.models.retrieve(modelId);
6969
const engine = existingModel?.engine || Engines.llamaCPP;

cortex-js/src/infrastructure/commanders/run.command.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import { ChatClient } from './services/chat-client';
1313
import { downloadProgress } from '@/utils/download-progress';
1414
import { CortexClient } from './services/cortex.client';
1515
import { DownloadType } from '@/domain/models/download.interface';
16+
import { isLocalFile } from '@/utils/urls';
17+
import { parse } from 'node:path';
1618

1719
type RunOptions = {
1820
threadId?: string;
@@ -71,6 +73,12 @@ export class RunCommand extends BaseCommand {
7173
await downloadProgress(this.cortex, modelId);
7274
checkingSpinner.succeed('Model downloaded');
7375

76+
// Update to persisted modelId
77+
// TODO: Should be retrieved from the request
78+
if (isLocalFile(modelId)) {
79+
modelId = parse(modelId).name;
80+
}
81+
7482
// Second check if model is available
7583
existingModel = await this.cortex.models.retrieve(modelId);
7684
if (!existingModel) {
@@ -93,6 +101,7 @@ export class RunCommand extends BaseCommand {
93101
}
94102

95103
const startingSpinner = ora('Loading model...').start();
104+
96105
return this.cortex.models
97106
.start(modelId, await this.fileService.getPreset(options.preset))
98107
.then(() => {

cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ export interface ModelMetadata {
22
stopWord?: string;
33
promptTemplate: string;
44
version: number;
5+
name?: string
56
}

cortex-js/src/usecases/models/models.usecases.ts

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
44
import { BadRequestException, Injectable } from '@nestjs/common';
55
import { Model, ModelSettingParams } from '@/domain/models/model.interface';
66
import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
7-
import { basename, join } from 'path';
7+
import { basename, join, parse } from 'path';
88
import { promises, existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
99
import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
1010
import { ExtensionRepository } from '@/domain/repositories/extension.interface';
@@ -17,7 +17,6 @@ import { TelemetrySource } from '@/domain/telemetry/telemetry.interface';
1717
import { ModelRepository } from '@/domain/repositories/model.interface';
1818
import { ModelParameterParser } from '@/utils/model-parameter.parser';
1919
import {
20-
HuggingFaceModelVersion,
2120
HuggingFaceRepoData,
2221
HuggingFaceRepoSibling,
2322
} from '@/domain/models/huggingface.interface';
@@ -26,7 +25,10 @@ import {
2625
fetchJanRepoData,
2726
getHFModelMetadata,
2827
} from '@/utils/huggingface';
29-
import { DownloadType } from '@/domain/models/download.interface';
28+
import {
29+
DownloadStatus,
30+
DownloadType,
31+
} from '@/domain/models/download.interface';
3032
import { EventEmitter2 } from '@nestjs/event-emitter';
3133
import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event';
3234
import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service';
@@ -35,6 +37,7 @@ import { Engines } from '@/infrastructure/commanders/types/engine.interface';
3537
import { load } from 'js-yaml';
3638
import { llamaModelFile } from '@/utils/app-path';
3739
import { CortexUsecases } from '../cortex/cortex.usecases';
40+
import { isLocalFile } from '@/utils/urls';
3841

3942
@Injectable()
4043
export class ModelsUsecases {
@@ -127,7 +130,9 @@ export class ModelsUsecases {
127130
)) as EngineExtension | undefined;
128131

129132
if (engine) {
130-
await engine.unloadModel(id, model.engine || Engines.llamaCPP).catch(() => {}); // Silent fail
133+
await engine
134+
.unloadModel(id, model.engine || Engines.llamaCPP)
135+
.catch(() => {}); // Silent fail
131136
}
132137
return this.modelRepository
133138
.remove(id)
@@ -174,7 +179,7 @@ export class ModelsUsecases {
174179
}
175180

176181
// Attempt to start cortex
177-
await this.cortexUsecases.startCortex()
182+
await this.cortexUsecases.startCortex();
178183

179184
const loadingModelSpinner = ora('Loading model...').start();
180185
// update states and emitting event
@@ -341,10 +346,26 @@ export class ModelsUsecases {
341346
) {
342347
const modelId = persistedModelId ?? originModelId;
343348
const existingModel = await this.findOne(modelId);
349+
344350
if (isLocalModel(existingModel?.files)) {
345351
throw new BadRequestException('Model already exists');
346352
}
347353

354+
// Pull a local model file
355+
if (isLocalFile(originModelId)) {
356+
await this.populateHuggingFaceModel(originModelId, persistedModelId);
357+
this.eventEmitter.emit('download.event', [
358+
{
359+
id: modelId,
360+
type: DownloadType.Model,
361+
status: DownloadStatus.Downloaded,
362+
progress: 100,
363+
children: [],
364+
},
365+
]);
366+
return;
367+
}
368+
348369
const modelsContainerDir = await this.fileManagerService.getModelsPath();
349370

350371
if (!existsSync(modelsContainerDir)) {
@@ -422,22 +443,18 @@ export class ModelsUsecases {
422443
model.model = modelId;
423444
if (!(await this.findOne(modelId))) await this.create(model);
424445
} else {
425-
await this.populateHuggingFaceModel(modelId, files[0]);
426-
const model = await this.findOne(modelId);
427-
if (model) {
428-
const fileUrl = join(
429-
await this.fileManagerService.getModelsPath(),
430-
normalizeModelId(modelId),
431-
basename(
432-
files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ??
433-
files[0].rfilename,
434-
),
435-
);
436-
await this.update(modelId, {
437-
files: [fileUrl],
438-
name: modelId.replace(':main', ''),
439-
});
440-
}
446+
const fileUrl = join(
447+
await this.fileManagerService.getModelsPath(),
448+
normalizeModelId(modelId),
449+
basename(
450+
files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ??
451+
files[0].rfilename,
452+
),
453+
);
454+
await this.populateHuggingFaceModel(
455+
fileUrl,
456+
modelId.replace(':main', ''),
457+
);
441458
}
442459
uploadModelMetadataSpiner.succeed('Model metadata updated');
443460
const modelEvent: ModelEvent = {
@@ -458,21 +475,18 @@ export class ModelsUsecases {
458475
* It could be a model from Jan's repo or other authors
459476
* @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
460477
*/
461-
async populateHuggingFaceModel(
462-
modelId: string,
463-
modelVersion: HuggingFaceModelVersion,
464-
) {
465-
if (!modelVersion) throw 'No expected quantization found';
466-
467-
const tokenizer = await getHFModelMetadata(modelVersion.downloadUrl!);
478+
async populateHuggingFaceModel(ggufUrl: string, overridenId?: string) {
479+
const metadata = await getHFModelMetadata(ggufUrl);
468480

469-
const stopWords: string[] = tokenizer?.stopWord ? [tokenizer.stopWord] : [];
481+
const stopWords: string[] = metadata?.stopWord ? [metadata.stopWord] : [];
470482

483+
const modelId =
484+
overridenId ?? (isLocalFile(ggufUrl) ? parse(ggufUrl).name : ggufUrl);
471485
const model: CreateModelDto = {
472-
files: [modelVersion.downloadUrl ?? ''],
486+
files: [ggufUrl],
473487
model: modelId,
474-
name: modelId,
475-
prompt_template: tokenizer?.promptTemplate,
488+
name: metadata?.name ?? modelId,
489+
prompt_template: metadata?.promptTemplate,
476490
stop: stopWords,
477491

478492
// Default Inference Params

cortex-js/src/utils/download-progress.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import { exit, stdin, stdout } from 'node:process';
44
import { DownloadState, DownloadType } from "@/domain/models/download.interface";
55

66
export const downloadProgress = async (cortex: Cortex, downloadId?: string, downloadType?: DownloadType) => {
7+
// Do not update on local file symlink
8+
if (downloadId && isLocalFile(downloadId)) return;
9+
710
const response = await cortex.events.downloadEvent();
811

912
const rl = require('readline').createInterface({

cortex-js/src/utils/huggingface.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ import {
2121
ZEPHYR,
2222
ZEPHYR_JINJA,
2323
} from '@/infrastructure/constants/prompt-constants';
24-
import { gguf } from '@huggingface/gguf';
2524
import axios from 'axios';
2625
import { parseModelHubEngineBranch } from './normalize-model-id';
26+
import { closeSync, openSync, readSync } from 'fs';
2727

2828
// TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider?
2929
export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
@@ -209,20 +209,29 @@ export async function getHFModelMetadata(
209209
ggufUrl: string,
210210
): Promise<ModelMetadata | undefined> {
211211
try {
212-
const { metadata } = await gguf(ggufUrl);
213-
// @ts-expect-error "tokenizer.ggml.eos_token_id"
212+
let metadata: any;
213+
const { ggufMetadata } = await import('hyllama');
214+
// Read first 10mb of gguf file
215+
const fd = openSync(ggufUrl, 'r');
216+
const buffer = new Uint8Array(10_000_000);
217+
readSync(fd, buffer, 0, 10_000_000, 0);
218+
closeSync(fd);
219+
220+
// Parse metadata and tensor info
221+
({ metadata } = ggufMetadata(buffer.buffer));
222+
214223
const index = metadata['tokenizer.ggml.eos_token_id'];
215-
// @ts-expect-error "tokenizer.ggml.eos_token_id"
216224
const hfChatTemplate = metadata['tokenizer.chat_template'];
217225
const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate);
218-
// @ts-expect-error "tokenizer.ggml.tokens"
219226
const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? '';
227+
const name = metadata['general.name'];
220228

221229
const version: number = metadata['version'];
222230
return {
223231
stopWord,
224232
promptTemplate,
225233
version,
234+
name,
226235
};
227236
} catch (err) {
228237
console.log('Failed to get model metadata:', err.message);

cortex-js/src/utils/urls.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { isAbsolute } from 'path';
2+
13
/**
24
* Check if a string is a valid URL.
35
* @param input - The string to check.
@@ -12,3 +14,12 @@ export function isValidUrl(input: string | undefined): boolean {
1214
return false;
1315
}
1416
}
17+
18+
/**
19+
* Check if the URL is a lcoal file path
20+
* @param modelFiles
21+
* @returns
22+
*/
23+
export const isLocalFile = (path: string): boolean => {
24+
return !/^(http|https):\/\/[^/]+\/.*/.test(path) && isAbsolute(path);
25+
};

cortex-js/tsconfig.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"compilerOptions": {
3-
"module": "commonjs",
4-
"moduleResolution": "node",
3+
"module": "node16",
4+
"moduleResolution": "node16",
55
"declaration": true,
66
"removeComments": true,
77
"emitDecoratorMetadata": true,

0 commit comments

Comments
 (0)