Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cortex-js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
},
"dependencies": {
"@huggingface/gguf": "^0.1.5",
"@huggingface/hub": "^0.15.1",
"@nestjs/axios": "^3.0.2",
"@nestjs/common": "^10.0.0",
"@nestjs/config": "^3.2.2",
Expand All @@ -47,7 +48,8 @@
"sqlite": "^5.1.1",
"sqlite3": "^5.1.7",
"typeorm": "^0.3.20",
"ulid": "^2.3.0"
"ulid": "^2.3.0",
"yaml": "^2.4.2"
},
"devDependencies": {
"@nestjs/cli": "^10.0.0",
Expand Down
9 changes: 5 additions & 4 deletions cortex-js/src/domain/abstracts/oai.abstract.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { HttpService } from '@nestjs/axios';
import { EngineExtension } from './engine.abstract';
import stream from 'stream';
import { firstValueFrom } from 'rxjs';

export abstract class OAIEngineExtension extends EngineExtension {
abstract apiUrl: string;
Expand All @@ -14,15 +15,15 @@ export abstract class OAIEngineExtension extends EngineExtension {
headers: Record<string, string>,
): Promise<stream.Readable | any> {
const { stream } = createChatDto;
const response = await this.httpService
.post(this.apiUrl, createChatDto, {
const response = await firstValueFrom(
this.httpService.post(this.apiUrl, createChatDto, {
headers: {
'Content-Type': headers['content-type'] ?? 'application/json',
Authorization: headers['authorization'],
},
responseType: stream ? 'stream' : 'json',
})
.toPromise();
}),
);
if (!response) {
throw new Error('No response');
}
Expand Down
107 changes: 103 additions & 4 deletions cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,124 @@
import { CommandRunner, SubCommand } from 'nest-commander';
import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
import { exit } from 'node:process';
import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
import { RepoDesignation, listFiles } from '@huggingface/hub';
import { basename } from 'node:path';

@SubCommand({
name: 'pull',
aliases: ['download'],
description: 'Download a model. Working with HuggingFace model id.',
})
export class ModelPullCommand extends CommandRunner {
constructor(private readonly modelsCliUsecases: ModelsCliUsecases) {
private janHqModelPrefix = 'janhq';

constructor(
private readonly inquirerService: InquirerService,
private readonly modelsCliUsecases: ModelsCliUsecases,
) {
super();
}

async run(input: string[]) {
if (input.length < 1) {
console.error('Model ID is required');
console.error('Model Id is required');
exit(1);
}

await this.modelsCliUsecases.pullModel(input[0]);
const branches = await this.tryToGetBranches(input[0]);

if (!branches) {
await this.modelsCliUsecases.pullModel(input[0]);
} else {
// if there's metadata.yaml file, we assumed it's a JanHQ model
await this.handleJanHqModel(input[0], branches);
}

console.log('\nDownload complete!');
exit(0);
}

private async tryToGetBranches(input: string): Promise<any> {
try {
// try to append with janhq/ if it's not already
const sanitizedInput = input.trim().startsWith(this.janHqModelPrefix)
? input
: `${this.janHqModelPrefix}/${input}`;

const repo: RepoDesignation = {
type: 'model',
name: sanitizedInput,
};

for await (const _fileInfo of listFiles({ repo })) {
break;
}

const response = await fetch(
`https://huggingface.co/api/models/${sanitizedInput}/refs`,
);
const data = await response.json();
const branches: string[] = data.branches.map((branch: any) => {
return branch.name;
});

return branches;
} catch (err) {
return undefined;
}
}

private async versionInquiry(tags: string[]): Promise<string> {
const { tag } = await this.inquirerService.inquirer.prompt({
type: 'list',
name: 'tag',
message: 'Select version',
choices: tags,
});

return tag;
}

private async handleJanHqModel(repoName: string, branches: string[]) {
const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
? repoName
: `${this.janHqModelPrefix}/${repoName}`;

let selectedTag = branches[0];

if (branches.length > 1) {
selectedTag = await this.versionInquiry(branches);
}

const revision = selectedTag;
if (!revision) {
console.error("Can't find model revision.");
exit(1);
}

const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
let ggufUrl: string | undefined = undefined;
let fileSize = 0;
for await (const fileInfo of listFiles({
repo: repo,
revision: revision,
})) {
if (fileInfo.path.endsWith('.gguf')) {
ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
fileSize = fileInfo.size;
break;
}
}

if (!ggufUrl) {
console.error("Can't find model file.");
exit(1);
}
console.log('Downloading', basename(ggufUrl));
await this.modelsCliUsecases.pullModelWithExactUrl(
`${sanitizedRepoName}/${revision}`,
ggufUrl,
fileSize,
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export interface ModelTokenizer {
stopWord?: string;
promptTemplate: string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import decompress from 'decompress';
import { exit } from 'node:process';
import { InitOptions } from '../types/init-options.interface';
import { Injectable } from '@nestjs/common';
import { firstValueFrom } from 'rxjs';

@Injectable()
export class InitCliUsecases {
Expand All @@ -19,17 +20,17 @@ export class InitCliUsecases {
engineFileName: string,
version: string = 'latest',
): Promise<any> => {
const res = await this.httpService
.get(
const res = await firstValueFrom(
this.httpService.get(
this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`,
{
headers: {
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'application/vnd.github+json',
},
},
)
.toPromise();
),
);

if (!res?.data) {
console.log('Failed to fetch releases');
Expand All @@ -55,11 +56,11 @@ export class InitCliUsecases {
const engineDir = resolve(this.rootDir(), 'cortex-cpp');
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });

const download = await this.httpService
.get(toDownloadAsset.browser_download_url, {
const download = await firstValueFrom(
this.httpService.get(toDownloadAsset.browser_download_url, {
responseType: 'stream',
})
.toPromise();
}),
);
if (!download) {
console.log('Failed to download model');
process.exit(1);
Expand Down Expand Up @@ -183,11 +184,11 @@ export class InitCliUsecases {
).replace('<platform>', platform);
const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz');

const download = await this.httpService
.get(url, {
const download = await firstValueFrom(
this.httpService.get(url, {
responseType: 'stream',
})
.toPromise();
}),
);

if (!download) {
console.log('Failed to download dependency');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import {
ZEPHYR,
ZEPHYR_JINJA,
} from '../prompt-constants';
import { ModelTokenizer } from '../types/model-tokenizer.interface';
import { HttpService } from '@nestjs/axios';
import { firstValueFrom } from 'rxjs';

const AllQuantizations = [
'Q3_K_S',
Expand Down Expand Up @@ -51,6 +54,7 @@ export class ModelsCliUsecases {
private readonly modelsUsecases: ModelsUsecases,
@Inject(InquirerService)
private readonly inquirerService: InquirerService,
private readonly httpService: HttpService,
) {}

/**
Expand Down Expand Up @@ -139,6 +143,47 @@ export class ModelsCliUsecases {
return this.modelsUsecases.remove(modelId);
}

async pullModelWithExactUrl(modelId: string, url: string, fileSize: number) {
const tokenizer = await this.getHFModelTokenizer(url);
const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
const stopWords: string[] = [tokenizer?.stopWord ?? ''];

const model: CreateModelDto = {
sources: [
{
url: url,
},
],
id: modelId,
name: modelId,
version: '',
format: ModelFormat.GGUF,
description: '',
settings: {
prompt_template: promptTemplate,
},
parameters: {
stop: stopWords,
},
metadata: {
author: 'janhq',
size: fileSize,
tags: [],
},
engine: 'cortex',
};
if (!(await this.modelsUsecases.findOne(modelId))) {
await this.modelsUsecases.create(model);
}

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);
const callback = (progress: number) => {
bar.update(progress);
};
await this.modelsUsecases.downloadModel(modelId, callback);
}

/**
* Pull model from Model repository (HF, Jan...)
* @param modelId
Expand All @@ -155,6 +200,30 @@ export class ModelsCliUsecases {
await this.modelsUsecases.downloadModel(modelId, callback);
}

private async getHFModelTokenizer(
ggufUrl: string,
): Promise<ModelTokenizer | undefined> {
try {
const { metadata } = await gguf(ggufUrl);
// @ts-expect-error "tokenizer.ggml.eos_token_id"
const index = metadata['tokenizer.ggml.eos_token_id'];
// @ts-expect-error "tokenizer.ggml.eos_token_id"
const hfChatTemplate = metadata['tokenizer.chat_template'];
const promptTemplate =
this.guessPromptTemplateFromHuggingFace(hfChatTemplate);
// @ts-expect-error "tokenizer.ggml.tokens"
const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? '';

return {
stopWord,
promptTemplate,
};
} catch (err) {
console.log('Failed to get model metadata:', err);
return undefined;
}
}

//// PRIVATE METHODS ////

/**
Expand Down Expand Up @@ -193,26 +262,10 @@ export class ModelsCliUsecases {
sibling = data.siblings.find((e) => e.rfilename.includes('.gguf'));
}
if (!sibling) throw 'No expected quantization found';
const tokenizer = await this.getHFModelTokenizer(sibling.downloadUrl!);

let stopWord = '';
let promptTemplate = LLAMA_2;

try {
const { metadata } = await gguf(sibling.downloadUrl!);
// @ts-expect-error "tokenizer.ggml.eos_token_id"
const index = metadata['tokenizer.ggml.eos_token_id'];
// @ts-expect-error "tokenizer.ggml.eos_token_id"
const hfChatTemplate = metadata['tokenizer.chat_template'];
promptTemplate = this.guessPromptTemplateFromHuggingFace(hfChatTemplate);

// @ts-expect-error "tokenizer.ggml.tokens"
stopWord = metadata['tokenizer.ggml.tokens'][index] ?? '';
} catch (err) {}

const stopWords: string[] = [];
if (stopWord.length > 0) {
stopWords.push(stopWord);
}
const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
const stopWords: string[] = [tokenizer?.stopWord ?? ''];

const model: CreateModelDto = {
sources: [
Expand Down Expand Up @@ -343,8 +396,8 @@ export class ModelsCliUsecases {
private async fetchHuggingFaceRepoData(repoId: string) {
const sanitizedUrl = this.getRepoModelsUrl(repoId);

const res = await fetch(sanitizedUrl);
const response = await res.json();
const res = await firstValueFrom(this.httpService.get(sanitizedUrl));
const response = res.data;
if (response['error'] != null) {
throw new Error(response['error']);
}
Expand Down
Loading