diff --git a/cortex-js/package.json b/cortex-js/package.json index a66b5b6da..9a33fe224 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -26,6 +26,7 @@ }, "dependencies": { "@huggingface/gguf": "^0.1.5", + "@huggingface/hub": "^0.15.1", "@nestjs/axios": "^3.0.2", "@nestjs/common": "^10.0.0", "@nestjs/config": "^3.2.2", @@ -47,7 +48,8 @@ "sqlite": "^5.1.1", "sqlite3": "^5.1.7", "typeorm": "^0.3.20", - "ulid": "^2.3.0" + "ulid": "^2.3.0", + "yaml": "^2.4.2" }, "devDependencies": { "@nestjs/cli": "^10.0.0", diff --git a/cortex-js/src/domain/abstracts/oai.abstract.ts b/cortex-js/src/domain/abstracts/oai.abstract.ts index 5f145af64..6f5165d53 100644 --- a/cortex-js/src/domain/abstracts/oai.abstract.ts +++ b/cortex-js/src/domain/abstracts/oai.abstract.ts @@ -1,6 +1,7 @@ import { HttpService } from '@nestjs/axios'; import { EngineExtension } from './engine.abstract'; import stream from 'stream'; +import { firstValueFrom } from 'rxjs'; export abstract class OAIEngineExtension extends EngineExtension { abstract apiUrl: string; @@ -14,15 +15,15 @@ export abstract class OAIEngineExtension extends EngineExtension { headers: Record, ): Promise { const { stream } = createChatDto; - const response = await this.httpService - .post(this.apiUrl, createChatDto, { + const response = await firstValueFrom( + this.httpService.post(this.apiUrl, createChatDto, { headers: { 'Content-Type': headers['content-type'] ?? 'application/json', Authorization: headers['authorization'], }, responseType: stream ? 'stream' : 'json', - }) - .toPromise(); + }), + ); if (!response) { throw new Error('No response'); } diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index c1a1af7ac..58b4a5d4a 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -1,6 +1,8 @@ -import { CommandRunner, SubCommand } from 'nest-commander'; +import { CommandRunner, InquirerService, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { RepoDesignation, listFiles } from '@huggingface/hub'; +import { basename } from 'node:path'; @SubCommand({ name: 'pull', @@ -8,18 +10,115 @@ import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; description: 'Download a model. Working with HuggingFace model id.', }) export class ModelPullCommand extends CommandRunner { - constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + private janHqModelPrefix = 'janhq'; + + constructor( + private readonly inquirerService: InquirerService, + private readonly modelsCliUsecases: ModelsCliUsecases, + ) { super(); } async run(input: string[]) { if (input.length < 1) { - console.error('Model ID is required'); + console.error('Model Id is required'); exit(1); } - await this.modelsCliUsecases.pullModel(input[0]); + const branches = await this.tryToGetBranches(input[0]); + + if (!branches) { + await this.modelsCliUsecases.pullModel(input[0]); + } else { + // if there's metadata.yaml file, we assumed it's a JanHQ model + await this.handleJanHqModel(input[0], branches); + } + console.log('\nDownload complete!'); exit(0); } + + private async tryToGetBranches(input: string): Promise { + try { + // try to append with janhq/ if it's not already + const sanitizedInput = input.trim().startsWith(this.janHqModelPrefix) + ? input + : `${this.janHqModelPrefix}/${input}`; + + const repo: RepoDesignation = { + type: 'model', + name: sanitizedInput, + }; + + for await (const _fileInfo of listFiles({ repo })) { + break; + } + + const response = await fetch( + `https://huggingface.co/api/models/${sanitizedInput}/refs`, + ); + const data = await response.json(); + const branches: string[] = data.branches.map((branch: any) => { + return branch.name; + }); + + return branches; + } catch (err) { + return undefined; + } + } + + private async versionInquiry(tags: string[]): Promise { + const { tag } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'tag', + message: 'Select version', + choices: tags, + }); + + return tag; + } + + private async handleJanHqModel(repoName: string, branches: string[]) { + const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix) + ? repoName + : `${this.janHqModelPrefix}/${repoName}`; + + let selectedTag = branches[0]; + + if (branches.length > 1) { + selectedTag = await this.versionInquiry(branches); + } + + const revision = selectedTag; + if (!revision) { + console.error("Can't find model revision."); + exit(1); + } + + const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName }; + let ggufUrl: string | undefined = undefined; + let fileSize = 0; + for await (const fileInfo of listFiles({ + repo: repo, + revision: revision, + })) { + if (fileInfo.path.endsWith('.gguf')) { + ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`; + fileSize = fileInfo.size; + break; + } + } + + if (!ggufUrl) { + console.error("Can't find model file."); + exit(1); + } + console.log('Downloading', basename(ggufUrl)); + await this.modelsCliUsecases.pullModelWithExactUrl( + `${sanitizedRepoName}/${revision}`, + ggufUrl, + fileSize, + ); + } } diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts new file mode 100644 index 000000000..b7e038b53 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts @@ -0,0 +1,4 @@ +export interface ModelTokenizer { + stopWord?: string; + promptTemplate: string; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index 47456e4f9..aed90f235 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -6,6 +6,7 @@ import decompress from 'decompress'; import { exit } from 'node:process'; import { InitOptions } from '../types/init-options.interface'; import { Injectable } from '@nestjs/common'; +import { firstValueFrom } from 'rxjs'; @Injectable() export class InitCliUsecases { @@ -19,8 +20,8 @@ export class InitCliUsecases { engineFileName: string, version: string = 'latest', ): Promise => { - const res = await this.httpService - .get( + const res = await firstValueFrom( + this.httpService.get( this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, { headers: { @@ -28,8 +29,8 @@ export class InitCliUsecases { Accept: 'application/vnd.github+json', }, }, - ) - .toPromise(); + ), + ); if (!res?.data) { console.log('Failed to fetch releases'); @@ -55,11 +56,11 @@ export class InitCliUsecases { const engineDir = resolve(this.rootDir(), 'cortex-cpp'); if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); - const download = await this.httpService - .get(toDownloadAsset.browser_download_url, { + const download = await firstValueFrom( + this.httpService.get(toDownloadAsset.browser_download_url, { responseType: 'stream', - }) - .toPromise(); + }), + ); if (!download) { console.log('Failed to download model'); process.exit(1); @@ -183,11 +184,11 @@ export class InitCliUsecases { ).replace('', platform); const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz'); - const download = await this.httpService - .get(url, { + const download = await firstValueFrom( + this.httpService.get(url, { responseType: 'stream', - }) - .toPromise(); + }), + ); if (!download) { console.log('Failed to download dependency'); diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index a34dee800..5dcaa55e5 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -21,6 +21,9 @@ import { ZEPHYR, ZEPHYR_JINJA, } from '../prompt-constants'; +import { ModelTokenizer } from '../types/model-tokenizer.interface'; +import { HttpService } from '@nestjs/axios'; +import { firstValueFrom } from 'rxjs'; const AllQuantizations = [ 'Q3_K_S', @@ -51,6 +54,7 @@ export class ModelsCliUsecases { private readonly modelsUsecases: ModelsUsecases, @Inject(InquirerService) private readonly inquirerService: InquirerService, + private readonly httpService: HttpService, ) {} /** @@ -139,6 +143,47 @@ export class ModelsCliUsecases { return this.modelsUsecases.remove(modelId); } + async pullModelWithExactUrl(modelId: string, url: string, fileSize: number) { + const tokenizer = await this.getHFModelTokenizer(url); + const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2; + const stopWords: string[] = [tokenizer?.stopWord ?? '']; + + const model: CreateModelDto = { + sources: [ + { + url: url, + }, + ], + id: modelId, + name: modelId, + version: '', + format: ModelFormat.GGUF, + description: '', + settings: { + prompt_template: promptTemplate, + }, + parameters: { + stop: stopWords, + }, + metadata: { + author: 'janhq', + size: fileSize, + tags: [], + }, + engine: 'cortex', + }; + if (!(await this.modelsUsecases.findOne(modelId))) { + await this.modelsUsecases.create(model); + } + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + const callback = (progress: number) => { + bar.update(progress); + }; + await this.modelsUsecases.downloadModel(modelId, callback); + } + /** * Pull model from Model repository (HF, Jan...) * @param modelId @@ -155,6 +200,30 @@ export class ModelsCliUsecases { await this.modelsUsecases.downloadModel(modelId, callback); } + private async getHFModelTokenizer( + ggufUrl: string, + ): Promise { + try { + const { metadata } = await gguf(ggufUrl); + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const index = metadata['tokenizer.ggml.eos_token_id']; + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const hfChatTemplate = metadata['tokenizer.chat_template']; + const promptTemplate = + this.guessPromptTemplateFromHuggingFace(hfChatTemplate); + // @ts-expect-error "tokenizer.ggml.tokens" + const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? ''; + + return { + stopWord, + promptTemplate, + }; + } catch (err) { + console.log('Failed to get model metadata:', err); + return undefined; + } + } + //// PRIVATE METHODS //// /** @@ -193,26 +262,10 @@ export class ModelsCliUsecases { sibling = data.siblings.find((e) => e.rfilename.includes('.gguf')); } if (!sibling) throw 'No expected quantization found'; + const tokenizer = await this.getHFModelTokenizer(sibling.downloadUrl!); - let stopWord = ''; - let promptTemplate = LLAMA_2; - - try { - const { metadata } = await gguf(sibling.downloadUrl!); - // @ts-expect-error "tokenizer.ggml.eos_token_id" - const index = metadata['tokenizer.ggml.eos_token_id']; - // @ts-expect-error "tokenizer.ggml.eos_token_id" - const hfChatTemplate = metadata['tokenizer.chat_template']; - promptTemplate = this.guessPromptTemplateFromHuggingFace(hfChatTemplate); - - // @ts-expect-error "tokenizer.ggml.tokens" - stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; - } catch (err) {} - - const stopWords: string[] = []; - if (stopWord.length > 0) { - stopWords.push(stopWord); - } + const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2; + const stopWords: string[] = [tokenizer?.stopWord ?? '']; const model: CreateModelDto = { sources: [ @@ -343,8 +396,8 @@ export class ModelsCliUsecases { private async fetchHuggingFaceRepoData(repoId: string) { const sanitizedUrl = this.getRepoModelsUrl(repoId); - const res = await fetch(sanitizedUrl); - const response = await res.json(); + const res = await firstValueFrom(this.httpService.get(sanitizedUrl)); + const response = res.data; if (response['error'] != null) { throw new Error(response['error']); } diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index e9174787a..731d501d2 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -7,6 +7,7 @@ import { HttpService } from '@nestjs/axios'; import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; import { readdirSync } from 'node:fs'; import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; +import { firstValueFrom } from 'rxjs'; /** * A class that implements the InferenceExtension interface from the @janhq/core package. @@ -72,13 +73,15 @@ export default class CortexProvider extends OAIEngineExtension { modelSettings.ai_prompt = prompt.ai_prompt; } - await this.httpService.post(this.loadModelUrl, modelSettings).toPromise(); + await firstValueFrom( + this.httpService.post(this.loadModelUrl, modelSettings), + ); } override async unloadModel(modelId: string): Promise { - await this.httpService - .post(this.unloadModelUrl, { model: modelId }) - .toPromise(); + await firstValueFrom( + this.httpService.post(this.unloadModelUrl, { model: modelId }), + ); } private readonly promptTemplateConverter = ( diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index f5ef2a87f..496d8d4a3 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -5,6 +5,7 @@ import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cor import { HttpService } from '@nestjs/axios'; import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; import { existsSync } from 'node:fs'; +import { firstValueFrom } from 'rxjs'; @Injectable() export class CortexUsecases { @@ -75,9 +76,11 @@ export class CortexUsecases { port?: number, ): Promise { try { - await this.httpService - .delete(`http://${host}:${port}/processmanager/destroy`) - .toPromise(); + await firstValueFrom( + this.httpService.delete( + `http://${host}:${port}/processmanager/destroy`, + ), + ); } catch (err) { console.error(err.response.data); } finally { diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 2ec0ffbba..24ea70c7c 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -24,6 +24,7 @@ import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { HttpService } from '@nestjs/axios'; import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto'; import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; +import { firstValueFrom } from 'rxjs'; @Injectable() export class ModelsUsecases { @@ -210,11 +211,11 @@ export class ModelsUsecases { await promises.mkdir(modelFolder, { recursive: true }); const destination = join(modelFolder, fileName); - const response = await this.httpService - .get(downloadUrl, { + const response = await firstValueFrom( + this.httpService.get(downloadUrl, { responseType: 'stream', - }) - .toPromise(); + }), + ); if (!response) { throw new Error('Failed to download model'); }