diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts index 6e0e69d55..2ab46dc79 100644 --- a/cortex-js/src/domain/models/huggingface.interface.ts +++ b/cortex-js/src/domain/models/huggingface.interface.ts @@ -4,6 +4,13 @@ export interface HuggingFaceModelVersion { fileSize?: number; quantization?: Quantization; } + +export interface HuggingFaceRepoSibling { + rfilename: string; + downloadUrl?: string; + fileSize?: number; + quantization?: Quantization; +} export interface HuggingFaceRepoData { id: string; modelId: string; @@ -18,12 +25,7 @@ export interface HuggingFaceRepoData { pipeline_tag: 'text-generation'; tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>; cardData: Record; - siblings: { - rfilename: string; - downloadUrl?: string; - fileSize?: number; - quantization?: Quantization; - }[]; + siblings: HuggingFaceRepoSibling[]; createdAt: string; } diff --git a/cortex-js/src/domain/models/model.event.ts b/cortex-js/src/domain/models/model.event.ts index d2544b306..7d2177911 100644 --- a/cortex-js/src/domain/models/model.event.ts +++ b/cortex-js/src/domain/models/model.event.ts @@ -7,6 +7,8 @@ const ModelLoadingEvents = [ 'stopped', 'starting-failed', 'stopping-failed', + 'model-downloaded', + 'model-deleted', ] as const; export type ModelLoadingEvent = (typeof ModelLoadingEvents)[number]; diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index 37119a28f..91153354a 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -168,4 +168,5 @@ export interface ModelRuntimeParams { export interface ModelArtifact { mmproj?: string; llama_model_path?: string; + model_path?: string; } diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index 7abc1346a..08dbeee6f 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -8,6 +8,7 @@ import { existsSync } from 'fs'; import { join } from 'node:path'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { InitCliUsecases } from '../usecases/init.cli.usecases'; +import { checkModelCompatibility } from '@/utils/model-check'; @SubCommand({ name: 'pull', @@ -35,6 +36,8 @@ export class ModelPullCommand extends CommandRunner { } const modelId = passedParams[0]; + checkModelCompatibility(modelId); + await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => { if (e instanceof ModelNotFoundException) console.error('Model does not exist.'); diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index fe50cfc1d..8a935a9d7 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -14,6 +14,7 @@ import { existsSync } from 'node:fs'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { join } from 'node:path'; import { Engines } from '../types/engine.interface'; +import { checkModelCompatibility } from '@/utils/model-check'; type ModelStartOptions = { attach: boolean; @@ -58,9 +59,14 @@ export class ModelStartCommand extends CommandRunner { !Array.isArray(existingModel.files) || /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0]) ) { - console.error('Model is not available. Please pull the model first.'); + console.error( + `${modelId} not found on filesystem. Please try 'cortex pull ${modelId}' first.`, + ); process.exit(1); } + + checkModelCompatibility(modelId); + const engine = existingModel.engine || 'cortex.llamacpp'; // Pull engine if not exist if ( @@ -72,10 +78,7 @@ export class ModelStartCommand extends CommandRunner { engine, ); } - if (engine === Engines.onnx && process.platform !== 'win32') { - console.error('The ONNX engine does not support this OS yet.'); - process.exit(1); - } + await this.cortexUsecases .startCortex(options.attach) .then(() => this.modelsCliUsecases.startModel(modelId, options.preset)) diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts index ba6b4138e..4eaa7bc45 100644 --- a/cortex-js/src/infrastructure/commanders/serve.command.ts +++ b/cortex-js/src/infrastructure/commanders/serve.command.ts @@ -42,8 +42,8 @@ export class ServeCommand extends CommandRunner { console.log( chalk.blue(`API Playground available at http://${host}:${port}/api`), ); - } catch (err) { - console.error(err.message ?? err); + } catch { + console.error(`Failed to start server. Is port ${port} in use?`); } } diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 32266d3e0..571ef0b18 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -14,6 +14,7 @@ import { join } from 'path'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { Engines } from '../types/engine.interface'; +import { checkModelCompatibility } from '@/utils/model-check'; type RunOptions = { threadId?: string; @@ -55,7 +56,9 @@ export class RunCommand extends CommandRunner { // If not exist // Try Pull if (!(await this.modelsCliUsecases.getModel(modelId))) { - console.log(`Model ${modelId} not found. Try pulling model...`); + console.log( + `${modelId} not found on filesystem. Downloading from remote: https://huggingface.co/cortexhub if possible.`, + ); await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => { if (e instanceof ModelNotFoundException) console.error('Model does not exist.'); @@ -71,10 +74,12 @@ export class RunCommand extends CommandRunner { !Array.isArray(existingModel.files) || /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0]) ) { - console.error('Model is not available. Please pull the model first.'); + console.error('Model is not available.'); process.exit(1); } + checkModelCompatibility(modelId); + const engine = existingModel.engine || 'cortex.llamacpp'; // Pull engine if not exist if ( @@ -86,10 +91,6 @@ export class RunCommand extends CommandRunner { engine, ); } - if (engine === Engines.onnx && process.platform !== 'win32') { - console.error('The ONNX engine does not support this OS yet.'); - process.exit(1); - } return this.cortexUsecases .startCortex(false) diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index fb7750afd..101776d12 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -18,6 +18,7 @@ import { } from '@/infrastructure/constants/cortex'; import { checkNvidiaGPUExist, cudaVersion } from '@/utils/cuda'; import { Engines } from '../types/engine.interface'; +import { checkModelCompatibility } from '@/utils/model-check'; @Injectable() export class InitCliUsecases { @@ -71,11 +72,6 @@ export class InitCliUsecases { ) await this.installLlamaCppEngine(options, version); - if (engine === Engines.onnx && process.platform !== 'win32') { - console.error('The ONNX engine does not support this OS yet.'); - process.exit(1); - } - if (engine !== 'cortex.llamacpp') await this.installAcceleratedEngine('latest', engine); diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 16617e05f..cb140559e 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -1,25 +1,19 @@ import { exit } from 'node:process'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { Model } from '@/domain/models/model.interface'; -import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; -import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; import { InquirerService } from 'nest-commander'; import { Inject, Injectable } from '@nestjs/common'; import { Presets, SingleBar } from 'cli-progress'; -import { LLAMA_2 } from '@/infrastructure/constants/prompt-constants'; import { HttpService } from '@nestjs/axios'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { join, basename } from 'path'; +import { join } from 'path'; import { load } from 'js-yaml'; import { existsSync, readdirSync, readFileSync } from 'fs'; -import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; -import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface'; -import { createWriteStream, mkdirSync, promises } from 'node:fs'; -import { firstValueFrom } from 'rxjs'; -import { Engines } from '../types/engine.interface'; +import { isLocalModel } from '@/utils/normalize-model-id'; +import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; @Injectable() export class ModelsCliUsecases { @@ -120,170 +114,34 @@ export class ModelsCliUsecases { console.error('Model already exists'); process.exit(1); } - - if (modelId.includes('onnx') || modelId.includes('tensorrt')) { - await this.pullEngineModelFiles(modelId); - } else { - await this.pullGGUFModel(modelId); - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - const callback = (progress: number) => { - bar.update(progress); - }; - - try { - await this.modelsUsecases.downloadModel(modelId, callback); - - const model = await this.modelsUsecases.findOne(modelId); - const fileUrl = join( - await this.fileService.getModelsPath(), - normalizeModelId(modelId), - basename((model?.files as string[])[0]), - ); - await this.modelsUsecases.update(modelId, { - files: [fileUrl], - name: modelId.replace(':default', ''), - }); - } catch (err) { - bar.stop(); - throw err; - } - } - } - - /** - * It's to pull engine model files from HuggingFace repository - * @param modelId - */ - private async pullEngineModelFiles(modelId: string) { - const modelsContainerDir = await this.fileService.getModelsPath(); - - if (!existsSync(modelsContainerDir)) { - mkdirSync(modelsContainerDir, { recursive: true }); - } - - const modelFolder = join(modelsContainerDir, normalizeModelId(modelId)); - await promises.mkdir(modelFolder, { recursive: true }).catch(() => {}); - - const files = (await fetchJanRepoData(modelId)).siblings; - for (const file of files) { - console.log(`Downloading ${file.rfilename}`); - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - const response = await firstValueFrom( - this.httpService.get(file.downloadUrl ?? '', { - responseType: 'stream', - }), - ); - if (!response) { - throw new Error('Failed to download model'); - } - - await new Promise((resolve, reject) => { - const writer = createWriteStream(join(modelFolder, file.rfilename)); - let receivedBytes = 0; - const totalBytes = response.headers['content-length']; - - writer.on('finish', () => { - resolve(true); - }); - - writer.on('error', (error) => { - reject(error); - }); - - response.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - response.data.pipe(writer); + await this.modelsUsecases.pullModel(modelId, true, (files) => { + return new Promise(async (resolve) => { + const listChoices = files + .filter((e) => e.quantization != null) + .map((e) => { + return { + name: e.quantization, + value: e.quantization, + }; + }); + + if (listChoices.length > 1) { + const { quantization } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'quantization', + message: 'Select quantization', + choices: listChoices, + }); + resolve( + files + .filter((e) => !!e.quantization) + .find((e: any) => e.quantization === quantization) ?? files[0], + ); + } else { + resolve(files.find((e) => e.rfilename.includes('.gguf')) ?? files[0]); + } }); - bar.stop(); - } - - const model: CreateModelDto = load( - readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), - ) as CreateModelDto; - model.files = [join(modelFolder)]; - model.model = modelId; - - if (!(await this.modelsUsecases.findOne(modelId))) - await this.modelsUsecases.create(model); - - if (model.engine === Engines.tensorrtLLM) { - if (process.platform === 'win32') - console.log( - 'Please ensure that you install MPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work. Refs:\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi', - ); - else if (process.platform === 'linux') - console.log( - 'Please ensure that you install OpenMPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work.\nYou can install OpenMPI by running "sudo apt update && sudo apt install openmpi-bin libopenmpi-dev"', - ); - } - } - /** - * It's to pull model from HuggingFace repository - * It could be a model from Jan's repo or other authors - * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b" - */ - private async pullGGUFModel(modelId: string) { - const data: HuggingFaceRepoData = - await this.modelsUsecases.fetchModelMetadata(modelId); - - let modelVersion; - - const listChoices = data.siblings - .filter((e) => e.quantization != null) - .map((e) => { - return { - name: e.quantization, - value: e.quantization, - }; - }); - - if (listChoices.length > 1) { - const { quantization } = await this.inquirerService.inquirer.prompt({ - type: 'list', - name: 'quantization', - message: 'Select quantization', - choices: listChoices, - }); - modelVersion = data.siblings - .filter((e) => !!e.quantization) - .find((e: any) => e.quantization === quantization); - } else { - modelVersion = data.siblings.find((e) => e.rfilename.includes('.gguf')); - } - - if (!modelVersion) throw 'No expected quantization found'; - const metadata = await getHFModelMetadata(modelVersion.downloadUrl!); - - const promptTemplate = metadata?.promptTemplate ?? LLAMA_2; - const stopWords: string[] = [metadata?.stopWord ?? '']; - - const model: CreateModelDto = { - files: [modelVersion.downloadUrl ?? ''], - model: modelId, - name: modelId, - prompt_template: promptTemplate, - stop: stopWords, - - // Default Inference Params - stream: true, - max_tokens: 4098, - frequency_penalty: 0.7, - presence_penalty: 0.7, - temperature: 0.7, - top_p: 0.7, - - // Default Model Settings - ctx_len: 4096, - ngl: 100, - engine: Engines.llamaCPP, - }; - if (!(await this.modelsUsecases.findOne(modelId))) - await this.modelsUsecases.create(model); + }); } /** diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts index 083b46584..477c4eba3 100644 --- a/cortex-js/src/infrastructure/constants/benchmark.ts +++ b/cortex-js/src/infrastructure/constants/benchmark.ts @@ -2,7 +2,7 @@ import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface'; export const defaultBenchmarkConfiguration: BenchmarkConfig = { api: { - base_url: 'http://localhost:1337/', + base_url: 'http://localhost:1337/v1', api_key: '', parameters: { messages: [ diff --git a/cortex-js/src/infrastructure/constants/huggingface.ts b/cortex-js/src/infrastructure/constants/huggingface.ts index 1e1c89a78..893a26b61 100644 --- a/cortex-js/src/infrastructure/constants/huggingface.ts +++ b/cortex-js/src/infrastructure/constants/huggingface.ts @@ -5,10 +5,9 @@ export const HUGGING_FACE_TREE_REF_URL = ( ) => `https://huggingface.co/cortexhub/${repo}/resolve/${tree}/${path}`; export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = ( - author: string, - repo: string, + modelId: string, fileName: string, -) => `https://huggingface.co/${author}/${repo}/resolve/main/${fileName}`; +) => `https://huggingface.co/${modelId}/resolve/main/${fileName}`; export const HUGGING_FACE_REPO_URL = (author: string, repo: string) => `https://huggingface.co/${author}/${repo}`; diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index 7f2077525..fa7d50633 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -21,6 +21,7 @@ import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-s import { TransformInterceptor } from '../interceptors/transform.interceptor'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { ModelSettingsDto } from '../dtos/models/model-settings.dto'; +import { CommonResponseDto } from '../dtos/common/common-response.dto'; @ApiTags('Models') @Controller('models') @@ -111,7 +112,11 @@ export class ModelsController { }) @Get('download/:modelId(*)') downloadModel(@Param('modelId') modelId: string) { - return this.modelsUsecases.downloadModel(modelId); + this.modelsUsecases.pullModel(modelId, false); + + return { + message: 'Download model started successfully.', + }; } @ApiOperation({ @@ -135,7 +140,7 @@ export class ModelsController { @ApiResponse({ status: 200, description: 'Ok', - type: DownloadModelResponseDto, + type: CommonResponseDto, }) @ApiOperation({ summary: 'Download a remote model', @@ -149,7 +154,11 @@ export class ModelsController { }) @Get('pull/:modelId(*)') pullModel(@Param('modelId') modelId: string) { - return this.modelsUsecases.pullModel(modelId); + this.modelsUsecases.pullModel(modelId); + + return { + message: 'Download model started successfully.', + }; } @HttpCode(200) diff --git a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts index 36b72b92e..b450966ba 100644 --- a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts @@ -6,7 +6,12 @@ export class ModelArtifactDto implements ModelArtifact { @ApiProperty({ description: 'The mmproj bin file url.' }) @IsString() mmproj?: string; - @ApiProperty({ description: 'The llama model bin file url.' }) + + @ApiProperty({ description: 'The llama model bin file url (legacy).' }) @IsString() llama_model_path?: string; + + @ApiProperty({ description: 'The gguf model bin file url.' }) + @IsString() + model_path?: string; } diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts index 1471ac236..3c018ab77 100644 --- a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts +++ b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts @@ -7,7 +7,9 @@ import { import { HttpService } from '@nestjs/axios'; import { Injectable } from '@nestjs/common'; import { EventEmitter2 } from '@nestjs/event-emitter'; +import { Presets, SingleBar } from 'cli-progress'; import { createWriteStream } from 'node:fs'; +import { basename } from 'node:path'; import { firstValueFrom } from 'rxjs'; @Injectable() @@ -40,6 +42,8 @@ export class DownloadManagerService { title: string, downloadType: DownloadType, urlToDestination: Record, + finishedCallback?: () => Promise, + inSequence: boolean = true, ) { if ( this.allDownloadStates.find( @@ -80,10 +84,39 @@ export class DownloadManagerService { this.allDownloadStates.push(downloadState); this.abortControllers[downloadId] = {}; - Object.keys(urlToDestination).forEach((url) => { - const destination = urlToDestination[url]; - this.downloadFile(downloadId, url, destination); - }); + const callBack = async () => { + // Await post processing callback + await finishedCallback?.(); + + // Finished - update the current downloading states + delete this.abortControllers[downloadId]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (currentDownloadState) { + currentDownloadState.status = DownloadStatus.Downloaded; + // remove download state if all children is downloaded + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + } + this.eventEmitter.emit('download.event', this.allDownloadStates); + }; + if (!inSequence) { + return Promise.all( + Object.keys(urlToDestination).map((url) => { + const destination = urlToDestination[url]; + return this.downloadFile(downloadId, url, destination); + }), + ).then(callBack); + } else { + // Download model file in sequence + for (const url of Object.keys(urlToDestination)) { + const destination = urlToDestination[url]; + await this.downloadFile(downloadId, url, destination); + } + return callBack(); + } } private async downloadFile( @@ -94,116 +127,117 @@ export class DownloadManagerService { const controller = new AbortController(); // adding to abort controllers this.abortControllers[downloadId][destination] = controller; - - const response = await firstValueFrom( - this.httpService.get(url, { - responseType: 'stream', - signal: controller.signal, - }), - ); - - // check if response is success - if (!response) { - throw new Error('Failed to download model'); - } - - const writer = createWriteStream(destination); - const totalBytes = response.headers['content-length']; - - // update download state - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) { - return; - } - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.size.total = totalBytes; - } - - let transferredBytes = 0; - - writer.on('finish', () => { - // delete the abort controller - delete this.abortControllers[downloadId][destination]; - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, + return new Promise(async (resolve, reject) => { + const response = await firstValueFrom( + this.httpService.get(url, { + responseType: 'stream', + signal: controller.signal, + }), ); - if (!currentDownloadState) { - return; - } - // update current child status to downloaded, find by destination as id - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.status = DownloadStatus.Downloaded; + // check if response is success + if (!response) { + throw new Error('Failed to download model'); } - const allChildrenDownloaded = currentDownloadState?.children.every( - (downloadItem) => downloadItem.status === DownloadStatus.Downloaded, - ); + const writer = createWriteStream(destination); + const totalBytes = Number(response.headers['content-length']); - if (allChildrenDownloaded) { - delete this.abortControllers[downloadId]; - currentDownloadState.status = DownloadStatus.Downloaded; - // remove download state if all children is downloaded - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - } - this.eventEmitter.emit('download.event', this.allDownloadStates); - }); - - writer.on('error', (error) => { - delete this.abortControllers[downloadId][destination]; + // update download state const currentDownloadState = this.allDownloadStates.find( (downloadState) => downloadState.id === downloadId, ); if (!currentDownloadState) { + resolve(); return; } - const downloadItem = currentDownloadState?.children.find( (downloadItem) => downloadItem.id === destination, ); if (downloadItem) { - downloadItem.status = DownloadStatus.Error; - downloadItem.error = error.message; + downloadItem.size.total = totalBytes; } - currentDownloadState.status = DownloadStatus.Error; - currentDownloadState.error = error.message; - - // remove download state if all children is downloaded - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - this.eventEmitter.emit('download.event', this.allDownloadStates); - }); - - response.data.on('data', (chunk: any) => { - transferredBytes += chunk.length; - - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) return; + console.log('Downloading', basename(destination)); + + let transferredBytes = 0; + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + writer.on('finish', () => { + try { + // delete the abort controller + delete this.abortControllers[downloadId][destination]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; + + // update current child status to downloaded, find by destination as id + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.status = DownloadStatus.Downloaded; + } + + this.eventEmitter.emit('download.event', this.allDownloadStates); + } finally { + bar.stop(); + resolve(); + } + }); + + writer.on('error', (error) => { + try { + delete this.abortControllers[downloadId][destination]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; + + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.status = DownloadStatus.Error; + downloadItem.error = error.message; + } + + currentDownloadState.status = DownloadStatus.Error; + currentDownloadState.error = error.message; + + // remove download state if all children is downloaded + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + this.eventEmitter.emit('download.event', this.allDownloadStates); + } finally { + bar.stop(); + resolve(); + } + }); + + response.data.on('data', (chunk: any) => { + transferredBytes += chunk.length; + + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.size.transferred = transferredBytes; - } - this.eventEmitter.emit('download.event', this.allDownloadStates); + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.size.transferred = transferredBytes; + bar.update(Math.floor((transferredBytes / totalBytes) * 100)); + } + this.eventEmitter.emit('download.event', this.allDownloadStates); + }); + + response.data.pipe(writer); }); - - response.data.pipe(writer); } getDownloadStates() { diff --git a/cortex-js/src/main.ts b/cortex-js/src/main.ts index 16890cc5b..1a43ca318 100644 --- a/cortex-js/src/main.ts +++ b/cortex-js/src/main.ts @@ -17,8 +17,8 @@ async function bootstrap() { console.log( chalk.blue(`API Playground available at http://${host}:${port}/api`), ); - } catch (err) { - console.error(err.message ?? err); + } catch { + console.error(`Failed to start server. Is port ${port} in use?`); } } diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index db0d6f886..014f18410 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -3,20 +3,12 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { BadRequestException, Injectable } from '@nestjs/common'; import { Model, ModelSettingParams } from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { join, basename } from 'path'; -import { - promises, - existsSync, - mkdirSync, - rmdirSync, - createWriteStream, -} from 'fs'; +import { basename, join } from 'path'; +import { promises, existsSync, mkdirSync, rmdirSync, readFileSync } from 'fs'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; -import { HttpService } from '@nestjs/axios'; import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; -import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { AxiosError } from 'axios'; import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; @@ -26,10 +18,10 @@ import { ModelParameterParser } from '@/utils/model-parameter.parser'; import { HuggingFaceModelVersion, HuggingFaceRepoData, + HuggingFaceRepoSibling, } from '@/domain/models/huggingface.interface'; import { LLAMA_2 } from '@/infrastructure/constants/prompt-constants'; -import { isValidUrl } from '@/utils/urls'; import { fetchHuggingFaceRepoData, fetchJanRepoData, @@ -41,6 +33,8 @@ import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; import { ContextService } from '@/infrastructure/services/context/context.service'; import { Engines } from '@/infrastructure/commanders/types/engine.interface'; +import { load } from 'js-yaml'; +import { llamaModelFile } from '@/utils/app-path'; @Injectable() export class ModelsUsecases { @@ -49,7 +43,6 @@ export class ModelsUsecases { private readonly extensionRepository: ExtensionRepository, private readonly fileManagerService: FileManagerService, private readonly downloadManagerService: DownloadManagerService, - private readonly httpService: HttpService, private readonly telemetryUseCases: TelemetryUsecases, private readonly contextService: ContextService, private readonly eventEmitter: EventEmitter2, @@ -135,6 +128,14 @@ export class ModelsUsecases { existsSync(modelFolder) && rmdirSync(modelFolder, { recursive: true }), ) + .then(() => { + const modelEvent: ModelEvent = { + model: id, + event: 'model-deleted', + metadata: {}, + }; + this.eventEmitter.emit(modelEvent.event, modelEvent); + }) .then(() => { return { message: 'Model removed successfully', @@ -304,92 +305,6 @@ export class ModelsUsecases { }); } - /** - * Download a remote model from HuggingFace or Jan's repo - * @param modelId Model ID - * @param callback Callback function to track download progress - * @returns - */ - async downloadModel(modelId: string, callback?: (progress: number) => void) { - const model = await this.getModelOrThrow(modelId); - - // TODO: We will support splited gguf files in the future - // Leave it as is for now (first element of the array) - const downloadUrl = Array.isArray(model.files) - ? model.files[0] - : model.files.llama_model_path; - - if (!downloadUrl) { - throw new BadRequestException('No model URL provided'); - } - if (!isValidUrl(downloadUrl)) { - throw new BadRequestException(`Invalid download URL: ${downloadUrl}`); - } - - const fileName = basename(downloadUrl); - const modelsContainerDir = await this.fileManagerService.getModelsPath(); - - if (!existsSync(modelsContainerDir)) { - mkdirSync(modelsContainerDir, { recursive: true }); - } - - const modelFolder = join(modelsContainerDir, normalizeModelId(model.model)); - await promises.mkdir(modelFolder, { recursive: true }); - const destination = join(modelFolder, fileName); - - if (callback != null) { - const response = await firstValueFrom( - this.httpService.get(downloadUrl, { - responseType: 'stream', - }), - ); - if (!response) { - throw new Error('Failed to download model'); - } - - return new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = response.headers['content-length']; - - writer.on('finish', () => { - resolve(true); - }); - - writer.on('error', (error) => { - reject(error); - }); - - response.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - callback?.(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - response.data.pipe(writer); - }); - } else { - // modelId should be unique - const downloadId = modelId; - - // inorder to download multiple files, just need to pass more urls and destination to this object - const urlToDestination: Record = { - [downloadUrl]: destination, - }; - - this.downloadManagerService.submitDownloadRequest( - downloadId, - model.name ?? modelId, - DownloadType.Model, - urlToDestination, - ); - - return { - downloadId, - message: 'Download started', - }; - } - } - /** * Abort a download * @param downloadId Download ID @@ -402,44 +317,95 @@ export class ModelsUsecases { * Populate model metadata from a Model repository (HF, Jan...) and download it * @param modelId */ - async pullModel(modelId: string, callback?: (progress: number) => void) { + async pullModel( + modelId: string, + inSequence: boolean = true, + selection?: ( + siblings: HuggingFaceRepoSibling[], + ) => Promise, + ) { const existingModel = await this.findOne(modelId); if (isLocalModel(existingModel?.files)) { throw new BadRequestException('Model already exists'); } // ONNX only supported on Windows - if (modelId.includes('onnx') && process.platform !== 'win32') { - throw new BadRequestException('ONNX models are not supported on this OS'); - } + const modelsContainerDir = await this.fileManagerService.getModelsPath(); - if (modelId.includes('tensorrt-llm') && process.platform === 'darwin') { - throw new BadRequestException( - 'Tensorrt-LLM models are not supported on this OS', - ); + if (!existsSync(modelsContainerDir)) { + mkdirSync(modelsContainerDir, { recursive: true }); } - // Fetch the repo data - const data = await this.fetchModelMetadata(modelId); - // Pull the model.yaml - await this.populateHuggingFaceModel( - modelId, - data.siblings.filter((e) => e.quantization != null)[0], - ); + const modelFolder = join(modelsContainerDir, normalizeModelId(modelId)); + await promises.mkdir(modelFolder, { recursive: true }).catch(() => {}); - // Start downloading the model - await this.downloadModel(modelId, callback); + let files = (await fetchJanRepoData(modelId)).siblings; - const model = await this.findOne(modelId); - const fileUrl = join( - await this.fileManagerService.getModelsPath(), - normalizeModelId(modelId), - basename((model?.files as string[])[0]), + // HuggingFace GGUF Repo - Only one file is downloaded + if (modelId.includes('/') && selection && files.length) { + files = [await selection(files)]; + } + + // Start downloading the model + const toDownloads: Record = files + .filter((e) => this.validFileDownload(e)) + .reduce((acc: Record, file) => { + acc[file.downloadUrl] = join(modelFolder, file.rfilename); + return acc; + }, {}); + + return this.downloadManagerService.submitDownloadRequest( + modelId, + modelId, + DownloadType.Model, + toDownloads, + // Post processing + async () => { + console.log('Update model metadata...'); + // Post processing after download + if (existsSync(join(modelFolder, 'model.yml'))) { + const model: CreateModelDto = load( + readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), + ) as CreateModelDto; + if (model.engine === 'cortex.llamacpp') { + const fileUrl = join( + await this.fileManagerService.getModelsPath(), + normalizeModelId(modelId), + llamaModelFile(model.files), + ); + model.files = [fileUrl]; + model.name = modelId.replace(':default', ''); + } else { + model.files = [modelFolder]; + } + model.model = modelId; + if (!(await this.findOne(modelId))) await this.create(model); + } else { + // Fallback if model.yml is not found & is a GGUF file + const data = await this.fetchModelMetadata(modelId); + await this.populateHuggingFaceModel(modelId, files[0]); + const model = await this.findOne(modelId); + if (model) { + const fileUrl = join( + await this.fileManagerService.getModelsPath(), + normalizeModelId(modelId), + basename(files[0].rfilename), + ); + await this.update(modelId, { + files: [fileUrl], + name: modelId.replace(':default', ''), + }); + } + } + const modelEvent: ModelEvent = { + model: modelId, + event: 'model-downloaded', + metadata: {}, + }; + this.eventEmitter.emit(modelEvent.event, modelEvent); + }, + inSequence, ); - await this.update(modelId, { - files: [fileUrl], - name: modelId.replace(':default', ''), - }); } /** @@ -455,14 +421,13 @@ export class ModelsUsecases { const tokenizer = await getHFModelMetadata(modelVersion.downloadUrl!); - const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2; - const stopWords: string[] = [tokenizer?.stopWord ?? '']; + const stopWords: string[] = tokenizer?.stopWord ? [tokenizer.stopWord] : []; const model: CreateModelDto = { files: [modelVersion.downloadUrl ?? ''], model: modelId, name: modelId, - prompt_template: promptTemplate, + prompt_template: tokenizer?.promptTemplate, stop: stopWords, // Default Inference Params @@ -498,4 +463,15 @@ export class ModelsUsecases { getModelStatuses(): Record { return this.activeModelStatuses; } + + /** + * Check whether the download file is valid or not + * @param file + * @returns + */ + private validFileDownload( + file: HuggingFaceRepoSibling, + ): file is Required { + return !!file.downloadUrl; + } } diff --git a/cortex-js/src/utils/app-path.ts b/cortex-js/src/utils/app-path.ts index 3d80cd8f4..08124fa46 100644 --- a/cortex-js/src/utils/app-path.ts +++ b/cortex-js/src/utils/app-path.ts @@ -1,5 +1,6 @@ +import { ModelArtifact } from '@/domain/models/model.interface'; import { existsSync } from 'fs'; -import { join } from 'path'; +import { basename, join } from 'path'; /** * Path to the root of the application. @@ -18,3 +19,25 @@ export const checkFileExistenceInPaths = ( ): boolean => { return paths.some((p) => existsSync(join(p, file))); }; + +/** + * Get the model file name from the given files. + * @param files + * @returns + */ +export const llamaModelFile = ( + files: string[] | ModelArtifact | ModelArtifact[], +) => { + let artifact: any = files; + // Legacy model.yml + if (Array.isArray(files)) { + artifact = files[0]; + } + const path = + 'llama_model_path' in artifact + ? (artifact as ModelArtifact).llama_model_path ?? '' + : 'model_path' in files + ? (artifact as ModelArtifact).model_path ?? '' + : (artifact as string[])[0]; + return basename(path); +}; diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts index de2a65dff..eb7b2db09 100644 --- a/cortex-js/src/utils/huggingface.ts +++ b/cortex-js/src/utils/huggingface.ts @@ -1,6 +1,9 @@ //// PRIVATE METHODS //// -import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; +import { + HuggingFaceRepoData, + HuggingFaceRepoSibling, +} from '@/domain/models/huggingface.interface'; import { ModelMetadata } from '@/infrastructure/commanders/types/model-tokenizer.interface'; import { AllQuantizations, @@ -84,8 +87,7 @@ export async function fetchHuggingFaceRepoData( for (let i = 0; i < data.siblings.length; i++) { const downloadUrl = HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL( - paths[2], - paths[3], + [paths[2], paths[3]].join('/'), data.siblings[i].rfilename, ); data.siblings[i].downloadUrl = downloadUrl; @@ -113,16 +115,37 @@ export async function fetchJanRepoData( modelId: string, ): Promise { const repo = modelId.split(':')[0]; - const tree = await parseModelHubEngineBranch(modelId.split(':')[1] ?? 'default'); - const url = getRepoModelsUrl(`cortexhub/${repo}`, tree); + const tree = await parseModelHubEngineBranch( + modelId.split(':')[1] ?? !modelId.includes('/') ? 'default' : '', + ); + const url = getRepoModelsUrl( + `${!modelId.includes('/') ? 'cortexhub/' : ''}${repo}`, + tree, + ); const res = await fetch(url); + const jsonData = await res.json(); + if ('siblings' in jsonData) { + AllQuantizations.forEach((quantization) => { + jsonData.siblings.forEach((sibling: HuggingFaceRepoSibling) => { + if (!sibling.quantization && sibling.rfilename.includes(quantization)) { + sibling.quantization = quantization; + sibling.downloadUrl = HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL( + repo, + sibling.rfilename, + ); + } + }); + }); + return jsonData as HuggingFaceRepoData; + } + const response: | { path: string; size: number; }[] - | { error: string } = await res.json(); + | { error: string } = jsonData; if ('error' in response && response.error != null) { throw new Error(response.error); diff --git a/cortex-js/src/utils/model-check.ts b/cortex-js/src/utils/model-check.ts new file mode 100644 index 000000000..73e144295 --- /dev/null +++ b/cortex-js/src/utils/model-check.ts @@ -0,0 +1,11 @@ +export const checkModelCompatibility = (modelId: string) => { + if (modelId.includes('onnx') && process.platform !== 'win32') { + console.error('The ONNX engine does not support this OS yet.'); + process.exit(1); + } + + if (modelId.includes('tensorrt-llm') && process.platform === 'darwin') { + console.error('Tensorrt-LLM models are not supported on this OS'); + process.exit(1); + } +};