diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts index 899055043..ae23f13fc 100644 --- a/cortex-js/src/infrastructure/commanders/benchmark.command.ts +++ b/cortex-js/src/infrastructure/commanders/benchmark.command.ts @@ -1,6 +1,9 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases'; -import { BenchmarkConfig } from './types/benchmark-config.interface'; +import { + BenchmarkConfig, + ParametersConfig, +} from './types/benchmark-config.interface'; @SubCommand({ name: 'benchmark', @@ -20,10 +23,12 @@ export class BenchmarkCommand extends CommandRunner { passedParams: string[], options?: Partial, ): Promise { - return this.benchmarkUsecases.benchmark({ - ...options, - ...(passedParams[0] ? { modelId: passedParams[0] } : {}), - }); + return this.benchmarkUsecases.benchmark( + options ?? {}, + passedParams[0] + ? ({ model: passedParams[0] } as ParametersConfig) + : undefined, + ); } @Option({ diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts index aa49c313e..d3117a744 100644 --- a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts @@ -1,21 +1,25 @@ import { ChatCompletionMessageParam } from 'openai/resources'; +export interface ApiConfig { + base_url: string; + api_key: string; + parameters: ParametersConfig; +} + +export interface ParametersConfig { + messages: ChatCompletionMessageParam[]; + model: string; + stream?: boolean; + max_tokens?: number; + stop?: string[]; + frequency_penalty?: number; + presence_penalty?: number; + temperature?: number; + top_p?: number; +} + export interface BenchmarkConfig { - api: { - base_url: string; - api_key: string; - parameters: { - messages: ChatCompletionMessageParam[]; - model: string; - stream?: boolean; - max_tokens?: number; - stop?: string[]; - frequency_penalty?: number; - presence_penalty?: number; - temperature?: number; - top_p?: number; - }; - }; + api: ApiConfig; prompts?: { min: number; max: number; diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts index 19cf6c951..ea7cba78e 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts @@ -8,7 +8,10 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- import { join } from 'path'; import { ModelsCliUsecases } from './models.cli.usecases'; import { spawn } from 'child_process'; -import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface'; +import { + BenchmarkConfig, + ParametersConfig, +} from '@commanders/types/benchmark-config.interface'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { inspect } from 'util'; import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; @@ -28,13 +31,17 @@ export class BenchmarkCliUsecases { /** * Benchmark and analyze the performance of a specific AI model using a variety of system resources */ - async benchmark(options: Partial) { + async benchmark( + options: Partial, + params?: ParametersConfig, + ) { return this.getBenchmarkConfig().then((config) => { this.config = { ...config, ...options, }; + const model = params?.model ?? this.config.api.parameters.model; // TODO: Using OpenAI client or Cortex client to benchmark? this.openai = new OpenAI({ apiKey: this.config.api.api_key, @@ -49,15 +56,13 @@ export class BenchmarkCliUsecases { return this.cortexUsecases .startCortex() - .then(() => - this.modelsCliUsecases.startModel(this.config.api.parameters.model), - ) + .then(() => this.modelsCliUsecases.startModel(model)) .then(() => this.psUsecases .getModels() .then((models) => models.find( - (e) => e.modelId === this.config.api.parameters.model, + (e) => e.modelId === model, ), ), ) diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index e05ce4d78..fb7750afd 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -221,7 +221,7 @@ export class InitCliUsecases { const dataFolderPath = await this.fileManagerService.getDataFolderPath(); const url = CUDA_DOWNLOAD_URL.replace( '', - cudaVersion === '11' ? '11.7' : '12.0', + cudaVersion === '11' ? '11.7' : '12.3', ).replace('', platform); const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 7d6d133cb..16617e05f 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -210,6 +210,17 @@ export class ModelsCliUsecases { if (!(await this.modelsUsecases.findOne(modelId))) await this.modelsUsecases.create(model); + + if (model.engine === Engines.tensorrtLLM) { + if (process.platform === 'win32') + console.log( + 'Please ensure that you install MPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work. Refs:\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi', + ); + else if (process.platform === 'linux') + console.log( + 'Please ensure that you install OpenMPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work.\nYou can install OpenMPI by running "sudo apt update && sudo apt install openmpi-bin libopenmpi-dev"', + ); + } } /** * It's to pull model from HuggingFace repository