From 9dff09692ba82264a5ed9f05f3dd909d72f68963 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 25 Jun 2024 22:41:39 +0700 Subject: [PATCH 1/2] chore: add dependency instructions --- .../commanders/usecases/init.cli.usecases.ts | 2 +- .../commanders/usecases/models.cli.usecases.ts | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index e05ce4d78..fb7750afd 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -221,7 +221,7 @@ export class InitCliUsecases { const dataFolderPath = await this.fileManagerService.getDataFolderPath(); const url = CUDA_DOWNLOAD_URL.replace( '', - cudaVersion === '11' ? '11.7' : '12.0', + cudaVersion === '11' ? '11.7' : '12.3', ).replace('', platform); const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 7d6d133cb..16617e05f 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -210,6 +210,17 @@ export class ModelsCliUsecases { if (!(await this.modelsUsecases.findOne(modelId))) await this.modelsUsecases.create(model); + + if (model.engine === Engines.tensorrtLLM) { + if (process.platform === 'win32') + console.log( + 'Please ensure that you install MPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work. Refs:\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi', + ); + else if (process.platform === 'linux') + console.log( + 'Please ensure that you install OpenMPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work.\nYou can install OpenMPI by running "sudo apt update && sudo apt install openmpi-bin libopenmpi-dev"', + ); + } } /** * It's to pull model from HuggingFace repository From 219532c3313ed505b669528fc16c388ef63c5eea Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 25 Jun 2024 23:47:34 +0700 Subject: [PATCH 2/2] fix: benchmark with custom model param --- .../commanders/benchmark.command.ts | 15 +++++--- .../types/benchmark-config.interface.ts | 34 +++++++++++-------- .../usecases/benchmark.cli.usecases.ts | 17 ++++++---- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts index 899055043..ae23f13fc 100644 --- a/cortex-js/src/infrastructure/commanders/benchmark.command.ts +++ b/cortex-js/src/infrastructure/commanders/benchmark.command.ts @@ -1,6 +1,9 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases'; -import { BenchmarkConfig } from './types/benchmark-config.interface'; +import { + BenchmarkConfig, + ParametersConfig, +} from './types/benchmark-config.interface'; @SubCommand({ name: 'benchmark', @@ -20,10 +23,12 @@ export class BenchmarkCommand extends CommandRunner { passedParams: string[], options?: Partial, ): Promise { - return this.benchmarkUsecases.benchmark({ - ...options, - ...(passedParams[0] ? { modelId: passedParams[0] } : {}), - }); + return this.benchmarkUsecases.benchmark( + options ?? {}, + passedParams[0] + ? ({ model: passedParams[0] } as ParametersConfig) + : undefined, + ); } @Option({ diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts index aa49c313e..d3117a744 100644 --- a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts @@ -1,21 +1,25 @@ import { ChatCompletionMessageParam } from 'openai/resources'; +export interface ApiConfig { + base_url: string; + api_key: string; + parameters: ParametersConfig; +} + +export interface ParametersConfig { + messages: ChatCompletionMessageParam[]; + model: string; + stream?: boolean; + max_tokens?: number; + stop?: string[]; + frequency_penalty?: number; + presence_penalty?: number; + temperature?: number; + top_p?: number; +} + export interface BenchmarkConfig { - api: { - base_url: string; - api_key: string; - parameters: { - messages: ChatCompletionMessageParam[]; - model: string; - stream?: boolean; - max_tokens?: number; - stop?: string[]; - frequency_penalty?: number; - presence_penalty?: number; - temperature?: number; - top_p?: number; - }; - }; + api: ApiConfig; prompts?: { min: number; max: number; diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts index 19cf6c951..ea7cba78e 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts @@ -8,7 +8,10 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- import { join } from 'path'; import { ModelsCliUsecases } from './models.cli.usecases'; import { spawn } from 'child_process'; -import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface'; +import { + BenchmarkConfig, + ParametersConfig, +} from '@commanders/types/benchmark-config.interface'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { inspect } from 'util'; import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; @@ -28,13 +31,17 @@ export class BenchmarkCliUsecases { /** * Benchmark and analyze the performance of a specific AI model using a variety of system resources */ - async benchmark(options: Partial) { + async benchmark( + options: Partial, + params?: ParametersConfig, + ) { return this.getBenchmarkConfig().then((config) => { this.config = { ...config, ...options, }; + const model = params?.model ?? this.config.api.parameters.model; // TODO: Using OpenAI client or Cortex client to benchmark? this.openai = new OpenAI({ apiKey: this.config.api.api_key, @@ -49,15 +56,13 @@ export class BenchmarkCliUsecases { return this.cortexUsecases .startCortex() - .then(() => - this.modelsCliUsecases.startModel(this.config.api.parameters.model), - ) + .then(() => this.modelsCliUsecases.startModel(model)) .then(() => this.psUsecases .getModels() .then((models) => models.find( - (e) => e.modelId === this.config.api.parameters.model, + (e) => e.modelId === model, ), ), )