janhq · louis-jan · Jun 26, 2024 · Jun 25, 2024 · Jun 25, 2024
diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts
@@ -1,6 +1,9 @@
 import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases';
-import { BenchmarkConfig } from './types/benchmark-config.interface';
+import {
+  BenchmarkConfig,
+  ParametersConfig,
+} from './types/benchmark-config.interface';
 
 @SubCommand({
   name: 'benchmark',
@@ -20,10 +23,12 @@ export class BenchmarkCommand extends CommandRunner {
     passedParams: string[],
     options?: Partial<BenchmarkConfig>,
   ): Promise<void> {
-    return this.benchmarkUsecases.benchmark({
-      ...options,
-      ...(passedParams[0] ? { modelId: passedParams[0] } : {}),
-    });
+    return this.benchmarkUsecases.benchmark(
+      options ?? {},
+      passedParams[0]
+        ? ({ model: passedParams[0] } as ParametersConfig)
+        : undefined,
+    );
   }
 
   @Option({

diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts
@@ -1,21 +1,25 @@
 import { ChatCompletionMessageParam } from 'openai/resources';
 
+export interface ApiConfig {
+  base_url: string;
+  api_key: string;
+  parameters: ParametersConfig;
+}
+
+export interface ParametersConfig {
+  messages: ChatCompletionMessageParam[];
+  model: string;
+  stream?: boolean;
+  max_tokens?: number;
+  stop?: string[];
+  frequency_penalty?: number;
+  presence_penalty?: number;
+  temperature?: number;
+  top_p?: number;
+}
+
 export interface BenchmarkConfig {
-  api: {
-    base_url: string;
-    api_key: string;
-    parameters: {
-      messages: ChatCompletionMessageParam[];
-      model: string;
-      stream?: boolean;
-      max_tokens?: number;
-      stop?: string[];
-      frequency_penalty?: number;
-      presence_penalty?: number;
-      temperature?: number;
-      top_p?: number;
-    };
-  };
+  api: ApiConfig;
   prompts?: {
     min: number;
     max: number;

diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
@@ -8,7 +8,10 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file-
 import { join } from 'path';
 import { ModelsCliUsecases } from './models.cli.usecases';
 import { spawn } from 'child_process';
-import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface';
+import {
+  BenchmarkConfig,
+  ParametersConfig,
+} from '@commanders/types/benchmark-config.interface';
 import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
 import { inspect } from 'util';
 import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark';
@@ -28,13 +31,17 @@ export class BenchmarkCliUsecases {
   /**
    * Benchmark and analyze the performance of a specific AI model using a variety of system resources
    */
-  async benchmark(options: Partial<BenchmarkConfig>) {
+  async benchmark(
+    options: Partial<BenchmarkConfig>,
+    params?: ParametersConfig,
+  ) {
     return this.getBenchmarkConfig().then((config) => {
       this.config = {
         ...config,
         ...options,
       };
 
+      const model = params?.model ?? this.config.api.parameters.model;
       // TODO: Using OpenAI client or Cortex client to benchmark?
       this.openai = new OpenAI({
         apiKey: this.config.api.api_key,
@@ -49,15 +56,13 @@ export class BenchmarkCliUsecases {
 
       return this.cortexUsecases
         .startCortex()
-        .then(() =>
-          this.modelsCliUsecases.startModel(this.config.api.parameters.model),
-        )
+        .then(() => this.modelsCliUsecases.startModel(model))
         .then(() =>
           this.psUsecases
             .getModels()
             .then((models) =>
               models.find(
-                (e) => e.modelId === this.config.api.parameters.model,
+                (e) => e.modelId === model,
               ),
             ),
         )

diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
@@ -221,7 +221,7 @@ export class InitCliUsecases {
     const dataFolderPath = await this.fileManagerService.getDataFolderPath();
     const url = CUDA_DOWNLOAD_URL.replace(
       '<version>',
-      cudaVersion === '11' ? '11.7' : '12.0',
+      cudaVersion === '11' ? '11.7' : '12.3',
     ).replace('<platform>', platform);
     const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz');
 

diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -210,6 +210,17 @@ export class ModelsCliUsecases {
 
     if (!(await this.modelsUsecases.findOne(modelId)))
       await this.modelsUsecases.create(model);
+
+    if (model.engine === Engines.tensorrtLLM) {
+      if (process.platform === 'win32')
+        console.log(
+          'Please ensure that you install MPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work. Refs:\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi',
+        );
+      else if (process.platform === 'linux')
+        console.log(
+          'Please ensure that you install OpenMPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work.\nYou can install OpenMPI by running "sudo apt update && sudo apt install openmpi-bin libopenmpi-dev"',
+        );
+    }
   }
   /**
    * It's to pull model from HuggingFace repository