chore: update model start params DTO

louis-jan · louis-jan · commit 1debf768f175 · 2024-06-10T12:58:07.000+07:00
diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
@@ -81,7 +81,7 @@ export interface Model {
   ngl?: number;
 
   /**
-   * The number of parallel operations. Only set when enable continuous batching.
+   * Number of parallel sequences to decode
    */
   n_parallel?: number;
 
@@ -96,13 +96,6 @@ export interface Model {
   engine?: string;
 }
 
-export interface ModelMetadata {
-  author: string;
-  tags: string[];
-  size: number;
-  cover?: string;
-}
-
 /**
  * The available model settings.
  */
@@ -140,10 +133,3 @@ export interface ModelRuntimeParams {
   presence_penalty?: number;
   engine?: string;
 }
-
-/**
- * Represents the model initialization error.
- */
-export type ModelInitFailed = Model & {
-  error: Error;
-};
diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts
@@ -20,6 +20,7 @@ import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger';
 import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
 import { TransformInterceptor } from '../interceptors/transform.interceptor';
 import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+import { ModelSettingsDto } from '../dtos/models/model-settings.dto';
 
 @ApiTags('Models')
 @Controller('models')
@@ -61,10 +62,13 @@ export class ModelsController {
     description: 'The unique identifier of the model.',
   })
   @Post(':modelId(*)/start')
-  startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) {
+  startModel(
+    @Param('modelId') modelId: string,
+    @Body() params: ModelSettingsDto,
+  ) {
     return this.cortexUsecases
       .startCortex()
-      .then(() => this.modelsUsecases.startModel(modelId, model));
+      .then(() => this.modelsUsecases.startModel(modelId, params));
   }
 
   @HttpCode(200)
diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
@@ -4,6 +4,7 @@ import {
   IsNumber,
   IsOptional,
   IsString,
+  Min,
 } from 'class-validator';
 import { Model } from '@/domain/models/model.interface';
 import { ModelArtifactDto } from './model-artifact.dto';
@@ -50,6 +51,7 @@ export class CreateModelDto implements Partial<Model> {
   @ApiProperty({
     description:
       'Sets the upper limit on the number of tokens the model can generate in a single output.',
+    example: 4096,
   })
   @IsOptional()
   @IsNumber()
@@ -97,30 +99,40 @@ export class CreateModelDto implements Partial<Model> {
   @ApiProperty({
     description:
       'Sets the maximum input the model can use to generate a response, it varies with the model used.',
+    example: 4096,
   })
   @IsOptional()
   @IsNumber()
   ctx_len?: number;
 
-  @ApiProperty({ description: 'Determines GPU layer usage.' })
+  @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 })
   @IsOptional()
   @IsNumber()
   ngl?: number;
 
-  @ApiProperty({ description: 'Number of parallel processing units to use.' })
+  @ApiProperty({
+    description: 'Number of parallel processing units to use.',
+    example: 1,
+  })
   @IsOptional()
   @IsNumber()
+  @Min(1)
   n_parallel?: number;
 
   @ApiProperty({
     description:
       'Determines CPU inference threads, limited by hardware and OS. ',
+    example: 10,
   })
   @IsOptional()
   @IsNumber()
+  @Min(1)
   cpu_threads?: number;
 
-  @ApiProperty({ description: 'The engine used to run the model.' })
+  @ApiProperty({
+    description: 'The engine used to run the model.',
+    example: 'cortex.llamacpp',
+  })
   @IsOptional()
   @IsString()
   engine?: string;
diff --git a/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts
@@ -0,0 +1,56 @@
+import { ModelSettingParams } from '@/domain/models/model.interface';
+import { ApiProperty } from '@nestjs/swagger';
+import { IsArray, IsNumber, IsOptional, Min } from 'class-validator';
+
+export class ModelSettingsDto implements ModelSettingParams {
+  // Prompt Settings
+  @ApiProperty({
+    example: 'system\n{system_message}\nuser\n{prompt}\nassistant',
+    description:
+      "A predefined text or framework that guides the AI model's response generation.",
+  })
+  @IsOptional()
+  prompt_template?: string;
+
+  @ApiProperty({
+    type: [String],
+    example: [],
+    description:
+      'Defines specific tokens or phrases that signal the model to stop producing further output.',
+  })
+  @IsArray()
+  @IsOptional()
+  stop?: string[];
+
+  // Engine Settings
+  @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 })
+  @IsOptional()
+  @IsNumber()
+  ngl?: number;
+
+  @ApiProperty({
+    description:
+      'The context length for model operations varies; the maximum depends on the specific model used.',
+    example: 4096,
+  })
+  @IsOptional()
+  @IsNumber()
+  ctx_len?: number;
+
+  @ApiProperty({
+    description:
+      'Determines CPU inference threads, limited by hardware and OS. ',
+    example: 10,
+  })
+  @IsOptional()
+  @IsNumber()
+  @Min(1)
+  cpu_threads?: number;
+
+  @ApiProperty({
+    example: 'cortex.llamacpp',
+    description: 'The engine to use.',
+  })
+  @IsOptional()
+  engine?: string;
+}
diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts
@@ -3,7 +3,10 @@ import { ChildProcess, spawn } from 'child_process';
 import { join } from 'path';
 import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
 import { HttpService } from '@nestjs/axios';
-import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
+import {
+  defaultCortexCppHost,
+  defaultCortexCppPort,
+} from '@/infrastructure/constants/cortex';
 import { existsSync } from 'node:fs';
 import { firstValueFrom } from 'rxjs';
 import { FileManagerService } from '@/file-manager/file-manager.service';
diff --git a/cortex-js/src/usecases/messages/messages.module.ts b/cortex-js/src/usecases/messages/messages.module.ts
@@ -1,11 +1,10 @@
 import { Module } from '@nestjs/common';
 import { MessagesUsecases } from './messages.usecases';
-import { MessagesController } from '@/infrastructure/controllers/messages.controller';
 import { DatabaseModule } from '@/infrastructure/database/database.module';
 
 @Module({
   imports: [DatabaseModule],
-  controllers: [MessagesController],
+  controllers: [],
   providers: [MessagesUsecases],
   exports: [MessagesUsecases],
 })
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
@@ -100,7 +100,9 @@ export class ModelsUsecases {
       // Default settings
       ctx_len: 4096,
       ngl: 100,
-      ...(Array.isArray(model?.files) &&
+      //TODO: Utils for model file retrieval
+      ...(model?.files &&
+        Array.isArray(model.files) &&
         !('llama_model_path' in model) && {
           llama_model_path: (model.files as string[])[0],
         }),