update model

namchuai · namchuai · commit d19f451c20c2 · 2024-06-17T22:39:35.000+07:00
Signed-off-by: James &lt;namnh0122@gmail.com&gt;
diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
@@ -1,8 +1,9 @@
-/**
- * Model type defines the shape of a model object.
- * @stored
- */
-export interface Model {
+import { Model as OpenAiModel } from 'openai/resources/models';
+
+export interface Model
+  extends OpenAiModel,
+    ModelSettingParams,
+    ModelRuntimeParams {
   /**
    * Model identifier.
    */
@@ -22,58 +23,22 @@ export interface Model {
    * The model download source. It can be an external url or a local filepath.
    */
   files: string[] | ModelArtifact;
+}
 
-  /**
-   * GGUF metadata: tokenizer.chat_template
-   */
-  prompt_template?: string;
-
-  /**
-   * Defines specific tokens or phrases at which the model will stop generating further output.
-   */
-  stop?: string[];
-
-  /// Inferencing
-  /**
-   * Set probability threshold for more relevant outputs.
-   */
-  top_p?: number;
-
-  /**
-   * Controls the randomness of the model’s output.
-   */
-  temperature?: number;
-
-  /**
-   * Adjusts the likelihood of the model repeating words or phrases in its output.
-   */
-  frequency_penalty?: number;
-
-  /**
-   * Influences the generation of new and varied concepts in the model’s output.
-   */
-  presence_penalty?: number;
-
-  /// Engines
+/**
+ * The available model settings.
+ */
+export interface ModelSettingParams {
   /**
    * The context length for model operations varies; the maximum depends on the specific model used.
    */
   ctx_len?: number;
 
-  /**
-   * Enable real-time data processing for faster predictions.
-   */
-  stream?: boolean;
-
-  /*
-   * The maximum number of tokens the model will generate in a single response.
-   */
-  max_tokens?: number;
-
   /**
    * The number of layers to load onto the GPU for acceleration.
    */
   ngl?: number;
+  embedding?: boolean;
 
   /**
    * Number of parallel sequences to decode
@@ -85,6 +50,22 @@ export interface Model {
    */
   cpu_threads?: number;
 
+  /**
+   * GGUF metadata: tokenizer.chat_template
+   */
+  prompt_template?: string;
+  system_prompt?: string;
+  ai_prompt?: string;
+  user_prompt?: string;
+  llama_model_path?: string;
+  mmproj?: string;
+  cont_batching?: boolean;
+
+  /**
+   * The model engine.
+   */
+  engine?: string;
+
   /**
    * The prompt to use for internal configuration
    */
@@ -134,59 +115,48 @@ export interface Model {
    * To enable mmap, default is true
    */
   use_mmap?: boolean;
-
-  /**
-   * The model engine.
-   */
-  engine?: string;
-}
-
-/**
- * The available model settings.
- */
-export interface ModelSettingParams {
-  ctx_len?: number;
-  ngl?: number;
-  embedding?: boolean;
-  n_parallel?: number;
-  cpu_threads?: number;
-  prompt_template?: string;
-  system_prompt?: string;
-  ai_prompt?: string;
-  user_prompt?: string;
-  llama_model_path?: string;
-  mmproj?: string;
-  cont_batching?: boolean;
-  engine?: string;
-  stop?: string[];
-  pre_prompt?: string;
-  n_batch?: number;
-  caching_enabled?: boolean;
-  grp_attn_n?: number;
-  grp_attn_w?: number;
-  mlock?: boolean;
-  grammar_file?: string;
-  model_type?: string;
-  model_alias?: string;
-  flash_attn?: boolean;
-  cache_type?: string;
-  use_mmap?: boolean;
 }
 
 /**
  * The available model runtime parameters.
  */
 export interface ModelRuntimeParams {
+  /**
+   * Controls the randomness of the model’s output.
+   */
   temperature?: number;
   token_limit?: number;
   top_k?: number;
+
+  /**
+   * Set probability threshold for more relevant outputs.
+   */
   top_p?: number;
+
+  /**
+   * Enable real-time data processing for faster predictions.
+   */
   stream?: boolean;
+
+  /*
+   * The maximum number of tokens the model will generate in a single response.
+   */
   max_tokens?: number;
+
+  /**
+   * Defines specific tokens or phrases at which the model will stop generating further output.
+   */
   stop?: string[];
+
+  /**
+   * Adjusts the likelihood of the model repeating words or phrases in its output.
+   */
   frequency_penalty?: number;
+
+  /**
+   * Influences the generation of new and varied concepts in the model’s output.
+   */
   presence_penalty?: number;
-  engine?: string;
 }
 
 /**
diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts
@@ -163,9 +163,7 @@ export class ModelsController {
   })
   @Get()
   findAll() {
-    return this.modelsUsecases
-      .findAll()
-      .then((data) => data.map((e) => ({ id: e.model, ...e })));
+    return this.modelsUsecases.findAll();
   }
 
   @HttpCode(200)
diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
@@ -136,4 +136,11 @@ export class CreateModelDto implements Partial<Model> {
   @IsOptional()
   @IsString()
   engine?: string;
+
+  @ApiProperty({
+    description: 'The owner of the model.',
+    example: '',
+    default: '',
+  })
+  owned_by?: string;
 }
diff --git a/cortex-js/src/usecases/assistants/assistants.module.ts b/cortex-js/src/usecases/assistants/assistants.module.ts
@@ -1,9 +1,10 @@
 import { Module } from '@nestjs/common';
 import { AssistantsUsecases } from './assistants.usecases';
 import { DatabaseModule } from '@/infrastructure/database/database.module';
+import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module';
 
 @Module({
-  imports: [DatabaseModule],
+  imports: [DatabaseModule, ModelRepositoryModule],
   controllers: [],
   providers: [AssistantsUsecases],
   exports: [AssistantsUsecases],
diff --git a/cortex-js/src/usecases/assistants/assistants.usecases.ts b/cortex-js/src/usecases/assistants/assistants.usecases.ts
@@ -4,16 +4,26 @@ import { Repository } from 'typeorm';
 import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto';
 import { Assistant } from '@/domain/models/assistant.interface';
 import { PageDto } from '@/infrastructure/dtos/page.dto';
+import { ModelRepository } from '@/domain/repositories/model.interface';
+import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
 
 @Injectable()
 export class AssistantsUsecases {
   constructor(
     @Inject('ASSISTANT_REPOSITORY')
-    private assistantRepository: Repository<AssistantEntity>,
+    private readonly assistantRepository: Repository<AssistantEntity>,
+    private readonly modelRepository: ModelRepository,
   ) {}
 
   async create(createAssistantDto: CreateAssistantDto) {
-    const { top_p, temperature } = createAssistantDto;
+    const { top_p, temperature, model } = createAssistantDto;
+    if (model !== '*') {
+      const modelEntity = await this.modelRepository.findOne(model);
+      if (!modelEntity) {
+        throw new ModelNotFoundException(model);
+      }
+    }
+
     const assistant: AssistantEntity = {
       ...createAssistantDto,
       object: 'assistant',
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
@@ -60,8 +60,13 @@ export class ModelsUsecases {
    * @param createModelDto Model data
    */
   async create(createModelDto: CreateModelDto) {
+    const { model: modelId, owned_by } = createModelDto;
     const model: Model = {
       ...createModelDto,
+      id: modelId,
+      created: Date.now(),
+      object: 'model',
+      owned_by: owned_by ?? '',
     };
 
     await this.modelRepository.create(model);