diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index 3a3905935..e42ce2cee 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -81,7 +81,7 @@ export interface Model { ngl?: number; /** - * The number of parallel operations. Only set when enable continuous batching. + * Number of parallel sequences to decode */ n_parallel?: number; @@ -96,13 +96,6 @@ export interface Model { engine?: string; } -export interface ModelMetadata { - author: string; - tags: string[]; - size: number; - cover?: string; -} - /** * The available model settings. */ @@ -140,10 +133,3 @@ export interface ModelRuntimeParams { presence_penalty?: number; engine?: string; } - -/** - * Represents the model initialization error. - */ -export type ModelInitFailed = Model & { - error: Error; -}; diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index c4bd31278..d49977df6 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -7,7 +7,6 @@ import { import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { isLocalModel } from '../utils/normalize-model-id'; type ModelStartOptions = { attach: boolean; @@ -52,9 +51,7 @@ export class ModelStartCommand extends CommandRunner { } modelInquiry = async () => { - const models = (await this.modelsCliUsecases.listAllModels()).filter( - (model) => isLocalModel(model.files), - ); + const models = await this.modelsCliUsecases.listAllModels(); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 118212590..e214e50b5 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -7,9 +7,11 @@ import { } from 'nest-commander'; import { exit } from 'node:process'; import { ChatCliUsecases } from '../usecases/chat.cli.usecases'; -import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; -import { isLocalModel } from '../utils/normalize-model-id'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; type RunOptions = { @@ -77,9 +79,7 @@ export class RunCommand extends CommandRunner { } modelInquiry = async () => { - const models = (await this.modelsCliUsecases.listAllModels()).filter( - (model) => isLocalModel(model.files), - ); + const models = await this.modelsCliUsecases.listAllModels(); if (!models.length) throw 'No models found'; const { model } = await this.inquirerService.inquirer.prompt({ type: 'list', diff --git a/cortex-js/src/infrastructure/controllers/messages.controller.spec.ts b/cortex-js/src/infrastructure/controllers/messages.controller.spec.ts deleted file mode 100644 index 351155b6e..000000000 --- a/cortex-js/src/infrastructure/controllers/messages.controller.spec.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { MessagesController } from './messages.controller'; -import { MessagesUsecases } from '@/usecases/messages/messages.usecases'; -import { DatabaseModule } from '../database/database.module'; - -describe('MessagesController', () => { - let controller: MessagesController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], - controllers: [MessagesController], - providers: [MessagesUsecases], - exports: [MessagesUsecases], - }).compile(); - - controller = module.get(MessagesController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/messages.controller.ts b/cortex-js/src/infrastructure/controllers/messages.controller.ts deleted file mode 100644 index d668ffec6..000000000 --- a/cortex-js/src/infrastructure/controllers/messages.controller.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { - Controller, - Get, - Post, - Body, - Patch, - Param, - HttpCode, - Delete, - UseInterceptors, -} from '@nestjs/common'; -import { MessagesUsecases } from '@/usecases/messages/messages.usecases'; -import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto'; -import { UpdateMessageDto } from '@/infrastructure/dtos/messages/update-message.dto'; -import { ListMessagesResponseDto } from '@/infrastructure/dtos/messages/list-message.dto'; -import { GetMessageResponseDto } from '@/infrastructure/dtos/messages/get-message.dto'; -import { DeleteMessageResponseDto } from '@/infrastructure/dtos/messages/delete-message.dto'; -import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; -import { TransformInterceptor } from '../interceptors/transform.interceptor'; - -@ApiTags('Messages') -@Controller('messages') -@UseInterceptors(TransformInterceptor) -export class MessagesController { - constructor(private readonly messagesUsecases: MessagesUsecases) {} - - @HttpCode(201) - @ApiResponse({ - status: 201, - description: 'The message has been successfully created.', - type: CreateMessageDto, - }) - @ApiOperation({ - summary: 'Create message', - description: 'Creates a message in a thread.', - }) - @Post() - create(@Body() createMessageDto: CreateMessageDto) { - return this.messagesUsecases.create(createMessageDto); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: ListMessagesResponseDto, - }) - @ApiOperation({ - summary: 'List messages', - description: 'Retrieves all the messages in a thread.', - }) - @Get() - findAll() { - return this.messagesUsecases.findAll(); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: GetMessageResponseDto, - }) - @ApiOperation({ - summary: 'Retrieve message', - description: "Retrieves a specific message defined by a message's `id`.", - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the message.', - }) - @Get(':id') - findOne(@Param('id') id: string) { - return this.messagesUsecases.findOne(id); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'The message has been successfully updated.', - type: UpdateMessageDto, - }) - @ApiOperation({ - summary: 'Update message', - description: "Updates a specific message defined by a message's `id`.", - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the message.', - }) - @Patch(':id') - update(@Param('id') id: string, @Body() updateMessageDto: UpdateMessageDto) { - return this.messagesUsecases.update(id, updateMessageDto); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Successfully deleted the message.', - type: DeleteMessageResponseDto, - }) - @ApiOperation({ - summary: 'Delete message', - description: "Deletes a specific message defined by a message's `id`.", - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the message.', - }) - @Delete(':id') - remove(@Param('id') id: string) { - return this.messagesUsecases.remove(id); - } -} diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index b3f5b4ac5..bcd4ab272 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -20,6 +20,7 @@ import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { TransformInterceptor } from '../interceptors/transform.interceptor'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModelSettingsDto } from '../dtos/models/model-settings.dto'; @ApiTags('Models') @Controller('models') @@ -61,10 +62,13 @@ export class ModelsController { description: 'The unique identifier of the model.', }) @Post(':modelId(*)/start') - startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) { + startModel( + @Param('modelId') modelId: string, + @Body() params: ModelSettingsDto, + ) { return this.cortexUsecases .startCortex() - .then(() => this.modelsUsecases.startModel(modelId, model)); + .then(() => this.modelsUsecases.startModel(modelId, params)); } @HttpCode(200) diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts index 09beb940b..4ff037171 100644 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts @@ -4,6 +4,7 @@ import { IsNumber, IsOptional, IsString, + Min, } from 'class-validator'; import { Model } from '@/domain/models/model.interface'; import { ModelArtifactDto } from './model-artifact.dto'; @@ -50,6 +51,7 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: 'Sets the upper limit on the number of tokens the model can generate in a single output.', + example: 4096, }) @IsOptional() @IsNumber() @@ -97,30 +99,40 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: 'Sets the maximum input the model can use to generate a response, it varies with the model used.', + example: 4096, }) @IsOptional() @IsNumber() ctx_len?: number; - @ApiProperty({ description: 'Determines GPU layer usage.' }) + @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) @IsOptional() @IsNumber() ngl?: number; - @ApiProperty({ description: 'Number of parallel processing units to use.' }) + @ApiProperty({ + description: 'Number of parallel processing units to use.', + example: 1, + }) @IsOptional() @IsNumber() + @Min(1) n_parallel?: number; @ApiProperty({ description: 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, }) @IsOptional() @IsNumber() + @Min(1) cpu_threads?: number; - @ApiProperty({ description: 'The engine used to run the model.' }) + @ApiProperty({ + description: 'The engine used to run the model.', + example: 'cortex.llamacpp', + }) @IsOptional() @IsString() engine?: string; diff --git a/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts new file mode 100644 index 000000000..32dffe469 --- /dev/null +++ b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts @@ -0,0 +1,56 @@ +import { ModelSettingParams } from '@/domain/models/model.interface'; +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsNumber, IsOptional, Min } from 'class-validator'; + +export class ModelSettingsDto implements ModelSettingParams { + // Prompt Settings + @ApiProperty({ + example: 'system\n{system_message}\nuser\n{prompt}\nassistant', + description: + "A predefined text or framework that guides the AI model's response generation.", + }) + @IsOptional() + prompt_template?: string; + + @ApiProperty({ + type: [String], + example: [], + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + }) + @IsArray() + @IsOptional() + stop?: string[]; + + // Engine Settings + @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, + }) + @IsOptional() + @IsNumber() + @Min(1) + cpu_threads?: number; + + @ApiProperty({ + example: 'cortex.llamacpp', + description: 'The engine to use.', + }) + @IsOptional() + engine?: string; +} diff --git a/cortex-js/src/infrastructure/repositories/model/model.repository.ts b/cortex-js/src/infrastructure/repositories/model/model.repository.ts index 6401c1ec7..78bb3c13d 100644 --- a/cortex-js/src/infrastructure/repositories/model/model.repository.ts +++ b/cortex-js/src/infrastructure/repositories/model/model.repository.ts @@ -12,7 +12,10 @@ import { writeFileSync, } from 'fs'; import { load, dump } from 'js-yaml'; -import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id'; +import { + isLocalModel, + normalizeModelId, +} from '@/infrastructure/commanders/utils/normalize-model-id'; @Injectable() export class ModelRepositoryImpl implements ModelRepository { @@ -58,7 +61,9 @@ export class ModelRepositoryImpl implements ModelRepository { * @returns the created model */ findAll(): Promise { - return this.loadModels(); + return this.loadModels().then((res) => + res.filter((model) => isLocalModel(model.files)), + ); } /** * Find one model by id diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index f867667f8..f7fbd5a6c 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -3,7 +3,10 @@ import { ChildProcess, spawn } from 'child_process'; import { join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; -import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; import { existsSync } from 'node:fs'; import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/file-manager/file-manager.service'; diff --git a/cortex-js/src/usecases/messages/messages.module.ts b/cortex-js/src/usecases/messages/messages.module.ts index cab6b863f..ab759dc81 100644 --- a/cortex-js/src/usecases/messages/messages.module.ts +++ b/cortex-js/src/usecases/messages/messages.module.ts @@ -1,11 +1,10 @@ import { Module } from '@nestjs/common'; import { MessagesUsecases } from './messages.usecases'; -import { MessagesController } from '@/infrastructure/controllers/messages.controller'; import { DatabaseModule } from '@/infrastructure/database/database.module'; @Module({ imports: [DatabaseModule], - controllers: [MessagesController], + controllers: [], providers: [MessagesUsecases], exports: [MessagesUsecases], }) diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index b7793d9d5..79af49f9f 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -69,7 +69,11 @@ export class ModelsUsecases { return this.modelRepository .remove(id) - .then(() => rmdirSync(modelFolder, { recursive: true })) + .then( + () => + existsSync(modelFolder) && + rmdirSync(modelFolder, { recursive: true }), + ) .then(() => { return { message: 'Model removed successfully', @@ -100,7 +104,9 @@ export class ModelsUsecases { // Default settings ctx_len: 4096, ngl: 100, - ...(Array.isArray(model?.files) && + //TODO: Utils for model file retrieval + ...(model?.files && + Array.isArray(model.files) && !('llama_model_path' in model) && { llama_model_path: (model.files as string[])[0], }),