diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts index 189d8e487..f1e3de965 100644 --- a/cortex-js/src/app.module.ts +++ b/cortex-js/src/app.module.ts @@ -10,7 +10,6 @@ import { ExtensionModule } from './infrastructure/repositories/extensions/extens import { CortexModule } from './usecases/cortex/cortex.module'; import { ConfigModule } from '@nestjs/config'; import { env } from 'node:process'; -import { SeedService } from './usecases/seed/seed.service'; import { FileManagerModule } from './infrastructure/services/file-manager/file-manager.module'; import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.middleware'; import { TelemetryModule } from './usecases/telemetry/telemetry.module'; @@ -38,6 +37,7 @@ import { DownloadManagerModule } from './infrastructure/services/download-manage envFilePath: env.NODE_ENV !== 'production' ? '.env.development' : '.env', }), EventEmitterModule.forRoot(), + DownloadManagerModule, DatabaseModule, MessagesModule, ThreadsModule, @@ -62,7 +62,6 @@ import { DownloadManagerModule } from './infrastructure/services/download-manage EventsController, ], providers: [ - SeedService, { provide: APP_FILTER, useClass: GlobalExceptionFilter, diff --git a/cortex-js/src/domain/models/assistant.interface.ts b/cortex-js/src/domain/models/assistant.interface.ts index 7668f6df2..73b1f454b 100644 --- a/cortex-js/src/domain/models/assistant.interface.ts +++ b/cortex-js/src/domain/models/assistant.interface.ts @@ -1,39 +1,10 @@ -/** - * Assistant type defines the shape of an assistant object. - * @stored - */ +import { Assistant as OpenAiAssistant } from 'openai/resources/beta/assistants'; +import { AssistantResponseFormatOption as OpenAIAssistantResponseFormatOption } from 'openai/resources/beta/threads/threads'; -export interface AssistantTool { - type: string; - enabled: boolean; - settings: any; +export interface Assistant extends OpenAiAssistant { + avatar?: string; } -export interface Assistant { - /** Represents the unique identifier of the object. */ - id: string; - /** Represents the avatar of the user. */ - avatar: string; - /** Represents the location of the thread. */ - thread_location?: string; - /** Represents the object. */ - object: string; - /** Represents the creation timestamp of the object. */ - created_at: number; - /** Represents the name of the object. */ - name: string; - /** Represents the description of the object. */ - description?: string; - /** Represents the model of the object. */ - model: string; - /** Represents the instructions for the object. */ - instructions?: string; - /** Represents the tools associated with the object. */ - tools?: AssistantTool[]; - /** Represents the file identifiers associated with the object. */ - file_ids: string[]; - /** Represents the metadata of the object. */ - metadata?: AssistantMetadata; -} +export type AssistantResponseFormatOption = OpenAIAssistantResponseFormatOption; -export interface AssistantMetadata {} +export interface AssistantToolResources extends OpenAiAssistant.ToolResources {} diff --git a/cortex-js/src/domain/models/message.interface.ts b/cortex-js/src/domain/models/message.interface.ts index e66aa9e9b..86bf82717 100644 --- a/cortex-js/src/domain/models/message.interface.ts +++ b/cortex-js/src/domain/models/message.interface.ts @@ -1,81 +1,16 @@ -export enum ChatCompletionRole { - System = 'system', - Assistant = 'assistant', - User = 'user', -} +import { + Message as OpenAiMessage, + MessageContent as OpenAiMessageContent, + TextContentBlock as OpenAiTextContentBlock, +} from 'openai/resources/beta/threads/messages'; -export enum ContentType { - Text = 'text', - Image = 'image', - Pdf = 'pdf', -} +export interface Message extends OpenAiMessage {} -export interface ContentValue { - value: string; - annotations: string[]; - name?: string; - size?: number; -} +export type MessageContent = OpenAiMessageContent; -export interface ThreadContent { - type: ContentType; - text: ContentValue; -} +export type TextContentBlock = OpenAiTextContentBlock; -/** - * The status of the message. - * @data_transfer_object - */ -export enum MessageStatus { - /** Message is fully loaded. **/ - Ready = 'ready', - /** Message is not fully loaded. **/ - Pending = 'pending', - /** Message loaded with error. **/ - Error = 'error', - /** Message is cancelled streaming */ - Stopped = 'stopped', -} +export interface MessageIncompleteDetails + extends OpenAiMessage.IncompleteDetails {} -/** - * The error code which explain what error type. Used in conjunction with MessageStatus.Error - */ -export enum ErrorCode { - InvalidApiKey = 'invalid_api_key', - - InsufficientQuota = 'insufficient_quota', - - InvalidRequestError = 'invalid_request_error', - - Unknown = 'unknown', -} - -export interface Message { - /** Unique identifier for the message, generated by default using the ULID method. **/ - id: string; - /** Object name **/ - object: string; - /** Thread id, default is a ulid. **/ - thread_id: string; - /** The assistant id of this thread. **/ - assistant_id?: string; - /** The role of the author of this message. **/ - role: ChatCompletionRole; - /** The content of this message. **/ - content: ThreadContent[]; - /** The status of this message. **/ - status: MessageStatus; - /** The timestamp indicating when this message was created. Represented in Unix time. **/ - created: number; - /** The timestamp indicating when this message was updated. Represented in Unix time. **/ - updated?: number; - /** The additional metadata of this message. **/ - metadata?: MessageMetadata; - /** The error code which explain what error type. Used in conjunction with MessageStatus.Error */ - error_code?: ErrorCode; -} - -/** - * The additional metadata of this message. - */ -export interface MessageMetadata {} +export interface MessageAttachment extends OpenAiMessage.Attachment {} diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index 9af4bd618..fdf1699da 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -1,8 +1,9 @@ -/** - * Model type defines the shape of a model object. - * @stored - */ -export interface Model { +import { Model as OpenAiModel } from 'openai/resources/models'; + +export interface Model + extends OpenAiModel, + ModelSettingParams, + ModelRuntimeParams { /** * Model identifier. */ @@ -23,57 +24,23 @@ export interface Model { */ files: string[] | ModelArtifact; - /** - * GGUF metadata: tokenizer.chat_template - */ - prompt_template?: string; - - /** - * Defines specific tokens or phrases at which the model will stop generating further output. - */ - stop?: string[]; - - /// Inferencing - /** - * Set probability threshold for more relevant outputs. - */ - top_p?: number; - - /** - * Controls the randomness of the model’s output. - */ - temperature?: number; - - /** - * Adjusts the likelihood of the model repeating words or phrases in its output. - */ - frequency_penalty?: number; - - /** - * Influences the generation of new and varied concepts in the model’s output. - */ - presence_penalty?: number; + metadata?: Record; +} - /// Engines +/** + * The available model settings. + */ +export interface ModelSettingParams { /** * The context length for model operations varies; the maximum depends on the specific model used. */ ctx_len?: number; - /** - * Enable real-time data processing for faster predictions. - */ - stream?: boolean; - - /* - * The maximum number of tokens the model will generate in a single response. - */ - max_tokens?: number; - /** * The number of layers to load onto the GPU for acceleration. */ ngl?: number; + embedding?: boolean; /** * Number of parallel sequences to decode @@ -85,6 +52,22 @@ export interface Model { */ cpu_threads?: number; + /** + * GGUF metadata: tokenizer.chat_template + */ + prompt_template?: string; + system_prompt?: string; + ai_prompt?: string; + user_prompt?: string; + llama_model_path?: string; + mmproj?: string; + cont_batching?: boolean; + + /** + * The model engine. + */ + engine?: string; + /** * The prompt to use for internal configuration */ @@ -134,59 +117,48 @@ export interface Model { * To enable mmap, default is true */ use_mmap?: boolean; - - /** - * The model engine. - */ - engine?: string; -} - -/** - * The available model settings. - */ -export interface ModelSettingParams { - ctx_len?: number; - ngl?: number; - embedding?: boolean; - n_parallel?: number; - cpu_threads?: number; - prompt_template?: string; - system_prompt?: string; - ai_prompt?: string; - user_prompt?: string; - llama_model_path?: string; - mmproj?: string; - cont_batching?: boolean; - engine?: string; - stop?: string[]; - pre_prompt?: string; - n_batch?: number; - caching_enabled?: boolean; - grp_attn_n?: number; - grp_attn_w?: number; - mlock?: boolean; - grammar_file?: string; - model_type?: string; - model_alias?: string; - flash_attn?: boolean; - cache_type?: string; - use_mmap?: boolean; } /** * The available model runtime parameters. */ export interface ModelRuntimeParams { + /** + * Controls the randomness of the model’s output. + */ temperature?: number; token_limit?: number; top_k?: number; + + /** + * Set probability threshold for more relevant outputs. + */ top_p?: number; + + /** + * Enable real-time data processing for faster predictions. + */ stream?: boolean; + + /* + * The maximum number of tokens the model will generate in a single response. + */ max_tokens?: number; + + /** + * Defines specific tokens or phrases at which the model will stop generating further output. + */ stop?: string[]; + + /** + * Adjusts the likelihood of the model repeating words or phrases in its output. + */ frequency_penalty?: number; + + /** + * Influences the generation of new and varied concepts in the model’s output. + */ presence_penalty?: number; - engine?: string; } /** diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts index bdfb67b32..20dc7d3be 100644 --- a/cortex-js/src/domain/models/thread.interface.ts +++ b/cortex-js/src/domain/models/thread.interface.ts @@ -1,46 +1,12 @@ -import { AssistantTool } from './assistant.interface'; -import { Model } from './model.interface'; +import { Thread as OpenAiThread } from 'openai/resources/beta/threads/threads'; +import { Assistant } from './assistant.interface'; -export interface Thread { - /** Unique identifier for the thread, generated by default using the ULID method. **/ - id: string; +export interface ThreadToolResources extends OpenAiThread.ToolResources {} - /** Object name **/ - object: string; - - /** The title of this thread. **/ +export interface Thread extends OpenAiThread { title: string; - /** Assistants in this thread. **/ - assistants: ThreadAssistantInfo[]; - - /** The timestamp indicating when this thread was created, represented in ISO 8601 format. **/ - createdAt: number; - - /** The timestamp indicating when this thread was updated, represented in ISO 8601 format. **/ - updatedAt?: number; - - /** - * The additional metadata of this thread. - **/ - metadata?: ThreadMetadata; -} - -/** - * The additional metadata of this thread. - */ -export interface ThreadMetadata { - lastMessage?: string; -} + assistants: Assistant[]; -/** - * Represents the information about an assistant in a thread. - * @stored - */ -export interface ThreadAssistantInfo { - assistant_id: string; - assistant_name: string; - model: Partial; - instructions?: string; - tools?: AssistantTool[]; + tool_resources: ThreadToolResources | null; } diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts index 424805347..f51b00f3c 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -1,8 +1,3 @@ -import { - ChatCompletionRole, - ContentType, - MessageStatus, -} from '@/domain/models/message.interface'; import { exit, stdin, stdout } from 'node:process'; import * as readline from 'node:readline/promises'; import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; @@ -19,6 +14,7 @@ import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message. import { MessagesUsecases } from '@/usecases/messages/messages.usecases'; import { ModelParameterParser } from '@/utils/model-parameter.parser'; import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { TextContentBlock } from '@/domain/models/message.interface'; @Injectable() export class ChatCliUsecases { @@ -46,7 +42,7 @@ export class ChatCliUsecases { const messages: ChatCompletionMessage[] = ( await this.messagesUsecases.getLastMessagesByThread(thread.id, 10) ).map((message) => ({ - content: message.content[0].text.value, + content: (message.content[0] as TextContentBlock).text.value, role: message.role, })); @@ -76,22 +72,22 @@ export class ChatCliUsecases { messages.push({ content: userInput, - role: ChatCompletionRole.User, + role: 'user', }); const createMessageDto: CreateMessageDto = { thread_id: thread.id, - role: ChatCompletionRole.User, + role: 'user', content: [ { - type: ContentType.Text, + type: 'text', text: { value: userInput, annotations: [], }, }, ], - status: MessageStatus.Ready, + status: 'completed', }; this.messagesUsecases.create(createMessageDto); @@ -127,22 +123,22 @@ export class ChatCliUsecases { stdout.write(assistantResponse); messages.push({ content: assistantResponse, - role: ChatCompletionRole.Assistant, + role: 'assistant', }); const createMessageDto: CreateMessageDto = { thread_id: thread.id, - role: ChatCompletionRole.Assistant, + role: 'assistant', content: [ { - type: ContentType.Text, + type: 'text', text: { value: assistantResponse, annotations: [], }, }, ], - status: MessageStatus.Ready, + status: 'completed', }; this.messagesUsecases.create(createMessageDto).then(() => { @@ -164,21 +160,21 @@ export class ChatCliUsecases { response.on('end', () => { messages.push({ content: assistantResponse, - role: ChatCompletionRole.Assistant, + role: 'assistant', }); const createMessageDto: CreateMessageDto = { thread_id: thread.id, - role: ChatCompletionRole.Assistant, + role: 'assistant', content: [ { - type: ContentType.Text, + type: 'text', text: { value: assistantResponse, annotations: [], }, }, ], - status: MessageStatus.Ready, + status: 'completed', }; this.messagesUsecases.create(createMessageDto).then(() => { @@ -265,9 +261,16 @@ export class ChatCliUsecases { if (!assistant) throw new Error('No assistant available'); const assistantDto: CreateThreadAssistantDto = { - assistant_id: assistant.id, - assistant_name: assistant.name, - model: model, + avatar: '', + id: 'jan', + object: 'assistant', + created_at: Date.now(), + name: 'Jan', + description: 'A default assistant that can use all downloaded models', + model: modelId, + instructions: '', + tools: [], + metadata: {}, }; const createThreadDto: CreateThreadDto = { diff --git a/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts b/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts index 6f634aa5d..9d642fcea 100644 --- a/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts +++ b/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts @@ -2,13 +2,21 @@ import { Test, TestingModule } from '@nestjs/testing'; import { AssistantsController } from './assistants.controller'; import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { DownloadManagerModule } from '../services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; describe('AssistantsController', () => { let controller: AssistantsController; beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelRepositoryModule, + DownloadManagerModule, + ], controllers: [AssistantsController], providers: [AssistantsUsecases], exports: [AssistantsUsecases], diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index 8df1d7391..6d42e4733 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -163,9 +163,7 @@ export class ModelsController { }) @Get() findAll() { - return this.modelsUsecases - .findAll() - .then((data) => data.map((e) => ({ id: e.model, ...e }))); + return this.modelsUsecases.findAll(); } @HttpCode(200) @@ -198,15 +196,21 @@ export class ModelsController { @ApiOperation({ summary: 'Update model', description: "Updates a model instance defined by a model's `id`.", + parameters: [ + { + in: 'path', + name: 'model', + required: true, + description: 'The unique identifier of the model.', + }, + ], }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the model.', - }) - @Patch(':id(*)') - update(@Param('id') id: string, @Body() updateModelDto: UpdateModelDto) { - return this.modelsUsecases.update(id, updateModelDto); + @Post(':model(*)/config') + async update( + @Param('model') model: string, + @Body() updateModelDto: UpdateModelDto, + ) { + return this.modelsUsecases.update(model, updateModelDto); } @ApiResponse({ diff --git a/cortex-js/src/infrastructure/controllers/threads.controller.ts b/cortex-js/src/infrastructure/controllers/threads.controller.ts index 8aa108e38..78a013864 100644 --- a/cortex-js/src/infrastructure/controllers/threads.controller.ts +++ b/cortex-js/src/infrastructure/controllers/threads.controller.ts @@ -215,6 +215,24 @@ export class ThreadsController { ); } + @ApiOperation({ + summary: 'Clean thread', + description: 'Deletes all messages in a thread.', + tags: ['Threads'], + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to clean.', + }, + ], + }) + @Post(':thread_id/clean') + async cleanThread(@Param('thread_id') threadId: string) { + return this.threadsUsecases.clean(threadId); + } + @ApiOperation({ summary: 'Delete message', description: 'Deletes a message.', diff --git a/cortex-js/src/infrastructure/dtos/assistants/assistant-tool.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/assistant-tool.dto.ts deleted file mode 100644 index 4deac2aab..000000000 --- a/cortex-js/src/infrastructure/dtos/assistants/assistant-tool.dto.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsBoolean, IsString } from 'class-validator'; -import { AssistantTool } from '@/domain/models/assistant.interface'; - -export class AssistantToolDto implements AssistantTool { - @ApiProperty({ - description: "The type of the assistant's tool.", - }) - @IsString() - type: string; - - @ApiProperty({ - description: "Enable or disable the assistant's tool.", - }) - @IsBoolean() - enabled: boolean; - - @ApiProperty({ - description: "The setting of the assistant's tool.", - }) - @ApiProperty() - settings: any; -} diff --git a/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts index 61d7dfb44..65e387a26 100644 --- a/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts +++ b/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts @@ -1,33 +1,37 @@ -import { Type } from 'class-transformer'; -import { IsArray, IsOptional, IsString, ValidateNested } from 'class-validator'; -import { - Assistant, - AssistantMetadata, -} from '@/domain/models/assistant.interface'; -import { AssistantToolDto } from './assistant-tool.dto'; +import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; +import { Assistant } from '@/domain/models/assistant.interface'; import { ApiProperty } from '@nestjs/swagger'; export class CreateAssistantDto implements Partial { @ApiProperty({ description: 'The unique identifier of the assistant.', + example: 'jan', + default: 'jan', }) @IsString() id: string; @ApiProperty({ description: 'The avatar of the assistant.', + example: '', + default: '', }) + @IsOptional() @IsString() - avatar: string; + avatar?: string; @ApiProperty({ description: 'The name of the assistant.', + example: 'Jan', + default: 'Jan', }) @IsString() name: string; @ApiProperty({ description: 'The description of the assistant.', + example: 'A default assistant that can use all downloaded models', + default: 'A default assistant that can use all downloaded models', }) @IsString() description: string; @@ -40,29 +44,41 @@ export class CreateAssistantDto implements Partial { @ApiProperty({ description: 'The instructions for the assistant.', + example: '', + default: '', }) @IsString() instructions: string; @ApiProperty({ description: 'The tools associated with the assistant.', + example: [], + default: [], }) @IsArray() - @ValidateNested({ each: true }) - @Type(() => AssistantToolDto) - tools: AssistantToolDto[]; + tools: any[]; @ApiProperty({ - description: - 'The identifiers of the files that have been uploaded to the thread.', + description: 'The metadata of the assistant.', }) - @IsArray() @IsOptional() - file_ids: string[]; + metadata: unknown | null; @ApiProperty({ - description: 'The metadata of the assistant.', + description: 'Top p.', + example: '0.7', + default: '0.7', + }) + @IsOptional() + @IsNumber() + top_p?: number; + + @ApiProperty({ + description: 'Temperature.', + example: '0.7', + default: '0.7', }) @IsOptional() - metadata?: AssistantMetadata; + @IsNumber() + temperature?: number; } diff --git a/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts new file mode 100644 index 000000000..53c06d497 --- /dev/null +++ b/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts @@ -0,0 +1,208 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsOptional } from 'class-validator'; + +export class ModelSettingDto { + @ApiProperty({ + type: 'number', + minimum: 0, + maximum: 1, + required: false, + default: 1, + description: `What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.`, + }) + temperature: number; + + @ApiProperty({ + type: 'number', + minimum: 0, + maximum: 1, + required: false, + default: 1, + description: `An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.`, + }) + top_p: number; + + @ApiProperty({ + required: false, + example: '', + description: 'GGUF metadata: tokenizer.chat_template', + }) + prompt_template?: string; + + @ApiProperty({ + required: false, + example: [], + description: + 'Defines specific tokens or phrases at which the model will stop generating further output.', + default: [], + }) + @IsArray() + @IsOptional() + stop?: string[]; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + description: + 'Adjusts the likelihood of the model repeating words or phrases in its output.', + }) + frequency_penalty?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + description: + 'Influences the generation of new and varied concepts in the model’s output.', + }) + presence_penalty?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 4096, + default: 4096, + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + }) + ctx_len?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'Enable real-time data processing for faster predictions.', + }) + stream?: boolean; + + @ApiProperty({ + required: false, + type: 'number', + example: 2048, + default: 2048, + description: + 'The maximum number of tokens the model will generate in a single response.', + }) + max_tokens?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: 'The number of layers to load onto the GPU for acceleration.', + }) + ngl?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: 'Number of parallel sequences to decode', + }) + n_parallel?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: + 'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)', + }) + cpu_threads?: number; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: 'The prompt to use for internal configuration', + }) + pre_prompt?: string; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'The batch size for prompt eval step', + }) + n_batch?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable prompt caching or not', + }) + caching_enabled?: boolean; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'Group attention factor in self-extend', + }) + grp_attn_n?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'Group attention width in self-extend', + }) + grp_attn_w?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: false, + default: false, + description: 'Prevent system swapping of the model to disk in macOS', + }) + mlock?: boolean; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: + 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', + }) + grammar_file?: string; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable Flash Attention, default is true', + }) + flash_attn?: boolean; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: 'KV cache type: f16, q8_0, q4_0, default is f16', + }) + cache_type?: string; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable mmap, default is true', + }) + use_mmap?: boolean; +} diff --git a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts index ea2b17f11..4845e1a2f 100644 --- a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts +++ b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts @@ -1,5 +1,4 @@ -import { IsEnum, IsString } from 'class-validator'; -import { ChatCompletionRole } from '@/domain/models/message.interface'; +import { IsString } from 'class-validator'; import { ApiProperty } from '@nestjs/swagger'; export class ChatCompletionMessage { @@ -10,6 +9,5 @@ export class ChatCompletionMessage { @ApiProperty({ description: 'The role of the entity in the chat completion.', }) - @IsEnum(ChatCompletionRole) - role: ChatCompletionRole; + role: 'user' | 'assistant'; } diff --git a/cortex-js/src/infrastructure/dtos/messages/content-value.dto.ts b/cortex-js/src/infrastructure/dtos/messages/content-value.dto.ts deleted file mode 100644 index fe377d316..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/content-value.dto.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; -import { ContentValue } from '@/domain/models/message.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ContentValueDto implements ContentValue { - @ApiProperty({ description: "The text's value." }) - @IsString() - value: string; - - @ApiProperty({ - description: "The text's annotation that categorize the text.", - }) - @IsArray() - annotations: string[]; - - @ApiProperty({ description: 'The name or title of the text.' }) - @IsOptional() - @IsString() - name?: string; - - @ApiProperty({ description: "The text's size in bytes." }) - @IsOptional() - @IsNumber() - size?: number; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts index 22499b9e6..79fb65acb 100644 --- a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts +++ b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts @@ -1,13 +1,6 @@ import { ApiProperty } from '@nestjs/swagger'; -import { IsArray, IsEnum, IsString, ValidateNested } from 'class-validator'; -import { - ChatCompletionRole, - ErrorCode, - Message, - MessageStatus, -} from '@/domain/models/message.interface'; -import { ThreadContentDto } from './thread-content.dto'; -import { Type } from 'class-transformer'; +import { IsArray, IsString } from 'class-validator'; +import { Message, MessageContent } from '@/domain/models/message.interface'; export class CreateMessageDto implements Partial { @ApiProperty({ @@ -21,18 +14,14 @@ export class CreateMessageDto implements Partial { assistant_id?: string; @ApiProperty({ description: 'The sources of the messages.' }) - @IsEnum(ChatCompletionRole) - role: ChatCompletionRole; + role: 'user' | 'assistant'; @ApiProperty({ description: 'The content of the messages.' }) @IsArray() - @ValidateNested({ each: true }) - @Type(() => ThreadContentDto) - content: ThreadContentDto[]; + content: MessageContent[]; @ApiProperty({ description: 'Current status of the message.' }) - @IsEnum(MessageStatus) - status: MessageStatus; + status: 'in_progress' | 'incomplete' | 'completed'; @ApiProperty({ description: @@ -43,8 +32,4 @@ export class CreateMessageDto implements Partial { @ApiProperty({ description: 'Type of the message.' }) @IsString() type?: string; - - @ApiProperty({ description: 'Specifies the cause of any error.' }) - @IsEnum(ErrorCode) - error_code?: ErrorCode; } diff --git a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts b/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts deleted file mode 100644 index ae54dd422..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { IsEnum, ValidateNested } from 'class-validator'; -import { ContentType, ThreadContent } from '@/domain/models/message.interface'; -import { ContentValueDto } from './content-value.dto'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ThreadContentDto implements ThreadContent { - @ApiProperty({ description: 'The type of content.' }) - @IsEnum(ContentType) - type: ContentType; - - @ApiProperty({ description: 'The content details.' }) - @ValidateNested() - text: ContentValueDto; -} diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts index 4ff037171..04d71e130 100644 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts @@ -136,4 +136,11 @@ export class CreateModelDto implements Partial { @IsOptional() @IsString() engine?: string; + + @ApiProperty({ + description: 'The owner of the model.', + example: '', + default: '', + }) + owned_by?: string; } diff --git a/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts index f9ce9fed9..6b3bf0bd3 100644 --- a/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts +++ b/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts @@ -1,6 +1,4 @@ -import { ChatCompletionRole } from '@/domain/models/message.interface'; import { ApiProperty } from '@nestjs/swagger'; -import { IsEnum } from 'class-validator'; export class CreateMessageDto { @ApiProperty({ @@ -8,8 +6,7 @@ export class CreateMessageDto { - user: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages. - assistant: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation.`, }) - @IsEnum(ChatCompletionRole) - role: ChatCompletionRole; + role: 'user' | 'assistant'; @ApiProperty({ description: 'The text contents of the message.', diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts index 2a6e8fbaf..40395f793 100644 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts +++ b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts @@ -1,34 +1,73 @@ -import { IsArray, IsOptional, IsString, ValidateNested } from 'class-validator'; -import { ThreadAssistantInfo } from '@/domain/models/thread.interface'; -import { AssistantToolDto } from '@/infrastructure/dtos/assistants/assistant-tool.dto'; -import { Type } from 'class-transformer'; +import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; import { ApiProperty } from '@nestjs/swagger'; -import { CreateModelDto } from '../models/create-model.dto'; +import { + Assistant, + AssistantResponseFormatOption, + AssistantToolResources, +} from '@/domain/models/assistant.interface'; -export class CreateThreadAssistantDto implements ThreadAssistantInfo { - @ApiProperty({ description: 'The unique identifier of the assistant.' }) +export class CreateThreadAssistantDto implements Assistant { + @ApiProperty({ + description: 'The unique identifier of the assistant.', + type: 'string', + }) + @IsString() + id: string; + + @ApiProperty() + @IsOptional() @IsString() - assistant_id: string; + avatar?: string; @ApiProperty({ description: 'The name of the assistant.' }) @IsString() - assistant_name: string; + name: string; @ApiProperty({ description: "The model's unique identifier and settings." }) - @ValidateNested() - model: CreateModelDto; + @IsString() + model: string; @ApiProperty({ description: "The assistant's specific instructions." }) - @IsOptional() @IsString() - instructions?: string; + instructions: string; @ApiProperty({ description: "The thread's tool(Knowledge Retrieval) configurations.", }) @IsOptional() @IsArray() - @ValidateNested({ each: true }) - @Type(() => AssistantToolDto) - tools?: AssistantToolDto[]; + tools: any; + + @ApiProperty() + @IsString() + @IsOptional() + description: string | null; + + @ApiProperty() + @IsOptional() + metadata: Record | null; + + @ApiProperty() + object: 'assistant'; + + @ApiProperty() + @IsNumber() + @IsOptional() + temperature?: number | null; + + @ApiProperty() + @IsNumber() + @IsOptional() + top_p?: number | null; + + @ApiProperty() + created_at: number; + + @ApiProperty() + @IsOptional() + response_format?: AssistantResponseFormatOption; + + @ApiProperty() + @IsOptional() + tool_resources?: AssistantToolResources; } diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts index 6752f7aa7..7ae7f3694 100644 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts +++ b/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts @@ -1,13 +1,10 @@ -import { IsArray, ValidateNested } from 'class-validator'; +import { IsArray } from 'class-validator'; import { Thread } from '@/domain/models/thread.interface'; import { CreateThreadAssistantDto } from './create-thread-assistant.dto'; -import { Type } from 'class-transformer'; import { ApiProperty } from '@nestjs/swagger'; export class CreateThreadDto implements Partial { @ApiProperty({ description: "The details of the thread's settings." }) @IsArray() - @ValidateNested({ each: true }) - @Type(() => CreateThreadAssistantDto) assistants: CreateThreadAssistantDto[]; } diff --git a/cortex-js/src/infrastructure/entities/assistant.entity.ts b/cortex-js/src/infrastructure/entities/assistant.entity.ts index 20dd124d9..d4726e4e2 100644 --- a/cortex-js/src/infrastructure/entities/assistant.entity.ts +++ b/cortex-js/src/infrastructure/entities/assistant.entity.ts @@ -1,45 +1,51 @@ import { Assistant, - AssistantMetadata, - AssistantTool, + AssistantResponseFormatOption, + AssistantToolResources, } from '@/domain/models/assistant.interface'; import { Column, Entity, PrimaryColumn } from 'typeorm'; @Entity('assistants') export class AssistantEntity implements Assistant { - @PrimaryColumn() + @PrimaryColumn({ type: String }) id: string; - @Column() - avatar: string; + @Column({ type: String, nullable: true }) + avatar?: string; - @Column({ nullable: true }) - thread_location?: string; + @Column({ type: String }) + object: 'assistant'; - @Column() - object: string; - - @Column() + @Column({ type: Number }) created_at: number; - @Column() - name: string; + @Column({ type: String, nullable: true }) + name: string | null; - @Column({ nullable: true }) - description?: string; + @Column({ type: String, nullable: true }) + description: string | null; - @Column() + @Column({ type: String }) model: string; - @Column({ nullable: true }) - instructions?: string; + @Column({ type: String, nullable: true }) + instructions: string | null; + + @Column({ type: 'simple-json' }) + tools: any; @Column({ type: 'simple-json', nullable: true }) - tools?: AssistantTool[]; + metadata: any | null; + + @Column({ type: Number, nullable: true }) + top_p: number | null; - @Column({ type: 'simple-array' }) - file_ids: string[]; + @Column({ type: Number, nullable: true }) + temperature: number | null; + + @Column({ type: 'simple-json', nullable: true }) + response_format: AssistantResponseFormatOption | null; @Column({ type: 'simple-json', nullable: true }) - metadata?: AssistantMetadata; + tool_resources: AssistantToolResources | null; } diff --git a/cortex-js/src/infrastructure/entities/message.entity.ts b/cortex-js/src/infrastructure/entities/message.entity.ts index 70bd8907f..906ed7a03 100644 --- a/cortex-js/src/infrastructure/entities/message.entity.ts +++ b/cortex-js/src/infrastructure/entities/message.entity.ts @@ -1,51 +1,52 @@ import { - ChatCompletionRole, - ErrorCode, Message, - MessageMetadata, - MessageStatus, - ThreadContent, + MessageContent, + MessageIncompleteDetails, + MessageAttachment, } from '@/domain/models/message.interface'; import { Column, Entity, PrimaryColumn } from 'typeorm'; @Entity('messages') export class MessageEntity implements Message { - @PrimaryColumn() + @PrimaryColumn({ type: String }) id: string; - @Column() - object: string; + @Column({ type: String }) + object: 'thread.message'; - @Column() + @Column({ type: String }) thread_id: string; - @Column({ nullable: true }) - assistant_id?: string; + @Column({ type: String, nullable: true }) + assistant_id: string | null; - @Column() - role: ChatCompletionRole; + @Column({ type: String }) + role: 'user' | 'assistant'; - @Column({ type: 'simple-json' }) - content: ThreadContent[]; + @Column({ type: String }) + status: 'in_progress' | 'incomplete' | 'completed'; + + @Column({ type: 'simple-json', nullable: true }) + metadata: any | null; - @Column() - status: MessageStatus; + @Column({ type: String, nullable: true }) + run_id: string | null; - @Column() - created: number; + @Column({ type: Number, nullable: true }) + completed_at: number | null; - @Column({ nullable: true }) - updated?: number; + @Column({ type: 'simple-json' }) + content: MessageContent[]; @Column({ type: 'simple-json', nullable: true }) - metadata?: MessageMetadata; + incomplete_details: MessageIncompleteDetails | null; - @Column({ nullable: true }) - type?: string; + @Column({ type: Number }) + created_at: number; - @Column({ nullable: true }) - error_code?: ErrorCode; + @Column({ type: 'simple-json' }) + attachments: MessageAttachment[]; - @Column({ type: 'simple-json', nullable: true }) - attachments?: any[]; + @Column({ type: Number, nullable: true }) + incomplete_at: number | null; } diff --git a/cortex-js/src/infrastructure/entities/thread.entity.ts b/cortex-js/src/infrastructure/entities/thread.entity.ts index 813e44f28..6670e5bfb 100644 --- a/cortex-js/src/infrastructure/entities/thread.entity.ts +++ b/cortex-js/src/infrastructure/entities/thread.entity.ts @@ -1,30 +1,27 @@ -import { - Thread, - ThreadAssistantInfo, - ThreadMetadata, -} from '@/domain/models/thread.interface'; +import { Thread, ThreadToolResources } from '@/domain/models/thread.interface'; import { Entity, PrimaryColumn, Column } from 'typeorm'; +import { AssistantEntity } from './assistant.entity'; @Entity('threads') export class ThreadEntity implements Thread { - @PrimaryColumn() + @PrimaryColumn({ type: String }) id: string; - @Column() - object: string; + @Column({ type: String }) + object: 'thread'; - @Column({ name: 'title' }) + @Column({ type: String, name: 'title' }) title: string; @Column({ type: 'simple-json' }) - assistants: ThreadAssistantInfo[]; + assistants: AssistantEntity[]; - @Column() - createdAt: number; + @Column({ type: Number }) + created_at: number; - @Column({ nullable: true }) - updatedAt?: number; + @Column({ type: 'simple-json', nullable: true }) + tool_resources: ThreadToolResources | null; - @Column({ nullable: true, type: 'simple-json' }) - metadata?: ThreadMetadata; + @Column({ type: 'simple-json', nullable: true }) + metadata: any | null; } diff --git a/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts b/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts new file mode 100644 index 000000000..8b328b9c4 --- /dev/null +++ b/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts @@ -0,0 +1,10 @@ +import { HttpException, HttpStatus } from '@nestjs/common'; + +export class DuplicateAssistantException extends HttpException { + constructor(assistantId: string) { + super( + `Assistant with the id ${assistantId} is already exists.`, + HttpStatus.CONFLICT, + ); + } +} diff --git a/cortex-js/src/infrastructure/repositories/models/model.repository.ts b/cortex-js/src/infrastructure/repositories/models/model.repository.ts index e27c33a03..ecbe9fb56 100644 --- a/cortex-js/src/infrastructure/repositories/models/model.repository.ts +++ b/cortex-js/src/infrastructure/repositories/models/model.repository.ts @@ -12,10 +12,7 @@ import { writeFileSync, } from 'fs'; import { load, dump } from 'js-yaml'; -import { - isLocalModel, - normalizeModelId, -} from '@/utils/normalize-model-id'; +import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; @Injectable() export class ModelRepositoryImpl implements ModelRepository { diff --git a/cortex-js/src/main.ts b/cortex-js/src/main.ts index 31deded13..c745de9b8 100644 --- a/cortex-js/src/main.ts +++ b/cortex-js/src/main.ts @@ -1,12 +1,11 @@ import { NestFactory } from '@nestjs/core'; import { AppModule } from './app.module'; import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger'; -import { INestApplication, ValidationPipe } from '@nestjs/common'; +import { ValidationPipe } from '@nestjs/common'; import { defaultCortexJsHost, defaultCortexJsPort, } from '@/infrastructure/constants/cortex'; -import { SeedService } from './usecases/seed/seed.service'; import { FileManagerService } from './infrastructure/services/file-manager/file-manager.service'; async function bootstrap() { @@ -15,9 +14,6 @@ async function bootstrap() { cors: true, }); - const seedService = app.get(SeedService); - await seedService.seed(); - const fileService = app.get(FileManagerService); await fileService.getConfig(); @@ -76,7 +72,6 @@ async function bootstrap() { const document = SwaggerModule.createDocument(app, config); SwaggerModule.setup('api', app, document); - buildSwagger(app); // getting port from env const host = process.env.CORTEX_JS_HOST || defaultCortexJsHost; @@ -86,15 +81,4 @@ async function bootstrap() { console.log(`Started server at http://${host}:${port}`); } -const buildSwagger = (app: INestApplication) => { - const config = new DocumentBuilder() - .setTitle('Cortex API') - .setDescription('The Cortex API description') - .setVersion('1.0') - .build(); - const document = SwaggerModule.createDocument(app, config); - - SwaggerModule.setup('api', app, document); -}; - bootstrap(); diff --git a/cortex-js/src/usecases/assistants/assistants.module.ts b/cortex-js/src/usecases/assistants/assistants.module.ts index 1fec26c19..95849a3e7 100644 --- a/cortex-js/src/usecases/assistants/assistants.module.ts +++ b/cortex-js/src/usecases/assistants/assistants.module.ts @@ -1,9 +1,10 @@ import { Module } from '@nestjs/common'; import { AssistantsUsecases } from './assistants.usecases'; import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; @Module({ - imports: [DatabaseModule], + imports: [DatabaseModule, ModelRepositoryModule], controllers: [], providers: [AssistantsUsecases], exports: [AssistantsUsecases], diff --git a/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts b/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts index 8af94e723..2dc8bb89f 100644 --- a/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts +++ b/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts @@ -1,13 +1,21 @@ import { Test, TestingModule } from '@nestjs/testing'; import { AssistantsUsecases } from './assistants.usecases'; import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; describe('AssistantsService', () => { let service: AssistantsUsecases; beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelRepositoryModule, + DownloadManagerModule, + ], exports: [AssistantsUsecases], providers: [AssistantsUsecases], }).compile(); diff --git a/cortex-js/src/usecases/assistants/assistants.usecases.ts b/cortex-js/src/usecases/assistants/assistants.usecases.ts index 09adc0591..2c9c1bb68 100644 --- a/cortex-js/src/usecases/assistants/assistants.usecases.ts +++ b/cortex-js/src/usecases/assistants/assistants.usecases.ts @@ -1,24 +1,52 @@ -import { BadRequestException, Inject, Injectable } from '@nestjs/common'; +import { Inject, Injectable } from '@nestjs/common'; import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; -import { Repository } from 'typeorm'; +import { QueryFailedError, Repository } from 'typeorm'; import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto'; import { Assistant } from '@/domain/models/assistant.interface'; import { PageDto } from '@/infrastructure/dtos/page.dto'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { DuplicateAssistantException } from '@/infrastructure/exception/duplicate-assistant.exception'; @Injectable() export class AssistantsUsecases { constructor( @Inject('ASSISTANT_REPOSITORY') - private assistantRepository: Repository, + private readonly assistantRepository: Repository, + private readonly modelRepository: ModelRepository, ) {} - create(createAssistantDto: CreateAssistantDto) { - const assistant: Assistant = { + async create(createAssistantDto: CreateAssistantDto) { + const { top_p, temperature, model, id } = createAssistantDto; + if (model !== '*') { + const modelEntity = await this.modelRepository.findOne(model); + if (!modelEntity) { + throw new ModelNotFoundException(model); + } + } + + const assistant: AssistantEntity = { ...createAssistantDto, object: 'assistant', created_at: Date.now(), + response_format: null, + tool_resources: null, + top_p: top_p ?? null, + temperature: temperature ?? null, }; - this.assistantRepository.insert(assistant); + + try { + await this.assistantRepository.insert(assistant); + } catch (err) { + if (err instanceof QueryFailedError) { + if (err.driverError.code === 'SQLITE_CONSTRAINT') + throw new DuplicateAssistantException(id); + } + + throw err; + } + + return this.findOne(assistant.id); } async listAssistants( @@ -42,10 +70,6 @@ export class AssistantsUsecases { const { entities: assistants } = await queryBuilder.getRawAndEntities(); - if (assistants.length === 0) { - assistants.push(this.janAssistant); - } - let hasMore = false; if (assistants.length > limit) { hasMore = true; @@ -63,51 +87,12 @@ export class AssistantsUsecases { } async findOne(id: string) { - if (id === this.janAssistant.id) { - return this.janAssistant; - } - return this.assistantRepository.findOne({ where: { id }, }); } async remove(id: string) { - if (id === this.janAssistant.id) { - throw new BadRequestException('Cannot delete Jan assistant!'); - } return this.assistantRepository.delete(id); } - - janAssistant: Assistant = { - avatar: '', - id: 'jan', - object: 'assistant', - created_at: Date.now(), - name: 'Jan', - description: 'A default assistant that can use all downloaded models', - model: '*', - instructions: '', - tools: [ - { - type: 'retrieval', - enabled: false, - settings: { - top_k: 2, - chunk_size: 1024, - chunk_overlap: 64, - retrieval_template: - "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\nCONTEXT: {CONTEXT}\n----------------\nQUESTION: {QUESTION}\n----------------\nHelpful Answer:", - }, - }, - ], - file_ids: [], - }; - - async seed() { - if ((await this.findOne(this.janAssistant.id)) != null) { - return; - } - await this.assistantRepository.insert(this.janAssistant); - } } diff --git a/cortex-js/src/usecases/messages/messages.usecases.ts b/cortex-js/src/usecases/messages/messages.usecases.ts index 7fcfb52a0..9ae758e88 100644 --- a/cortex-js/src/usecases/messages/messages.usecases.ts +++ b/cortex-js/src/usecases/messages/messages.usecases.ts @@ -13,11 +13,19 @@ export class MessagesUsecases { ) {} async create(createMessageDto: CreateMessageDto) { + const { assistant_id } = createMessageDto; const message: MessageEntity = { ...createMessageDto, id: ulid(), - object: 'message', - created: Date.now(), + created_at: Date.now(), + object: 'thread.message', + run_id: null, + completed_at: null, + incomplete_details: null, + attachments: [], + incomplete_at: null, + metadata: undefined, + assistant_id: assistant_id ?? null, }; this.messageRepository.insert(message); } @@ -35,7 +43,10 @@ export class MessagesUsecases { } update(id: string, updateMessageDto: UpdateMessageDto) { - return this.messageRepository.update(id, updateMessageDto); + const updateEntity: Partial = { + ...updateMessageDto, + }; + return this.messageRepository.update(id, updateEntity); } remove(id: string) { @@ -48,7 +59,7 @@ export class MessagesUsecases { thread_id: threadId, }, order: { - created: 'DESC', + created_at: 'DESC', }, take: limit, }); diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index c759cea6a..2e39a758a 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -38,7 +38,7 @@ import { } from '@/utils/huggingface'; import { DownloadType } from '@/domain/models/download.interface'; import { EventEmitter2 } from '@nestjs/event-emitter'; -import { ModelId, ModelStatus } from '@/domain/models/model.event'; +import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; @Injectable() @@ -61,8 +61,13 @@ export class ModelsUsecases { * @param createModelDto Model data */ async create(createModelDto: CreateModelDto) { + const { model: modelId, owned_by } = createModelDto; const model: Model = { ...createModelDto, + id: modelId, + created: Date.now(), + object: 'model', + owned_by: owned_by ?? '', }; await this.modelRepository.create(model); @@ -165,10 +170,12 @@ export class ModelsUsecases { status: 'starting', metadata: {}, }; - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'starting', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'starting', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); const parser = new ModelParameterParser(); const loadModelSettings: ModelSettingParams = { @@ -196,11 +203,12 @@ export class ModelsUsecases { status: 'started', metadata: {}, }; - - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'started', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'started', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); }) .then(() => ({ message: 'Model loaded successfully', @@ -209,11 +217,12 @@ export class ModelsUsecases { .catch(async (e) => { // remove the model from this.activeModelStatus. delete this.activeModelStatuses[modelId]; - - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'starting-failed', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'starting-failed', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); console.error('Starting model failed', e.code, e.message, e.stack); if (e.code === AxiosError.ERR_BAD_REQUEST) { return { @@ -250,30 +259,35 @@ export class ModelsUsecases { status: 'stopping', metadata: {}, }; - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'stopping', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopping', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); return engine .unloadModel(modelId) .then(() => { delete this.activeModelStatuses[modelId]; - - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'stopped', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopped', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); }) .then(() => ({ message: 'Model is stopped', modelId, })) .catch(async (e) => { - this.eventEmitter.emit('model.event', { - id: modelId, - action: 'stopping-failed', - }); + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopping-failed', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); await this.telemetryUseCases.createCrashReport( e, TelemetrySource.CORTEX_CPP, diff --git a/cortex-js/src/usecases/seed/seed.service.ts b/cortex-js/src/usecases/seed/seed.service.ts deleted file mode 100644 index c00ede562..000000000 --- a/cortex-js/src/usecases/seed/seed.service.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import { AssistantsUsecases } from '../assistants/assistants.usecases'; - -@Injectable() -export class SeedService { - public constructor(private readonly assistantsUsecases: AssistantsUsecases) {} - - public async seed() { - await this.assistantsUsecases.seed(); - } -} diff --git a/cortex-js/src/usecases/threads/threads.usecases.ts b/cortex-js/src/usecases/threads/threads.usecases.ts index 6cf5e0c46..0a06e971b 100644 --- a/cortex-js/src/usecases/threads/threads.usecases.ts +++ b/cortex-js/src/usecases/threads/threads.usecases.ts @@ -8,14 +8,11 @@ import { MessageEntity } from '@/infrastructure/entities/message.entity'; import { PageDto } from '@/infrastructure/dtos/page.dto'; import { CreateMessageDto } from '@/infrastructure/dtos/threads/create-message.dto'; import { ulid } from 'ulid'; -import { - ContentType, - Message, - MessageStatus, -} from '@/domain/models/message.interface'; +import { Message, MessageContent } from '@/domain/models/message.interface'; import { UpdateMessageDto } from '@/infrastructure/dtos/threads/update-message.dto'; import { Thread } from '@/domain/models/thread.interface'; import DeleteMessageDto from '@/infrastructure/dtos/threads/delete-message.dto'; +import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; @Injectable() export class ThreadsUsecases { @@ -29,13 +26,25 @@ export class ThreadsUsecases { async create(createThreadDto: CreateThreadDto): Promise { const id = uuidv4(); const { assistants } = createThreadDto; + const assistantEntity: AssistantEntity[] = assistants.map((assistant) => { + const entity: AssistantEntity = { + ...assistant, + response_format: null, + tool_resources: null, + top_p: assistant.top_p ?? null, + temperature: assistant.temperature ?? null, + }; + return entity; + }); const thread: ThreadEntity = { id, - assistants, + assistants: assistantEntity, object: 'thread', - createdAt: Date.now(), + created_at: Date.now(), title: 'New Thread', + tool_resources: null, + metadata: null, }; await this.threadRepository.insert(thread); return thread; @@ -44,7 +53,7 @@ export class ThreadsUsecases { async findAll(): Promise { return this.threadRepository.find({ order: { - createdAt: 'DESC', + created_at: 'DESC', }, }); } @@ -64,7 +73,7 @@ export class ThreadsUsecases { queryBuilder .where('thread_id = :id', { id: threadId }) - .orderBy('created', normalizedOrder) + .orderBy('created_at', normalizedOrder) .take(limit + 1); // Fetch one more record than the limit if (after) { @@ -94,25 +103,31 @@ export class ThreadsUsecases { createMessageDto: CreateMessageDto, ) { const thread = await this.getThreadOrThrow(threadId); - const assistantId: string | undefined = thread.assistants[0].assistant_id; + const assistantId: string = thread.assistants[0].id; + + const messageContent: MessageContent = { + type: 'text', + text: { + annotations: [], + value: createMessageDto.content, + }, + }; const message: MessageEntity = { + id: ulid(), object: 'thread.message', thread_id: threadId, assistant_id: assistantId, - id: ulid(), - created: Date.now(), - status: MessageStatus.Ready, + created_at: Date.now(), + status: 'completed', role: createMessageDto.role, - content: [ - { - type: ContentType.Text, - text: { - value: createMessageDto.content, - annotations: [], - }, - }, - ], + content: [messageContent], + metadata: null, + run_id: null, + completed_at: null, + incomplete_details: null, + attachments: [], + incomplete_at: null, }; await this.messageRepository.insert(message); return message; @@ -156,8 +171,25 @@ export class ThreadsUsecases { return this.threadRepository.findOne({ where: { id } }); } - update(id: string, updateThreadDto: UpdateThreadDto) { - return this.threadRepository.update(id, updateThreadDto); + async update(id: string, updateThreadDto: UpdateThreadDto) { + const assistantEntities: AssistantEntity[] = + updateThreadDto.assistants?.map((assistant) => { + const entity: AssistantEntity = { + ...assistant, + name: assistant.name, + response_format: null, + tool_resources: null, + top_p: assistant.top_p ?? null, + temperature: assistant.temperature ?? null, + }; + return entity; + }) ?? []; + + const entity: Partial = { + ...updateThreadDto, + assistants: assistantEntities, + }; + return this.threadRepository.update(id, entity); } remove(id: string) { @@ -183,4 +215,9 @@ export class ThreadsUsecases { // we still allow user to delete message even if the thread is not there return this.getMessageOrThrow(messageId); } + + async clean(threadId: string) { + await this.getThreadOrThrow(threadId); + await this.messageRepository.delete({ thread_id: threadId }); + } }