Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 1debf76

Browse files
committed
chore: update model start params DTO
1 parent c97a46e commit 1debf76

File tree

7 files changed

+86
-24
lines changed

7 files changed

+86
-24
lines changed

cortex-js/src/domain/models/model.interface.ts

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ export interface Model {
8181
ngl?: number;
8282

8383
/**
84-
* The number of parallel operations. Only set when enable continuous batching.
84+
* Number of parallel sequences to decode
8585
*/
8686
n_parallel?: number;
8787

@@ -96,13 +96,6 @@ export interface Model {
9696
engine?: string;
9797
}
9898

99-
export interface ModelMetadata {
100-
author: string;
101-
tags: string[];
102-
size: number;
103-
cover?: string;
104-
}
105-
10699
/**
107100
* The available model settings.
108101
*/
@@ -140,10 +133,3 @@ export interface ModelRuntimeParams {
140133
presence_penalty?: number;
141134
engine?: string;
142135
}
143-
144-
/**
145-
* Represents the model initialization error.
146-
*/
147-
export type ModelInitFailed = Model & {
148-
error: Error;
149-
};

cortex-js/src/infrastructure/controllers/models.controller.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger';
2020
import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
2121
import { TransformInterceptor } from '../interceptors/transform.interceptor';
2222
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
23+
import { ModelSettingsDto } from '../dtos/models/model-settings.dto';
2324

2425
@ApiTags('Models')
2526
@Controller('models')
@@ -61,10 +62,13 @@ export class ModelsController {
6162
description: 'The unique identifier of the model.',
6263
})
6364
@Post(':modelId(*)/start')
64-
startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) {
65+
startModel(
66+
@Param('modelId') modelId: string,
67+
@Body() params: ModelSettingsDto,
68+
) {
6569
return this.cortexUsecases
6670
.startCortex()
67-
.then(() => this.modelsUsecases.startModel(modelId, model));
71+
.then(() => this.modelsUsecases.startModel(modelId, params));
6872
}
6973

7074
@HttpCode(200)

cortex-js/src/infrastructure/dtos/models/create-model.dto.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
IsNumber,
55
IsOptional,
66
IsString,
7+
Min,
78
} from 'class-validator';
89
import { Model } from '@/domain/models/model.interface';
910
import { ModelArtifactDto } from './model-artifact.dto';
@@ -50,6 +51,7 @@ export class CreateModelDto implements Partial<Model> {
5051
@ApiProperty({
5152
description:
5253
'Sets the upper limit on the number of tokens the model can generate in a single output.',
54+
example: 4096,
5355
})
5456
@IsOptional()
5557
@IsNumber()
@@ -97,30 +99,40 @@ export class CreateModelDto implements Partial<Model> {
9799
@ApiProperty({
98100
description:
99101
'Sets the maximum input the model can use to generate a response, it varies with the model used.',
102+
example: 4096,
100103
})
101104
@IsOptional()
102105
@IsNumber()
103106
ctx_len?: number;
104107

105-
@ApiProperty({ description: 'Determines GPU layer usage.' })
108+
@ApiProperty({ description: 'Determines GPU layer usage.', example: 32 })
106109
@IsOptional()
107110
@IsNumber()
108111
ngl?: number;
109112

110-
@ApiProperty({ description: 'Number of parallel processing units to use.' })
113+
@ApiProperty({
114+
description: 'Number of parallel processing units to use.',
115+
example: 1,
116+
})
111117
@IsOptional()
112118
@IsNumber()
119+
@Min(1)
113120
n_parallel?: number;
114121

115122
@ApiProperty({
116123
description:
117124
'Determines CPU inference threads, limited by hardware and OS. ',
125+
example: 10,
118126
})
119127
@IsOptional()
120128
@IsNumber()
129+
@Min(1)
121130
cpu_threads?: number;
122131

123-
@ApiProperty({ description: 'The engine used to run the model.' })
132+
@ApiProperty({
133+
description: 'The engine used to run the model.',
134+
example: 'cortex.llamacpp',
135+
})
124136
@IsOptional()
125137
@IsString()
126138
engine?: string;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import { ModelSettingParams } from '@/domain/models/model.interface';
2+
import { ApiProperty } from '@nestjs/swagger';
3+
import { IsArray, IsNumber, IsOptional, Min } from 'class-validator';
4+
5+
export class ModelSettingsDto implements ModelSettingParams {
6+
// Prompt Settings
7+
@ApiProperty({
8+
example: 'system\n{system_message}\nuser\n{prompt}\nassistant',
9+
description:
10+
"A predefined text or framework that guides the AI model's response generation.",
11+
})
12+
@IsOptional()
13+
prompt_template?: string;
14+
15+
@ApiProperty({
16+
type: [String],
17+
example: [],
18+
description:
19+
'Defines specific tokens or phrases that signal the model to stop producing further output.',
20+
})
21+
@IsArray()
22+
@IsOptional()
23+
stop?: string[];
24+
25+
// Engine Settings
26+
@ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 })
27+
@IsOptional()
28+
@IsNumber()
29+
ngl?: number;
30+
31+
@ApiProperty({
32+
description:
33+
'The context length for model operations varies; the maximum depends on the specific model used.',
34+
example: 4096,
35+
})
36+
@IsOptional()
37+
@IsNumber()
38+
ctx_len?: number;
39+
40+
@ApiProperty({
41+
description:
42+
'Determines CPU inference threads, limited by hardware and OS. ',
43+
example: 10,
44+
})
45+
@IsOptional()
46+
@IsNumber()
47+
@Min(1)
48+
cpu_threads?: number;
49+
50+
@ApiProperty({
51+
example: 'cortex.llamacpp',
52+
description: 'The engine to use.',
53+
})
54+
@IsOptional()
55+
engine?: string;
56+
}

cortex-js/src/usecases/cortex/cortex.usecases.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ import { ChildProcess, spawn } from 'child_process';
33
import { join } from 'path';
44
import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
55
import { HttpService } from '@nestjs/axios';
6-
import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
6+
import {
7+
defaultCortexCppHost,
8+
defaultCortexCppPort,
9+
} from '@/infrastructure/constants/cortex';
710
import { existsSync } from 'node:fs';
811
import { firstValueFrom } from 'rxjs';
912
import { FileManagerService } from '@/file-manager/file-manager.service';

cortex-js/src/usecases/messages/messages.module.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import { Module } from '@nestjs/common';
22
import { MessagesUsecases } from './messages.usecases';
3-
import { MessagesController } from '@/infrastructure/controllers/messages.controller';
43
import { DatabaseModule } from '@/infrastructure/database/database.module';
54

65
@Module({
76
imports: [DatabaseModule],
8-
controllers: [MessagesController],
7+
controllers: [],
98
providers: [MessagesUsecases],
109
exports: [MessagesUsecases],
1110
})

cortex-js/src/usecases/models/models.usecases.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ export class ModelsUsecases {
100100
// Default settings
101101
ctx_len: 4096,
102102
ngl: 100,
103-
...(Array.isArray(model?.files) &&
103+
//TODO: Utils for model file retrieval
104+
...(model?.files &&
105+
Array.isArray(model.files) &&
104106
!('llama_model_path' in model) && {
105107
llama_model_path: (model.files as string[])[0],
106108
}),

0 commit comments

Comments
 (0)