Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit b48efb0

Browse files
committed
feat: add model event
1 parent 40ada07 commit b48efb0

File tree

5 files changed

+360
-15
lines changed

5 files changed

+360
-15
lines changed

cortex-js/src/app.module.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.mid
1717
import { EventEmitterModule } from '@nestjs/event-emitter';
1818
import { DownloadManagerModule } from './download-manager/download-manager.module';
1919
import { EventsController } from './infrastructure/controllers/events.controller';
20-
import { AppController } from './infrastructure/controllers/app.controller';
2120
import { AssistantsController } from './infrastructure/controllers/assistants.controller';
2221
import { ChatController } from './infrastructure/controllers/chat.controller';
2322
import { EmbeddingsController } from './infrastructure/controllers/embeddings.controller';
@@ -49,7 +48,7 @@ import { ProcessController } from './infrastructure/controllers/process.controll
4948
DownloadManagerModule,
5049
],
5150
controllers: [
52-
AppController,
51+
EventsController,
5352
AssistantsController,
5453
ChatController,
5554
EmbeddingsController,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
export type ModelId = string;
2+
3+
const ModelLoadingEvents = [
4+
'starting',
5+
'stopping',
6+
'started',
7+
'stopped',
8+
'starting-failed',
9+
'stopping-failed',
10+
] as const;
11+
export type ModelLoadingEvent = (typeof ModelLoadingEvents)[number];
12+
13+
const AllModelStates = ['starting', 'stopping', 'started'] as const;
14+
export type ModelState = (typeof AllModelStates)[number];
15+
16+
export interface ModelStatus {
17+
model: ModelId;
18+
status: ModelState;
19+
metadata: Record<string, unknown>;
20+
}
21+
22+
export interface ModelEvent {
23+
model: ModelId;
24+
event: ModelLoadingEvent;
25+
metadata: Record<string, unknown>;
26+
}
27+
28+
export const EmptyModelEvent = {};
29+
30+
export interface ModelStatusAndEvent {
31+
data: {
32+
status: Record<ModelId, ModelStatus>;
33+
event: ModelEvent | typeof EmptyModelEvent;
34+
};
35+
}

cortex-js/src/infrastructure/controllers/events.controller.ts

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,40 @@ import {
22
DownloadState,
33
DownloadStateEvent,
44
} from '@/domain/models/download.interface';
5+
import {
6+
EmptyModelEvent,
7+
ModelEvent,
8+
ModelId,
9+
ModelStatus,
10+
ModelStatusAndEvent,
11+
} from '@/domain/models/model.event';
512
import { DownloadManagerService } from '@/download-manager/download-manager.service';
13+
import { ModelsUsecases } from '@/usecases/models/models.usecases';
614
import { Controller, Sse } from '@nestjs/common';
715
import { EventEmitter2 } from '@nestjs/event-emitter';
8-
import { Observable, fromEvent, map, merge, of, throttleTime } from 'rxjs';
16+
import { ApiTags } from '@nestjs/swagger';
17+
import {
18+
Observable,
19+
combineLatest,
20+
fromEvent,
21+
map,
22+
merge,
23+
of,
24+
startWith,
25+
throttleTime,
26+
} from 'rxjs';
927

28+
@ApiTags('Events')
1029
@Controller('events')
1130
export class EventsController {
1231
constructor(
1332
private readonly downloadManagerService: DownloadManagerService,
33+
private readonly modelsUsecases: ModelsUsecases,
1434
private readonly eventEmitter: EventEmitter2,
1535
) {}
1636

1737
@Sse('download')
1838
downloadEvent(): Observable<DownloadStateEvent> {
19-
// Welcome message Observable
2039
const latestDownloadState$: Observable<DownloadStateEvent> = of({
2140
data: this.downloadManagerService.getDownloadStates(),
2241
});
@@ -40,4 +59,20 @@ export class EventsController {
4059
downloadAbortEvent$,
4160
).pipe();
4261
}
62+
63+
@Sse('model')
64+
modelEvent(): Observable<ModelStatusAndEvent> {
65+
const latestModelStatus$: Observable<Record<ModelId, ModelStatus>> = of(
66+
this.modelsUsecases.getModelStatuses(),
67+
);
68+
69+
const modelEvent$ = fromEvent<ModelEvent>(
70+
this.eventEmitter,
71+
'model.event',
72+
).pipe(startWith(EmptyModelEvent));
73+
74+
return combineLatest([latestModelStatus$, modelEvent$]).pipe(
75+
map(([status, event]) => ({ data: { status, event } })),
76+
);
77+
}
4378
}
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import { ApiProperty } from '@nestjs/swagger';
2+
import { IsArray, IsOptional } from 'class-validator';
3+
4+
export class ModelSettingDto {
5+
@ApiProperty({
6+
type: 'number',
7+
minimum: 0,
8+
maximum: 1,
9+
required: false,
10+
default: 1,
11+
description: `What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.`,
12+
})
13+
temperature: number;
14+
15+
@ApiProperty({
16+
type: 'number',
17+
minimum: 0,
18+
maximum: 1,
19+
required: false,
20+
default: 1,
21+
description: `An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.`,
22+
})
23+
top_p: number;
24+
25+
@ApiProperty({
26+
required: false,
27+
example: '',
28+
description: 'GGUF metadata: tokenizer.chat_template',
29+
})
30+
prompt_template?: string;
31+
32+
@ApiProperty({
33+
required: false,
34+
example: [],
35+
description:
36+
'Defines specific tokens or phrases at which the model will stop generating further output.',
37+
default: [],
38+
})
39+
@IsArray()
40+
@IsOptional()
41+
stop?: string[];
42+
43+
@ApiProperty({
44+
required: false,
45+
type: 'number',
46+
example: 0,
47+
description:
48+
'Adjusts the likelihood of the model repeating words or phrases in its output.',
49+
})
50+
frequency_penalty?: number;
51+
52+
@ApiProperty({
53+
required: false,
54+
type: 'number',
55+
example: 0,
56+
description:
57+
'Influences the generation of new and varied concepts in the model’s output.',
58+
})
59+
presence_penalty?: number;
60+
61+
@ApiProperty({
62+
required: false,
63+
type: 'number',
64+
example: 4096,
65+
default: 4096,
66+
description:
67+
'The context length for model operations varies; the maximum depends on the specific model used.',
68+
})
69+
ctx_len?: number;
70+
71+
@ApiProperty({
72+
required: false,
73+
type: 'boolean',
74+
example: true,
75+
default: true,
76+
description: 'Enable real-time data processing for faster predictions.',
77+
})
78+
stream?: boolean;
79+
80+
@ApiProperty({
81+
required: false,
82+
type: 'number',
83+
example: 2048,
84+
default: 2048,
85+
description:
86+
'The maximum number of tokens the model will generate in a single response.',
87+
})
88+
max_tokens?: number;
89+
90+
@ApiProperty({
91+
required: false,
92+
type: 'number',
93+
example: 1,
94+
default: 1,
95+
description: 'The number of layers to load onto the GPU for acceleration.',
96+
})
97+
ngl?: number;
98+
99+
@ApiProperty({
100+
required: false,
101+
type: 'number',
102+
example: 1,
103+
default: 1,
104+
description: 'Number of parallel sequences to decode',
105+
})
106+
n_parallel?: number;
107+
108+
@ApiProperty({
109+
required: false,
110+
type: 'number',
111+
example: 1,
112+
default: 1,
113+
description:
114+
'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)',
115+
})
116+
cpu_threads?: number;
117+
118+
@ApiProperty({
119+
required: false,
120+
type: 'string',
121+
example: '',
122+
default: '',
123+
description: 'The prompt to use for internal configuration',
124+
})
125+
pre_prompt?: string;
126+
127+
@ApiProperty({
128+
required: false,
129+
type: 'number',
130+
example: 0,
131+
default: 0,
132+
description: 'The batch size for prompt eval step',
133+
})
134+
n_batch?: number;
135+
136+
@ApiProperty({
137+
required: false,
138+
type: 'boolean',
139+
example: true,
140+
default: true,
141+
description: 'To enable prompt caching or not',
142+
})
143+
caching_enabled?: boolean;
144+
145+
@ApiProperty({
146+
required: false,
147+
type: 'number',
148+
example: 0,
149+
default: 0,
150+
description: 'Group attention factor in self-extend',
151+
})
152+
grp_attn_n?: number;
153+
154+
@ApiProperty({
155+
required: false,
156+
type: 'number',
157+
example: 0,
158+
default: 0,
159+
description: 'Group attention width in self-extend',
160+
})
161+
grp_attn_w?: number;
162+
163+
@ApiProperty({
164+
required: false,
165+
type: 'boolean',
166+
example: false,
167+
default: false,
168+
description: 'Prevent system swapping of the model to disk in macOS',
169+
})
170+
mlock?: boolean;
171+
172+
@ApiProperty({
173+
required: false,
174+
type: 'string',
175+
example: '',
176+
default: '',
177+
description:
178+
'You can constrain the sampling using GBNF grammars by providing path to a grammar file',
179+
})
180+
grammar_file?: string;
181+
182+
@ApiProperty({
183+
required: false,
184+
type: 'boolean',
185+
example: true,
186+
default: true,
187+
description: 'To enable Flash Attention, default is true',
188+
})
189+
flash_attn?: boolean;
190+
191+
@ApiProperty({
192+
required: false,
193+
type: 'string',
194+
example: '',
195+
default: '',
196+
description: 'KV cache type: f16, q8_0, q4_0, default is f16',
197+
})
198+
cache_type?: string;
199+
200+
@ApiProperty({
201+
required: false,
202+
type: 'boolean',
203+
example: true,
204+
default: true,
205+
description: 'To enable mmap, default is true',
206+
})
207+
use_mmap?: boolean;
208+
}

0 commit comments

Comments
 (0)