Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 380de2a

Browse files
authored
Merge pull request #596 from janhq/chore/remove-inference-stream
chore: remove inference stream
2 parents dff7099 + 18c071f commit 380de2a

File tree

6 files changed

+12
-55
lines changed

6 files changed

+12
-55
lines changed

cortex-js/constant.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ export const databaseName = 'cortex';
33
export const databaseFile = `${databaseName}.db`;
44

55
export const defaultCortexJsHost = 'localhost';
6-
export const defaultCortexJsPort = 7331;
6+
export const defaultCortexJsPort = 1337;
77

88
export const defaultCortexCppHost = '127.0.0.1';
99
export const defaultCortexCppPort = 3928;

cortex-js/src/domain/abstracts/engine.abstract.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ import { Extension } from './extension.abstract';
66
export abstract class EngineExtension extends Extension {
77
abstract provider: string;
88

9-
abstract inference(dto: any, headers: Record<string, string>): Promise<any>;
10-
11-
abstract inferenceStream(dto: any, headers: any): Promise<stream.Readable>;
9+
abstract inference(
10+
dto: any,
11+
headers: Record<string, string>,
12+
): Promise<stream.Readable | any>;
1213

1314
async loadModel(
1415
model: Model,

cortex-js/src/domain/abstracts/oai.abstract.ts

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,18 @@ export abstract class OAIEngineExtension extends EngineExtension {
99
super();
1010
}
1111

12-
override async inferenceStream(
13-
createChatDto: any,
14-
headers: Record<string, string>,
15-
): Promise<stream.Readable> {
16-
const response = await this.httpService
17-
.post(this.apiUrl, createChatDto, {
18-
headers: {
19-
'Content-Type': headers['content-type'] ?? 'application/json',
20-
Authorization: headers['authorization'],
21-
},
22-
responseType: 'stream',
23-
})
24-
.toPromise();
25-
26-
if (!response) {
27-
throw new Error('No response');
28-
}
29-
30-
return response.data;
31-
}
32-
3312
override async inference(
3413
createChatDto: any,
3514
headers: Record<string, string>,
36-
): Promise<any> {
15+
): Promise<stream.Readable | any> {
16+
const { stream } = createChatDto;
3717
const response = await this.httpService
3818
.post(this.apiUrl, createChatDto, {
3919
headers: {
4020
'Content-Type': headers['content-type'] ?? 'application/json',
4121
Authorization: headers['authorization'],
4222
},
23+
responseType: stream ? 'stream' : 'json',
4324
})
4425
.toPromise();
4526
if (!response) {

cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ export class ChatCliUsecases {
6060
};
6161

6262
const decoder = new TextDecoder('utf-8');
63-
this.chatUsecases.inferenceStream(chatDto, {}).then((response) => {
64-
response.on('error', (error) => {
63+
this.chatUsecases.inference(chatDto, {}).then((response) => {
64+
response.on('error', (error: any) => {
6565
console.error(error);
6666
rl.prompt();
6767
});
@@ -71,7 +71,7 @@ export class ChatCliUsecases {
7171
rl.prompt();
7272
});
7373

74-
response.on('data', (chunk) => {
74+
response.on('data', (chunk: any) => {
7575
let content = '';
7676
const text = decoder.decode(chunk);
7777
const lines = text.trim().split('\n');

cortex-js/src/infrastructure/controllers/chat.controller.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export class ChatController {
2626

2727
if (stream) {
2828
this.chatService
29-
.inferenceStream(createChatDto, headers)
29+
.inference(createChatDto, headers)
3030
.then((stream) => stream.pipe(res));
3131
} else {
3232
res.json(await this.chatService.inference(createChatDto, headers));

cortex-js/src/usecases/chat/chat.usecases.ts

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import { ExtensionRepository } from '@/domain/repositories/extension.interface';
44
import { Repository } from 'typeorm';
55
import { ModelEntity } from '@/infrastructure/entities/model.entity';
66
import { EngineExtension } from '@/domain/abstracts/engine.abstract';
7-
import stream from 'stream';
87
import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
98

109
@Injectable()
@@ -37,28 +36,4 @@ export class ChatUsecases {
3736
}
3837
return engine.inference(createChatDto, headers);
3938
}
40-
41-
async inferenceStream(
42-
createChatDto: CreateChatCompletionDto,
43-
headers: Record<string, string>,
44-
): Promise<stream.Readable> {
45-
const { model: modelId } = createChatDto;
46-
const extensions = (await this.extensionRepository.findAll()) ?? [];
47-
const model = await this.modelRepository.findOne({
48-
where: { id: modelId },
49-
});
50-
51-
if (!model) {
52-
throw new ModelNotFoundException(modelId);
53-
}
54-
55-
const engine = extensions.find((e: any) => e.provider === model.engine) as
56-
| EngineExtension
57-
| undefined;
58-
if (engine == null) {
59-
throw new Error(`No engine found with name: ${model.engine}`);
60-
}
61-
62-
return engine?.inferenceStream(createChatDto, headers);
63-
}
6439
}

0 commit comments

Comments
 (0)