Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 14cfcfe

Browse files
authored
Merge pull request #564 from janhq/feat/cli-chat
[WIP] feat: add CLI for chat
2 parents ca85eee + 177c0fb commit 14cfcfe

File tree

7 files changed

+215
-49
lines changed

7 files changed

+215
-49
lines changed

cortex-js/src/command.module.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import { PullCommand } from './infrastructure/commanders/pull.command';
99
import { InferenceCommand } from './infrastructure/commanders/inference.command';
1010
import { ModelsCommand } from './infrastructure/commanders/models.command';
1111
import { StartCommand } from './infrastructure/commanders/start.command';
12+
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
13+
import { ChatModule } from './usecases/chat/chat.module';
1214

1315
@Module({
1416
imports: [
@@ -20,6 +22,8 @@ import { StartCommand } from './infrastructure/commanders/start.command';
2022
DatabaseModule,
2123
ModelsModule,
2224
CortexModule,
25+
ChatModule,
26+
ExtensionModule,
2327
],
2428
providers: [
2529
BasicCommand,
Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
import { Model } from '../models/model.interface';
12
import { Extension } from './extension.abstract';
23

34
export abstract class EngineExtension extends Extension {
45
abstract provider: string;
5-
abstract inference(completion: any, req: any, res: any): void;
6-
abstract loadModel(loadModel: any): Promise<void>;
7-
abstract unloadModel(modelId: string): Promise<void>;
6+
7+
abstract inference(completion: any, req: any, stream: any, res?: any): void;
8+
9+
async loadModel(model: Model): Promise<void> {}
10+
11+
async unloadModel(modelId: string): Promise<void> {}
812
}
Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
/* eslint-disable @typescript-eslint/no-unused-vars */
21
import { HttpService } from '@nestjs/axios';
32
import { EngineExtension } from './engine.abstract';
3+
import { stdout } from 'process';
4+
5+
export type ChatStreamEvent = {
6+
type: 'data' | 'error' | 'end';
7+
data?: any;
8+
error?: any;
9+
};
410

511
export abstract class OAIEngineExtension extends EngineExtension {
612
abstract apiUrl: string;
@@ -9,44 +15,120 @@ export abstract class OAIEngineExtension extends EngineExtension {
915
super();
1016
}
1117

12-
async inference(
18+
inference(
1319
createChatDto: any,
1420
headers: Record<string, string>,
15-
res: any,
21+
writableStream: WritableStream<ChatStreamEvent>,
22+
res?: any,
1623
) {
1724
if (createChatDto.stream === true) {
18-
const response = await this.httpService
19-
.post(this.apiUrl, createChatDto, {
20-
headers: {
21-
'Content-Type': headers['content-type'] ?? 'application/json',
22-
Authorization: headers['authorization'],
23-
},
24-
responseType: 'stream',
25-
})
26-
.toPromise();
27-
28-
res.writeHead(200, {
29-
'Content-Type': 'text/event-stream',
30-
'Cache-Control': 'no-cache',
31-
Connection: 'keep-alive',
32-
'Access-Control-Allow-Origin': '*',
33-
});
25+
if (res) {
26+
res.writeHead(200, {
27+
'Content-Type': 'text/event-stream',
28+
'Cache-Control': 'no-cache',
29+
Connection: 'keep-alive',
30+
'Access-Control-Allow-Origin': '*',
31+
});
32+
this.httpService
33+
.post(this.apiUrl, createChatDto, {
34+
headers: {
35+
'Content-Type': headers['content-type'] ?? 'application/json',
36+
Authorization: headers['authorization'],
37+
},
38+
responseType: 'stream',
39+
})
40+
.toPromise()
41+
.then((response) => {
42+
response?.data.pipe(res);
43+
});
44+
} else {
45+
const decoder = new TextDecoder('utf-8');
46+
const defaultWriter = writableStream.getWriter();
47+
defaultWriter.ready.then(() => {
48+
this.httpService
49+
.post(this.apiUrl, createChatDto, {
50+
headers: {
51+
'Content-Type': headers['content-type'] ?? 'application/json',
52+
Authorization: headers['authorization'],
53+
},
54+
responseType: 'stream',
55+
})
56+
.subscribe({
57+
next: (response) => {
58+
response.data.on('data', (chunk: any) => {
59+
let content = '';
60+
const text = decoder.decode(chunk);
61+
const lines = text.trim().split('\n');
62+
let cachedLines = '';
63+
for (const line of lines) {
64+
try {
65+
const toParse = cachedLines + line;
66+
if (!line.includes('data: [DONE]')) {
67+
const data = JSON.parse(toParse.replace('data: ', ''));
68+
content += data.choices[0]?.delta?.content ?? '';
69+
70+
if (content.startsWith('assistant: ')) {
71+
content = content.replace('assistant: ', '');
72+
}
73+
74+
if (content !== '') {
75+
defaultWriter.write({
76+
type: 'data',
77+
data: content,
78+
});
79+
}
80+
}
81+
} catch {
82+
cachedLines = line;
83+
}
84+
}
85+
});
3486

35-
response?.data.pipe(res);
87+
response.data.on('error', (error: any) => {
88+
defaultWriter.write({
89+
type: 'error',
90+
error,
91+
});
92+
});
93+
94+
response.data.on('end', () => {
95+
// stdout.write('Stream end');
96+
defaultWriter.write({
97+
type: 'end',
98+
});
99+
});
100+
},
101+
102+
error: (error) => {
103+
stdout.write('Stream error: ' + error);
104+
},
105+
});
106+
});
107+
}
36108
} else {
37-
const response = await this.httpService
38-
.post(this.apiUrl, createChatDto, {
39-
headers: {
40-
'Content-Type': headers['content-type'] ?? 'application/json',
41-
Authorization: headers['authorization'],
42-
},
43-
})
44-
.toPromise();
45-
46-
res.json(response?.data);
109+
const defaultWriter = writableStream.getWriter();
110+
defaultWriter.ready.then(() => {
111+
this.httpService
112+
.post(this.apiUrl, createChatDto, {
113+
headers: {
114+
'Content-Type': headers['content-type'] ?? 'application/json',
115+
Authorization: headers['authorization'],
116+
},
117+
})
118+
.toPromise()
119+
.then((response) => {
120+
defaultWriter.write({
121+
type: 'data',
122+
data: response?.data,
123+
});
124+
})
125+
.catch((error: any) => {
126+
defaultWriter.write({
127+
type: 'error',
128+
error,
129+
});
130+
});
131+
});
47132
}
48133
}
49-
50-
async loadModel(_loadModel: any): Promise<void> {}
51-
async unloadModel(_modelId: string): Promise<void> {}
52134
}
Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,81 @@
1+
import { ChatUsecases } from '@/usecases/chat/chat.usecases';
12
import { CommandRunner, SubCommand } from 'nest-commander';
3+
import { CreateChatCompletionDto } from '../dtos/chat/create-chat-completion.dto';
4+
import { ChatCompletionRole } from '@/domain/models/message.interface';
5+
import { stdout } from 'process';
6+
import * as readline from 'node:readline/promises';
7+
import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract';
8+
import { ChatCompletionMessage } from '../dtos/chat/chat-completion-message.dto';
29

310
@SubCommand({ name: 'chat' })
411
export class InferenceCommand extends CommandRunner {
5-
constructor() {
12+
exitClause = 'exit()';
13+
userIndicator = '>> ';
14+
exitMessage = 'Bye!';
15+
16+
constructor(private readonly chatUsecases: ChatUsecases) {
617
super();
718
}
819

9-
async run(_input: string[]): Promise<void> {
10-
const lineByLine = require('readline');
11-
const lbl = lineByLine.createInterface({
20+
async run(): Promise<void> {
21+
console.log(`Inorder to exit, type '${this.exitClause}'.`);
22+
const messages: ChatCompletionMessage[] = [];
23+
24+
const rl = readline.createInterface({
1225
input: process.stdin,
1326
output: process.stdout,
27+
prompt: this.userIndicator,
28+
});
29+
rl.prompt();
30+
31+
rl.on('close', () => {
32+
console.log(this.exitMessage);
33+
process.exit(0);
1434
});
15-
lbl.on('line', (userInput: string) => {
16-
if (userInput.trim() === 'exit()') {
17-
lbl.close();
35+
36+
rl.on('line', (userInput: string) => {
37+
if (userInput.trim() === this.exitClause) {
38+
rl.close();
1839
return;
1940
}
2041

21-
console.log('Result:', userInput);
22-
console.log('Enter another equation or type "exit()" to quit.');
42+
messages.push({
43+
content: userInput,
44+
role: ChatCompletionRole.User,
45+
});
46+
47+
const chatDto: CreateChatCompletionDto = {
48+
messages,
49+
model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
50+
stream: true,
51+
max_tokens: 2048,
52+
stop: [],
53+
frequency_penalty: 0.7,
54+
presence_penalty: 0.7,
55+
temperature: 0.7,
56+
top_p: 0.7,
57+
};
58+
59+
let llmFullResponse = '';
60+
const writableStream = new WritableStream<ChatStreamEvent>({
61+
write(chunk) {
62+
if (chunk.type === 'data') {
63+
stdout.write(chunk.data ?? '');
64+
llmFullResponse += chunk.data ?? '';
65+
} else if (chunk.type === 'error') {
66+
console.log('Error!!');
67+
} else {
68+
messages.push({
69+
content: llmFullResponse,
70+
role: ChatCompletionRole.Assistant,
71+
});
72+
llmFullResponse = '';
73+
console.log('\n');
74+
}
75+
},
76+
});
77+
78+
this.chatUsecases.createChatCompletions(chatDto, {}, writableStream);
2379
});
2480
}
2581
}

cortex-js/src/infrastructure/controllers/chat.controller.ts

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-
33
import { ChatUsecases } from '@/usecases/chat/chat.usecases';
44
import { Response } from 'express';
55
import { ApiTags } from '@nestjs/swagger';
6+
import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract';
67

78
@ApiTags('Inference')
89
@Controller('chat')
@@ -15,6 +16,23 @@ export class ChatController {
1516
@Body() createChatDto: CreateChatCompletionDto,
1617
@Res() res: Response,
1718
) {
18-
this.chatService.createChatCompletions(createChatDto, headers, res);
19+
const writableStream = new WritableStream<ChatStreamEvent>({
20+
write(chunk) {
21+
if (chunk.type === 'data') {
22+
res.json(chunk.data ?? {});
23+
} else if (chunk.type === 'error') {
24+
res.json(chunk.error ?? {});
25+
} else {
26+
console.log('\n');
27+
}
28+
},
29+
});
30+
31+
this.chatService.createChatCompletions(
32+
createChatDto,
33+
headers,
34+
writableStream,
35+
res,
36+
);
1937
}
2038
}

cortex-js/src/usecases/chat/chat.module.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ import { ExtensionModule } from '@/infrastructure/repositories/extensions/extens
88
imports: [DatabaseModule, ExtensionModule],
99
controllers: [ChatController],
1010
providers: [ChatUsecases],
11+
exports: [ChatUsecases],
1112
})
1213
export class ChatModule {}

cortex-js/src/usecases/chat/chat.usecases.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import { Inject, Injectable } from '@nestjs/common';
22
import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto';
3-
import { Response } from 'express';
43
import { ExtensionRepository } from '@/domain/repositories/extension.interface';
54
import { Repository } from 'typeorm';
65
import { ModelEntity } from '@/infrastructure/entities/model.entity';
76
import { EngineExtension } from '@/domain/abstracts/engine.abstract';
7+
import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract';
88

99
@Injectable()
1010
export class ChatUsecases {
@@ -17,7 +17,8 @@ export class ChatUsecases {
1717
async createChatCompletions(
1818
createChatDto: CreateChatCompletionDto,
1919
headers: Record<string, string>,
20-
res: Response,
20+
stream: WritableStream<ChatStreamEvent>,
21+
res?: any,
2122
) {
2223
const extensions = (await this.extensionRepository.findAll()) ?? [];
2324
const model = await this.modelRepository.findOne({
@@ -26,6 +27,6 @@ export class ChatUsecases {
2627
const engine = extensions.find((e: any) => e.provider === model?.engine) as
2728
| EngineExtension
2829
| undefined;
29-
await engine?.inference(createChatDto, headers, res);
30+
engine?.inference(createChatDto, headers, stream, res);
3031
}
3132
}

0 commit comments

Comments
 (0)