Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 9c105af

Browse files
committed
feat: cortex CLI init
1 parent 14cfcfe commit 9c105af

File tree

9 files changed

+202
-28
lines changed

9 files changed

+202
-28
lines changed

cortex-js/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@
3333
"@nestjs/mapped-types": "*",
3434
"@nestjs/platform-express": "^10.0.0",
3535
"@nestjs/swagger": "^7.3.1",
36+
"@terascope/fetch-github-release": "^0.8.8",
3637
"axios": "^1.6.8",
3738
"class-transformer": "^0.5.1",
3839
"class-validator": "^0.14.1",
3940
"cli-progress": "^3.12.0",
41+
"decompress": "^4.2.1",
4042
"nest-commander": "^3.13.0",
4143
"readline": "^1.3.0",
4244
"reflect-metadata": "^0.2.0",
@@ -52,6 +54,7 @@
5254
"@nestjs/testing": "^10.0.0",
5355
"@nestjs/typeorm": "^10.0.2",
5456
"@types/cli-progress": "^3.11.5",
57+
"@types/decompress": "^4.2.7",
5558
"@types/express": "^4.17.17",
5659
"@types/jest": "^29.5.2",
5760
"@types/node": "^20.12.9",

cortex-js/src/command.module.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import { ModelsCommand } from './infrastructure/commanders/models.command';
1111
import { StartCommand } from './infrastructure/commanders/start.command';
1212
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
1313
import { ChatModule } from './usecases/chat/chat.module';
14+
import { InitCommand } from './infrastructure/commanders/init.command';
15+
import { HttpModule } from '@nestjs/axios';
16+
import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions';
1417

1518
@Module({
1619
imports: [
@@ -24,6 +27,7 @@ import { ChatModule } from './usecases/chat/chat.module';
2427
CortexModule,
2528
ChatModule,
2629
ExtensionModule,
30+
HttpModule,
2731
],
2832
providers: [
2933
BasicCommand,
@@ -32,6 +36,8 @@ import { ChatModule } from './usecases/chat/chat.module';
3236
ServeCommand,
3337
InferenceCommand,
3438
StartCommand,
39+
InitCommand,
40+
CreateInitQuestions,
3541
],
3642
})
3743
export class CommandModule {}

cortex-js/src/infrastructure/commanders/basic-command.commander.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,16 @@ import { InferenceCommand } from './inference.command';
55
import { ModelsCommand } from './models.command';
66
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
77
import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
8+
import { InitCommand } from './init.command';
89

910
@RootCommand({
10-
subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand],
11+
subCommands: [
12+
ModelsCommand,
13+
PullCommand,
14+
ServeCommand,
15+
InferenceCommand,
16+
InitCommand,
17+
],
1118
})
1219
export class BasicCommand extends CommandRunner {
1320
constructor(private readonly cortexUsecases: CortexUsecases) {
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import { createWriteStream, existsSync, rmSync } from 'fs';
2+
import { CommandRunner, SubCommand, InquirerService } from 'nest-commander';
3+
import { resolve } from 'path';
4+
import { HttpService } from '@nestjs/axios';
5+
import { Presets, SingleBar } from 'cli-progress';
6+
import decompress from 'decompress';
7+
8+
@SubCommand({ name: 'init', aliases: ['setup'] })
9+
export class InitCommand extends CommandRunner {
10+
CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases';
11+
constructor(
12+
private readonly httpService: HttpService,
13+
private readonly inquirerService: InquirerService,
14+
) {
15+
super();
16+
}
17+
18+
async run(input: string[], options?: any): Promise<void> {
19+
options = await this.inquirerService.ask('create-init-questions', options);
20+
21+
await this.download(this.parseEngineFileName(options));
22+
}
23+
24+
download = async (engineFileName: string): Promise<any> => {
25+
const res = await this.httpService
26+
.get(this.CORTEX_RELEASES_URL)
27+
.toPromise();
28+
29+
if (!res?.data) {
30+
console.log('Failed to fetch releases');
31+
return;
32+
}
33+
const releases = Array(res?.data);
34+
const toDownloadAsset = releases[0][0].assets.find((s: any) =>
35+
s.name.includes(engineFileName),
36+
);
37+
38+
const engineDir = resolve(this.rootDir(), 'cortex-cpp');
39+
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
40+
41+
console.log(`Downloading engine file ${toDownloadAsset.name}`);
42+
43+
const download = await this.httpService
44+
.get(toDownloadAsset.browser_download_url, {
45+
responseType: 'stream',
46+
})
47+
.toPromise();
48+
if (!download) {
49+
throw new Error('Failed to download model');
50+
}
51+
52+
const destination = resolve(this.rootDir(), toDownloadAsset.name);
53+
54+
await new Promise((resolve, reject) => {
55+
const writer = createWriteStream(destination);
56+
let receivedBytes = 0;
57+
const totalBytes = download.headers['content-length'];
58+
59+
writer.on('finish', () => {
60+
bar.stop();
61+
resolve(true);
62+
});
63+
64+
writer.on('error', (error) => {
65+
bar.stop();
66+
reject(error);
67+
});
68+
69+
const bar = new SingleBar({}, Presets.shades_classic);
70+
bar.start(100, 0);
71+
72+
download.data.on('data', (chunk: any) => {
73+
receivedBytes += chunk.length;
74+
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
75+
});
76+
77+
download.data.pipe(writer);
78+
});
79+
80+
try {
81+
await decompress(
82+
resolve(this.rootDir(), destination),
83+
resolve(this.rootDir()),
84+
);
85+
} catch (e) {
86+
console.log(e);
87+
}
88+
process.exit(0);
89+
};
90+
91+
parseEngineFileName = (options: {
92+
runMode?: 'CPU' | 'GPU';
93+
gpuType?: 'Nvidia' | 'Others (Vulkan)';
94+
instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined;
95+
cudaVersion?: '11' | '12';
96+
}) => {
97+
const platform =
98+
process.platform === 'win32'
99+
? 'windows'
100+
: process.platform === 'darwin'
101+
? 'mac'
102+
: process.platform;
103+
const arch = process.arch === 'arm64' ? process.arch : 'amd64';
104+
const cudaVersion =
105+
options.runMode === 'GPU'
106+
? options.gpuType === 'Nvidia'
107+
? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2')
108+
: '-vulkan'
109+
: '';
110+
const instructions = options.instructions ? `-${options.instructions}` : '';
111+
const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}}`;
112+
return `${engineName}.tar.gz`;
113+
};
114+
115+
rootDir = () => resolve(__dirname, `../../../`);
116+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { Question, QuestionSet } from 'nest-commander';
2+
3+
@QuestionSet({ name: 'create-init-questions' })
4+
export class CreateInitQuestions {
5+
@Question({
6+
type: 'list',
7+
message: 'Select run mode',
8+
name: 'runMode',
9+
default: 'CPU',
10+
choices: ['CPU', 'GPU'],
11+
when: () => process.platform !== 'darwin',
12+
})
13+
parseRunMode(val: string) {
14+
return val;
15+
}
16+
17+
@Question({
18+
type: 'list',
19+
message: 'Select GPU type',
20+
name: 'gpuType',
21+
default: 'Nvidia',
22+
choices: ['Nvidia', 'Others (Vulkan)'],
23+
when: (answers: any) => answers.runMode === 'GPU',
24+
})
25+
parseGPUType(val: string) {
26+
return val;
27+
}
28+
29+
@Question({
30+
type: 'list',
31+
message: 'Select CPU instructions set',
32+
name: 'instructions',
33+
choices: ['AVX2', 'AVX', 'AVX-512'],
34+
when: () => process.platform !== 'darwin',
35+
})
36+
parseContent(val: string) {
37+
return val;
38+
}
39+
}

cortex-js/src/infrastructure/commanders/start.command.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
22
import { ModelsUsecases } from '@/usecases/models/models.usecases';
33
import { CommandRunner, SubCommand } from 'nest-commander';
44
import { LoadModelDto } from '../dtos/models/load-model.dto';
5+
import { resolve } from 'path';
6+
import { existsSync } from 'fs';
57

68
@SubCommand({ name: 'start', aliases: ['run'] })
79
export class StartCommand extends CommandRunner {
@@ -26,6 +28,10 @@ export class StartCommand extends CommandRunner {
2628
}
2729

2830
private async startCortex() {
31+
if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) {
32+
console.log('Please init the cortex by running cortex init command!');
33+
process.exit(0);
34+
}
2935
const host = '127.0.0.1';
3036
const port = '3928';
3137
return this.cortexUsecases.startCortex(host, port);
@@ -45,4 +51,6 @@ export class StartCommand extends CommandRunner {
4551
const loadModelDto: LoadModelDto = { modelId, settings };
4652
return this.modelsUsecases.startModel(loadModelDto);
4753
}
54+
55+
rootDir = () => resolve(__dirname, `../../../`);
4856
}

cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export default class CortexProvider extends OAIEngineExtension {
4646
const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore);
4747
const modelSettings = {
4848
// This is critical and requires real CPU physical core count (or performance core)
49+
model: model.id,
4950
cpu_threads: cpuThreadCount,
5051
...model.settings,
5152
llama_model_path: modelBinaryLocalPath,

cortex-js/src/usecases/cortex/cortex.usecases.ts

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
1-
import { Injectable, InternalServerErrorException } from '@nestjs/common';
2-
import { ConfigService } from '@nestjs/config';
1+
import { Injectable } from '@nestjs/common';
32
import { ChildProcess, spawn } from 'child_process';
43
import { join } from 'path';
5-
import { existsSync } from 'fs';
64
import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
75
import { HttpService } from '@nestjs/axios';
86

97
@Injectable()
108
export class CortexUsecases {
119
private cortexProcess: ChildProcess | undefined;
1210

13-
constructor(
14-
private readonly configService: ConfigService,
15-
private readonly httpService: HttpService,
16-
) {}
11+
constructor(private readonly httpService: HttpService) {}
1712

1813
async startCortex(
1914
host: string,
@@ -26,29 +21,27 @@ export class CortexUsecases {
2621
};
2722
}
2823

29-
const binaryPath = this.configService.get<string>('CORTEX_BINARY_PATH');
30-
if (!binaryPath || !existsSync(binaryPath)) {
31-
throw new InternalServerErrorException('Cortex binary not found');
32-
}
33-
3424
const args: string[] = ['1', host, port];
3525
// go up one level to get the binary folder, have to also work on windows
36-
const binaryFolder = join(binaryPath, '..');
37-
38-
this.cortexProcess = spawn(binaryPath, args, {
39-
detached: false,
40-
cwd: binaryFolder,
41-
stdio: 'inherit',
42-
env: {
43-
...process.env,
44-
// TODO: NamH need to get below information
45-
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
46-
// // Vulkan - Support 1 device at a time for now
47-
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
48-
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
49-
// }),
26+
// const binaryFolder = join(binaryPath, '..');
27+
this.cortexProcess = spawn(
28+
join(__dirname, '../../../cortex-cpp/cortex-cpp'),
29+
args,
30+
{
31+
detached: false,
32+
cwd: join(__dirname, '../../../cortex-cpp'),
33+
stdio: 'inherit',
34+
env: {
35+
...process.env,
36+
// TODO: NamH need to get below information
37+
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
38+
// // Vulkan - Support 1 device at a time for now
39+
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
40+
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
41+
// }),
42+
},
5043
},
51-
});
44+
);
5245

5346
this.registerCortexEvents();
5447

cortex-js/tsconfig.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"strictBindCallApply": true,
1919
"forceConsistentCasingInFileNames": true,
2020
"noFallthroughCasesInSwitch": true,
21+
"esModuleInterop": true,
2122
"paths": {
2223
"@/*": ["src/*"]
2324
}

0 commit comments

Comments
 (0)