Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cortex-js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
"@nestjs/mapped-types": "*",
"@nestjs/platform-express": "^10.0.0",
"@nestjs/swagger": "^7.3.1",
"@terascope/fetch-github-release": "^0.8.8",
"axios": "^1.6.8",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.1",
"cli-progress": "^3.12.0",
"decompress": "^4.2.1",
"nest-commander": "^3.13.0",
"readline": "^1.3.0",
"reflect-metadata": "^0.2.0",
Expand All @@ -52,6 +54,7 @@
"@nestjs/testing": "^10.0.0",
"@nestjs/typeorm": "^10.0.2",
"@types/cli-progress": "^3.11.5",
"@types/decompress": "^4.2.7",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.2",
"@types/node": "^20.12.9",
Expand Down
6 changes: 6 additions & 0 deletions cortex-js/src/command.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import { ModelsCommand } from './infrastructure/commanders/models.command';
import { StartCommand } from './infrastructure/commanders/start.command';
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
import { ChatModule } from './usecases/chat/chat.module';
import { InitCommand } from './infrastructure/commanders/init.command';
import { HttpModule } from '@nestjs/axios';
import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions';

@Module({
imports: [
Expand All @@ -24,6 +27,7 @@ import { ChatModule } from './usecases/chat/chat.module';
CortexModule,
ChatModule,
ExtensionModule,
HttpModule,
],
providers: [
BasicCommand,
Expand All @@ -32,6 +36,8 @@ import { ChatModule } from './usecases/chat/chat.module';
ServeCommand,
InferenceCommand,
StartCommand,
InitCommand,
CreateInitQuestions,
],
})
export class CommandModule {}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,16 @@ import { InferenceCommand } from './inference.command';
import { ModelsCommand } from './models.command';
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
import { InitCommand } from './init.command';

@RootCommand({
subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand],
subCommands: [
ModelsCommand,
PullCommand,
ServeCommand,
InferenceCommand,
InitCommand,
],
})
export class BasicCommand extends CommandRunner {
constructor(private readonly cortexUsecases: CortexUsecases) {
Expand Down
140 changes: 140 additions & 0 deletions cortex-js/src/infrastructure/commanders/init.command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { createWriteStream, existsSync, rmSync } from 'fs';
import { CommandRunner, SubCommand, InquirerService } from 'nest-commander';
import { resolve } from 'path';
import { HttpService } from '@nestjs/axios';
import { Presets, SingleBar } from 'cli-progress';
import decompress from 'decompress';

@SubCommand({ name: 'init', aliases: ['setup'] })
export class InitCommand extends CommandRunner {
CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases';

constructor(
private readonly httpService: HttpService,
private readonly inquirerService: InquirerService,
) {
super();
}

async run(input: string[], options?: any): Promise<void> {
options = await this.inquirerService.ask('create-init-questions', options);
const version = input[0] ?? 'latest';

await this.download(this.parseEngineFileName(options), version);
}

download = async (
engineFileName: string,
version: string = 'latest',
): Promise<any> => {
const res = await this.httpService
.get(
this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`,
{
headers: {
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'application/vnd.github+json',
},
},
)
.toPromise();

if (!res?.data) {
console.log('Failed to fetch releases');
process.exit(1);
}

let release = res?.data;
if (Array.isArray(res?.data)) {
release = Array(res?.data)[0].find(
(e) => e.name === version.replace('v', ''),
);
}
const toDownloadAsset = release.assets.find((s: any) =>
s.name.includes(engineFileName),
);

if (!toDownloadAsset) {
console.log(`Could not find engine file ${engineFileName}`);
process.exit(1);
}

console.log(`Downloading engine file ${engineFileName}`);
const engineDir = resolve(this.rootDir(), 'cortex-cpp');
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });

const download = await this.httpService
.get(toDownloadAsset.browser_download_url, {
responseType: 'stream',
})
.toPromise();
if (!download) {
throw new Error('Failed to download model');
}

const destination = resolve(this.rootDir(), toDownloadAsset.name);

await new Promise((resolve, reject) => {
const writer = createWriteStream(destination);
let receivedBytes = 0;
const totalBytes = download.headers['content-length'];

writer.on('finish', () => {
bar.stop();
resolve(true);
});

writer.on('error', (error) => {
bar.stop();
reject(error);
});

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);

download.data.on('data', (chunk: any) => {
receivedBytes += chunk.length;
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
});

download.data.pipe(writer);
});

try {
await decompress(
resolve(this.rootDir(), destination),
resolve(this.rootDir()),
);
} catch (e) {
console.log(e);
process.exit(1);
}
process.exit(0);
};

parseEngineFileName = (options: {
runMode?: 'CPU' | 'GPU';
gpuType?: 'Nvidia' | 'Others (Vulkan)';
instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined;
cudaVersion?: '11' | '12';
}) => {
const platform =
process.platform === 'win32'
? 'windows'
: process.platform === 'darwin'
? 'mac'
: process.platform;
const arch = process.arch === 'arm64' ? process.arch : 'amd64';
const cudaVersion =
options.runMode === 'GPU'
? options.gpuType === 'Nvidia'
? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2')
: '-vulkan'
: '';
const instructions = options.instructions ? `-${options.instructions}` : '';
const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`;
return `${engineName}.tar.gz`;
};

rootDir = () => resolve(__dirname, `../../../`);
}
39 changes: 39 additions & 0 deletions cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { Question, QuestionSet } from 'nest-commander';

@QuestionSet({ name: 'create-init-questions' })
export class CreateInitQuestions {
@Question({
type: 'list',
message: 'Select run mode',
name: 'runMode',
default: 'CPU',
choices: ['CPU', 'GPU'],
when: () => process.platform !== 'darwin',
})
parseRunMode(val: string) {
return val;
}

@Question({
type: 'list',
message: 'Select GPU type',
name: 'gpuType',
default: 'Nvidia',
choices: ['Nvidia', 'Others (Vulkan)'],
when: (answers: any) => answers.runMode === 'GPU',
})
parseGPUType(val: string) {
return val;
}

@Question({
type: 'list',
message: 'Select CPU instructions set',
name: 'instructions',
choices: ['AVX2', 'AVX', 'AVX-512'],
when: () => process.platform !== 'darwin',
})
parseContent(val: string) {
return val;
}
}
8 changes: 8 additions & 0 deletions cortex-js/src/infrastructure/commanders/start.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { CommandRunner, SubCommand } from 'nest-commander';
import { LoadModelDto } from '../dtos/models/load-model.dto';
import { resolve } from 'path';
import { existsSync } from 'fs';

@SubCommand({ name: 'start', aliases: ['run'] })
export class StartCommand extends CommandRunner {
Expand All @@ -26,6 +28,10 @@ export class StartCommand extends CommandRunner {
}

private async startCortex() {
if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) {
console.log('Please init the cortex by running cortex init command!');
process.exit(0);
}
const host = '127.0.0.1';
const port = '3928';
return this.cortexUsecases.startCortex(host, port);
Expand All @@ -45,4 +51,6 @@ export class StartCommand extends CommandRunner {
const loadModelDto: LoadModelDto = { modelId, settings };
return this.modelsUsecases.startModel(loadModelDto);
}

rootDir = () => resolve(__dirname, `../../../`);
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export default class CortexProvider extends OAIEngineExtension {
const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore);
const modelSettings = {
// This is critical and requires real CPU physical core count (or performance core)
model: model.id,
cpu_threads: cpuThreadCount,
...model.settings,
llama_model_path: modelBinaryLocalPath,
Expand Down
47 changes: 20 additions & 27 deletions cortex-js/src/usecases/cortex/cortex.usecases.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import { Injectable, InternalServerErrorException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { Injectable } from '@nestjs/common';
import { ChildProcess, spawn } from 'child_process';
import { join } from 'path';
import { existsSync } from 'fs';
import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
import { HttpService } from '@nestjs/axios';

@Injectable()
export class CortexUsecases {
private cortexProcess: ChildProcess | undefined;

constructor(
private readonly configService: ConfigService,
private readonly httpService: HttpService,
) {}
constructor(private readonly httpService: HttpService) {}

async startCortex(
host: string,
Expand All @@ -26,29 +21,27 @@ export class CortexUsecases {
};
}

const binaryPath = this.configService.get<string>('CORTEX_BINARY_PATH');
if (!binaryPath || !existsSync(binaryPath)) {
throw new InternalServerErrorException('Cortex binary not found');
}

const args: string[] = ['1', host, port];
// go up one level to get the binary folder, have to also work on windows
const binaryFolder = join(binaryPath, '..');

this.cortexProcess = spawn(binaryPath, args, {
detached: false,
cwd: binaryFolder,
stdio: 'inherit',
env: {
...process.env,
// TODO: NamH need to get below information
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// // Vulkan - Support 1 device at a time for now
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
// }),
// const binaryFolder = join(binaryPath, '..');
this.cortexProcess = spawn(
join(__dirname, '../../../cortex-cpp/cortex-cpp'),
args,
{
detached: false,
cwd: join(__dirname, '../../../cortex-cpp'),
stdio: 'inherit',
env: {
...process.env,
// TODO: NamH need to get below information
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// // Vulkan - Support 1 device at a time for now
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
// }),
},
},
});
);

this.registerCortexEvents();

Expand Down
1 change: 1 addition & 0 deletions cortex-js/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"strictBindCallApply": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"esModuleInterop": true,
"paths": {
"@/*": ["src/*"]
}
Expand Down