diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index 097590ce4..ad27d9b5f 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -44,6 +44,7 @@ import { ConfigsListCommand } from './infrastructure/commanders/configs/configs- import { ConfigsSetCommand } from './infrastructure/commanders/configs/configs-set.command'; import { EnginesListCommand } from './infrastructure/commanders/engines/engines-list.command'; import { EnginesGetCommand } from './infrastructure/commanders/engines/engines-get.command'; +import { EnginesInitCommand } from './infrastructure/commanders/engines/engines-init.command'; @Module({ imports: [ @@ -112,6 +113,7 @@ import { EnginesGetCommand } from './infrastructure/commanders/engines/engines-g // Engines EnginesListCommand, EnginesGetCommand, + EnginesInitCommand, ], }) export class CommandModule {} diff --git a/cortex-js/src/infrastructure/commanders/engines.command.ts b/cortex-js/src/infrastructure/commanders/engines.command.ts index 3fc597803..f88f2a421 100644 --- a/cortex-js/src/infrastructure/commanders/engines.command.ts +++ b/cortex-js/src/infrastructure/commanders/engines.command.ts @@ -1,13 +1,13 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { SetCommandContext } from './decorators/CommandContext'; import { ContextService } from '@/infrastructure/services/context/context.service'; -import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; import { EnginesListCommand } from './engines/engines-list.command'; import { EnginesGetCommand } from './engines/engines-get.command'; +import { EnginesInitCommand } from './engines/engines-init.command'; @SubCommand({ name: 'engines', - subCommands: [EnginesListCommand, EnginesGetCommand], + subCommands: [EnginesListCommand, EnginesGetCommand, EnginesInitCommand], description: 'Get cortex engines', }) @SetCommandContext() diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts new file mode 100644 index 000000000..8e7394eac --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts @@ -0,0 +1,39 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { InitCliUsecases } from '../usecases/init.cli.usecases'; +import { Engines } from '../types/engine.interface'; + +@SubCommand({ + name: 'init', + description: 'Setup engine', + arguments: '', + argsDescription: { + name: 'Engine name to setup', + }, +}) +@SetCommandContext() +export class EnginesInitCommand extends CommandRunner { + constructor( + private readonly initUsecases: InitCliUsecases, + readonly contextService: ContextService, + ) { + super(); + } + + async run(passedParams: string[]): Promise { + const engine = passedParams[0]; + const options = passedParams.includes(Engines.llamaCPP) + ? await this.initUsecases.defaultInstallationOptions() + : {}; + return this.initUsecases + .installEngine( + options, + engine.includes('@') ? engine.split('@')[1] : 'latest', + engine, + true + ) + .then(() => console.log('Engine installed successfully!')) + .catch(() => console.error('Engine not found or installation failed!')); + } +} diff --git a/cortex-js/src/infrastructure/commanders/kill.command.ts b/cortex-js/src/infrastructure/commanders/kill.command.ts index 0bde73a22..e6f40742a 100644 --- a/cortex-js/src/infrastructure/commanders/kill.command.ts +++ b/cortex-js/src/infrastructure/commanders/kill.command.ts @@ -10,12 +10,15 @@ import { ContextService } from '../services/context/context.service'; @SetCommandContext() export class KillCommand extends CommandRunner { constructor( - private readonly usecases: CortexUsecases, + private readonly cortexUsecases: CortexUsecases, readonly contextService: ContextService, ) { super(); } async run(): Promise { - return this.usecases.stopCortex().then(console.log); + return this.cortexUsecases + .stopCortex() + .then(this.cortexUsecases.stopServe) + .then(() => console.log('Cortex processes stopped successfully!')); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index e3397bbbd..c7a751b21 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -14,6 +14,7 @@ import { join } from 'node:path'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { checkModelCompatibility } from '@/utils/model-check'; +import { Engines } from '../types/engine.interface'; @SubCommand({ name: 'pull', @@ -52,7 +53,7 @@ export class ModelPullCommand extends CommandRunner { }); const existingModel = await this.modelsCliUsecases.getModel(modelId); - const engine = existingModel?.engine || 'cortex.llamacpp'; + const engine = existingModel?.engine || Engines.llamaCPP; // Pull engine if not exist if ( diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index 8a935a9d7..7eee1f990 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -67,7 +67,7 @@ export class ModelStartCommand extends CommandRunner { checkModelCompatibility(modelId); - const engine = existingModel.engine || 'cortex.llamacpp'; + const engine = existingModel.engine || Engines.llamaCPP; // Pull engine if not exist if ( !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index e908eef8c..beee3ec45 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -80,7 +80,7 @@ export class RunCommand extends CommandRunner { checkModelCompatibility(modelId); - const engine = existingModel.engine || 'cortex.llamacpp'; + const engine = existingModel.engine || Engines.llamaCPP; // Pull engine if not exist if ( !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts index c7a64f7dd..933d2f979 100644 --- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -98,10 +98,7 @@ describe('Helper commands', () => { await CommandTestFactory.run(commandInstance, ['kill']); await CommandTestFactory.run(commandInstance, ['ps']); - expect(logMock.firstCall?.args[0]).toEqual({ - message: 'Cortex stopped successfully', - status: 'success', - }); + expect(logMock.firstCall?.args[0]).toEqual("Cortex processes stopped successfully!"); expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); expect(tableMock.firstCall?.args[0].length).toEqual(0); }, diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index 101776d12..c9e1e7015 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -66,14 +66,15 @@ export class InitCliUsecases { !existsSync( join( await this.fileManagerService.getCortexCppEnginePath(), - 'cortex.llamacpp', + Engines.llamaCPP, ), - ) + ) || + (engine === Engines.llamaCPP && force) ) await this.installLlamaCppEngine(options, version); - if (engine !== 'cortex.llamacpp') - await this.installAcceleratedEngine('latest', engine); + if (engine !== Engines.llamaCPP) + await this.installAcceleratedEngine(version, engine); configs.initialized = true; await this.fileManagerService.writeConfigFile(configs); diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts index 15525c2a8..6fad12b6b 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts @@ -9,6 +9,7 @@ import { HttpService } from '@nestjs/axios'; import { firstValueFrom } from 'rxjs'; import { ModelStat } from '@commanders/types/model-stat.interface'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { Engines } from '../types/engine.interface'; interface ModelStatResponse { object: string; @@ -47,7 +48,7 @@ export class PSCliUsecases { currentTime.getTime() - new Date(startTime).getTime(); return { modelId: e.id, - engine: e.engine ?? 'cortex.llamacpp', + engine: e.engine ?? Engines.llamaCPP, status: 'running', duration: this.formatDuration(duration), ram: e.ram ?? '-', diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index b37ece430..8eae51cea 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -48,18 +48,18 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { this.httpService, this.fileManagerService, ); - llamaCPPEngine.name = 'cortex.llamacpp'; + llamaCPPEngine.name = Engines.llamaCPP; const onnxEngine = new CortexProvider( this.httpService, this.fileManagerService, ); - onnxEngine.name = 'cortex.onnx'; + onnxEngine.name = Engines.onnx; const tensorrtLLMEngine = new CortexProvider( this.httpService, this.fileManagerService, ); - tensorrtLLMEngine.name = 'cortex.tensorrt-llm'; + tensorrtLLMEngine.name = Engines.tensorrtLLM; await llamaCPPEngine.onLoad(); await onnxEngine.onLoad(); diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index c5c132f46..1ce580f42 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -11,6 +11,7 @@ import { HttpService } from '@nestjs/axios'; import { CORTEX_CPP_EMBEDDINGS_URL } from '@/infrastructure/constants/cortex'; import { CreateEmbeddingsDto } from '@/infrastructure/dtos/embeddings/embeddings-request.dto'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; @Injectable() export class ChatUsecases { @@ -32,7 +33,7 @@ export class ChatUsecases { throw new ModelNotFoundException(modelId); } const engine = (await this.extensionRepository.findOne( - model!.engine ?? 'cortex.llamacpp', + model!.engine ?? Engines.llamaCPP, )) as EngineExtension | undefined; if (engine == null) { diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 9ad6a2ece..2410b34bf 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -10,6 +10,7 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- import { CORTEX_CPP_HEALTH_Z_URL, CORTEX_CPP_PROCESS_DESTROY_URL, + CORTEX_JS_STOP_API_SERVER_URL, } from '@/infrastructure/constants/cortex'; @Injectable() @@ -22,6 +23,11 @@ export class CortexUsecases { private readonly fileManagerService: FileManagerService, ) {} + /** + * Start the Cortex CPP process + * @param attach + * @returns + */ async startCortex( attach: boolean = false, ): Promise { @@ -92,6 +98,9 @@ export class CortexUsecases { }); } + /** + * Stop the Cortex CPP process + */ async stopCortex(): Promise { const configs = await this.fileManagerService.getConfig(); try { @@ -114,6 +123,18 @@ export class CortexUsecases { } } + /** + * Stop the API server + * @returns + */ + async stopServe(): Promise { + return fetch(CORTEX_JS_STOP_API_SERVER_URL(), { + method: 'DELETE', + }) + .then(() => {}) + .catch(() => {}); + } + private healthCheck(host: string, port: number): Promise { return fetch(CORTEX_CPP_HEALTH_Z_URL(host, port)) .then((res) => { diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 036a6d8ba..b0f86eef2 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -154,7 +154,7 @@ export class ModelsUsecases { ): Promise { const model = await this.getModelOrThrow(modelId); const engine = (await this.extensionRepository.findOne( - model!.engine ?? 'cortex.llamacpp', + model!.engine ?? Engines.llamaCPP, )) as EngineExtension | undefined; if (!engine) { @@ -189,7 +189,7 @@ export class ModelsUsecases { llama_model_path: (model.files as string[])[0], model_path: (model.files as string[])[0], }), - engine: model.engine ?? 'cortex.llamacpp', + engine: model.engine ?? Engines.llamaCPP, // User / Model settings ...parser.parseModelEngineSettings(model), ...parser.parseModelEngineSettings(settings ?? {}), @@ -248,7 +248,7 @@ export class ModelsUsecases { async stopModel(modelId: string): Promise { const model = await this.getModelOrThrow(modelId); const engine = (await this.extensionRepository.findOne( - model!.engine ?? 'cortex.llamacpp', + model!.engine ?? Engines.llamaCPP, )) as EngineExtension | undefined; if (!engine) { @@ -348,7 +348,8 @@ export class ModelsUsecases { const toDownloads: Record = files .filter((e) => this.validFileDownload(e)) .reduce((acc: Record, file) => { - acc[file.downloadUrl] = join(modelFolder, file.rfilename); + if (file.downloadUrl) + acc[file.downloadUrl] = join(modelFolder, file.rfilename); return acc; }, {}); @@ -365,7 +366,7 @@ export class ModelsUsecases { const model: CreateModelDto = load( readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), ) as CreateModelDto; - if (model.engine === 'cortex.llamacpp') { + if (model.engine === Engines.llamaCPP) { const fileUrl = join( await this.fileManagerService.getModelsPath(), normalizeModelId(modelId),