diff --git a/cortex-js/package.json b/cortex-js/package.json index 366c450dd..2d8080df5 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -10,11 +10,11 @@ }, "scripts": { "dev": "nest dev", - "build": "yarn build:extensions && nest build && cpx \"cpuinfo/bin/**\" dist/bin", - "build:binary": "run-script-os", - "build:binary:windows": "bun build --compile --target=bun-windows-x64 ./dist/src/command.js --outfile cortex.exe --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", - "build:binary:linux": "bun build --compile --target=bun-linux-x64 ./dist/src/command.js --outfile cortex --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", - "build:binary:macos": "bun build --compile --target=bun-darwin-arm64 ./dist/src/command.js --outfile cortex --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", + "build": "nest build && cpx \"cpuinfo/bin/**\" dist/bin", + "build:binary": "run-script-os && cpx \"cpuinfo/bin/**\" dist/bin", + "build:binary:windows": "bun build --compile --target=bun-windows-x64 ./src/command.ts --outfile cortex.exe --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", + "build:binary:linux": "bun build --compile --target=bun-linux-x64 ./src/command.ts --outfile cortex --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", + "build:binary:macos": "bun build --compile --target=bun-darwin-arm64 ./src/command.ts --outfile cortex --external @nestjs/microservices --external @nestjs/websockets/socket-module --external class-transformer/storage", "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"", "build:extensions": "run-script-os", "build:extensions:windows": "powershell -command \"$jobs = Get-ChildItem -Path './src/extensions' -Directory | ForEach-Object { Start-Job -Name ($_.Name) -ScriptBlock { param($_dir); try { Set-Location $_dir; yarn; yarn build; Write-Output 'Build successful in ' + $_dir } catch { Write-Error 'Error in ' + $_dir; throw } } -ArgumentList $_.FullName }; $jobs | Wait-Job; $jobs | ForEach-Object { Receive-Job -Job $_ -Keep } | ForEach-Object { Write-Host $_ }; $failed = $jobs | Where-Object { $_.State -ne 'Completed' -or $_.ChildJobs[0].JobStateInfo.State -ne 'Completed' }; if ($failed) { Exit 1 }\"", diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts index f1e3de965..6ff7528e3 100644 --- a/cortex-js/src/app.module.ts +++ b/cortex-js/src/app.module.ts @@ -15,7 +15,6 @@ import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.mid import { TelemetryModule } from './usecases/telemetry/telemetry.module'; import { APP_FILTER } from '@nestjs/core'; import { GlobalExceptionFilter } from './infrastructure/exception/global.exception'; -import { UtilModule } from './util/util.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { EventsController } from './infrastructure/controllers/events.controller'; import { AssistantsController } from './infrastructure/controllers/assistants.controller'; @@ -26,6 +25,7 @@ import { ThreadsController } from './infrastructure/controllers/threads.controll import { StatusController } from './infrastructure/controllers/status.controller'; import { ProcessController } from './infrastructure/controllers/process.controller'; import { DownloadManagerModule } from './infrastructure/services/download-manager/download-manager.module'; +import { ContextModule } from './infrastructure/services/context/context.module'; @Module({ imports: [ @@ -48,7 +48,7 @@ import { DownloadManagerModule } from './infrastructure/services/download-manage ExtensionModule, FileManagerModule, TelemetryModule, - UtilModule, + ContextModule, DownloadManagerModule, ], controllers: [ diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index 3107eeac7..f1ff68a95 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -18,7 +18,6 @@ import { ModelStopCommand } from './infrastructure/commanders/models/model-stop. import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; import { RunCommand } from './infrastructure/commanders/shortcuts/run.command'; -import { InitCudaQuestions } from './infrastructure/commanders/questions/cuda.questions'; import { ModelUpdateCommand } from './infrastructure/commanders/models/model-update.command'; import { AssistantsModule } from './usecases/assistants/assistants.module'; import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module'; @@ -29,12 +28,12 @@ import { KillCommand } from './infrastructure/commanders/kill.command'; import { PresetCommand } from './infrastructure/commanders/presets.command'; import { TelemetryModule } from './usecases/telemetry/telemetry.module'; import { TelemetryCommand } from './infrastructure/commanders/telemetry.command'; -import { UtilModule } from './util/util.module'; import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command'; import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { DownloadManagerModule } from './infrastructure/services/download-manager/download-manager.module'; import { ServeStopCommand } from './infrastructure/commanders/sub-commands/serve-stop.command'; +import { ContextModule } from './infrastructure/services/context/context.module'; @Module({ imports: [ @@ -54,7 +53,7 @@ import { ServeStopCommand } from './infrastructure/commanders/sub-commands/serve MessagesModule, FileManagerModule, TelemetryModule, - UtilModule, + ContextModule, DownloadManagerModule, ], providers: [ @@ -71,7 +70,6 @@ import { ServeStopCommand } from './infrastructure/commanders/sub-commands/serve // Questions InitRunModeQuestions, - InitCudaQuestions, // Model commands ModelStartCommand, diff --git a/cortex-js/src/command.ts b/cortex-js/src/command.ts index 03096560c..d4978311b 100644 --- a/cortex-js/src/command.ts +++ b/cortex-js/src/command.ts @@ -4,7 +4,7 @@ import { CommandModule } from './command.module'; import { TelemetryUsecases } from './usecases/telemetry/telemetry.usecases'; import { TelemetrySource } from './domain/telemetry/telemetry.interface'; import { AsyncLocalStorage } from 'async_hooks'; -import { ContextService } from './util/context.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; export const asyncLocalStorage = new AsyncLocalStorage(); diff --git a/cortex-js/src/domain/config/config.interface.ts b/cortex-js/src/domain/config/config.interface.ts index 5b0351055..7f2875abb 100644 --- a/cortex-js/src/domain/config/config.interface.ts +++ b/cortex-js/src/domain/config/config.interface.ts @@ -1,3 +1,6 @@ export interface Config { dataFolderPath: string; + initialized: boolean; + cortexCppHost: string; + cortexCppPort: number; } diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index cbb4dadce..0124ab54c 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -9,8 +9,8 @@ import { exit } from 'node:process'; import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; import { ModelStat } from './types/model-stat.interface'; +import { ContextService } from '../services/context/context.service'; type ChatOptions = { threadId?: string; diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts index b4e948a51..167e5a048 100644 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -11,11 +11,11 @@ import pkg from '@/../package.json'; import { PresetCommand } from './presets.command'; import { TelemetryCommand } from './telemetry.command'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; import { EmbeddingCommand } from './embeddings.command'; import { BenchmarkCommand } from './benchmark.command'; import chalk from 'chalk'; import { printSlogan } from '@/utils/logo'; +import { ContextService } from '../services/context/context.service'; @RootCommand({ subCommands: [ diff --git a/cortex-js/src/infrastructure/commanders/init.command.ts b/cortex-js/src/infrastructure/commanders/init.command.ts index b843c78f5..583b26cf8 100644 --- a/cortex-js/src/infrastructure/commanders/init.command.ts +++ b/cortex-js/src/infrastructure/commanders/init.command.ts @@ -7,7 +7,7 @@ import { import { InitCliUsecases } from './usecases/init.cli.usecases'; import { InitOptions } from './types/init-options.interface'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '../services/context/context.service'; @SubCommand({ name: 'init', @@ -30,59 +30,20 @@ export class InitCommand extends CommandRunner { async run(passedParams: string[], options?: InitOptions): Promise { if (options?.silent) { - return this.initSilently(passedParams); + const installationOptions = + await this.initUsecases.defaultInstallationOptions(); + return this.initUsecases.installEngine(installationOptions); } else { - return this.initPrompts(passedParams, options); - } - } + options = await this.inquirerService.ask( + 'init-run-mode-questions', + options, + ); - private initSilently = async ( - passedParams: string[], - options: InitOptions = {}, - ) => { - const version = passedParams[0] ?? 'latest'; - if (process.platform === 'darwin') { - const engineFileName = this.initUsecases.parseEngineFileName(options); - return this.initUsecases.installEngine(engineFileName, version); - } - // If Nvidia Driver is installed -> GPU - options.runMode = (await this.initUsecases.checkNvidiaGPUExist()) - ? 'GPU' - : 'CPU'; - // CPU Instructions detection - options.gpuType = 'Nvidia'; - options.instructions = await this.initUsecases.detectInstructions(); - const engineFileName = this.initUsecases.parseEngineFileName(options); - return this.initUsecases - .installEngine(engineFileName, version) - .then(() => this.initUsecases.installCudaToolkitDependency(options)); - }; + const version = passedParams[0] ?? 'latest'; - /** - * Manual initalization - * To setup cortex's dependencies - * @param input - * @param options GPU | CPU / Nvidia | Others (Vulkan) / AVX | AVX2 | AVX512 - */ - private initPrompts = async (input: string[], options?: InitOptions) => { - options = await this.inquirerService.ask( - 'init-run-mode-questions', - options, - ); - - if (options.runMode === 'GPU' && !(await this.initUsecases.cudaVersion())) { - options = await this.inquirerService.ask('init-cuda-questions', options); + await this.initUsecases.installEngine(options, version); } - - const version = input[0] ?? 'latest'; - - const engineFileName = this.initUsecases.parseEngineFileName(options); - await this.initUsecases.installEngine(engineFileName, version); - - if (options.installCuda === 'Yes') { - await this.initUsecases.installCudaToolkitDependency(options); - } - }; + } @Option({ flags: '-s, --silent', diff --git a/cortex-js/src/infrastructure/commanders/kill.command.ts b/cortex-js/src/infrastructure/commanders/kill.command.ts index 9679e605a..0bde73a22 100644 --- a/cortex-js/src/infrastructure/commanders/kill.command.ts +++ b/cortex-js/src/infrastructure/commanders/kill.command.ts @@ -1,7 +1,7 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '../services/context/context.service'; @SubCommand({ name: 'kill', @@ -16,6 +16,6 @@ export class KillCommand extends CommandRunner { super(); } async run(): Promise { - this.usecases.stopCortex().then(console.log); + return this.usecases.stopCortex().then(console.log); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts index b402336c6..16c26ae66 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -1,8 +1,8 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { exit } from 'node:process'; -import { ContextService } from '@/util/context.service'; import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; @SubCommand({ name: 'get', diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts index 36404fd67..e7e505106 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -1,7 +1,7 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; interface ModelListOptions { format: 'table' | 'json'; diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index c6b81d9cf..8629a850b 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -1,9 +1,13 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { existsSync } from 'fs'; +import { join } from 'node:path'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { InitCliUsecases } from '../usecases/init.cli.usecases'; @SubCommand({ name: 'pull', @@ -17,6 +21,8 @@ import { ModelNotFoundException } from '@/infrastructure/exception/model-not-fou export class ModelPullCommand extends CommandRunner { constructor( private readonly modelsCliUsecases: ModelsCliUsecases, + private readonly initUsecases: InitCliUsecases, + private readonly fileService: FileManagerService, readonly contextService: ContextService, ) { super(); @@ -27,15 +33,29 @@ export class ModelPullCommand extends CommandRunner { console.error('Model Id is required'); exit(1); } + const modelId = passedParams[0]; - await this.modelsCliUsecases - .pullModel(passedParams[0]) - .catch((e: Error) => { - if (e instanceof ModelNotFoundException) - console.error('Model does not exist.'); - else console.error(e); - exit(1); - }); + await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => { + if (e instanceof ModelNotFoundException) + console.error('Model does not exist.'); + else console.error(e); + exit(1); + }); + + const existingModel = await this.modelsCliUsecases.getModel(modelId); + const engine = existingModel?.engine || 'cortex.llamacpp'; + + // Pull engine if not exist + if ( + !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) + ) { + console.log('\n'); + await this.initUsecases.installEngine( + await this.initUsecases.defaultInstallationOptions(), + 'latest', + engine, + ); + } console.log('\nDownload complete!'); exit(0); diff --git a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts index fa73354c5..c04e36128 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts @@ -1,8 +1,8 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { exit } from 'node:process'; -import { ContextService } from '@/util/context.service'; import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; @SubCommand({ name: 'remove', diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index 1190b1a4e..8b99c2cc6 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -8,7 +8,11 @@ import { exit } from 'node:process'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { InitCliUsecases } from '../usecases/init.cli.usecases'; +import { existsSync } from 'node:fs'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { join } from 'node:path'; type ModelStartOptions = { attach: boolean; @@ -29,6 +33,8 @@ export class ModelStartCommand extends CommandRunner { private readonly inquirerService: InquirerService, private readonly cortexUsecases: CortexUsecases, private readonly modelsCliUsecases: ModelsCliUsecases, + private readonly initUsecases: InitCliUsecases, + private readonly fileService: FileManagerService, readonly contextService: ContextService, ) { super(); @@ -54,7 +60,21 @@ export class ModelStartCommand extends CommandRunner { console.error('Model is not available. Please pull the model first.'); process.exit(1); } - + const engine = existingModel.engine || 'cortex.llamacpp'; + // Pull engine if not exist + if ( + !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) + ) { + await this.initUsecases.installEngine( + await this.initUsecases.defaultInstallationOptions(), + 'latest', + engine, + ); + } + if (engine === 'cortex.onnx' && process.platform !== 'win32') { + console.error('The ONNX engine does not support this OS yet.'); + process.exit(1); + } await this.cortexUsecases .startCortex(options.attach) .then(() => this.modelsCliUsecases.startModel(modelId, options.preset)) diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts index 6e45b4eae..af9b7d011 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts @@ -2,7 +2,7 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; @SubCommand({ name: 'stop', diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts index b328780eb..ebd06bfe8 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts @@ -2,8 +2,8 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { exit } from 'node:process'; import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; +import { ContextService } from '@/infrastructure/services/context/context.service'; type UpdateOptions = { model?: string; diff --git a/cortex-js/src/infrastructure/commanders/presets.command.ts b/cortex-js/src/infrastructure/commanders/presets.command.ts index 9494c7756..2f60c3c58 100644 --- a/cortex-js/src/infrastructure/commanders/presets.command.ts +++ b/cortex-js/src/infrastructure/commanders/presets.command.ts @@ -3,7 +3,7 @@ import { readdirSync } from 'fs'; import { CommandRunner, SubCommand } from 'nest-commander'; import { join } from 'path'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '../services/context/context.service'; @SubCommand({ name: 'presets', diff --git a/cortex-js/src/infrastructure/commanders/ps.command.ts b/cortex-js/src/infrastructure/commanders/ps.command.ts index e0ceb3607..c00bbed2c 100644 --- a/cortex-js/src/infrastructure/commanders/ps.command.ts +++ b/cortex-js/src/infrastructure/commanders/ps.command.ts @@ -1,7 +1,7 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '../services/context/context.service'; @SubCommand({ name: 'ps', diff --git a/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts b/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts deleted file mode 100644 index 2309c3d00..000000000 --- a/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { Question, QuestionSet } from 'nest-commander'; -import { platform } from 'node:process'; - -@QuestionSet({ name: 'init-cuda-questions' }) -export class InitCudaQuestions { - @Question({ - type: 'list', - message: 'Do you want to install additional dependencies for CUDA Toolkit?', - name: 'installCuda', - default: 'Yes', - choices: ['Yes', 'No, I want to use my own CUDA Toolkit'], - when: () => platform !== 'darwin', - }) - parseRunMode(val: string) { - return val; - } -} diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts index 8d341c2ae..94a45e3f2 100644 --- a/cortex-js/src/infrastructure/commanders/serve.command.ts +++ b/cortex-js/src/infrastructure/commanders/serve.command.ts @@ -6,8 +6,8 @@ import { import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { join } from 'path'; import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/util/context.service'; import { ServeStopCommand } from './sub-commands/serve-stop.command'; +import { ContextService } from '../services/context/context.service'; type ServeOptions = { address?: string; diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index e7c4120e8..bddf5ee7c 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -7,10 +7,6 @@ import { } from 'nest-commander'; import { exit } from 'node:process'; import { ChatCliUsecases } from '@commanders/usecases/chat.cli.usecases'; -import { - defaultCortexCppHost, - defaultCortexCppPort, -} from '@/infrastructure/constants/cortex'; import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; @@ -62,7 +58,7 @@ export class RunCommand extends CommandRunner { } return this.cortexUsecases - .startCortex(false, defaultCortexCppHost, defaultCortexCppPort) + .startCortex(false) .then(() => this.modelsCliUsecases.startModel(modelId, options.preset)) .then(() => this.chatCliUsecases.chat(modelId, options.threadId)); } diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts index 2f83e3c18..c7a64f7dd 100644 --- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -35,6 +35,9 @@ beforeAll( // Attempt to create test folder await fileService.writeConfigFile({ dataFolderPath: join(__dirname, 'test_data'), + initialized: false, + cortexCppHost: 'localhost', + cortexCppPort: 3929 }); res(); }), diff --git a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts index 630282e6f..e5434eb11 100644 --- a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts @@ -24,6 +24,9 @@ beforeAll( // Attempt to create test folder await fileService.writeConfigFile({ dataFolderPath: join(__dirname, 'test_data'), + initialized: false, + cortexCppHost: 'localhost', + cortexCppPort: 3929, }); res(); diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts index 05ff929eb..2631da0bd 100644 --- a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts @@ -3,6 +3,5 @@ export interface InitOptions { gpuType?: 'Nvidia' | 'Others (Vulkan)'; instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; cudaVersion?: '11' | '12'; - installCuda?: 'Yes' | string; silent?: boolean; } diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index 57ada02c1..6a5bb9f91 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -16,6 +16,7 @@ import { CORTEX_RELEASES_URL, CUDA_DOWNLOAD_URL, } from '@/infrastructure/constants/cortex'; +import { checkNvidiaGPUExist, cudaVersion } from '@/utils/cuda'; @Injectable() export class InitCliUsecases { @@ -24,10 +25,73 @@ export class InitCliUsecases { private readonly fileManagerService: FileManagerService, ) {} + /** + * Default installation options base on the system + * @returns + */ + defaultInstallationOptions = async (): Promise => { + let options: InitOptions = {}; + + // Skip check if darwin + if (process.platform === 'darwin') { + return options; + } + // If Nvidia Driver is installed -> GPU + options.runMode = (await checkNvidiaGPUExist()) ? 'GPU' : 'CPU'; + options.gpuType = 'Nvidia'; + //CPU Instructions detection + options.instructions = await this.detectInstructions(); + return options; + }; + + /** + * Install Engine and Dependencies with given options + * @param engineFileName + * @param version + */ installEngine = async ( - engineFileName: string, + options: InitOptions, version: string = 'latest', + engine: string = 'default', + force: boolean = true, ): Promise => { + const configs = await this.fileManagerService.getConfig(); + + if (configs.initialized && !force) return; + + // Ship Llama.cpp engine by default + if ( + !existsSync( + join( + await this.fileManagerService.getCortexCppEnginePath(), + 'cortex.llamacpp', + ), + ) + ) + await this.installLlamaCppEngine(options, version); + + if (engine === 'cortex.onnx' && process.platform === 'win32') + await this.installONNXEngine(); + else if (engine === 'cortex.onnx' && process.platform !== 'win32') { + console.error('The ONNX engine does not support this OS yet.'); + process.exit(1); + } + + configs.initialized = true; + await this.fileManagerService.writeConfigFile(configs); + }; + + /** + * Install Llama.cpp engine + * @param options + * @param version + */ + private installLlamaCppEngine = async ( + options: InitOptions, + version: string = 'latest', + ) => { + const engineFileName = this.parseEngineFileName(options); + const res = await firstValueFrom( this.httpService.get( CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, @@ -40,14 +104,14 @@ export class InitCliUsecases { ), ); - if (!res?.data) { + if (!res.data) { console.log('Failed to fetch releases'); exit(1); } - let release = res?.data; - if (Array.isArray(res?.data)) { - release = Array(res?.data)[0].find( + let release = res.data; + if (Array.isArray(res.data)) { + release = Array(res.data)[0].find( (e) => e.name === version.replace('v', ''), ); } @@ -109,13 +173,22 @@ export class InitCliUsecases { console.error('Error decompressing file', e); exit(1); } + await rm(destination, { force: true }); - // Ship ONNX Runtime on Windows by default - if (process.platform === 'win32') await this.installONNXEngine(); + // If the user selected GPU mode and Nvidia GPU, install CUDA Toolkit dependencies + if (options.runMode === 'GPU' && !(await cudaVersion())) { + await this.installCudaToolkitDependency(options.cudaVersion); + } }; - parseEngineFileName = (options?: InitOptions) => { + /** + * Parse the engine file name based on the options + * Please check cortex-cpp release artifacts for the available engine files + * @param options + * @returns + */ + private parseEngineFileName = (options?: InitOptions) => { const platform = process.platform === 'win32' ? 'windows' @@ -136,58 +209,17 @@ export class InitCliUsecases { return `${engineName}.tar.gz`; }; - cudaVersion = async () => { - let filesCuda12: string[]; - let filesCuda11: string[]; - let paths: string[]; - - if (process.platform === 'win32') { - filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; - filesCuda11 = [ - 'cublas64_11.dll', - 'cudart64_110.dll', - 'cublasLt64_11.dll', - ]; - paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; - } else { - filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; - filesCuda11 = [ - 'libcudart.so.11.0', - 'libcublas.so.11', - 'libcublasLt.so.11', - ]; - paths = process.env.LD_LIBRARY_PATH - ? process.env.LD_LIBRARY_PATH.split(delimiter) - : []; - paths.push('/usr/lib/x86_64-linux-gnu/'); - } - - if ( - filesCuda12.every( - (file) => - existsSync(file) || this.checkFileExistenceInPaths(file, paths), - ) - ) - return '12'; - - if ( - filesCuda11.every( - (file) => - existsSync(file) || this.checkFileExistenceInPaths(file, paths), - ) - ) - return '11'; - - return undefined; // No CUDA Toolkit found - }; - - installCudaToolkitDependency = async (options: InitOptions) => { + /** + * Install CUDA Toolkit dependency (dll/so files) + * @param options + */ + private installCudaToolkitDependency = async (cudaVersion?: string) => { const platform = process.platform === 'win32' ? 'windows' : 'linux'; const dataFolderPath = await this.fileManagerService.getDataFolderPath(); const url = CUDA_DOWNLOAD_URL.replace( '', - options.cudaVersion === '11' ? '11.7' : '12.0', + cudaVersion === '11' ? '11.7' : '12.0', ).replace('', platform); const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); @@ -238,25 +270,9 @@ export class InitCliUsecases { await rm(destination, { force: true }); }; - // Function to check for NVIDIA GPU - checkNvidiaGPUExist = (): Promise => { - return new Promise((resolve) => { - // Execute the nvidia-smi command - exec('nvidia-smi', (error) => { - if (error) { - // If there's an error, it means nvidia-smi is not installed or there's no NVIDIA GPU - console.log('NVIDIA GPU not detected or nvidia-smi not installed.'); - resolve(false); - } else { - // If the command executes successfully, NVIDIA GPU is present - console.log('NVIDIA GPU detected.'); - resolve(true); - } - }); - }); - }; - - detectInstructions = (): Promise<'AVX' | 'AVX2' | 'AVX512' | undefined> => { + private detectInstructions = (): Promise< + 'AVX' | 'AVX2' | 'AVX512' | undefined + > => { return new Promise<'AVX' | 'AVX2' | 'AVX512' | undefined>((res) => { // Execute the cpuinfo command @@ -293,7 +309,7 @@ export class InitCliUsecases { * @param version * @param engineFileName */ - async installONNXEngine( + private async installONNXEngine( version: string = 'latest', engineFileName: string = 'windows-amd64', ) { @@ -390,11 +406,4 @@ export class InitCliUsecases { } } } - - private checkFileExistenceInPaths = ( - file: string, - paths: string[], - ): boolean => { - return paths.some((p) => existsSync(join(p, file))); - }; } diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 55ce533bd..3db95fe04 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -152,7 +152,7 @@ export class ModelsCliUsecases { /** * It's to pull ONNX model from HuggingFace repository - * @param modelId + * @param modelId */ private async pullOnnxModel(modelId: string) { const modelsContainerDir = await this.fileService.getModelsPath(); @@ -179,7 +179,6 @@ export class ModelsCliUsecases { console.log(`Downloading ${file}`); const bar = new SingleBar({}, Presets.shades_classic); bar.start(100, 0); - const response = await firstValueFrom( this.httpService.get( `https://huggingface.co/cortexhub/${repo}/resolve/${branch}/${file}?download=true`, @@ -219,7 +218,7 @@ export class ModelsCliUsecases { readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), ) as CreateModelDto; model.files = [join(modelFolder)]; - model.model = modelId + model.model = modelId; if (!(await this.modelsUsecases.findOne(modelId))) await this.modelsUsecases.create(model); @@ -290,8 +289,8 @@ export class ModelsCliUsecases { /** * Parse preset file - * @param preset - * @returns + * @param preset + * @returns */ private async parsePreset(preset?: string): Promise { const presetsFolder = await this.fileService.getPresetsPath(); diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts index 0ad57b2b9..15525c2a8 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts @@ -2,14 +2,13 @@ import { HttpStatus, Injectable } from '@nestjs/common'; import { CORTEX_CPP_MODELS_URL, CORTEX_JS_HEALTH_URL, - defaultCortexCppHost, - defaultCortexCppPort, defaultCortexJsHost, defaultCortexJsPort, } from '@/infrastructure/constants/cortex'; import { HttpService } from '@nestjs/axios'; import { firstValueFrom } from 'rxjs'; import { ModelStat } from '@commanders/types/model-stat.interface'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; interface ModelStatResponse { object: string; @@ -17,18 +16,21 @@ interface ModelStatResponse { } @Injectable() export class PSCliUsecases { - constructor(private readonly httpService: HttpService) {} + constructor( + private readonly httpService: HttpService, + private readonly fileService: FileManagerService, + ) {} /** * Get models running in the Cortex C++ server - * @param host Cortex host address - * @param port Cortex port address */ - async getModels( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, - ): Promise { + async getModels(): Promise { + const configs = await this.fileService.getConfig(); return new Promise((resolve, reject) => - firstValueFrom(this.httpService.get(CORTEX_CPP_MODELS_URL(host, port))) + firstValueFrom( + this.httpService.get( + CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort), + ), + ) .then((res) => { const data = res.data as ModelStatResponse; if ( diff --git a/cortex-js/src/infrastructure/constants/cortex.ts b/cortex-js/src/infrastructure/constants/cortex.ts index bc2d91064..ad0690e05 100644 --- a/cortex-js/src/infrastructure/constants/cortex.ts +++ b/cortex-js/src/infrastructure/constants/cortex.ts @@ -6,7 +6,7 @@ export const defaultCortexJsHost = 'localhost'; export const defaultCortexJsPort = 1337; export const defaultCortexCppHost = '127.0.0.1'; -export const defaultCortexCppPort = 3928; +export const defaultCortexCppPort = 3929; // CORTEX CPP export const CORTEX_CPP_EMBEDDINGS_URL = ( host: string = defaultCortexCppHost, diff --git a/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts b/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts index 8a453ff68..97a3333af 100644 --- a/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts +++ b/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts @@ -8,6 +8,7 @@ import { HttpModule } from '@nestjs/axios'; import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { FileManagerModule } from '../services/file-manager/file-manager.module'; describe('ChatController', () => { let controller: ChatController; @@ -23,6 +24,7 @@ describe('ChatController', () => { DownloadManagerModule, EventEmitterModule.forRoot(), TelemetryModule, + FileManagerModule, ], controllers: [ChatController], providers: [ChatUsecases], diff --git a/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts b/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts index dd089bc84..80112114e 100644 --- a/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts +++ b/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts @@ -8,6 +8,7 @@ import { HttpModule } from '@nestjs/axios'; import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { FileManagerModule } from '../services/file-manager/file-manager.module'; describe('EmbeddingsController', () => { let controller: EmbeddingsController; @@ -23,6 +24,7 @@ describe('EmbeddingsController', () => { DownloadManagerModule, EventEmitterModule.forRoot(), TelemetryModule, + FileManagerModule, ], controllers: [EmbeddingsController], providers: [ChatUsecases], diff --git a/cortex-js/src/infrastructure/controllers/models.controller.spec.ts b/cortex-js/src/infrastructure/controllers/models.controller.spec.ts index 4417536e9..5f9cc2741 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.spec.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.spec.ts @@ -10,7 +10,7 @@ import { ModelRepositoryModule } from '../repositories/models/model.module'; import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; -import { UtilModule } from '@/util/util.module'; +import { ContextModule } from '../services/context/context.module'; describe('ModelsController', () => { let controller: ModelsController; @@ -28,7 +28,7 @@ describe('ModelsController', () => { DownloadManagerModule, EventEmitterModule.forRoot(), TelemetryModule, - UtilModule, + ContextModule, ], controllers: [ModelsController], providers: [ModelsUsecases, CortexUsecases], diff --git a/cortex-js/src/infrastructure/entities/assistant.entity.ts b/cortex-js/src/infrastructure/entities/assistant.entity.ts index d4726e4e2..9eb5fcb15 100644 --- a/cortex-js/src/infrastructure/entities/assistant.entity.ts +++ b/cortex-js/src/infrastructure/entities/assistant.entity.ts @@ -1,7 +1,7 @@ -import { - Assistant, - AssistantResponseFormatOption, +import { Assistant } from '@/domain/models/assistant.interface'; +import type { AssistantToolResources, + AssistantResponseFormatOption, } from '@/domain/models/assistant.interface'; import { Column, Entity, PrimaryColumn } from 'typeorm'; diff --git a/cortex-js/src/infrastructure/entities/message.entity.ts b/cortex-js/src/infrastructure/entities/message.entity.ts index 906ed7a03..8e009d9c6 100644 --- a/cortex-js/src/infrastructure/entities/message.entity.ts +++ b/cortex-js/src/infrastructure/entities/message.entity.ts @@ -1,4 +1,4 @@ -import { +import type { Message, MessageContent, MessageIncompleteDetails, diff --git a/cortex-js/src/infrastructure/entities/thread.entity.ts b/cortex-js/src/infrastructure/entities/thread.entity.ts index 6670e5bfb..c53f2559d 100644 --- a/cortex-js/src/infrastructure/entities/thread.entity.ts +++ b/cortex-js/src/infrastructure/entities/thread.entity.ts @@ -1,4 +1,4 @@ -import { Thread, ThreadToolResources } from '@/domain/models/thread.interface'; +import type { Thread, ThreadToolResources } from '@/domain/models/thread.interface'; import { Entity, PrimaryColumn, Column } from 'typeorm'; import { AssistantEntity } from './assistant.entity'; diff --git a/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts b/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts index bb8f9778e..1dbbd0a1e 100644 --- a/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts +++ b/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts @@ -1,8 +1,8 @@ import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '@/util/context.service'; import { Injectable, NestMiddleware, Logger } from '@nestjs/common'; import { Request, Response, NextFunction } from 'express'; +import { ContextService } from '../services/context/context.service'; @Injectable() export class AppLoggerMiddleware implements NestMiddleware { diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index ef6ab4fde..5664f80c2 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -11,17 +11,16 @@ import { existsSync } from 'fs'; @Injectable() export class ExtensionRepositoryImpl implements ExtensionRepository { // Initialize the Extensions Map with the key-value pairs of the core providers. - extensions = new Map([ - ['cortex.llamacpp', this.cortexProvider], - ['cortex.onnx', this.cortexProvider], - ['cortex.tensorrt-llm', this.cortexProvider], - ]); + extensions = new Map(); constructor( @Inject('CORTEX_PROVIDER') private readonly cortexProvider: EngineExtension, private readonly fileService: FileManagerService, ) { + this.extensions.set('cortex.llamacpp', this.cortexProvider); + this.extensions.set('cortex.onnx', this.cortexProvider); + this.extensions.set('cortex.tensorrt-llm', this.cortexProvider); this.loadCoreExtensions(); this.loadExternalExtensions(); } diff --git a/cortex-js/src/util/util.module.ts b/cortex-js/src/infrastructure/services/context/context.module.ts similarity index 87% rename from cortex-js/src/util/util.module.ts rename to cortex-js/src/infrastructure/services/context/context.module.ts index ff2de09ad..a583baa25 100644 --- a/cortex-js/src/util/util.module.ts +++ b/cortex-js/src/infrastructure/services/context/context.module.ts @@ -6,4 +6,4 @@ import { ContextService } from './context.service'; providers: [ContextService], exports: [ContextService], }) -export class UtilModule {} +export class ContextModule {} diff --git a/cortex-js/src/util/context.service.ts b/cortex-js/src/infrastructure/services/context/context.service.ts similarity index 100% rename from cortex-js/src/util/context.service.ts rename to cortex-js/src/infrastructure/services/context/context.service.ts diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts index a7243b0da..4ac66f089 100644 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts +++ b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts @@ -14,6 +14,10 @@ import { promisify } from 'util'; import yaml from 'js-yaml'; import { write } from 'fs'; import { createInterface } from 'readline'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; const readFileAsync = promisify(read); const openAsync = promisify(open); @@ -48,7 +52,10 @@ export class FileManagerService { try { const content = await promises.readFile(configPath, 'utf8'); const config = yaml.load(content) as Config; - return config; + return { + ...this.defaultConfig(), + ...config, + }; } catch (error) { console.warn('Error reading config file. Using default config.'); console.warn(error); @@ -97,6 +104,9 @@ export class FileManagerService { return { dataFolderPath, + initialized: false, + cortexCppHost: defaultCortexCppHost, + cortexCppPort: defaultCortexCppPort, }; } @@ -224,13 +234,21 @@ export class FileManagerService { /** * Get the benchmark folder path * Usually it is located at the home directory > cortex > extensions - * @returns the path to the extensions folder + * @returns the path to the benchmark folder */ async getBenchmarkPath(): Promise { const dataFolderPath = await this.getDataFolderPath(); return join(dataFolderPath, this.benchmarkFoldername); } + /** + * Get Cortex CPP engines folder path + * @returns the path to the cortex engines folder + */ + async getCortexCppEnginePath(): Promise { + return join(await this.getDataFolderPath(), 'cortex-cpp', 'engines'); + } + async createFolderIfNotExistInDataFolder(folderName: string): Promise { const dataFolderPath = await this.getDataFolderPath(); const folderPath = join(dataFolderPath, folderName); diff --git a/cortex-js/src/usecases/chat/chat.module.ts b/cortex-js/src/usecases/chat/chat.module.ts index 6629c567d..c339983eb 100644 --- a/cortex-js/src/usecases/chat/chat.module.ts +++ b/cortex-js/src/usecases/chat/chat.module.ts @@ -5,6 +5,7 @@ import { ExtensionModule } from '@/infrastructure/repositories/extensions/extens import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; import { HttpModule } from '@nestjs/axios'; import { TelemetryModule } from '../telemetry/telemetry.module'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; @Module({ imports: [ @@ -13,6 +14,7 @@ import { TelemetryModule } from '../telemetry/telemetry.module'; ModelRepositoryModule, HttpModule, TelemetryModule, + FileManagerModule, ], controllers: [], providers: [ChatUsecases], diff --git a/cortex-js/src/usecases/chat/chat.usecases.spec.ts b/cortex-js/src/usecases/chat/chat.usecases.spec.ts index e8653ec9b..0d53ce61f 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.spec.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.spec.ts @@ -7,6 +7,7 @@ import { ModelRepositoryModule } from '@/infrastructure/repositories/models/mode import { HttpModule } from '@nestjs/axios'; import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; describe('ChatService', () => { let service: ChatUsecases; @@ -22,6 +23,7 @@ describe('ChatService', () => { TelemetryModule, DownloadManagerModule, EventEmitterModule.forRoot(), + FileManagerModule, ], providers: [ChatUsecases], exports: [ChatUsecases], diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index 806ebb06a..a3072652d 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -10,6 +10,7 @@ import { firstValueFrom } from 'rxjs'; import { HttpService } from '@nestjs/axios'; import { CORTEX_CPP_EMBEDDINGS_URL } from '@/infrastructure/constants/cortex'; import { CreateEmbeddingsDto } from '@/infrastructure/dtos/embeddings/embeddings-request.dto'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; @Injectable() export class ChatUsecases { @@ -18,6 +19,7 @@ export class ChatUsecases { private readonly extensionRepository: ExtensionRepository, private readonly telemetryUseCases: TelemetryUsecases, private readonly httpService: HttpService, + private readonly fileService: FileManagerService, ) {} async inference( @@ -57,14 +59,19 @@ export class ChatUsecases { * @param port Cortex CPP port. * @returns Embedding vector. */ - embeddings(dto: CreateEmbeddingsDto) { + async embeddings(dto: CreateEmbeddingsDto) { + const configs = await this.fileService.getConfig(); return firstValueFrom( - this.httpService.post(CORTEX_CPP_EMBEDDINGS_URL(), dto, { - headers: { - 'Content-Type': 'application/json', - 'Accept-Encoding': 'gzip', + this.httpService.post( + CORTEX_CPP_EMBEDDINGS_URL(configs.cortexCppHost, configs.cortexCppPort), + dto, + { + headers: { + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip', + }, }, - }), + ), ).then((res) => res.data); } } diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 302d5ed32..04dafd0b3 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -1,12 +1,9 @@ import { Injectable } from '@nestjs/common'; import { ChildProcess, spawn } from 'child_process'; -import { join, delimiter } from 'path'; +import { join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; -import { - defaultCortexCppHost, - defaultCortexCppPort, -} from '@/infrastructure/constants/cortex'; + import { existsSync } from 'node:fs'; import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; @@ -27,9 +24,10 @@ export class CortexUsecases { async startCortex( attach: boolean = false, - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, ): Promise { + const configs = await this.fileManagerService.getConfig(); + const host = configs.cortexCppHost; + const port = configs.cortexCppPort; if (this.cortexProcess || (await this.healthCheck(host, port))) { return { message: 'Cortex is already running', @@ -45,7 +43,6 @@ export class CortexUsecases { if (!existsSync(cortexCppPath)) { throw new Error('The engine is not available, please run "cortex init".'); } - await this.addAdditionalDependencies(); // go up one level to get the binary folder, have to also work on windows this.cortexProcess = spawn(cortexCppPath, args, { @@ -75,13 +72,26 @@ export class CortexUsecases { }) .catch(reject); }, 1000); + }).then((res) => { + this.fileManagerService.writeConfigFile({ + ...configs, + cortexCppHost: host, + cortexCppPort: port, + }); + return res; }); } async stopCortex(): Promise { + const configs = await this.fileManagerService.getConfig(); try { await firstValueFrom( - this.httpService.delete(CORTEX_CPP_PROCESS_DESTROY_URL()), + this.httpService.delete( + CORTEX_CPP_PROCESS_DESTROY_URL( + configs.cortexCppHost, + configs.cortexCppPort, + ), + ), ); } catch (err) { console.error(err.response.data); @@ -104,26 +114,4 @@ export class CortexUsecases { }) .catch(() => false); } - - private async addAdditionalDependencies() { - const cortexCPPPath = join( - await this.fileManagerService.getDataFolderPath(), - 'cortex-cpp', - ); - const additionalLlamaCppPath = delimiter.concat( - join(cortexCPPPath, 'cortex.llamacpp'), - ); - const additionalTensortLLMCppPath = delimiter.concat( - join(cortexCPPPath, 'cortex.tensorrt-llm'), - ); - const additionalPaths = delimiter.concat( - additionalLlamaCppPath, - additionalTensortLLMCppPath, - ); - // Set the updated PATH - process.env.PATH = (process.env.PATH || '').concat(additionalPaths); - process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( - additionalPaths, - ); - } } diff --git a/cortex-js/src/usecases/models/models.module.ts b/cortex-js/src/usecases/models/models.module.ts index b4ea0e328..e5972941b 100644 --- a/cortex-js/src/usecases/models/models.module.ts +++ b/cortex-js/src/usecases/models/models.module.ts @@ -5,10 +5,10 @@ import { CortexModule } from '@/usecases/cortex/cortex.module'; import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; import { HttpModule } from '@nestjs/axios'; import { TelemetryModule } from '../telemetry/telemetry.module'; -import { UtilModule } from '@/util/util.module'; import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; @Module({ imports: [ @@ -18,7 +18,7 @@ import { DownloadManagerModule } from '@/infrastructure/services/download-manage HttpModule, FileManagerModule, TelemetryModule, - UtilModule, + ContextModule, ModelRepositoryModule, DownloadManagerModule, ], diff --git a/cortex-js/src/usecases/models/models.usecases.spec.ts b/cortex-js/src/usecases/models/models.usecases.spec.ts index 2f8053ab6..8bea2eaa3 100644 --- a/cortex-js/src/usecases/models/models.usecases.spec.ts +++ b/cortex-js/src/usecases/models/models.usecases.spec.ts @@ -9,7 +9,7 @@ import { ModelRepositoryModule } from '@/infrastructure/repositories/models/mode import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { TelemetryModule } from '../telemetry/telemetry.module'; -import { UtilModule } from '@/util/util.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; describe('ModelsService', () => { let service: ModelsUsecases; @@ -29,7 +29,7 @@ describe('ModelsService', () => { EventEmitterModule.forRoot(), TelemetryModule, TelemetryModule, - UtilModule, + ContextModule, ], providers: [ModelsUsecases], exports: [ModelsUsecases], diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 2e39a758a..1b7965d2b 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -21,7 +21,6 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- import { AxiosError } from 'axios'; import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '@/util/context.service'; import { ModelRepository } from '@/domain/repositories/model.interface'; import { ModelParameterParser } from '@/utils/model-parameter.parser'; import { @@ -40,6 +39,7 @@ import { DownloadType } from '@/domain/models/download.interface'; import { EventEmitter2 } from '@nestjs/event-emitter'; import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; @Injectable() export class ModelsUsecases { @@ -146,7 +146,7 @@ export class ModelsUsecases { * Start a model by ID * @param modelId Model ID * @param settings Model settings - * @returns + * @returns Model start status */ async startModel( modelId: string, @@ -241,6 +241,11 @@ export class ModelsUsecases { }); } + /** + * Stop a running model + * @param modelId Model Identifier + * @returns Model stop status + */ async stopModel(modelId: string): Promise { const model = await this.getModelOrThrow(modelId); const engine = (await this.extensionRepository.findOne( @@ -301,8 +306,8 @@ export class ModelsUsecases { /** * Download a remote model from HuggingFace or Jan's repo - * @param modelId - * @param callback + * @param modelId Model ID + * @param callback Callback function to track download progress * @returns */ async downloadModel(modelId: string, callback?: (progress: number) => void) { @@ -385,6 +390,10 @@ export class ModelsUsecases { } } + /** + * Abort a download + * @param downloadId Download ID + */ async abortDownloadModel(downloadId: string) { this.downloadManagerService.abortDownload(downloadId); } @@ -465,13 +474,17 @@ export class ModelsUsecases { /** * Fetches the model data from HuggingFace * @param modelId Model repo id. e.g. llama3, llama3:8b, janhq/llama3 - * @returns + * @returns Model metadata */ fetchModelMetadata(modelId: string): Promise { if (modelId.includes('/')) return fetchHuggingFaceRepoData(modelId); else return fetchJanRepoData(modelId); } + /** + * Get the current status of the models + * @returns Model statuses + */ getModelStatuses(): Record { return this.activeModelStatuses; } diff --git a/cortex-js/src/usecases/telemetry/telemetry.module.ts b/cortex-js/src/usecases/telemetry/telemetry.module.ts index 8d17a5cb8..b058814fb 100644 --- a/cortex-js/src/usecases/telemetry/telemetry.module.ts +++ b/cortex-js/src/usecases/telemetry/telemetry.module.ts @@ -2,11 +2,11 @@ import { Module } from '@nestjs/common'; import { TelemetryUsecases } from './telemetry.usecases'; import { HttpModule } from '@nestjs/axios'; import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { UtilModule } from '@/util/util.module'; import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; @Module({ - imports: [HttpModule, DatabaseModule, FileManagerModule, UtilModule], + imports: [HttpModule, DatabaseModule, FileManagerModule, ContextModule], providers: [TelemetryUsecases], exports: [TelemetryUsecases], }) diff --git a/cortex-js/src/usecases/telemetry/telemetry.usecases.ts b/cortex-js/src/usecases/telemetry/telemetry.usecases.ts index e31a766fe..89c00eaa8 100644 --- a/cortex-js/src/usecases/telemetry/telemetry.usecases.ts +++ b/cortex-js/src/usecases/telemetry/telemetry.usecases.ts @@ -4,7 +4,7 @@ import { Telemetry, TelemetrySource, } from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '@/util/context.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; import { HttpException, Inject, Injectable, Scope } from '@nestjs/common'; @Injectable({ scope: Scope.TRANSIENT }) diff --git a/cortex-js/src/utils/app-path.ts b/cortex-js/src/utils/app-path.ts index 49fc598a5..3d80cd8f4 100644 --- a/cortex-js/src/utils/app-path.ts +++ b/cortex-js/src/utils/app-path.ts @@ -1,6 +1,20 @@ +import { existsSync } from 'fs'; import { join } from 'path'; /** * Path to the root of the application. */ export const appPath = join(__dirname, '../../'); + +/** + * Check if a file exists in any of the given paths. + * @param file + * @param paths + * @returns + */ +export const checkFileExistenceInPaths = ( + file: string, + paths: string[], +): boolean => { + return paths.some((p) => existsSync(join(p, file))); +}; diff --git a/cortex-js/src/utils/cuda.ts b/cortex-js/src/utils/cuda.ts new file mode 100644 index 000000000..a20fa17f3 --- /dev/null +++ b/cortex-js/src/utils/cuda.ts @@ -0,0 +1,65 @@ +import { exec } from 'child_process'; +import { existsSync } from 'fs'; +import { delimiter } from 'path'; +import { checkFileExistenceInPaths } from './app-path'; + +/** + * Return the CUDA version installed on the system + * @returns CUDA Version 11 | 12 + */ +export const cudaVersion = async () => { + let filesCuda12: string[]; + let filesCuda11: string[]; + let paths: string[]; + + if (process.platform === 'win32') { + filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; + filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']; + paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; + } else { + filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; + filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']; + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(delimiter) + : []; + paths.push('/usr/lib/x86_64-linux-gnu/'); + } + + if ( + filesCuda12.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), + ) + ) + return '12'; + + if ( + filesCuda11.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), + ) + ) + return '11'; + + return undefined; // No CUDA Toolkit found +}; + +/** + * Check if an NVIDIA GPU is present + * @returns GPU driver exist or not + * TODO: This should be enhanced better + */ +export const checkNvidiaGPUExist = (): Promise => { + return new Promise((resolve) => { + // Execute the nvidia-smi command + exec('nvidia-smi', (error) => { + if (error) { + // If there's an error, it means nvidia-smi is not installed or there's no NVIDIA GPU + console.log('NVIDIA GPU not detected or nvidia-smi not installed.'); + resolve(false); + } else { + // If the command executes successfully, NVIDIA GPU is present + console.log('NVIDIA GPU detected.'); + resolve(true); + } + }); + }); +};