diff --git a/.github/workflows/cortex-build.yml b/.github/workflows/cortex-build.yml index 8bd635197..b408735ef 100644 --- a/.github/workflows/cortex-build.yml +++ b/.github/workflows/cortex-build.yml @@ -45,25 +45,29 @@ jobs: runs-on: "ubuntu-20-04" cmake-flags: "" build-deps-cmake-flags: "" - ccache-dir: '' + arch: "x64" + platform: "linux" - os: "mac" name: "amd64" runs-on: "macos-13" cmake-flags: "" build-deps-cmake-flags: "" - ccache-dir: '' + arch: "x64" + platform: "darwin" - os: "mac" name: "arm64" runs-on: "macos-latest" cmake-flags: "-DMAC_ARM64=ON" build-deps-cmake-flags: "" - ccache-dir: '' + arch: "arm64" + platform: "darwin" - os: "windows" name: "amd64" runs-on: "windows-cuda-12-0" cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" - ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + arch: "x64" + platform: "win32" steps: - name: Clone @@ -77,6 +81,11 @@ jobs: with: dotnet-version: "8.0.x" + - uses: actions/setup-node@v3 + with: + node-version: "20.x" + registry-url: "https://registry.npmjs.org" + - name: Install choco on Windows if: runner.os == 'Windows' run: | @@ -116,7 +125,7 @@ jobs: run: | cd cortex-cpp make pre-package - + - name: Code Signing macOS if: runner.os == 'macOS' run: | @@ -163,6 +172,50 @@ jobs: AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + ## cortex-cpp node binding + + # update version in package.json + - name: Install jq + uses: dcarbone/install-jq-action@v2.0.1 + + - name: "Update version by tag" + working-directory: cortex-cpp + run: | + echo "Version: ${{ needs.create-draft-release.outputs.version }}" + # Update the version in package.json + jq --arg version "${{ needs.create-draft-release.outputs.version }}" '.version = $version' package.json > package-tmp.json + rm package.json + mv package-tmp.json package.json + + # build prebuilds + - name: Build Prebuilds + working-directory: cortex-cpp + run: | + npm install -g yarn + yarn && yarn prebuild + + # upload prebuilds + - name: Upload Prebuilds Darwin + uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./cortex-cpp/prebuilds/cortex-cpp-v${{ needs.create-draft-release.outputs.version }}-napi-v8-${{matrix.platform}}-${{ matrix.arch }}.tar.gz + asset_name: cortex-cpp-v${{ needs.create-draft-release.outputs.version }}-napi-v8-${{matrix.platform}}-${{ matrix.arch }}.tar.gz + asset_content_type: application/gzip + + # Setup .npmrc file to publish to npm - upload only once + - run: npm publish --access public + continue-on-error: true + if: runner.os == 'linux' + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + working-directory: ./cortex-cpp + + ## cortex-cpp node binding + build-cortex-single-binary: runs-on: ${{ matrix.runs-on }} needs: [create-draft-release] @@ -229,7 +282,7 @@ jobs: with: python-version: "3.10" - - run: pip3 install --upgrade setuptools + - run: pip3 install --upgrade setuptools if: runner.os == 'macOS' - run: yarn install && yarn build:binary diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt index e22d9f622..031428263 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/cortex-cpp/CMakeLists.txt @@ -1,6 +1,12 @@ cmake_minimum_required(VERSION 3.5) + project(cortex-cpp C CXX) +# Build using CMAKE-JS +if(DEFINED CMAKE_JS_INC) + include_directories(${CMAKE_JS_INC}) +endif() + include(CheckIncludeFileCXX) check_include_file_cxx(any HAS_ANY) @@ -53,13 +59,29 @@ if(APPLE) endif() endif() +if(DEFINED CMAKE_JS_INC) + # define NPI_VERSION + add_compile_definitions(NAPI_VERSION=8) +endif() + add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") # add_subdirectory(test) -add_executable(${PROJECT_NAME} main.cc - ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc -) +# Build using CMAKE-JS +if(DEFINED CMAKE_JS_INC) + if(("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") OR("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + endif() + + add_library(${PROJECT_NAME} SHARED addon.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ) +else() # Official build + add_executable(${PROJECT_NAME} main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ) +endif() # ############################################################################## # If you include the drogon source code locally in your project, use this method @@ -69,9 +91,23 @@ add_executable(${PROJECT_NAME} main.cc # and comment out the following lines find_package(Drogon CONFIG REQUIRED) -target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon - ${CMAKE_THREAD_LIBS_INIT}) + +# Build using CMAKE-JS +if(DEFINED CMAKE_JS_INC) + set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") + + target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon + ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_JS_LIB}) + + if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET) + # Generate node.lib + execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS}) + endif() +else() + target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon + ${CMAKE_THREAD_LIBS_INIT}) +endif() # ############################################################################## if(CMAKE_CXX_STANDARD LESS 17) @@ -103,4 +139,4 @@ target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC}) # ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC}) # ############################################################################## # uncomment the following line for dynamically loading views set_property(TARGET -# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON) \ No newline at end of file +# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON) diff --git a/cortex-cpp/addon.cc b/cortex-cpp/addon.cc new file mode 100644 index 000000000..42db5f153 --- /dev/null +++ b/cortex-cpp/addon.cc @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include // for PATH_MAX +#include +#include "cortex-common/cortexpythoni.h" +#include "utils/cortex_utils.h" +#include "utils/dylib.h" + +#if defined(__APPLE__) && defined(__MACH__) +#include // for dirname() +#include +#elif defined(__linux__) +#include // for dirname() +#include // for readlink() +#elif defined(_WIN32) +#include +#undef max +#else +#error "Unsupported platform!" +#endif + +static Napi::Env* s_env = nullptr; + +void start() { + int thread_num = 1; + std::string host = "127.0.0.1"; + int port = 3929; + std::string uploads_folder_path; + int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = std::max(thread_num, logical_cores); +#ifdef CORTEX_CPP_VERSION + LOG_INFO << "cortex-cpp version: " << CORTEX_CPP_VERSION; +#else + LOG_INFO << "cortex-cpp version: undefined"; +#endif +#ifdef CORTEX_LLAMACPP_VERSION + LOG_INFO << "cortex.llamacpp version: " << CORTEX_LLAMACPP_VERSION; +#endif + + LOG_INFO << "Server started, listening at: " << host << ":" << port; + LOG_INFO << "Please load your model"; + drogon::app().addListener(host, port); + drogon::app().setThreadNum(drogon_thread_num); + if (!uploads_folder_path.empty()) { + LOG_INFO << "Drogon uploads folder is at: " << uploads_folder_path; + drogon::app().setUploadPath(uploads_folder_path); + } + LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); + + drogon::app().run(); +} + +void stop() { + drogon::app().quit(); +} + +void exitCallback() { + Napi::TypeError::New(*s_env, "Process Exited!").ThrowAsJavaScriptException(); +} + +Napi::Value Start(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + + s_env = &env; + + // Register exitCallback with atexit + std::atexit(exitCallback); + + start(); + return env.Undefined(); +} + +Napi::Value Stop(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + stop(); + return Napi::String::New(env, "Server stopped successfully"); +} + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "start"), Napi::Function::New(env, Start)); + exports.Set(Napi::String::New(env, "stop"), Napi::Function::New(env, Start)); + return exports; +} + +NODE_API_MODULE(cortex-cpp, Init) \ No newline at end of file diff --git a/cortex-cpp/binding/index.d.ts b/cortex-cpp/binding/index.d.ts new file mode 100644 index 000000000..9d600d31d --- /dev/null +++ b/cortex-cpp/binding/index.d.ts @@ -0,0 +1,7 @@ +// Type definitions for cortex-cpp node binding + +/// +declare module "cortex-cpp" { + export function start(); + export function stop(); +} diff --git a/cortex-cpp/binding/index.js b/cortex-cpp/binding/index.js new file mode 100644 index 000000000..fd80a6df8 --- /dev/null +++ b/cortex-cpp/binding/index.js @@ -0,0 +1,3 @@ +const addon = require("./../build/Release/cortex-cpp.node"); + +module.exports = addon; diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 14507317d..3c3ac0e9f 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -206,7 +206,9 @@ void server::FineTuning( if (engines_.find(engine_type) == engines_.end()) { try { std::string abs_path = - cortex_utils::GetCurrentPath() + cortex_utils::kPythonRuntimeLibPath; + (getenv("ENGINE_PATH") ? getenv("ENGINE_PATH") + : cortex_utils::GetCurrentPath()) + + cortex_utils::kPythonRuntimeLibPath; engines_[engine_type].dl = std::make_unique(abs_path, "engine"); } catch (const cortex_cpp::dylib::load_error& e) { @@ -262,9 +264,9 @@ void server::LoadModel(const HttpRequestPtr& req, auto get_engine_path = [](std::string_view e) { if (e == kLlamaEngine) { return cortex_utils::kLlamaLibPath; - } else if(e == kOnnxEngine) { + } else if (e == kOnnxEngine) { return cortex_utils::kOnnxLibPath; - } else if(e == kTensorrtLlmEngine) { + } else if (e == kTensorrtLlmEngine) { return cortex_utils::kTensorrtLlmPath; } return cortex_utils::kLlamaLibPath; @@ -277,7 +279,9 @@ void server::LoadModel(const HttpRequestPtr& req, } std::string abs_path = - cortex_utils::GetCurrentPath() + get_engine_path(engine_type); + (getenv("ENGINE_PATH") ? getenv("ENGINE_PATH") + : cortex_utils::GetCurrentPath()) + + get_engine_path(engine_type); engines_[engine_type].dl = std::make_unique(abs_path, "engine"); diff --git a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt index f74e90be0..b28c48d19 100644 --- a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt +++ b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt @@ -62,9 +62,11 @@ ExternalProject_Add( GIT_TAG cares-1_26_0 CMAKE_ARGS -DCARES_SHARED=OFF - -DCARES_STATIC=ON + -DCARES_STATIC=ON + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} -DCMAKE_BUILD_TYPE=RELEASE + -DCARES_STATIC_PIC=ON ) ExternalProject_Add( diff --git a/cortex-cpp/package.json b/cortex-cpp/package.json new file mode 100644 index 000000000..4a121be75 --- /dev/null +++ b/cortex-cpp/package.json @@ -0,0 +1,41 @@ +{ + "name": "cortex-cpp", + "version": "0.0.11", + "description": "Cortex-cpp is a streamlined, stateless C++ server engineered to be fully compatible with OpenAI's API, particularly its stateless functionalities", + "main": "./binding/index.js", + "types": "./binding/index.d.ts", + "repository": { + "type": "git", + "url": "git+https://github.com/janhq/cortex.git" + }, + "scripts": { + "install": "prebuild-install --runtime napi --backend cmake-js --config Release || cmake-js rebuild --config Release", + "build": "cmake-js configure --config Release && cmake-js build --config Release", + "rebuild": "cmake-js rebuild --config Release", + "prebuild": "prebuild --runtime napi --backend cmake-js --all --strip --verbose --config Release", + "upload": "prebuild --runtime napi --backend cmake-js --upload ${GITHUB_TOKEN}" + }, + "author": "Jan ", + "license": "Apache-2.0", + "gypfile": true, + "dependencies": { + "bindings": "^1.5.0", + "cmake-js": "^7.3.0", + "node-addon-api": "^7.0.0", + "prebuild": "^13.0.1", + "prebuild-install": "^7.1.2" + }, + "devDependencies": { + "@types/node": "^20.14.9", + "typescript": "^5.5.3" + }, + "binary": { + "napi_versions": [ + 8 + ] + }, + "files": [ + "binding/*.js", + "binding/*.d.ts" + ] +} diff --git a/cortex-js/package.json b/cortex-js/package.json index 5a462cae8..f396f9a37 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -52,6 +52,7 @@ "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "cli-progress": "^3.12.0", + "cortex-cpp": "^0.4.24", "cortexso-node": "^0.0.4", "cpu-instructions": "^0.0.11", "decompress": "^4.2.1", @@ -88,7 +89,6 @@ "@typescript-eslint/parser": "^6.0.0", "@vercel/ncc": "^0.38.0", "@yao-pkg/pkg": "^5.12.0", - "bun": "^1.1.15", "cpx": "^1.5.0", "eslint": "^8.42.0", "eslint-config-prettier": "^9.0.0", diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts index 1763d8757..2bbfe0a9f 100644 --- a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts +++ b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts @@ -1,4 +1,4 @@ -import { CommandRunner, SubCommand } from 'nest-commander'; +import { CommandRunner, Option, SubCommand } from 'nest-commander'; import { SetCommandContext } from '../decorators/CommandContext'; import { ContextService } from '@/infrastructure/services/context/context.service'; import { InitCliUsecases } from '../usecases/init.cli.usecases'; @@ -25,10 +25,16 @@ export class EnginesInitCommand extends CommandRunner { super(); } - async run(passedParams: string[]): Promise { + async run( + passedParams: string[], + options: { vulkan: boolean }, + ): Promise { const engine = passedParams[0]; - const options = passedParams.includes(Engines.llamaCPP) - ? await this.initUsecases.defaultInstallationOptions() + const params = passedParams.includes(Engines.llamaCPP) + ? { + ...(await this.initUsecases.defaultInstallationOptions()), + ...options, + } : {}; const configs = await this.fileManagerService.getConfig(); @@ -40,14 +46,23 @@ export class EnginesInitCommand extends CommandRunner { } return this.initUsecases .installEngine( - options, + params, engine.includes('@') ? engine.split('@')[1] : 'latest', engine, - true, + true ) .then(() => console.log('Engine installed successfully!')) .catch((e) => console.error('Install engine failed with reason: %s', e.message ?? e), ); } + + @Option({ + flags: '-vk, --vulkan', + description: 'Install Vulkan engine', + defaultValue: false, + }) + parseVulkan() { + return true; + } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index fd4d0b140..a74787015 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -75,9 +75,7 @@ export class ModelStartCommand extends CommandRunner { if ( !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) ) { - const engineSpinner = ora('Installing engine...').start(); await this.initUsecases.installEngine(undefined, 'latest', engine); - engineSpinner.succeed(); } // Attached - stdout logs diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 146785eef..84e3fd92a 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -87,9 +87,7 @@ export class RunCommand extends CommandRunner { if ( !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) ) { - const engineSpinner = ora('Installing engine...').start(); await this.initUsecases.installEngine(undefined, 'latest', engine); - engineSpinner.succeed('Engine installed'); } return this.cortexUsecases diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts index 2631da0bd..55444f04e 100644 --- a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts @@ -4,4 +4,5 @@ export interface InitOptions { instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; cudaVersion?: '11' | '12'; silent?: boolean; + vulkan?: boolean; } diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts index d5d041f2c..9b8ab90ee 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -1,4 +1,11 @@ -import { cpSync, createWriteStream, existsSync, readdirSync, rmSync } from 'fs'; +import { + cpSync, + createWriteStream, + existsSync, + mkdirSync, + readdirSync, + rmSync, +} from 'fs'; import { join } from 'path'; import { HttpService } from '@nestjs/axios'; import { Presets, SingleBar } from 'cli-progress'; @@ -11,13 +18,13 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- import { rm } from 'fs/promises'; import { CORTEX_ENGINE_RELEASES_URL, - CORTEX_RELEASES_URL, CUDA_DOWNLOAD_URL, } from '@/infrastructure/constants/cortex'; -import { checkNvidiaGPUExist, cudaVersion } from '@/utils/cuda'; +import { checkNvidiaGPUExist } from '@/utils/cuda'; import { Engines } from '../types/engine.interface'; import { cpuInfo } from 'cpu-instructions'; +import ora from 'ora'; @Injectable() export class InitCliUsecases { @@ -54,161 +61,65 @@ export class InitCliUsecases { options?: InitOptions, version: string = 'latest', engine: string = 'default', - force: boolean = true, + force: boolean = false, ): Promise => { // Use default option if not defined - if (!options) { + if (!options && engine === Engines.llamaCPP) { options = await this.defaultInstallationOptions(); } const configs = await this.fileManagerService.getConfig(); - if (configs.initialized && !force) return; - + const engineSpinner = ora('Installing engine...').start(); // Ship Llama.cpp engine by default if ( !existsSync( - join( - await this.fileManagerService.getCortexCppEnginePath(), - Engines.llamaCPP, - ), + join(await this.fileManagerService.getCortexCppEnginePath(), engine), ) || - (engine === Engines.llamaCPP && force) - ) - await this.installLlamaCppEngine(options, version); + force + ) { + const isVulkan = + engine === Engines.llamaCPP && + (options?.vulkan || + (options?.runMode === 'GPU' && options?.gpuType !== 'Nvidia')); + await this.installAcceleratedEngine(version, engine, [ + process.platform === 'win32' + ? '-windows' + : process.platform === 'darwin' + ? '-mac' + : '-linux', + // CPU Instructions - CPU | GPU Non-Vulkan + options?.instructions && + (options?.runMode === 'CPU' || + (options?.runMode === 'GPU' && !isVulkan)) + ? `-${options?.instructions?.toLowerCase()}` + : '', + // Cuda + options?.runMode === 'GPU' && options?.gpuType === 'Nvidia' && !isVulkan + ? `cuda-${options.cudaVersion ?? '12'}` + : '', + // Vulkan + isVulkan ? '-vulkan' : '', + + // Arch + engine !== Engines.tensorrtLLM + ? process.arch === 'arm64' + ? '-arm64' + : '-amd64' + : '', + ]); + } - if (engine !== Engines.llamaCPP) - await this.installAcceleratedEngine(version, engine); + if ( + (engine === Engines.llamaCPP || engine === Engines.tensorrtLLM) && + options?.runMode === 'GPU' && + options?.gpuType === 'Nvidia' && + !options?.vulkan + ) + await this.installCudaToolkitDependency(options?.cudaVersion); configs.initialized = true; await this.fileManagerService.writeConfigFile(configs); - }; - - /** - * Install Llama.cpp engine - * @param options - * @param version - */ - private installLlamaCppEngine = async ( - options: InitOptions, - version: string = 'latest', - ) => { - const engineFileName = this.parseEngineFileName(options); - - const res = await firstValueFrom( - this.httpService.get( - CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, - { - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'application/vnd.github+json', - }, - }, - ), - ); - - if (!res.data) { - console.log('Failed to fetch releases'); - exit(1); - } - - let release = res.data; - if (Array.isArray(res.data)) { - release = Array(res.data)[0].find( - (e) => e.name === version.replace('v', ''), - ); - } - const toDownloadAsset = release.assets.find((s: any) => - s.name.includes(engineFileName), - ); - - if (!toDownloadAsset) { - console.log(`Could not find engine ${engineFileName}`); - exit(1); - } - - console.log(`Downloading default engine ${engineFileName}`); - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - const engineDir = join(dataFolderPath, 'cortex-cpp'); - if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); - - const download = await firstValueFrom( - this.httpService.get(toDownloadAsset.browser_download_url, { - responseType: 'stream', - }), - ); - if (!download) { - console.log('Failed to download model'); - process.exit(1); - } - - const destination = join(dataFolderPath, toDownloadAsset.name); - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - download.data.pipe(writer); - }); - - try { - await decompress(destination, join(dataFolderPath)); - } catch (e) { - console.error('Error decompressing file', e); - exit(1); - } - - await rm(destination, { force: true }); - - // If the user selected GPU mode and Nvidia GPU, install CUDA Toolkit dependencies - if (options.runMode === 'GPU' && !(await cudaVersion())) { - await this.installCudaToolkitDependency(options.cudaVersion); - } - }; - - /** - * Parse the engine file name based on the options - * Please check cortex-cpp release artifacts for the available engine files - * @param options - * @returns - */ - private parseEngineFileName = (options?: InitOptions) => { - const platform = - process.platform === 'win32' - ? 'windows' - : process.platform === 'darwin' - ? 'mac' - : process.platform; - const arch = process.arch === 'arm64' ? process.arch : 'amd64'; - const cudaVersion = - options?.runMode === 'GPU' - ? options.gpuType === 'Nvidia' - ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') - : '-vulkan' - : ''; - const instructions = options?.instructions - ? `-${options.instructions}` - : ''; - const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; - return `${engineName}.tar.gz`; + engineSpinner.succeed('Engine installed'); }; /** @@ -264,7 +175,10 @@ export class InitCliUsecases { }); try { - await decompress(destination, join(dataFolderPath, 'cortex-cpp')); + await decompress( + destination, + await this.fileManagerService.getCortexCppEnginePath(), + ); } catch (e) { console.log(e); exit(1); @@ -287,8 +201,10 @@ export class InitCliUsecases { */ private async installAcceleratedEngine( version: string = 'latest', - engine: string = Engines.onnx, + engine: string = Engines.llamaCPP, + matchers: string[] = [], ) { + const checkingIndicator = ora('Fetching engine repo...').start(); const res = await firstValueFrom( this.httpService.get( CORTEX_ENGINE_RELEASES_URL(engine) + @@ -313,8 +229,10 @@ export class InitCliUsecases { (e) => e.name === version.replace('v', ''), ); } - const toDownloadAsset = release.assets.find((s: any) => - s.name.includes(process.platform === 'win32' ? 'windows' : 'linux'), + + // Find the asset for the current platform + const toDownloadAsset = release.assets.find((asset: any) => + matchers.every((matcher) => asset.name.includes(matcher)), ); if (!toDownloadAsset) { @@ -324,9 +242,11 @@ export class InitCliUsecases { exit(1); } - console.log(`Downloading engine file ${toDownloadAsset.name}`); - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - const engineDir = join(dataFolderPath, 'cortex-cpp'); + checkingIndicator.succeed('Engine repo fetched'); + + const engineDir = await this.fileManagerService.getCortexCppEnginePath(); + + if (!existsSync(engineDir)) mkdirSync(engineDir, { recursive: true }); const download = await firstValueFrom( this.httpService.get(toDownloadAsset.browser_download_url, { @@ -338,7 +258,7 @@ export class InitCliUsecases { process.exit(1); } - const destination = join(dataFolderPath, toDownloadAsset.name); + const destination = join(engineDir, toDownloadAsset.name); await new Promise((resolve, reject) => { const writer = createWriteStream(destination); @@ -366,8 +286,9 @@ export class InitCliUsecases { download.data.pipe(writer); }); + const decompressIndicator = ora('Decompressing engine...').start(); try { - await decompress(destination, join(engineDir, 'engines')); + await decompress(destination, engineDir); } catch (e) { console.error('Error decompressing file', e); exit(1); @@ -375,13 +296,12 @@ export class InitCliUsecases { await rm(destination, { force: true }); // Copy the additional files to the cortex-cpp directory - for (const file of readdirSync(join(engineDir, 'engines', engine))) { + for (const file of readdirSync(join(engineDir, engine))) { if (file !== 'engine.dll') { - await cpSync( - join(engineDir, 'engines', engine, file), - join(engineDir, file), - ); + await cpSync(join(engineDir, engine, file), join(engineDir, file)); + await rmSync(join(engineDir, engine, file)); } } + decompressIndicator.succeed('Engine decompressed'); } } diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts index 7bfbd1e5e..c4c3fd499 100644 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts +++ b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts @@ -31,7 +31,7 @@ export class FileManagerService { private presetFolderName = 'presets'; private extensionFoldername = 'extensions'; private benchmarkFoldername = 'benchmark'; - private cortexCppFolderName = 'cortex-cpp'; + private cortexEnginesFolderName = 'engines'; private cortexTelemetryFolderName = 'telemetry'; /** @@ -81,7 +81,10 @@ export class FileManagerService { } const modelFolderPath = join(dataFolderPath, this.modelFolderName); - const cortexCppFolderPath = join(dataFolderPath, this.cortexCppFolderName); + const cortexCppFolderPath = join( + dataFolderPath, + this.cortexEnginesFolderName, + ); const cortexTelemetryFolderPath = join( dataFolderPath, this.cortexTelemetryFolderName, @@ -246,7 +249,7 @@ export class FileManagerService { * @returns the path to the cortex engines folder */ async getCortexCppEnginePath(): Promise { - return join(await this.getDataFolderPath(), 'cortex-cpp', 'engines'); + return join(await this.getDataFolderPath(), this.cortexEnginesFolderName); } /** diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index e4df5d098..8e5dc535c 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -1,10 +1,9 @@ import { Injectable } from '@nestjs/common'; -import { ChildProcess, spawn } from 'child_process'; +import { ChildProcess, fork } from 'child_process'; import { delimiter, join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; -import { existsSync } from 'node:fs'; import { firstValueFrom } from 'rxjs'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { @@ -12,12 +11,11 @@ import { CORTEX_CPP_PROCESS_DESTROY_URL, CORTEX_JS_STOP_API_SERVER_URL, } from '@/infrastructure/constants/cortex'; -import { createWriteStream, openSync } from 'fs'; +import { openSync } from 'fs'; @Injectable() export class CortexUsecases { private cortexProcess: ChildProcess | undefined; - private cortexBinaryName: string = `cortex-cpp${process.platform === 'win32' ? '.exe' : ''}`; constructor( private readonly httpService: HttpService, @@ -40,34 +38,23 @@ export class CortexUsecases { }; } - const args: string[] = ['1', host, `${port}`]; - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - const cortexCppFolderPath = join(dataFolderPath, 'cortex-cpp'); - const cortexCppPath = join(cortexCppFolderPath, this.cortexBinaryName); - - if (!existsSync(cortexCppPath)) { - throw new Error('The engine is not available, please run "cortex init".'); - } - - const cortexCPPPath = join( - await this.fileManagerService.getDataFolderPath(), - 'cortex-cpp', - ); + const engineDir = await this.fileManagerService.getCortexCppEnginePath(); + const dataFolderPath = await this.fileManagerService.getDataFolderPath() const writer = openSync(await this.fileManagerService.getLogPath(), 'a+'); - // go up one level to get the binary folder, have to also work on windows - this.cortexProcess = spawn(cortexCppPath, args, { + this.cortexProcess = fork(join(__dirname, './../../utils/cortex-cpp'), [], { detached: true, - cwd: cortexCppFolderPath, - stdio: [0, writer, writer], + cwd: dataFolderPath, + stdio: [0, writer, writer, 'ipc'], env: { ...process.env, CUDA_VISIBLE_DEVICES: '0', - PATH: (process.env.PATH || '').concat(delimiter, cortexCPPPath), + ENGINE_PATH: dataFolderPath, + PATH: (process.env.PATH || '').concat(delimiter, engineDir), LD_LIBRARY_PATH: (process.env.LD_LIBRARY_PATH || '').concat( delimiter, - cortexCPPPath, + engineDir, ), // // Vulkan - Support 1 device at a time for now // ...(executableOptions.vkVisibleDevices?.length > 0 && { @@ -75,6 +62,7 @@ export class CortexUsecases { // }), }, }); + this.cortexProcess.unref(); // Await for the /healthz status ok return new Promise((resolve, reject) => { diff --git a/cortex-js/src/utils/cortex-cpp.ts b/cortex-js/src/utils/cortex-cpp.ts new file mode 100644 index 000000000..909304cfc --- /dev/null +++ b/cortex-js/src/utils/cortex-cpp.ts @@ -0,0 +1,3 @@ +import * as cortexCPP from 'cortex-cpp'; + +cortexCPP.start();