From 9d2b02d842faa9e141a4eee8a06d88296837363b Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 11:03:24 +0700 Subject: [PATCH 01/16] refactor: rename cortex-cpp to engine --- .gitignore | 1 + .gitmodules | 3 ++ {cortex-cpp => engine}/.clang-format | 0 {cortex-cpp => engine}/.gitignore | 0 {cortex-cpp => engine}/CMakeLists.txt | 30 +++++++++++++++--- {cortex-cpp => engine}/CONTRIBUTING.md | 0 {cortex-cpp => engine}/LICENSE | 0 {cortex-cpp => engine}/Makefile | 0 {cortex-cpp => engine}/README.md | 0 {cortex-cpp => engine}/addon.cc | 0 {cortex-cpp => engine}/audio.md | 0 {cortex-cpp => engine}/binding/index.d.ts | 0 {cortex-cpp => engine}/binding/index.js | 0 {cortex-cpp => engine}/common/base.h | 0 {cortex-cpp => engine}/controllers/health.cc | 0 {cortex-cpp => engine}/controllers/health.h | 0 .../controllers/prelight.cc | 0 {cortex-cpp => engine}/controllers/prelight.h | 0 .../controllers/processManager.cc | 0 .../controllers/processManager.h | 0 {cortex-cpp => engine}/controllers/server.cc | 0 {cortex-cpp => engine}/controllers/server.h | 0 .../cortex-common/EngineI.h | 0 .../cortex-common/cortexpythoni.h | 0 .../cortex-cpp-deps/.gitignore | 0 .../cortex-cpp-deps/CMakeLists.txt | 0 .../cortex-cpp-deps/README.md | 0 .../examples/example-docker/Dockerfile | 0 .../examples/example-docker/alpine.Dockerfile | 0 .../examples/example-docker/cuda.Dockerfile | 0 .../examples/grammars/json.gbnf | 0 .../examples/interface/README.md | 0 .../examples/interface/app.py | 0 .../examples/interface/avatar.png | Bin {cortex-cpp => engine}/install.bat | 0 {cortex-cpp => engine}/install.sh | 0 {cortex-cpp => engine}/install_deps.sh | 0 {cortex-cpp => engine}/main.cc | 0 {cortex-cpp => engine}/package.json | 0 {cortex-cpp => engine}/test/CMakeLists.txt | 0 .../test/components/CMakeLists.txt | 0 .../test/components/main.cc | 0 .../test/components/test_cortex_utils.cc | 0 {cortex-cpp => engine}/utils/cortex_utils.h | 0 .../utils/cpuid/cpu_info.cc | 0 {cortex-cpp => engine}/utils/cpuid/cpu_info.h | 0 .../utils/cpuid/detail/cpu_info_impl.h | 0 .../utils/cpuid/detail/extract_x86_flags.h | 0 .../utils/cpuid/detail/init_gcc_x86.h | 0 .../utils/cpuid/detail/init_ios_clang_arm.h | 0 .../utils/cpuid/detail/init_linux_gcc_arm.h | 0 .../utils/cpuid/detail/init_msvc_arm.h | 0 .../utils/cpuid/detail/init_msvc_x86.h | 0 .../utils/cpuid/detail/init_unknown.h | 0 {cortex-cpp => engine}/utils/cpuid/platform.h | 0 {cortex-cpp => engine}/utils/dr_wav.h | 0 {cortex-cpp => engine}/utils/dylib.h | 0 {cortex-cpp => engine}/utils/json.hpp | 0 {cortex-cpp => engine}/utils/logging_utils.h | 0 engine/vcpkg | 1 + engine/vcpkg-configuration.json | 14 ++++++++ engine/vcpkg.json | 18 +++++++++++ 62 files changed, 63 insertions(+), 4 deletions(-) rename {cortex-cpp => engine}/.clang-format (100%) rename {cortex-cpp => engine}/.gitignore (100%) rename {cortex-cpp => engine}/CMakeLists.txt (80%) rename {cortex-cpp => engine}/CONTRIBUTING.md (100%) rename {cortex-cpp => engine}/LICENSE (100%) rename {cortex-cpp => engine}/Makefile (100%) rename {cortex-cpp => engine}/README.md (100%) rename {cortex-cpp => engine}/addon.cc (100%) rename {cortex-cpp => engine}/audio.md (100%) rename {cortex-cpp => engine}/binding/index.d.ts (100%) rename {cortex-cpp => engine}/binding/index.js (100%) rename {cortex-cpp => engine}/common/base.h (100%) rename {cortex-cpp => engine}/controllers/health.cc (100%) rename {cortex-cpp => engine}/controllers/health.h (100%) rename {cortex-cpp => engine}/controllers/prelight.cc (100%) rename {cortex-cpp => engine}/controllers/prelight.h (100%) rename {cortex-cpp => engine}/controllers/processManager.cc (100%) rename {cortex-cpp => engine}/controllers/processManager.h (100%) rename {cortex-cpp => engine}/controllers/server.cc (100%) rename {cortex-cpp => engine}/controllers/server.h (100%) rename {cortex-cpp => engine}/cortex-common/EngineI.h (100%) rename {cortex-cpp => engine}/cortex-common/cortexpythoni.h (100%) rename {cortex-cpp => engine}/cortex-cpp-deps/.gitignore (100%) rename {cortex-cpp => engine}/cortex-cpp-deps/CMakeLists.txt (100%) rename {cortex-cpp => engine}/cortex-cpp-deps/README.md (100%) rename {cortex-cpp => engine}/examples/example-docker/Dockerfile (100%) rename {cortex-cpp => engine}/examples/example-docker/alpine.Dockerfile (100%) rename {cortex-cpp => engine}/examples/example-docker/cuda.Dockerfile (100%) rename {cortex-cpp => engine}/examples/grammars/json.gbnf (100%) rename {cortex-cpp => engine}/examples/interface/README.md (100%) rename {cortex-cpp => engine}/examples/interface/app.py (100%) rename {cortex-cpp => engine}/examples/interface/avatar.png (100%) rename {cortex-cpp => engine}/install.bat (100%) rename {cortex-cpp => engine}/install.sh (100%) rename {cortex-cpp => engine}/install_deps.sh (100%) mode change 100755 => 100644 rename {cortex-cpp => engine}/main.cc (100%) rename {cortex-cpp => engine}/package.json (100%) rename {cortex-cpp => engine}/test/CMakeLists.txt (100%) rename {cortex-cpp => engine}/test/components/CMakeLists.txt (100%) rename {cortex-cpp => engine}/test/components/main.cc (100%) rename {cortex-cpp => engine}/test/components/test_cortex_utils.cc (100%) rename {cortex-cpp => engine}/utils/cortex_utils.h (100%) rename {cortex-cpp => engine}/utils/cpuid/cpu_info.cc (100%) rename {cortex-cpp => engine}/utils/cpuid/cpu_info.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/cpu_info_impl.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/extract_x86_flags.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_gcc_x86.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_ios_clang_arm.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_linux_gcc_arm.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_msvc_arm.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_msvc_x86.h (100%) rename {cortex-cpp => engine}/utils/cpuid/detail/init_unknown.h (100%) rename {cortex-cpp => engine}/utils/cpuid/platform.h (100%) rename {cortex-cpp => engine}/utils/dr_wav.h (100%) rename {cortex-cpp => engine}/utils/dylib.h (100%) rename {cortex-cpp => engine}/utils/json.hpp (100%) rename {cortex-cpp => engine}/utils/logging_utils.h (100%) create mode 160000 engine/vcpkg create mode 100644 engine/vcpkg-configuration.json create mode 100644 engine/vcpkg.json diff --git a/.gitignore b/.gitignore index d3c4ef22b..237fb5b33 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ cortex-js/package-lock.json .vscode cortex-js/command cortex-js/src/infrastructure/commanders/test/test_data +**/vcpkg_installed \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index e69de29bb..da05bcdd8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "engine/vcpkg"] + path = engine/vcpkg + url = https://github.com/microsoft/vcpkg.git diff --git a/cortex-cpp/.clang-format b/engine/.clang-format similarity index 100% rename from cortex-cpp/.clang-format rename to engine/.clang-format diff --git a/cortex-cpp/.gitignore b/engine/.gitignore similarity index 100% rename from cortex-cpp/.gitignore rename to engine/.gitignore diff --git a/cortex-cpp/CMakeLists.txt b/engine/CMakeLists.txt similarity index 80% rename from cortex-cpp/CMakeLists.txt rename to engine/CMakeLists.txt index a53e9fa70..46ea2c633 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -33,9 +33,17 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(OPENSSL_USE_STATIC_LIBS TRUE) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install) +# set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install) # This is the critical line for installing another package +if(MSVC) + add_compile_options( + $<$:/MT> #---------| + $<$:/MTd> #---|-- Statically link the runtime libraries + $<$:/MT> #--| + ) +endif() + if(LLAMA_CUDA) cmake_minimum_required(VERSION 3.17) @@ -75,6 +83,16 @@ add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") # add_subdirectory(test) +find_package(jsoncpp CONFIG REQUIRED) +find_package(Drogon CONFIG REQUIRED) +find_package(yaml-cpp CONFIG REQUIRED) +find_package(jinja2cpp CONFIG REQUIRED) +find_package(httplib CONFIG REQUIRED) +find_package(nlohmann_json CONFIG REQUIRED) +find_package(CLI11 CONFIG REQUIRED) +find_package(unofficial-minizip CONFIG REQUIRED) +find_package(LibArchive REQUIRED) + # Build using CMAKE-JS if(DEFINED CMAKE_JS_INC) if(("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") OR("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")) @@ -109,8 +127,12 @@ endif() # # and comment out the following lines -find_package(Drogon CONFIG REQUIRED) - +target_link_libraries(${PROJECT_NAME} PRIVATE httplib::httplib) +target_link_libraries(${PROJECT_NAME} PRIVATE nlohmann_json::nlohmann_json) +target_link_libraries(${PROJECT_NAME} PRIVATE jinja2cpp) +target_link_libraries(${PROJECT_NAME} PRIVATE CLI11::CLI11) +target_link_libraries(${PROJECT_NAME} PRIVATE unofficial::minizip::minizip) +target_link_libraries(${PROJECT_NAME} PRIVATE LibArchive::LibArchive) # Build using CMAKE-JS if(DEFINED CMAKE_JS_INC) @@ -124,7 +146,7 @@ if(DEFINED CMAKE_JS_INC) execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS}) endif() else() - target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon + target_link_libraries(${PROJECT_NAME} PRIVATE JsonCpp::JsonCpp Drogon::Drogon OpenSSL::SSL OpenSSL::Crypto yaml-cpp::yaml-cpp ${CMAKE_THREAD_LIBS_INIT}) endif() # ############################################################################## diff --git a/cortex-cpp/CONTRIBUTING.md b/engine/CONTRIBUTING.md similarity index 100% rename from cortex-cpp/CONTRIBUTING.md rename to engine/CONTRIBUTING.md diff --git a/cortex-cpp/LICENSE b/engine/LICENSE similarity index 100% rename from cortex-cpp/LICENSE rename to engine/LICENSE diff --git a/cortex-cpp/Makefile b/engine/Makefile similarity index 100% rename from cortex-cpp/Makefile rename to engine/Makefile diff --git a/cortex-cpp/README.md b/engine/README.md similarity index 100% rename from cortex-cpp/README.md rename to engine/README.md diff --git a/cortex-cpp/addon.cc b/engine/addon.cc similarity index 100% rename from cortex-cpp/addon.cc rename to engine/addon.cc diff --git a/cortex-cpp/audio.md b/engine/audio.md similarity index 100% rename from cortex-cpp/audio.md rename to engine/audio.md diff --git a/cortex-cpp/binding/index.d.ts b/engine/binding/index.d.ts similarity index 100% rename from cortex-cpp/binding/index.d.ts rename to engine/binding/index.d.ts diff --git a/cortex-cpp/binding/index.js b/engine/binding/index.js similarity index 100% rename from cortex-cpp/binding/index.js rename to engine/binding/index.js diff --git a/cortex-cpp/common/base.h b/engine/common/base.h similarity index 100% rename from cortex-cpp/common/base.h rename to engine/common/base.h diff --git a/cortex-cpp/controllers/health.cc b/engine/controllers/health.cc similarity index 100% rename from cortex-cpp/controllers/health.cc rename to engine/controllers/health.cc diff --git a/cortex-cpp/controllers/health.h b/engine/controllers/health.h similarity index 100% rename from cortex-cpp/controllers/health.h rename to engine/controllers/health.h diff --git a/cortex-cpp/controllers/prelight.cc b/engine/controllers/prelight.cc similarity index 100% rename from cortex-cpp/controllers/prelight.cc rename to engine/controllers/prelight.cc diff --git a/cortex-cpp/controllers/prelight.h b/engine/controllers/prelight.h similarity index 100% rename from cortex-cpp/controllers/prelight.h rename to engine/controllers/prelight.h diff --git a/cortex-cpp/controllers/processManager.cc b/engine/controllers/processManager.cc similarity index 100% rename from cortex-cpp/controllers/processManager.cc rename to engine/controllers/processManager.cc diff --git a/cortex-cpp/controllers/processManager.h b/engine/controllers/processManager.h similarity index 100% rename from cortex-cpp/controllers/processManager.h rename to engine/controllers/processManager.h diff --git a/cortex-cpp/controllers/server.cc b/engine/controllers/server.cc similarity index 100% rename from cortex-cpp/controllers/server.cc rename to engine/controllers/server.cc diff --git a/cortex-cpp/controllers/server.h b/engine/controllers/server.h similarity index 100% rename from cortex-cpp/controllers/server.h rename to engine/controllers/server.h diff --git a/cortex-cpp/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h similarity index 100% rename from cortex-cpp/cortex-common/EngineI.h rename to engine/cortex-common/EngineI.h diff --git a/cortex-cpp/cortex-common/cortexpythoni.h b/engine/cortex-common/cortexpythoni.h similarity index 100% rename from cortex-cpp/cortex-common/cortexpythoni.h rename to engine/cortex-common/cortexpythoni.h diff --git a/cortex-cpp/cortex-cpp-deps/.gitignore b/engine/cortex-cpp-deps/.gitignore similarity index 100% rename from cortex-cpp/cortex-cpp-deps/.gitignore rename to engine/cortex-cpp-deps/.gitignore diff --git a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt b/engine/cortex-cpp-deps/CMakeLists.txt similarity index 100% rename from cortex-cpp/cortex-cpp-deps/CMakeLists.txt rename to engine/cortex-cpp-deps/CMakeLists.txt diff --git a/cortex-cpp/cortex-cpp-deps/README.md b/engine/cortex-cpp-deps/README.md similarity index 100% rename from cortex-cpp/cortex-cpp-deps/README.md rename to engine/cortex-cpp-deps/README.md diff --git a/cortex-cpp/examples/example-docker/Dockerfile b/engine/examples/example-docker/Dockerfile similarity index 100% rename from cortex-cpp/examples/example-docker/Dockerfile rename to engine/examples/example-docker/Dockerfile diff --git a/cortex-cpp/examples/example-docker/alpine.Dockerfile b/engine/examples/example-docker/alpine.Dockerfile similarity index 100% rename from cortex-cpp/examples/example-docker/alpine.Dockerfile rename to engine/examples/example-docker/alpine.Dockerfile diff --git a/cortex-cpp/examples/example-docker/cuda.Dockerfile b/engine/examples/example-docker/cuda.Dockerfile similarity index 100% rename from cortex-cpp/examples/example-docker/cuda.Dockerfile rename to engine/examples/example-docker/cuda.Dockerfile diff --git a/cortex-cpp/examples/grammars/json.gbnf b/engine/examples/grammars/json.gbnf similarity index 100% rename from cortex-cpp/examples/grammars/json.gbnf rename to engine/examples/grammars/json.gbnf diff --git a/cortex-cpp/examples/interface/README.md b/engine/examples/interface/README.md similarity index 100% rename from cortex-cpp/examples/interface/README.md rename to engine/examples/interface/README.md diff --git a/cortex-cpp/examples/interface/app.py b/engine/examples/interface/app.py similarity index 100% rename from cortex-cpp/examples/interface/app.py rename to engine/examples/interface/app.py diff --git a/cortex-cpp/examples/interface/avatar.png b/engine/examples/interface/avatar.png similarity index 100% rename from cortex-cpp/examples/interface/avatar.png rename to engine/examples/interface/avatar.png diff --git a/cortex-cpp/install.bat b/engine/install.bat similarity index 100% rename from cortex-cpp/install.bat rename to engine/install.bat diff --git a/cortex-cpp/install.sh b/engine/install.sh similarity index 100% rename from cortex-cpp/install.sh rename to engine/install.sh diff --git a/cortex-cpp/install_deps.sh b/engine/install_deps.sh old mode 100755 new mode 100644 similarity index 100% rename from cortex-cpp/install_deps.sh rename to engine/install_deps.sh diff --git a/cortex-cpp/main.cc b/engine/main.cc similarity index 100% rename from cortex-cpp/main.cc rename to engine/main.cc diff --git a/cortex-cpp/package.json b/engine/package.json similarity index 100% rename from cortex-cpp/package.json rename to engine/package.json diff --git a/cortex-cpp/test/CMakeLists.txt b/engine/test/CMakeLists.txt similarity index 100% rename from cortex-cpp/test/CMakeLists.txt rename to engine/test/CMakeLists.txt diff --git a/cortex-cpp/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt similarity index 100% rename from cortex-cpp/test/components/CMakeLists.txt rename to engine/test/components/CMakeLists.txt diff --git a/cortex-cpp/test/components/main.cc b/engine/test/components/main.cc similarity index 100% rename from cortex-cpp/test/components/main.cc rename to engine/test/components/main.cc diff --git a/cortex-cpp/test/components/test_cortex_utils.cc b/engine/test/components/test_cortex_utils.cc similarity index 100% rename from cortex-cpp/test/components/test_cortex_utils.cc rename to engine/test/components/test_cortex_utils.cc diff --git a/cortex-cpp/utils/cortex_utils.h b/engine/utils/cortex_utils.h similarity index 100% rename from cortex-cpp/utils/cortex_utils.h rename to engine/utils/cortex_utils.h diff --git a/cortex-cpp/utils/cpuid/cpu_info.cc b/engine/utils/cpuid/cpu_info.cc similarity index 100% rename from cortex-cpp/utils/cpuid/cpu_info.cc rename to engine/utils/cpuid/cpu_info.cc diff --git a/cortex-cpp/utils/cpuid/cpu_info.h b/engine/utils/cpuid/cpu_info.h similarity index 100% rename from cortex-cpp/utils/cpuid/cpu_info.h rename to engine/utils/cpuid/cpu_info.h diff --git a/cortex-cpp/utils/cpuid/detail/cpu_info_impl.h b/engine/utils/cpuid/detail/cpu_info_impl.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/cpu_info_impl.h rename to engine/utils/cpuid/detail/cpu_info_impl.h diff --git a/cortex-cpp/utils/cpuid/detail/extract_x86_flags.h b/engine/utils/cpuid/detail/extract_x86_flags.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/extract_x86_flags.h rename to engine/utils/cpuid/detail/extract_x86_flags.h diff --git a/cortex-cpp/utils/cpuid/detail/init_gcc_x86.h b/engine/utils/cpuid/detail/init_gcc_x86.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_gcc_x86.h rename to engine/utils/cpuid/detail/init_gcc_x86.h diff --git a/cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h b/engine/utils/cpuid/detail/init_ios_clang_arm.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h rename to engine/utils/cpuid/detail/init_ios_clang_arm.h diff --git a/cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h b/engine/utils/cpuid/detail/init_linux_gcc_arm.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h rename to engine/utils/cpuid/detail/init_linux_gcc_arm.h diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_arm.h b/engine/utils/cpuid/detail/init_msvc_arm.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_msvc_arm.h rename to engine/utils/cpuid/detail/init_msvc_arm.h diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_x86.h b/engine/utils/cpuid/detail/init_msvc_x86.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_msvc_x86.h rename to engine/utils/cpuid/detail/init_msvc_x86.h diff --git a/cortex-cpp/utils/cpuid/detail/init_unknown.h b/engine/utils/cpuid/detail/init_unknown.h similarity index 100% rename from cortex-cpp/utils/cpuid/detail/init_unknown.h rename to engine/utils/cpuid/detail/init_unknown.h diff --git a/cortex-cpp/utils/cpuid/platform.h b/engine/utils/cpuid/platform.h similarity index 100% rename from cortex-cpp/utils/cpuid/platform.h rename to engine/utils/cpuid/platform.h diff --git a/cortex-cpp/utils/dr_wav.h b/engine/utils/dr_wav.h similarity index 100% rename from cortex-cpp/utils/dr_wav.h rename to engine/utils/dr_wav.h diff --git a/cortex-cpp/utils/dylib.h b/engine/utils/dylib.h similarity index 100% rename from cortex-cpp/utils/dylib.h rename to engine/utils/dylib.h diff --git a/cortex-cpp/utils/json.hpp b/engine/utils/json.hpp similarity index 100% rename from cortex-cpp/utils/json.hpp rename to engine/utils/json.hpp diff --git a/cortex-cpp/utils/logging_utils.h b/engine/utils/logging_utils.h similarity index 100% rename from cortex-cpp/utils/logging_utils.h rename to engine/utils/logging_utils.h diff --git a/engine/vcpkg b/engine/vcpkg new file mode 160000 index 000000000..fb544875b --- /dev/null +++ b/engine/vcpkg @@ -0,0 +1 @@ +Subproject commit fb544875b93bffebe96c6f720000003234cfba08 diff --git a/engine/vcpkg-configuration.json b/engine/vcpkg-configuration.json new file mode 100644 index 000000000..c88ae390d --- /dev/null +++ b/engine/vcpkg-configuration.json @@ -0,0 +1,14 @@ +{ + "default-registry": { + "kind": "git", + "baseline": "a76e5d9e1c62a23b9e92353e5e25d8c34cda2b74", + "repository": "https://github.com/Cheaterdev/vcpkg" + }, + "registries": [ + { + "kind": "artifact", + "location": "https://github.com/microsoft/vcpkg-ce-catalog/archive/refs/heads/main.zip", + "name": "microsoft" + } + ] + } \ No newline at end of file diff --git a/engine/vcpkg.json b/engine/vcpkg.json new file mode 100644 index 000000000..82118eb17 --- /dev/null +++ b/engine/vcpkg.json @@ -0,0 +1,18 @@ +{ + "dependencies": [ + "cli11", + { + "name": "cpp-httplib", + "features": [ + "openssl" + ] + }, + "drogon", + "jinja2cpp", + "jsoncpp", + "minizip", + "nlohmann-json", + "yaml-cpp", + "libarchive" + ] + } \ No newline at end of file From 1f80ff75a500f6dfd576d3abaf76b23974dc4e23 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 11:29:01 +0700 Subject: [PATCH 02/16] refactor: import cortex.cpp code --- engine/CMakeLists.txt | 18 +- engine/commands/model_pull_cmd.cc | 22 + engine/commands/model_pull_cmd.h | 15 + engine/commands/start_model_cmd.cc | 43 ++ engine/commands/start_model_cmd.h | 18 + engine/commands/stop_model_cmd.cc | 31 ++ engine/commands/stop_model_cmd.h | 18 + engine/commands/stop_server_cmd.cc | 20 + engine/commands/stop_server_cmd.h | 15 + engine/config/gguf_parser.cc | 582 ++++++++++++++++++++++ engine/config/gguf_parser.h | 71 +++ engine/config/model_config.h | 40 ++ engine/config/yaml_config.cc | 212 ++++++++ engine/config/yaml_config.h | 32 ++ engine/controllers/command_line_parser.cc | 93 ++++ engine/controllers/command_line_parser.h | 13 + engine/services/download_service.cc | 111 +++++ engine/services/download_service.h | 73 +++ engine/utils/archive_utils.h | 146 ++++++ engine/utils/cortexso_parser.h | 69 +++ engine/utils/http_util.h | 24 + 21 files changed, 1653 insertions(+), 13 deletions(-) create mode 100644 engine/commands/model_pull_cmd.cc create mode 100644 engine/commands/model_pull_cmd.h create mode 100644 engine/commands/start_model_cmd.cc create mode 100644 engine/commands/start_model_cmd.h create mode 100644 engine/commands/stop_model_cmd.cc create mode 100644 engine/commands/stop_model_cmd.h create mode 100644 engine/commands/stop_server_cmd.cc create mode 100644 engine/commands/stop_server_cmd.h create mode 100644 engine/config/gguf_parser.cc create mode 100644 engine/config/gguf_parser.h create mode 100644 engine/config/model_config.h create mode 100644 engine/config/yaml_config.cc create mode 100644 engine/config/yaml_config.h create mode 100644 engine/controllers/command_line_parser.cc create mode 100644 engine/controllers/command_line_parser.h create mode 100644 engine/services/download_service.cc create mode 100644 engine/services/download_service.h create mode 100644 engine/utils/archive_utils.h create mode 100644 engine/utils/cortexso_parser.h create mode 100644 engine/utils/http_util.h diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 46ea2c633..d18d28f2d 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -163,21 +163,13 @@ else() endif() aux_source_directory(controllers CTL_SRC) +aux_source_directory(services SERVICES_SRC) aux_source_directory(common COMMON_SRC) aux_source_directory(models MODEL_SRC) aux_source_directory(cortex-common CORTEX_COMMON) -# aux_source_directory(filters FILTER_SRC) aux_source_directory(plugins -# PLUGIN_SRC) - -# drogon_create_views(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/views -# ${CMAKE_CURRENT_BINARY_DIR}) use the following line to create views with -# namespaces. drogon_create_views(${PROJECT_NAME} -# ${CMAKE_CURRENT_SOURCE_DIR}/views ${CMAKE_CURRENT_BINARY_DIR} TRUE) +aux_source_directory(config CONFIG_SRC) +aux_source_directory(commands COMMANDS_SRC) target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) -# ${CMAKE_CURRENT_SOURCE_DIR}/models) -target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC}) -# ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC}) -# ############################################################################## -# uncomment the following line for dynamically loading views set_property(TARGET -# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON) + +target_sources(${PROJECT_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC}) diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc new file mode 100644 index 000000000..6135efb43 --- /dev/null +++ b/engine/commands/model_pull_cmd.cc @@ -0,0 +1,22 @@ +#include "model_pull_cmd.h" +#include +#include "services/download_service.h" +#include "trantor/utils/Logger.h" +#include "utils/cortexso_parser.h" + +namespace commands { +ModelPullCmd::ModelPullCmd(std::string modelHandle) + : modelHandle_(std::move(modelHandle)) {} + +void ModelPullCmd::Exec() { + auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_); + if (downloadTask.has_value()) { + DownloadService downloadService; + downloadService.AddDownloadTask(downloadTask.value()); + std::cout << "Download finished" << std::endl; + } else { + std::cout << "Model not found" << std::endl; + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/model_pull_cmd.h b/engine/commands/model_pull_cmd.h new file mode 100644 index 000000000..2c5f658f2 --- /dev/null +++ b/engine/commands/model_pull_cmd.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace commands { + +class ModelPullCmd { + public: + ModelPullCmd(std::string modelHandle); + void Exec(); + + private: + std::string modelHandle_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.cc b/engine/commands/start_model_cmd.cc new file mode 100644 index 000000000..341ba2f9d --- /dev/null +++ b/engine/commands/start_model_cmd.cc @@ -0,0 +1,43 @@ +#include "start_model_cmd.h" +#include "httplib.h" +#include "nlohmann/json.hpp" +#include "trantor/utils/Logger.h" + +namespace commands { +StartModelCmd::StartModelCmd(std::string host, int port, + const config::ModelConfig& mc) + : host_(std::move(host)), port_(port), mc_(mc) {} + +void StartModelCmd::Exec() { + httplib::Client cli(host_ + ":" + std::to_string(port_)); + nlohmann::json json_data; + if (mc_.files.size() > 0) { + // TODO(sang) support multiple files + json_data["model_path"] = mc_.files[0]; + } else { + LOG_WARN << "model_path is empty"; + return; + } + json_data["model"] = mc_.name; + json_data["system_prompt"] = mc_.system_template; + json_data["user_prompt"] = mc_.user_template; + json_data["ai_prompt"] = mc_.ai_template; + json_data["ctx_len"] = mc_.ctx_len; + json_data["stop"] = mc_.stop; + json_data["engine"] = mc_.engine; + + auto data_str = json_data.dump(); + + auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(), + data_str.data(), data_str.size(), "application/json"); + if (res) { + if (res->status == httplib::StatusCode::OK_200) { + LOG_INFO << res->body; + } + } else { + auto err = res.error(); + LOG_WARN << "HTTP error: " << httplib::to_string(err); + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.h b/engine/commands/start_model_cmd.h new file mode 100644 index 000000000..27cfc59e6 --- /dev/null +++ b/engine/commands/start_model_cmd.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include "config/model_config.h" + +namespace commands { + +class StartModelCmd{ + public: + StartModelCmd(std::string host, int port, const config::ModelConfig& mc); + void Exec(); + + private: + std::string host_; + int port_; + const config::ModelConfig& mc_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/stop_model_cmd.cc b/engine/commands/stop_model_cmd.cc new file mode 100644 index 000000000..628007efe --- /dev/null +++ b/engine/commands/stop_model_cmd.cc @@ -0,0 +1,31 @@ +#include "stop_model_cmd.h" +#include "httplib.h" +#include "nlohmann/json.hpp" +#include "trantor/utils/Logger.h" + +namespace commands { +StopModelCmd::StopModelCmd(std::string host, int port, + const config::ModelConfig& mc) + : host_(std::move(host)), port_(port), mc_(mc) {} + +void StopModelCmd::Exec() { + httplib::Client cli(host_ + ":" + std::to_string(port_)); + nlohmann::json json_data; + json_data["model"] = mc_.name; + json_data["engine"] = mc_.engine; + + auto data_str = json_data.dump(); + + auto res = cli.Post("/inferences/server/unloadmodel", httplib::Headers(), + data_str.data(), data_str.size(), "application/json"); + if (res) { + if (res->status == httplib::StatusCode::OK_200) { + LOG_INFO << res->body; + } + } else { + auto err = res.error(); + LOG_WARN << "HTTP error: " << httplib::to_string(err); + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/stop_model_cmd.h b/engine/commands/stop_model_cmd.h new file mode 100644 index 000000000..9ead32370 --- /dev/null +++ b/engine/commands/stop_model_cmd.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include "config/model_config.h" + +namespace commands { + +class StopModelCmd{ + public: + StopModelCmd(std::string host, int port, const config::ModelConfig& mc); + void Exec(); + + private: + std::string host_; + int port_; + const config::ModelConfig& mc_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/stop_server_cmd.cc b/engine/commands/stop_server_cmd.cc new file mode 100644 index 000000000..cb312ef99 --- /dev/null +++ b/engine/commands/stop_server_cmd.cc @@ -0,0 +1,20 @@ +#include "stop_server_cmd.h" +#include "httplib.h" +#include "trantor/utils/Logger.h" + +namespace commands { +StopServerCmd::StopServerCmd(std::string host, int port) + : host_(std::move(host)), port_(port) {} + +void StopServerCmd::Exec() { + httplib::Client cli(host_ + ":" + std::to_string(port_)); + auto res = cli.Delete("/processManager/destroy"); + if (res) { + LOG_INFO << res->body; + } else { + auto err = res.error(); + LOG_WARN << "HTTP error: " << httplib::to_string(err); + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/stop_server_cmd.h b/engine/commands/stop_server_cmd.h new file mode 100644 index 000000000..03735d81c --- /dev/null +++ b/engine/commands/stop_server_cmd.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace commands { + +class StopServerCmd{ + public: + StopServerCmd(std::string host, int port); + void Exec(); + + private: + std::string host_; + int port_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc new file mode 100644 index 000000000..00b461719 --- /dev/null +++ b/engine/config/gguf_parser.cc @@ -0,0 +1,582 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include // For memory-mapped file +#include // For file descriptors +#endif + +#include // For file descriptors + +#include + +#include "gguf_parser.h" +#include "trantor/utils/Logger.h" + +namespace config { +void GGUFHandler::OpenFile(const std::string& file_path) { +#ifdef _WIN32 + HANDLE file_handle_ = INVALID_HANDLE_VALUE; + HANDLE file_mapping_ = nullptr; + file_handle_ = + CreateFileA(file_path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (file_handle_ == INVALID_HANDLE_VALUE) { + throw std::runtime_error("Failed to open file"); + } + + // Get the file size + LARGE_INTEGER file_size_struct; + if (!GetFileSizeEx(file_handle_, &file_size_struct)) { + CloseHandle(file_handle_); + throw std::runtime_error("Failed to get file size"); + } + file_size_ = static_cast(file_size_struct.QuadPart); + + // Create a file mapping object + file_mapping_ = + CreateFileMappingA(file_handle_, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (file_mapping_ == nullptr) { + CloseHandle(file_handle_); + throw std::runtime_error("Failed to create file mapping"); + } + + // Map the file into memory + data_ = static_cast( + MapViewOfFile(file_mapping_, FILE_MAP_READ, 0, 0, file_size_)); + if (data_ == nullptr) { + CloseHandle(file_mapping_); + CloseHandle(file_handle_); + throw std::runtime_error("Failed to map file"); + } + + // Close the file handle, as it is no longer needed after mapping + CloseHandle(file_handle_); + +#else + FILE* fd = fopen(file_path.c_str(), "rb"); + if (!fd) { + perror("Error opening file"); + throw std::runtime_error("Failed to open file"); + } + + // Get file size + // file_size_ = lseek(fd, 0, SEEK_END); + fseek(fd, 0, SEEK_END); // move file pointer to end of file + file_size_ = ftell(fd); // get the file size, in bytes + fclose(fd); + if (file_size_ == -1) { + perror("Error getting file size"); + // close(fd); + throw std::runtime_error("Failed to get file size"); + } + int file_descriptor = open(file_path.c_str(), O_RDONLY); + ; + // Memory-map the file + data_ = static_cast( + mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, file_descriptor, 0)); + if (data_ == MAP_FAILED) { + perror("Error mapping file"); + close(file_descriptor); + throw std::runtime_error("Failed to map file"); + } + + close(file_descriptor); + +#endif +} + +void GGUFHandler::CloseFile() { +#ifdef _WIN32 + if (data_ != nullptr) { + UnmapViewOfFile(data_); + data_ = nullptr; + } +#else + if (data_ != nullptr && data_ != MAP_FAILED) { + munmap(data_, file_size_); + } +#endif +} + +std::pair GGUFHandler::ReadString( + std::size_t offset) const { + uint64_t length; + std::memcpy(&length, data_ + offset, sizeof(uint64_t)); + + std::string value(reinterpret_cast(data_ + offset + 8), length); + return {8 + static_cast(length), value}; +} + +size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset, + const std::string& key) { + switch (type) { + case 0: // UINT8 + metadata_uint8_[key] = data_[offset]; + return 1; + case 1: // INT8 + metadata_int8_[key] = static_cast(data_[offset]); + return 1; + case 2: // UINT16 + metadata_uint16_[key] = + *reinterpret_cast(data_ + offset); + return 2; + case 3: // INT16 + metadata_int16_[key] = *reinterpret_cast(data_ + offset); + return 2; + case 4: // UINT32 + metadata_uint32_[key] = + *reinterpret_cast(data_ + offset); + return 4; + case 5: // INT32 + metadata_int32_[key] = *reinterpret_cast(data_ + offset); + return 4; + case 6: // FLOAT32 + metadata_float_[key] = *reinterpret_cast(data_ + offset); + return 4; + case 7: // BOOL + metadata_bool_[key] = data_[offset] != 0; + return 1; + case 8: // STRING + { + auto [byte_length, value] = ReadString(offset); + metadata_string_[key] = value; + return byte_length; + } + case 9: // ARRAY + + return ReadArray(offset, key); + case 10: // UINT64 + metadata_uint64_[key] = + *reinterpret_cast(data_ + offset); + return 8; + case 11: // INT64 + metadata_int64_[key] = *reinterpret_cast(data_ + offset); + return 8; + case 12: // FLOAT64 + metadata_double_[key] = *reinterpret_cast(data_ + offset); + return 8; + default: + throw std::runtime_error("Unsupported metadata type: " + + std::to_string(type)); + } +} + +size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) { + uint32_t array_type = *reinterpret_cast(data_ + offset); + // std::memcpy(&array_type, data_ + offset, sizeof(uint32_t)); + + uint64_t array_length = + *reinterpret_cast(data_ + offset + 4); + // std::memcpy(&array_length, data_ + offset + 4, sizeof(uint64_t)); + LOG_INFO << "\n" + << "Parsing array type: " << array_type + << ", array length:" << array_length << "\n"; + std::size_t array_offset = 12; + std::vector array_values_string; + std::vector array_values_float; + uint8_t uint8_value; + int8_t int8_value; + uint16_t uint16_value; + int16_t int16_value; + uint32_t uint32_value; + int32_t int32_value; + float float_value; + bool bool_value; + std::string string_value; + uint64_t uint64_value; + int64_t int64_value; + double double_value; + size_t length; + + for (uint64_t i = 0; i < array_length; ++i) { + // auto [byteLength, value] = ReadMetadataValue(array_type, offset + array_offset); + // assume that array ony has 2 type string and int + switch (array_type) { + case 0: + uint8_value = data_[offset + array_offset]; + length = 1; + array_values_float.push_back(static_cast(uint8_value)); + break; + case 1: { + int8_value = static_cast(data_[offset + array_offset]); + length = 1; + array_values_float.push_back(static_cast(int8_value)); + } + + break; + case 2: + uint16_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 2; + array_values_float.push_back(static_cast(uint16_value)); + break; + case 3: + int16_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 2; + array_values_float.push_back(static_cast(int16_value)); + break; + case 4: + uint32_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 4; + array_values_float.push_back(static_cast(uint32_value)); + break; + case 5: + int32_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 4; + array_values_float.push_back(static_cast(int32_value)); + break; + case 6: + float_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 4; + array_values_float.push_back(static_cast(float_value)); + break; + case 7: + bool_value = data_[offset + array_offset] != 0; + length = 1; + array_values_float.push_back(static_cast(bool_value)); + break; + case 8: { + uint64_t length_ = + *reinterpret_cast(data_ + offset + array_offset); + std::string value( + reinterpret_cast(data_ + offset + array_offset + 8), + length_); + length = 8 + static_cast(length_); + array_values_string.push_back(value); + } break; + case 10: + uint64_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 8; + array_values_float.push_back(static_cast(uint64_value)); + break; + case 11: + int64_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 8; + array_values_float.push_back(static_cast(int64_value)); + break; + case 12: + double_value = + *reinterpret_cast(data_ + offset + array_offset); + length = 8; + array_values_float.push_back(static_cast(double_value)); + break; + default: + throw std::runtime_error("Unsupported metadata type: " + + std::to_string(array_type)); + break; + } + + array_offset += length; + } + if (array_values_string.size() > 0) + metadata_array_string_[key] = array_values_string; + else + metadata_array_float_[key] = array_values_float; + return array_offset; +} + +void GGUFHandler::Parse(const std::string& file_path) { + OpenFile(file_path); + LOG_INFO << "GGUF magic number: " << *reinterpret_cast(data_) + << "\n"; + if (*reinterpret_cast(data_) != GGUF_MAGIC_NUMBER) { + throw std::runtime_error("Not a valid GGUF file"); + } + + version_ = *reinterpret_cast(data_ + 4); + tensor_count_ = *reinterpret_cast(data_ + 8); + uint64_t metadata_kv_count = *reinterpret_cast(data_ + 16); + LOG_INFO << "version: " << version_ << "\ntensor count: " << tensor_count_ + << "\nmetadata key-value pairs: " << metadata_kv_count << "\n"; + + std::size_t offset = 24; + + for (uint64_t i = 0; i < metadata_kv_count; ++i) { + LOG_INFO << "Parsing key-value number " << i << "\n"; + auto [key_byte_length, key] = ReadString(offset); + offset += key_byte_length; + LOG_INFO << "key: " << key << "\n"; + uint32_t value_type = *reinterpret_cast(data_ + offset); + offset += 4; + LOG_INFO << "value type number: " << value_type << "\n"; + size_t value_byte_length = ReadMetadataValue(value_type, offset, key); + offset += value_byte_length; + LOG_INFO << "-------------------------------------------- " << "\n"; + } + PrintMetadata(); + ModelConfigFromMetadata(); + CloseFile(); +} + +void GGUFHandler::PrintMetadata() { + LOG_INFO << "GGUF Metadata:" << "\n"; + for (const auto& [key, value] : metadata_uint8_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_int8_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_uint16_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_int16_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_uint32_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_int32_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_float_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_bool_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_string_) { + + if (key.compare("tokenizer.chat_template") == 0) { + LOG_INFO << key << ": " << "\n" << value << "\n"; + + jinja2::Template chat_template; + chat_template.Load(value); + jinja2::ValuesMap params{ + {"add_generation_prompt", true}, + {"bos_token", "<|begin_of_text|>"}, + {"eos_token", "<|eot_id|>"}, + {"messages", + jinja2::ValuesList{ + jinja2::ValuesMap{{"role", "system"}, + {"content", "{system_message}"}}, + jinja2::ValuesMap{{"role", "user"}, {"content", "{prompt}"}}}}}; + std::string result = chat_template.RenderAsString(params).value(); + + LOG_INFO << "result jinja render: " << result << "\n"; + } else { + LOG_INFO << key << ": " << value << "\n"; + } + } + + for (const auto& [key, value] : metadata_uint64_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_int64_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_double_) + LOG_INFO << key << ": " << value << "\n"; + + for (const auto& [key, value] : metadata_array_float_) + LOG_INFO << key << "num elements: " << value.size() << "\n"; + + for (const auto& [key, value] : metadata_array_string_) + LOG_INFO << key << " num elements: " << value.size() << "\n"; +} + +void GGUFHandler::ModelConfigFromMetadata() { + int eos_token, bos_token, max_tokens, version, ngl; + std::string chat_template, name, eos_string, bos_string; + std::vector tokens, stop; + model_config_.top_p = 0.95; + model_config_.temperature = 0.7; + model_config_.frequency_penalty = 0; + model_config_.presence_penalty = 0; + model_config_.stream = true; + model_config_.engine = "cortex.llamacpp"; + model_config_.created = std::time(nullptr); + model_config_.model = "model"; + model_config_.owned_by = ""; + model_config_.version; + + // Get version, bos, eos id, contex_len, ngl from meta data + for (const auto& [key, value] : metadata_uint8_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_int8_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_uint16_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_int16_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_uint32_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_int32_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + for (const auto& [key, value] : metadata_uint64_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + + for (const auto& [key, value] : metadata_int64_) { + if (key.compare("general.quantization_version") == 0) + version = static_cast(value); + else if (key.compare("tokenizer.ggml.bos_token_id") == 0) + bos_token = static_cast(value); + else if (key.compare("tokenizer.ggml.eos_token_id") == 0) + eos_token = static_cast(value); + else if (key.find("context_length") != std::string::npos) + max_tokens = static_cast(value); + else if (key.find("block_count") != std::string::npos) + ngl = static_cast(value) + 1; + } + for (const auto& [key, value] : metadata_array_string_) { + if (key.compare("tokenizer.ggml.tokens") == 0) { + tokens = std::move(value); + } + } + for (const auto& [key, value] : metadata_string_) { + if (key.compare("general.name") == 0) { + name = std::regex_replace(value, std::regex(" "), "-"); + } else if (key.compare("tokenizer.chat_template") == 0) { + if (value.compare(ZEPHYR_JINJA) == 0) { + chat_template = + "<|system|>\n{system_message}\n<|user|>\n{prompt}\n<|assistant|>\n"; + } else if (value.compare(OPEN_CHAT_3_5_JINJA) == 0) { + chat_template = + "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"; + } else if (value.compare(LLAMA_3_JINJA) == 0 || + value.compare(LLAMA_3_1_JINJA) == 0) { + chat_template = + "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{" + "system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>" + "\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|" + ">\n\n"; + } else { + try { + jinja2::Template jinja2_chat_template; + jinja2_chat_template.Load(value); + jinja2::ValuesMap params{ + {"add_generation_prompt", true}, + {"bos_token", tokens[bos_token]}, + {"eos_token", tokens[eos_token]}, + {"messages", + jinja2::ValuesList{ + jinja2::ValuesMap{{"role", "system"}, + {"content", "{system_message}"}}, + jinja2::ValuesMap{{"role", "user"}, + {"content", "{prompt}"}}}}}; + chat_template = jinja2_chat_template.RenderAsString(params).value(); + } catch (const std::exception& e) { + std::cerr << "Error render chat template: " << e.what() + << ". Using default template: \n[INST] " + "<>\n{system_message}\n<>\n{prompt}[/INST]" + << "\n"; + chat_template = + "[INST] <>\n{system_message}\n<>\n{prompt}[/INST]"; + } + } + } + } + + eos_string = tokens[eos_token]; + bos_string = tokens[bos_token]; + stop.push_back(std::move(eos_string)); + + model_config_.stop = std::move(stop); + + model_config_.prompt_template = std::move(chat_template); + model_config_.name = name; + model_config_.model = name; + model_config_.id = name; + model_config_.version = std::to_string(version); + model_config_.max_tokens = max_tokens; + model_config_.ctx_len = max_tokens; + model_config_.ngl = ngl; +} + +const ModelConfig& GGUFHandler::GetModelConfig() const { + return model_config_; +} +} // namespace config \ No newline at end of file diff --git a/engine/config/gguf_parser.h b/engine/config/gguf_parser.h new file mode 100644 index 000000000..c71a9320f --- /dev/null +++ b/engine/config/gguf_parser.h @@ -0,0 +1,71 @@ +#pragma once +#include +#include "yaml_config.h" + +namespace config { +constexpr char OPEN_CHAT_3_5_JINJA[] = + "{{ bos_token }}{\% for message in messages \%}{{ 'GPT4 Correct ' + " + "message['role'].title() + ': ' + message['content'] + " + "'<|end_of_turn|>'}}{\% endfor \%}{\% if add_generation_prompt \%}{{ 'GPT4 " + "Correct Assistant:' }}{\% endif \%}"; +constexpr char ZEPHYR_JINJA[] = + "{\% for message in messages \%}\n{\% if message['role'] == 'user' \%}\n{{ " + "'<|user|>\n' + message['content'] + eos_token }}\n{\% elif " + "message['role'] == 'system' \%}\n{{ '<|system|>\n' + message['content'] + " + "eos_token }}\n{\% elif message['role'] == 'assistant' \%}\n{{ " + "'<|assistant|>\n' + message['content'] + eos_token }}\n{\% endif " + "\%}\n{\% if loop.last and add_generation_prompt \%}\n{{ '<|assistant|>' " + "}}\n{\% endif \%}\n{\% endfor \%}"; +constexpr char LLAMA_3_1_JINJA[] = + "{\% set loop_messages = messages \%}{\% for message in loop_messages " + "\%}{\% set content = '<|start_header_id|>' + message['role'] + " + "'<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' \%}{\% " + "if loop.index0 == 0 \%}{\% set content = bos_token + content \%}{\% endif " + "\%}{{ content }}{\% endfor \%}{{ " + "'<|start_header_id|>assistant<|end_header_id|>\n\n' }}"; +constexpr char LLAMA_3_JINJA[] = + "{\% set loop_messages = messages \%}{\% for message in loop_messages " + "\%}{\% set content = '<|start_header_id|>' + message['role'] + " + "'<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' \%}{\% " + "if loop.index0 == 0 \%}{\% set content = bos_token + content \%}{\% endif " + "\%}{{ content }}{\% endfor \%}{\% if add_generation_prompt \%}{{ " + "'<|start_header_id|>assistant<|end_header_id|>\n\n' }}"; +constexpr uint32_t GGUF_MAGIC_NUMBER = 1179993927; + +class GGUFHandler { + public: + void CloseFile(); + void Parse(const std::string& file_path); + const ModelConfig& GetModelConfig() const; + void PrintMetadata(); + + private: + std::pair ReadString(std::size_t offset) const; + size_t ReadMetadataValue(int type, std::size_t offset, + const std::string& key); + size_t ReadArray(std::size_t offset, const std::string& key); + void ModelConfigFromMetadata(); + void OpenFile(const std::string& file_path); + + uint8_t* data_; + size_t file_size_; + uint32_t version_; + uint64_t tensor_count_; + ModelConfig model_config_; + std::unordered_map metadata_uint8_; + std::unordered_map metadata_int8_; + std::unordered_map metadata_uint16_; + std::unordered_map metadata_int16_; + std::unordered_map metadata_uint32_; + std::unordered_map metadata_int32_; + std::unordered_map metadata_float_; + std::unordered_map metadata_bool_; + std::unordered_map metadata_string_; + std::unordered_map metadata_uint64_; + std::unordered_map metadata_int64_; + std::unordered_map metadata_double_; + std::unordered_map> metadata_array_float_; + std::unordered_map> + metadata_array_string_; +}; +} \ No newline at end of file diff --git a/engine/config/model_config.h b/engine/config/model_config.h new file mode 100644 index 000000000..b7cd15810 --- /dev/null +++ b/engine/config/model_config.h @@ -0,0 +1,40 @@ +#pragma once +#include +#include +#include +#include + +namespace config { +struct ModelConfig { + std::string name; + std::string model; + std::string version; + std::vector stop = {}; + float top_p = std::numeric_limits::quiet_NaN(); + float temperature = std::numeric_limits::quiet_NaN(); + float frequency_penalty = std::numeric_limits::quiet_NaN(); + float presence_penalty = std::numeric_limits::quiet_NaN(); + int max_tokens = std::numeric_limits::quiet_NaN(); + bool stream = std::numeric_limits::quiet_NaN(); + int ngl = std::numeric_limits::quiet_NaN(); + int ctx_len = std::numeric_limits::quiet_NaN(); + std::string engine; + std::string prompt_template; + std::string system_template; + std::string user_template; + std::string ai_template; + + std::string os; + std::string gpu_arch; + std::string quantization_method; + std::string precision; + int tp = std::numeric_limits::quiet_NaN(); + std::string trtllm_version; + bool text_model = std::numeric_limits::quiet_NaN(); + std::string id; + std::vector files; + std::size_t created; + std::string object; + std::string owned_by = ""; +}; +} // namespace config \ No newline at end of file diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc new file mode 100644 index 000000000..fe3e57370 --- /dev/null +++ b/engine/config/yaml_config.cc @@ -0,0 +1,212 @@ +#include +#include +#include +#include +#include +#include +using namespace std; + +#include "yaml-cpp/yaml.h" +#include "yaml_config.h" + +namespace config { +// Method to read YAML file +void YamlHandler::Reset() { + model_config_ = ModelConfig(); + yaml_node_.reset(); +}; +void YamlHandler::ReadYamlFile(const std::string& file_path) { + try { + yaml_node_ = YAML::LoadFile(file_path); + // incase of model.yml file, we don't have files yet, create them + if (!yaml_node_["files"]) { + auto s = file_path; + // normalize path + std::replace(s.begin(), s.end(), '\\', '/'); + std::vector v; + if (yaml_node_["engine"] && + yaml_node_["engine"].as() == "cortex.llamacpp") { + v.emplace_back(s.substr(0, s.find_last_of('/')) + "/model.gguf"); + } else { + v.emplace_back(s.substr(0, s.find_last_of('/'))); + } + + // TODO(any) need to support mutiple gguf files + yaml_node_["files"] = v; + } + } catch (const YAML::BadFile& e) { + std::cerr << "Failed to read file: " << e.what() << std::endl; + throw; + } + ModelConfigFromYaml(); +} +void YamlHandler::SplitPromptTemplate(ModelConfig& mc) { + if (mc.prompt_template.size() > 0) { + auto& pt = mc.prompt_template; + mc.system_template = pt.substr(0, pt.find_first_of('{')); + // std::cout << "System template: " << mc.system_template << std::endl; + mc.user_template = + pt.substr(pt.find_first_of('}') + 1, + pt.find_last_of('{') - pt.find_first_of('}') - 1); + // std::cout << "User template : " << mc.user_template << std::endl; + mc.ai_template = pt.substr(pt.find_last_of('}') + 1); + // std::cout << "Assistant template: " << mc.ai_template << std::endl; + } +} +const ModelConfig& YamlHandler::GetModelConfig() const { + return model_config_; +} + +void YamlHandler::ModelConfigFromFile(const std::string& file_path) { + ReadYamlFile(file_path); + ModelConfigFromYaml(); +} + +void YamlHandler::ModelConfigFromYaml() { + ModelConfig tmp; + try { + if (yaml_node_["name"]) + tmp.name = yaml_node_["name"].as(); + if (yaml_node_["model"]) + tmp.model = yaml_node_["model"].as(); + if (yaml_node_["version"]) + tmp.version = yaml_node_["version"].as(); + if (yaml_node_["engine"]) + tmp.engine = yaml_node_["engine"].as(); + if (yaml_node_["prompt_template"]) { + tmp.prompt_template = yaml_node_["prompt_template"].as(); + SplitPromptTemplate(tmp); + } + + if (yaml_node_["os"]) + tmp.os = yaml_node_["os"].as(); + if (yaml_node_["gpu_arch"]) + tmp.gpu_arch = yaml_node_["gpu_arch"].as(); + if (yaml_node_["quantization_method"]) + tmp.quantization_method = + yaml_node_["quantization_method"].as(); + if (yaml_node_["precision"]) + tmp.precision = yaml_node_["precision"].as(); + if (yaml_node_["trtllm_version"]) + tmp.trtllm_version = yaml_node_["trtllm_version"].as(); + if (yaml_node_["id"]) + tmp.id = yaml_node_["id"].as(); + if (yaml_node_["object"]) + tmp.object = yaml_node_["object"].as(); + if (yaml_node_["owned_by"]) + tmp.owned_by = yaml_node_["owned_by"].as(); + if (yaml_node_["top_p"]) + tmp.top_p = yaml_node_["top_p"].as(); + if (yaml_node_["temperature"]) + tmp.temperature = yaml_node_["temperature"].as(); + if (yaml_node_["frequency_penalty"]) + tmp.frequency_penalty = yaml_node_["frequency_penalty"].as(); + if (yaml_node_["presence_penalty"]) + tmp.presence_penalty = yaml_node_["presence_penalty"].as(); + if (yaml_node_["max_tokens"]) + tmp.max_tokens = yaml_node_["max_tokens"].as(); + if (yaml_node_["ngl"]) + tmp.ngl = yaml_node_["ngl"].as(); + if (yaml_node_["ctx_len"]) + tmp.ctx_len = yaml_node_["ctx_len"].as(); + if (yaml_node_["tp"]) + tmp.tp = yaml_node_["tp"].as(); + if (yaml_node_["stream"]) + tmp.stream = yaml_node_["stream"].as(); + if (yaml_node_["text_model"]) + tmp.tp = yaml_node_["text_model"].as(); + if (yaml_node_["stop"]) + tmp.stop = yaml_node_["stop"].as>(); + if (yaml_node_["files"]) + tmp.files = yaml_node_["files"].as>(); + if (yaml_node_["created"]) + tmp.created = yaml_node_["created"].as(); + } catch (const std::exception& e) { + std::cerr << "Error when load model config : " << e.what() << std::endl; + std::cerr << "Revert ..." << std::endl; + return; + } + model_config_ = std::move(tmp); +} + +void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) { + ModelConfig tmp = std::move(model_config_); + try { + model_config_ = std::move(new_model_config); + yaml_node_.reset(); + if (!model_config_.name.empty()) + yaml_node_["name"] = model_config_.name; + if (!model_config_.model.empty()) + yaml_node_["model"] = model_config_.model; + if (!model_config_.version.empty()) + yaml_node_["version"] = model_config_.version; + if (!model_config_.engine.empty()) + yaml_node_["engine"] = model_config_.engine; + if (!model_config_.prompt_template.empty()) { + yaml_node_["prompt_template"] = model_config_.prompt_template; + SplitPromptTemplate(model_config_); + } + + if (!model_config_.os.empty()) + yaml_node_["os"] = model_config_.os; + if (!model_config_.gpu_arch.empty()) + yaml_node_["gpu_arch"] = model_config_.gpu_arch; + if (!model_config_.quantization_method.empty()) + yaml_node_["quantization_method"] = model_config_.quantization_method; + if (!model_config_.precision.empty()) + yaml_node_["precision"] = model_config_.precision; + if (!model_config_.trtllm_version.empty()) + yaml_node_["trtllm_version"] = model_config_.trtllm_version; + if (!model_config_.id.empty()) + yaml_node_["id"] = model_config_.id; + if (!model_config_.object.empty()) + yaml_node_["object"] = model_config_.object; + if (!model_config_.owned_by.empty()) + yaml_node_["owned_by"] = model_config_.owned_by; + if (!std::isnan(model_config_.top_p)) + yaml_node_["top_p"] = model_config_.top_p; + if (!std::isnan(model_config_.temperature)) + yaml_node_["temperature"] = model_config_.temperature; + if (!std::isnan(model_config_.frequency_penalty)) + yaml_node_["frequency_penalty"] = model_config_.frequency_penalty; + if (!std::isnan(model_config_.presence_penalty)) + yaml_node_["presence_penalty"] = model_config_.presence_penalty; + if (!std::isnan(static_cast(model_config_.max_tokens))) + yaml_node_["max_tokens"] = model_config_.max_tokens; + if (!std::isnan(static_cast(model_config_.ngl))) + yaml_node_["ngl"] = model_config_.ngl; + if (!std::isnan(static_cast(model_config_.ctx_len))) + yaml_node_["ctx_len"] = model_config_.ctx_len; + if (!std::isnan(static_cast(model_config_.tp))) + yaml_node_["tp"] = model_config_.tp; + if (!std::isnan(static_cast(model_config_.stream))) + yaml_node_["stream"] = model_config_.stream; + if (!std::isnan(static_cast(model_config_.text_model))) + yaml_node_["text_model"] = model_config_.text_model; + if (model_config_.stop.size() > 0) + yaml_node_["stop"] = model_config_.stop; + if (model_config_.files.size() > 0) + yaml_node_["files"] = model_config_.files; + yaml_node_["created"] = std::time(nullptr); + } catch (const std::exception& e) { + std::cerr << "Error when update model config : " << e.what() << std::endl; + std::cerr << "Revert ..." << std::endl; + model_config_ = std::move(tmp); + } +} + +// Method to write all attributes to a YAML file +void YamlHandler::WriteYamlFile(const std::string& file_path) const { + try { + std::ofstream outFile(file_path); + if (!outFile) { + throw std::runtime_error("Failed to open output file."); + } + outFile << yaml_node_; + outFile.close(); + } catch (const std::exception& e) { + std::cerr << "Error writing to file: " << e.what() << std::endl; + throw; + } +} +} // namespace config \ No newline at end of file diff --git a/engine/config/yaml_config.h b/engine/config/yaml_config.h new file mode 100644 index 000000000..3f8af5400 --- /dev/null +++ b/engine/config/yaml_config.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "yaml-cpp/yaml.h" +#include "model_config.h" +namespace config { +class YamlHandler { + private: + YAML::Node yaml_node_; + ModelConfig model_config_; + void ReadYamlFile(const std::string& file_path); + void ModelConfigFromYaml(); + void SplitPromptTemplate(ModelConfig& mc); + + public: + // Method to read YAML file + void Reset(); + + const ModelConfig& GetModelConfig() const; + + void ModelConfigFromFile(const std::string& file_path); + + void UpdateModelConfig(ModelConfig new_model_config); + // Method to write all attributes to a YAML file + void WriteYamlFile(const std::string& file_path) const; +}; +} \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc new file mode 100644 index 000000000..38c071546 --- /dev/null +++ b/engine/controllers/command_line_parser.cc @@ -0,0 +1,93 @@ +#include "command_line_parser.h" +#include "commands/model_pull_cmd.h" +#include "commands/start_model_cmd.h" +#include "commands/stop_model_cmd.h" +#include "commands/stop_server_cmd.h" +#include "config/yaml_config.h" +#include "utils/cortex_utils.h" + +CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {} + +bool CommandLineParser::SetupCommand(int argc, char** argv) { + // Models group commands + { + auto models_cmd = + app_.add_subcommand("models", "Subcommands for managing models"); + + auto start_cmd = models_cmd->add_subcommand("start", "Start a model by ID"); + std::string model_id; + start_cmd->add_option("model_id", model_id, ""); + start_cmd->callback([&model_id]() { + // TODO(sang) switch to .yaml when implement model manager + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + + "/models/" + model_id + "/model.yml"); + commands::StartModelCmd smc("127.0.0.1", 3928, + yaml_handler.GetModelConfig()); + smc.Exec(); + }); + + auto stop_model_cmd = + models_cmd->add_subcommand("stop", "Stop a model by ID"); + stop_model_cmd->add_option("model_id", model_id, ""); + stop_model_cmd->callback([&model_id]() { + // TODO(sang) switch to .yaml when implement model manager + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + + "/models/" + model_id + "/model.yml"); + commands::StopModelCmd smc("127.0.0.1", 3928, + yaml_handler.GetModelConfig()); + smc.Exec(); + }); + + auto list_models_cmd = + models_cmd->add_subcommand("list", "List all models locally"); + + //// Models group commands + auto model_pull_cmd = + app_.add_subcommand("pull", + "Download a model from a registry. Working with " + "HuggingFace repositories. For available models, " + "please visit https://huggingface.co/cortexso"); + model_pull_cmd->add_option("model_id", model_id, ""); + model_pull_cmd->callback([&model_id]() { + commands::ModelPullCmd command(model_id); + command.Exec(); + }); + + auto remove_cmd = + models_cmd->add_subcommand("remove", "Remove a model by ID locally"); + auto update_cmd = + models_cmd->add_subcommand("update", "Update configuration of a model"); + } + //// End of Models group commands + + auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); + + auto ps_cmd = + app_.add_subcommand("ps", "Show running models and their status"); + + auto embeddings_cmd = app_.add_subcommand( + "embeddings", "Creates an embedding vector representing the input text"); + + // engines group commands + auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); + auto list_engines_cmd = + engines_cmd->add_subcommand("list", "List all cortex engines"); + auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); + auto init_cmd = engines_cmd->add_subcommand("init", "Setup engine"); + + auto run_cmd = + app_.add_subcommand("run", "Shortcut to start a model and chat"); + + auto stop_cmd = app_.add_subcommand("stop", "Stop the API server"); + + stop_cmd->callback([] { + // TODO get info from config file + commands::StopServerCmd ssc("127.0.0.1", 3928); + ssc.Exec(); + }); + + CLI11_PARSE(app_, argc, argv); + return true; +} \ No newline at end of file diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h new file mode 100644 index 000000000..3324d45e0 --- /dev/null +++ b/engine/controllers/command_line_parser.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include "CLI/CLI.hpp" + +class CommandLineParser { + public: + CommandLineParser(); + bool SetupCommand(int argc, char** argv); + + private: + CLI::App app_; +}; \ No newline at end of file diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc new file mode 100644 index 000000000..c5c970412 --- /dev/null +++ b/engine/services/download_service.cc @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include + +#include "download_service.h" + +void DownloadService::AddDownloadTask(const DownloadTask& task) { + tasks.push_back(task); + + for (const auto& item : task.items) { + StartDownloadItem(task.id, item); + } +} + +void DownloadService::AddAsyncDownloadTask(const DownloadTask& task) { + tasks.push_back(task); + for (const auto& item : task.items) { + // TODO: maybe apply std::async is better? + std::thread([this, task, item]() { + this->StartDownloadItem(task.id, item); + }).detach(); + } +} + +const std::string DownloadService::GetContainerFolderPath(DownloadType type) { + std::filesystem::path container_folder_path; + + switch (type) { + case DownloadType::Model: { + container_folder_path = std::filesystem::current_path() / "models"; + break; + } + case DownloadType::Engine: { + container_folder_path = std::filesystem::current_path() / "engines"; + break; + } + default: { + container_folder_path = std::filesystem::current_path() / "misc"; + break; + } + } + + if (!std::filesystem::exists(container_folder_path)) { + LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n"; + std::filesystem::create_directory(container_folder_path); + } + + return container_folder_path.string(); +} + +void DownloadService::StartDownloadItem(const std::string& downloadId, + const DownloadItem& item, + const DownloadItemCb& callback) { + LOG_INFO << "Downloading item: " << downloadId; + const std::string containerFolderPath = GetContainerFolderPath(item.type); + LOG_INFO << "Container folder path: " << containerFolderPath << "\n"; + const std::filesystem::path itemFolderPath = + std::filesystem::path(containerFolderPath) / + std::filesystem::path(downloadId); + if (!std::filesystem::exists(itemFolderPath)) { + LOG_INFO << "Creating " << itemFolderPath.string(); + std::filesystem::create_directory(itemFolderPath); + } + + LOG_INFO << "itemFolderPath: " << itemFolderPath.string(); + auto outputFilePath = itemFolderPath / std::filesystem::path(item.fileName); + LOG_INFO << "Absolute file output: " << outputFilePath.string(); + + uint64_t last = 0; + uint64_t tot = 0; + std::ofstream outputFile(outputFilePath, std::ios::binary); + + std::ostringstream downloadUrl; + downloadUrl << item.host << "/" << item.path; + LOG_INFO << "Downloading url: " << downloadUrl.str(); + + httplib::Client client(item.host); + + client.set_follow_location(true); + client.Get( + downloadUrl.str(), + [](const httplib::Response& res) { + if (res.status != httplib::StatusCode::OK_200) { + LOG_ERROR << "HTTP error: " << res.reason; + return false; + } + return true; + }, + [&](const char* data, size_t data_length) { + tot += data_length; + outputFile.write(data, data_length); + return true; + }, + [&last, this](uint64_t current, uint64_t total) { + if (current - last > kUpdateProgressThreshold) { + last = current; + LOG_INFO << "Downloading: " << current << " / " << total; + } + if (current == total) { + LOG_INFO << "Done download: " + << static_cast(total) / 1024 / 1024 << " MiB"; + return false; + } + return true; + }); + if(callback){ + callback(outputFilePath.string()); + } +} \ No newline at end of file diff --git a/engine/services/download_service.h b/engine/services/download_service.h new file mode 100644 index 000000000..e7fc14b6d --- /dev/null +++ b/engine/services/download_service.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include "httplib.h" + +enum class DownloadType { Model, Engine, Miscellaneous }; + +enum class DownloadStatus { + Pending, + Downloading, + Error, + Downloaded, +}; + +struct DownloadItem { + std::string id; + + std::string host; + + std::string fileName; + + DownloadType type; + + std::string path; + + uint64_t totalSize; + + uint64_t transferredSize; + + DownloadStatus status; + + std::optional checksum; +}; + +struct DownloadTask { + std::string id; + DownloadType type; + float percentage; + DownloadStatus status; + std::optional error; + std::vector items; +}; + +class DownloadService { + public: + /** + * @brief Synchronously download. + * + * @param task + */ + using DownloadItemCb = std::function; + void AddDownloadTask(const DownloadTask& task); + + void AddAsyncDownloadTask(const DownloadTask& task); + + // TODO: [NamH] implement the following methods + // void removeTask(const std::string &id); + // void registerCallback + // setup folder path at runtime + // register action after downloaded + + private: + void StartDownloadItem(const std::string& downloadId, + const DownloadItem& item, + const DownloadItemCb& callback = nullptr); + + const std::string GetContainerFolderPath(DownloadType type); + + // store tasks so we can abort it later + std::vector tasks; + const int kUpdateProgressThreshold = 100000000; +}; \ No newline at end of file diff --git a/engine/utils/archive_utils.h b/engine/utils/archive_utils.h new file mode 100644 index 000000000..6b2f5767d --- /dev/null +++ b/engine/utils/archive_utils.h @@ -0,0 +1,146 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace archive_utils { +inline bool UnzipFile(const std::string& input_zip_path, + const std::string& destination_path); +inline bool UntarFile(const std::string& input_tar_path, + const std::string& destination_path); + +inline bool ExtractArchive(const std::string& input_path, + const std::string& destination_path) { + if (input_path.find(".zip") != std::string::npos) { + return UnzipFile(input_path, destination_path); + } else if (input_path.find(".tar") != std::string::npos || + input_path.find(".tar.gz") != std::string::npos) { + return UntarFile(input_path, destination_path); + } else { + LOG_ERROR << "Unsupported file type: " << input_path << "\n"; + return false; + } +} + +inline bool UnzipFile(const std::string& input_zip_path, + const std::string& destination_path) { + unzFile zip_file = unzOpen(input_zip_path.c_str()); + if (!zip_file) { + LOG_ERROR << "Failed to open zip file: " << input_zip_path << "\n"; + return false; + } + + std::filesystem::create_directories(destination_path); + + if (unzGoToFirstFile(zip_file) != UNZ_OK) { + LOG_ERROR << "Error opening first file in zip" << "\n"; + unzClose(zip_file); + return false; + } + + do { + unz_file_info file_info; + char file_name[256]; + if (unzGetCurrentFileInfo(zip_file, &file_info, file_name, + sizeof(file_name), nullptr, 0, nullptr, + 0) != UNZ_OK) { + LOG_ERROR << "Failed to get file info" << "\n"; + unzClose(zip_file); + return false; + } + + std::string full_path = destination_path + "/" + file_name; + + if (file_name[strlen(file_name) - 1] == '/') { + std::filesystem::create_directories(full_path); + } else { + std::filesystem::create_directories( + std::filesystem::path(full_path).parent_path()); + + if (unzOpenCurrentFile(zip_file) != UNZ_OK) { + LOG_ERROR << "Failed to open file in zip: " << file_name << "\n"; + unzClose(zip_file); + return false; + } + + std::ofstream outFile(full_path, std::ios::binary); + if (!outFile.is_open()) { + LOG_ERROR << "Failed to create file: " << full_path << "\n"; + unzCloseCurrentFile(zip_file); + unzClose(zip_file); + return false; + } + + char buffer[8192]; + int bytes_read; + while ((bytes_read = + unzReadCurrentFile(zip_file, buffer, sizeof(buffer))) > 0) { + outFile.write(buffer, bytes_read); + } + + outFile.close(); + unzCloseCurrentFile(zip_file); + } + } while (unzGoToNextFile(zip_file) == UNZ_OK); + + unzClose(zip_file); + LOG_INFO << "Extracted successfully " << input_zip_path << " to " + << destination_path << "\n"; + return true; +} + +inline bool UntarFile(const std::string& input_tar_path, + const std::string& destination_path) { + struct archive* tar_archive = archive_read_new(); + archive_read_support_format_tar(tar_archive); + archive_read_support_compression_gzip(tar_archive); + + if (archive_read_open_filename(tar_archive, input_tar_path.c_str(), 10240) != + ARCHIVE_OK) { + LOG_ERROR << "Failed to open tar file: " << input_tar_path << "\n"; + archive_read_free(tar_archive); + return false; + } + + std::filesystem::create_directories(destination_path); + struct archive_entry* entry; + while (archive_read_next_header(tar_archive, &entry) == ARCHIVE_OK) { + const char* current_file = archive_entry_pathname(entry); + std::string full_path = destination_path + "/" + current_file; + + if (archive_entry_filetype(entry) == AE_IFDIR) { + std::filesystem::create_directories(full_path); + } else { + std::filesystem::create_directories( + std::filesystem::path(full_path).parent_path()); + + std::ofstream out_file(full_path, std::ios::binary); + if (!out_file.is_open()) { + LOG_ERROR << "Failed to create file: " << full_path << "\n"; + archive_read_free(tar_archive); + return false; + } + + const void* buff; + size_t size; + la_int64_t offset; + while (archive_read_data_block(tar_archive, &buff, &size, &offset) == + ARCHIVE_OK) { + out_file.write(static_cast(buff), size); + } + + out_file.close(); + } + + archive_entry_clear(entry); + } + + archive_read_free(tar_archive); + LOG_INFO << "Extracted successfully " << input_tar_path << " to " + << destination_path << "\n"; + return true; +} +} // namespace archive_utils \ No newline at end of file diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h new file mode 100644 index 000000000..6150e9f0a --- /dev/null +++ b/engine/utils/cortexso_parser.h @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include +#include +#include "httplib.h" + +namespace cortexso_parser { +constexpr static auto kHuggingFaceHost = "https://huggingface.co"; + +inline std::optional getDownloadTask( + const std::string& modelId, const std::string& branch = "main") { + using namespace nlohmann; + std::ostringstream oss; + oss << "/api/models/cortexso/" << modelId << "/tree/" << branch; + const std::string url = oss.str(); + + std::ostringstream repoAndModelId; + repoAndModelId << "cortexso/" << modelId; + const std::string repoAndModelIdStr = repoAndModelId.str(); + + httplib::Client cli(kHuggingFaceHost); + if (auto res = cli.Get(url)) { + if (res->status == httplib::StatusCode::OK_200) { + try { + auto jsonResponse = json::parse(res->body); + + std::vector downloadItems{}; + for (auto& [key, value] : jsonResponse.items()) { + std::ostringstream downloadUrlOutput; + auto path = value["path"].get(); + downloadUrlOutput << repoAndModelIdStr << "/resolve/" << branch << "/" + << path; + const std::string downloadUrl = downloadUrlOutput.str(); + + DownloadItem downloadItem{}; + downloadItem.id = path; + downloadItem.host = kHuggingFaceHost; + downloadItem.fileName = path; + downloadItem.type = DownloadType::Model; + downloadItem.path = downloadUrl; + downloadItem.totalSize = value["size"].get(); + downloadItem.transferredSize = 0; + downloadItem.status = DownloadStatus::Pending; + downloadItems.push_back(downloadItem); + } + + DownloadTask downloadTask{}; + downloadTask.id = modelId; + downloadTask.type = DownloadType::Model; + downloadTask.percentage = 0.0f; + downloadTask.status = DownloadStatus::Pending; + downloadTask.error = std::nullopt; + downloadTask.items = downloadItems; + + return downloadTask; + } catch (const nlohmann::json::parse_error& e) { + std::cerr << "JSON parse error: " << e.what() << std::endl; + } + } + } else { + auto err = res.error(); + LOG_ERROR << "HTTP error: " << httplib::to_string(err); + } + return std::nullopt; +} +} // namespace cortexso_parser \ No newline at end of file diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h new file mode 100644 index 000000000..73c53668f --- /dev/null +++ b/engine/utils/http_util.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +using namespace drogon; + +namespace http_util { + +bool HasFieldInReq(const HttpRequestPtr& req, + std::function& callback, + const std::string& field) { + if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) { + Json::Value res; + res["message"] = "No " + field + " field in request body"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "No " << field << " field in request body"; + return false; + } + return true; +} + +} // namespace http_util \ No newline at end of file From 35e45f247cdb6879ec01c7103f4b2a5ef1ee706f Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 11:40:00 +0700 Subject: [PATCH 03/16] fix: CI with vcpkg --- .github/workflows/cortex-cpp-quality-gate.yml | 22 +++++++++++-------- engine/Makefile | 16 ++++++++++---- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index b30fb30de..b9515858f 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -5,7 +5,7 @@ on: types: [opened, synchronize, reopened] paths: [ - "cortex-cpp/**", + "engine/**", ] workflow_dispatch: @@ -24,25 +24,25 @@ jobs: - os: "linux" name: "amd64" runs-on: "ubuntu-20-04" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}}" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "mac" name: "amd64" runs-on: "macos-13" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}}" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "mac" name: "arm64" runs-on: "mac-silicon" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "windows" name: "amd64" runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: @@ -69,26 +69,30 @@ jobs: AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + - name: Configure vcpkg + run: | + make configure-vcpkg + - name: Build run: | - cd cortex-cpp + cd engine make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" - name: Pre-package run: | - cd cortex-cpp + cd engine make pre-package - name: Package run: | - cd cortex-cpp + cd engine make package - name: Upload Artifact uses: actions/upload-artifact@v2 with: name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }} - path: ./cortex-cpp/cortex-cpp + path: ./engine/cortex-cpp - name: Upload ccache to s3 continue-on-error: true diff --git a/engine/Makefile b/engine/Makefile index 83c3f61f1..a3fdaa0fd 100644 --- a/engine/Makefile +++ b/engine/Makefile @@ -18,19 +18,27 @@ DEVELOPER_ID ?= xxxx all: @echo "Specify a target to run" +configure-vcpkg: +ifeq ($(OS),Windows_NT) + @cd engine/vcpkg && bootstrap-vcpkg.bat; + @cd engine/vcpkg && vcpkg install +else ifeq ($(shell uname -s),Linux) + @cd engine/vcpkg && ./bootstrap-vcpkg.sh; + @cd engine/vcpkg && ./vcpkg install; +else + @cd engine/vcpkg && ./bootstrap-vcpkg.sh; + @cd engine/vcpkg && ./vcpkg install; +endif + # Build the Cortex engine build: ifeq ($(OS),Windows_NT) - @powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps $(BUILD_DEPS_CMAKE_EXTRA_FLAGS);" - @powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;" @powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;" else ifeq ($(shell uname -s),Linux) - @./install_deps.sh; @mkdir -p build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ make -j4; else - @./install_deps.sh; @mkdir -p build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ make -j4; From b207d23e3c27ea0a7a2a9b14fc03a98065b1da24 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 12:07:50 +0700 Subject: [PATCH 04/16] fix: Makefile --- .github/workflows/cortex-cpp-quality-gate.yml | 1 + engine/Makefile | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index b9515858f..97db3ea2d 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -71,6 +71,7 @@ jobs: - name: Configure vcpkg run: | + cd engine make configure-vcpkg - name: Build diff --git a/engine/Makefile b/engine/Makefile index a3fdaa0fd..ae4a49258 100644 --- a/engine/Makefile +++ b/engine/Makefile @@ -20,14 +20,14 @@ all: configure-vcpkg: ifeq ($(OS),Windows_NT) - @cd engine/vcpkg && bootstrap-vcpkg.bat; - @cd engine/vcpkg && vcpkg install + @cd vcpkg && bootstrap-vcpkg.bat; + @cd vcpkg && vcpkg install else ifeq ($(shell uname -s),Linux) - @cd engine/vcpkg && ./bootstrap-vcpkg.sh; - @cd engine/vcpkg && ./vcpkg install; + @cd vcpkg && ./bootstrap-vcpkg.sh; + @cd vcpkg && ./vcpkg install; else - @cd engine/vcpkg && ./bootstrap-vcpkg.sh; - @cd engine/vcpkg && ./vcpkg install; + @cd vcpkg && ./bootstrap-vcpkg.sh; + @cd vcpkg && ./vcpkg install; endif # Build the Cortex engine From 8a04157b845e54bec45c1f88e11c08e4ebc6e9d1 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 14:23:12 +0700 Subject: [PATCH 05/16] fix: quality gate --- .github/workflows/cortex-cpp-quality-gate.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 97db3ea2d..85b7f1550 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -23,7 +23,7 @@ jobs: include: - os: "linux" name: "amd64" - runs-on: "ubuntu-20-04" + runs-on: "ubuntu-20-04-cuda-12-0" cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' @@ -35,7 +35,7 @@ jobs: ccache-dir: '' - os: "mac" name: "arm64" - runs-on: "mac-silicon" + runs-on: "macos-latest" cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' From c604d3852f71a09311c6b7ef80f04971d54fa78f Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 26 Aug 2024 14:41:39 +0700 Subject: [PATCH 06/16] fix: CI --- .github/workflows/cortex-cpp-quality-gate.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 85b7f1550..092423821 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -24,25 +24,25 @@ jobs: - os: "linux" name: "amd64" runs-on: "ubuntu-20-04-cuda-12-0" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "mac" name: "amd64" runs-on: "macos-13" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "mac" name: "arm64" runs-on: "macos-latest" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" build-deps-cmake-flags: "" ccache-dir: '' - os: "windows" name: "amd64" runs-on: "windows-cuda-12-0" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja" ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' steps: From 5e2fe08e3e3437e90f377113047274d2fa0c2a97 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 27 Aug 2024 10:44:53 +0700 Subject: [PATCH 07/16] feat: model list + post download process + download engine --- engine/commands/engine_init_cmd.cc | 109 ++++++++++++++++++++++ engine/commands/engine_init_cmd.h | 21 +++++ engine/commands/model_list_cmd.cc | 30 ++++++ engine/commands/model_list_cmd.h | 11 +++ engine/commands/model_pull_cmd.cc | 4 +- engine/controllers/command_line_parser.cc | 34 +++++-- engine/controllers/models.cc | 104 +++++++++++++++++++++ engine/controllers/models.h | 23 +++++ engine/controllers/server.cc | 2 +- engine/controllers/server.h | 4 +- engine/services/download_service.cc | 77 ++++++--------- engine/services/download_service.h | 21 ++--- engine/utils/cortex_utils.h | 14 ++- engine/utils/cortexso_parser.h | 7 +- engine/utils/file_manager_utils.h | 43 +++++++++ engine/utils/http_util.h | 6 +- engine/utils/model_callback_utils.h | 51 ++++++++++ engine/utils/system_info_utils.h | 36 +++++++ 18 files changed, 512 insertions(+), 85 deletions(-) create mode 100644 engine/commands/engine_init_cmd.cc create mode 100644 engine/commands/engine_init_cmd.h create mode 100644 engine/commands/model_list_cmd.cc create mode 100644 engine/commands/model_list_cmd.h create mode 100644 engine/controllers/models.cc create mode 100644 engine/controllers/models.h create mode 100644 engine/utils/file_manager_utils.h create mode 100644 engine/utils/model_callback_utils.h create mode 100644 engine/utils/system_info_utils.h diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc new file mode 100644 index 000000000..430433e4d --- /dev/null +++ b/engine/commands/engine_init_cmd.cc @@ -0,0 +1,109 @@ +#include "engine_init_cmd.h" +#include +#include "services/download_service.h" +#include "trantor/utils/Logger.h" +// clang-format off +#include "utils/cortexso_parser.h" +#include "utils/archive_utils.h" +#include "utils/system_info_utils.h" +// clang-format on + +namespace commands { + +EngineInitCmd::EngineInitCmd(std::string engineName, std::string version) + : engineName_(std::move(engineName)), version_(std::move(version)) {} + +void EngineInitCmd::Exec() const { + if (engineName_.empty()) { + LOG_ERROR << "Engine name is required"; + return; + } + + // Check if the architecture and OS are supported + auto system_info = system_info_utils::GetSystemInfo(); + if (system_info.arch == system_info_utils::kUnsupported || + system_info.os == system_info_utils::kUnsupported) { + LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", " + << system_info.arch; + return; + } + + // check if engine is supported + if (std::find(supportedEngines_.begin(), supportedEngines_.end(), + engineName_) == supportedEngines_.end()) { + LOG_ERROR << "Engine not supported"; + return; + } + + constexpr auto gitHubHost = "https://api.github.com"; + + std::ostringstream engineReleasePath; + engineReleasePath << "/repos/janhq/" << engineName_ << "/releases/" + << version_; + + using namespace nlohmann; + + httplib::Client cli(gitHubHost); + if (auto res = cli.Get(engineReleasePath.str())) { + if (res->status == httplib::StatusCode::OK_200) { + try { + auto jsonResponse = json::parse(res->body); + auto assets = jsonResponse["assets"]; + auto os_arch{system_info.os + "-" + system_info.arch}; + + for (auto& asset : assets) { + auto assetName = asset["name"].get(); + if (assetName.find(os_arch) != std::string::npos) { + std::string host{"https://github.com"}; + + auto full_url = asset["browser_download_url"].get(); + std::string path = full_url.substr(host.length()); + + auto fileName = asset["name"].get(); + LOG_INFO << "URL: " << full_url; + + auto downloadTask = DownloadTask{.id = engineName_, + .type = DownloadType::Engine, + .error = std::nullopt, + .items = {DownloadItem{ + .id = engineName_, + .host = host, + .fileName = fileName, + .type = DownloadType::Engine, + .path = path, + }}}; + + DownloadService().AddDownloadTask( + downloadTask, + [&downloadTask](const std::string& absolute_path) { + // try to unzip the downloaded file + std::filesystem::path downloadedEnginePath{absolute_path}; + LOG_INFO << "Downloaded engine path: " + << downloadedEnginePath.string(); + + archive_utils::ExtractArchive( + downloadedEnginePath.string(), + downloadedEnginePath.parent_path() + .parent_path() + .string()); + + // remove the downloaded file + std::filesystem::remove(absolute_path); + LOG_INFO << "Finished!"; + }); + + return; + } + } + LOG_ERROR << "No asset found for " << os_arch; + } catch (const json::parse_error& e) { + std::cerr << "JSON parse error: " << e.what() << std::endl; + } + } + } else { + auto err = res.error(); + LOG_ERROR << "HTTP error: " << httplib::to_string(err); + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h new file mode 100644 index 000000000..09b908e37 --- /dev/null +++ b/engine/commands/engine_init_cmd.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace commands { + +class EngineInitCmd { + public: + EngineInitCmd(std::string engineName, std::string version); + + void Exec() const; + + private: + std::string engineName_; + std::string version_; + + static constexpr std::array supportedEngines_ = { + "cortex.llamacpp"}; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/model_list_cmd.cc b/engine/commands/model_list_cmd.cc new file mode 100644 index 000000000..2aedd6d24 --- /dev/null +++ b/engine/commands/model_list_cmd.cc @@ -0,0 +1,30 @@ +#include "model_list_cmd.h" +#include +#include +#include +#include "utils/cortex_utils.h" +#include "config/yaml_config.h" +#include "trantor/utils/Logger.h" +namespace commands { + +void ModelListCmd::Exec() { + if (std::filesystem::exists(cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::models_folder)) { + // Iterate through directory + for (const auto& entry : + std::filesystem::directory_iterator(cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().extension() == ".yaml") { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + std::cout<<"Model ID: "<< entry.path().stem().string() <<", Engine: "<< handler.GetModelConfig().engine < + +namespace commands { + +class ModelListCmd { + public: + void Exec(); +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc index 6135efb43..9dcd8c4ef 100644 --- a/engine/commands/model_pull_cmd.cc +++ b/engine/commands/model_pull_cmd.cc @@ -3,6 +3,7 @@ #include "services/download_service.h" #include "trantor/utils/Logger.h" #include "utils/cortexso_parser.h" +#include "utils/model_callback_utils.h" namespace commands { ModelPullCmd::ModelPullCmd(std::string modelHandle) @@ -12,7 +13,8 @@ void ModelPullCmd::Exec() { auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_); if (downloadTask.has_value()) { DownloadService downloadService; - downloadService.AddDownloadTask(downloadTask.value()); + downloadService.AddDownloadTask(downloadTask.value(), + model_callback_utils::DownloadModelCb); std::cout << "Download finished" << std::endl; } else { std::cout << "Model not found" << std::endl; diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 38c071546..d58760433 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,5 +1,7 @@ #include "command_line_parser.h" +#include "commands/engine_init_cmd.h" #include "commands/model_pull_cmd.h" +#include "commands/model_list_cmd.h" #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" @@ -42,8 +44,11 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto list_models_cmd = models_cmd->add_subcommand("list", "List all models locally"); + list_models_cmd->callback([](){ + commands::ModelListCmd command; + command.Exec(); + }); - //// Models group commands auto model_pull_cmd = app_.add_subcommand("pull", "Download a model from a registry. Working with " @@ -60,7 +65,6 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto update_cmd = models_cmd->add_subcommand("update", "Update configuration of a model"); } - //// End of Models group commands auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); @@ -71,11 +75,27 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { "embeddings", "Creates an embedding vector representing the input text"); // engines group commands - auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); - auto list_engines_cmd = - engines_cmd->add_subcommand("list", "List all cortex engines"); - auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); - auto init_cmd = engines_cmd->add_subcommand("init", "Setup engine"); + { + auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); + auto list_engines_cmd = + engines_cmd->add_subcommand("list", "List all cortex engines"); + auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); + + { // Engine init command + auto init_cmd = engines_cmd->add_subcommand("init", "Initialize engine"); + std::string engine_name; + std::string version = "latest"; + + init_cmd->add_option("-n,--name", engine_name, + "Engine name. E.g: cortex.llamacpp"); + init_cmd->add_option("-v,--version", version, + "Engine version. Default will be latest"); + init_cmd->callback([&engine_name, &version]() { + commands::EngineInitCmd eic(engine_name, version); + eic.Exec(); + }); + } + } auto run_cmd = app_.add_subcommand("run", "Shortcut to start a model and chat"); diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc new file mode 100644 index 000000000..e445ff90a --- /dev/null +++ b/engine/controllers/models.cc @@ -0,0 +1,104 @@ +#include "models.h" +#include "config/yaml_config.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" +#include "utils/model_callback_utils.h" + +void Models::PullModel( + const HttpRequestPtr& req, + std::function&& callback) const { + if (!http_util::HasFieldInReq(req, callback, "modelId")) { + return; + } + auto modelHandle = (*(req->getJsonObject())).get("modelId", "").asString(); + LOG_DEBUG << "PullModel, Model handle: " << modelHandle; + if (modelHandle.empty()) { + Json::Value ret; + ret["result"] = "Bad Request"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + + auto downloadTask = cortexso_parser::getDownloadTask(modelHandle); + if (downloadTask.has_value()) { + DownloadService downloadService; + downloadService.AddAsyncDownloadTask(downloadTask.value(), + model_callback_utils::DownloadModelCb); + + Json::Value ret; + ret["result"] = "OK"; + ret["modelHandle"] = modelHandle; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } else { + Json::Value ret; + ret["result"] = "Not Found"; + ret["modelHandle"] = modelHandle; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k404NotFound); + callback(resp); + } +} + +void Models::ListModel( + const HttpRequestPtr& req, + std::function&& callback) const { + Json::Value ret; + ret["object"] = "list"; + Json::Value data(Json::arrayValue); + if (std::filesystem::exists(cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::models_folder)) { + // Iterate through directory + for (const auto& entry : + std::filesystem::directory_iterator(cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().extension() == ".yaml") { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + auto const& model_config = handler.GetModelConfig(); + Json::Value obj; + obj["name"] = model_config.name; + obj["model"] = model_config.model; + obj["version"] = model_config.version; + Json::Value stop_array(Json::arrayValue); + for (const std::string& stop : model_config.stop) + stop_array.append(stop); + obj["stop"] = stop_array; + obj["top_p"] = model_config.top_p; + obj["temperature"] = model_config.temperature; + obj["presence_penalty"] = model_config.presence_penalty; + obj["max_tokens"] = model_config.max_tokens; + obj["stream"] = model_config.stream; + obj["ngl"] = model_config.ngl; + obj["ctx_len"] = model_config.ctx_len; + obj["engine"] = model_config.engine; + obj["prompt_template"] = model_config.prompt_template; + + Json::Value files_array(Json::arrayValue); + for (const std::string& file : model_config.files) + files_array.append(file); + obj["files"] = files_array; + obj["id"] = model_config.id; + obj["created"] = static_cast(model_config.created); + obj["object"] = model_config.object; + obj["owned_by"] = model_config.owned_by; + if (model_config.engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = model_config.trtllm_version; + } + data.append(std::move(obj)); + } catch (const std::exception& e) { + LOG_ERROR << "Error reading yaml file '" << entry.path().string() + << "': " << e.what(); + } + } + } + } + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); +} \ No newline at end of file diff --git a/engine/controllers/models.h b/engine/controllers/models.h new file mode 100644 index 000000000..d8f9415e4 --- /dev/null +++ b/engine/controllers/models.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include "services/download_service.h" +#include "utils/cortex_utils.h" +#include "utils/cortexso_parser.h" +#include "utils/http_util.h" + +using namespace drogon; + +class Models : public drogon::HttpController { + public: + METHOD_LIST_BEGIN + METHOD_ADD(Models::PullModel, "/pull", Post); + METHOD_ADD(Models::ListModel, "/list", Get); + METHOD_LIST_END + + void PullModel(const HttpRequestPtr& req, + std::function&& callback) const; + void ListModel(const HttpRequestPtr& req, + std::function&& callback) const; +}; \ No newline at end of file diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 3de3bc2ba..0c5963d7a 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -419,4 +419,4 @@ bool server::HasFieldInReq( return true; } -} // namespace inferences +} // namespace inferences \ No newline at end of file diff --git a/engine/controllers/server.h b/engine/controllers/server.h index 1b1360baf..6d811192d 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -20,6 +20,8 @@ #include "cortex-common/EngineI.h" #include "cortex-common/cortexpythoni.h" #include "trantor/utils/SerialTaskQueue.h" +#include "config/yaml_config.h" +#include "config/gguf_parser.h" #include "utils/dylib.h" #include "utils/json.hpp" #ifndef SERVER_VERBOSE @@ -155,4 +157,4 @@ class server : public drogon::HttpController, std::unordered_map engines_; std::string cur_engine_type_; }; -}; // namespace inferences +}; // namespace inferences \ No newline at end of file diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index c5c970412..4a60a42a8 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,82 +6,60 @@ #include #include "download_service.h" +#include "utils/file_manager_utils.h" -void DownloadService::AddDownloadTask(const DownloadTask& task) { +void DownloadService::AddDownloadTask(const DownloadTask& task, + std::optional callback) { tasks.push_back(task); for (const auto& item : task.items) { - StartDownloadItem(task.id, item); + StartDownloadItem(task.id, item, callback); } } -void DownloadService::AddAsyncDownloadTask(const DownloadTask& task) { +void DownloadService::AddAsyncDownloadTask( + const DownloadTask& task, std::optional callback) { tasks.push_back(task); + for (const auto& item : task.items) { // TODO: maybe apply std::async is better? - std::thread([this, task, item]() { - this->StartDownloadItem(task.id, item); + std::thread([this, task, &callback, item]() { + this->StartDownloadItem(task.id, item, callback); }).detach(); } } -const std::string DownloadService::GetContainerFolderPath(DownloadType type) { - std::filesystem::path container_folder_path; - - switch (type) { - case DownloadType::Model: { - container_folder_path = std::filesystem::current_path() / "models"; - break; - } - case DownloadType::Engine: { - container_folder_path = std::filesystem::current_path() / "engines"; - break; - } - default: { - container_folder_path = std::filesystem::current_path() / "misc"; - break; - } - } +void DownloadService::StartDownloadItem( + const std::string& downloadId, const DownloadItem& item, + std::optional callback) { + LOG_INFO << "Downloading item: " << downloadId; - if (!std::filesystem::exists(container_folder_path)) { - LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n"; - std::filesystem::create_directory(container_folder_path); - } + auto containerFolderPath{file_manager_utils::GetContainerFolderPath( + file_manager_utils::downloadTypeToString(item.type))}; + LOG_INFO << "Container folder path: " << containerFolderPath.string() << "\n"; - return container_folder_path.string(); -} - -void DownloadService::StartDownloadItem(const std::string& downloadId, - const DownloadItem& item, - const DownloadItemCb& callback) { - LOG_INFO << "Downloading item: " << downloadId; - const std::string containerFolderPath = GetContainerFolderPath(item.type); - LOG_INFO << "Container folder path: " << containerFolderPath << "\n"; - const std::filesystem::path itemFolderPath = - std::filesystem::path(containerFolderPath) / - std::filesystem::path(downloadId); + auto itemFolderPath{containerFolderPath / std::filesystem::path(downloadId)}; + LOG_INFO << "itemFolderPath: " << itemFolderPath.string(); if (!std::filesystem::exists(itemFolderPath)) { LOG_INFO << "Creating " << itemFolderPath.string(); std::filesystem::create_directory(itemFolderPath); } - LOG_INFO << "itemFolderPath: " << itemFolderPath.string(); - auto outputFilePath = itemFolderPath / std::filesystem::path(item.fileName); + auto outputFilePath{itemFolderPath / std::filesystem::path(item.fileName)}; LOG_INFO << "Absolute file output: " << outputFilePath.string(); uint64_t last = 0; uint64_t tot = 0; std::ofstream outputFile(outputFilePath, std::ios::binary); - std::ostringstream downloadUrl; - downloadUrl << item.host << "/" << item.path; - LOG_INFO << "Downloading url: " << downloadUrl.str(); + auto downloadUrl{item.host + "/" + item.path}; + LOG_INFO << "Downloading url: " << downloadUrl; httplib::Client client(item.host); client.set_follow_location(true); client.Get( - downloadUrl.str(), + downloadUrl, [](const httplib::Response& res) { if (res.status != httplib::StatusCode::OK_200) { LOG_ERROR << "HTTP error: " << res.reason; @@ -93,19 +72,21 @@ void DownloadService::StartDownloadItem(const std::string& downloadId, outputFile.write(data, data_length); return true; }, - [&last, this](uint64_t current, uint64_t total) { + [&last, &outputFile, &callback, outputFilePath, this](uint64_t current, + uint64_t total) { if (current - last > kUpdateProgressThreshold) { last = current; LOG_INFO << "Downloading: " << current << " / " << total; } if (current == total) { + outputFile.flush(); LOG_INFO << "Done download: " << static_cast(total) / 1024 / 1024 << " MiB"; + if (callback.has_value()) { + callback.value()(outputFilePath.string()); + } return false; } return true; }); - if(callback){ - callback(outputFilePath.string()); - } } \ No newline at end of file diff --git a/engine/services/download_service.h b/engine/services/download_service.h index e7fc14b6d..86aefeb52 100644 --- a/engine/services/download_service.h +++ b/engine/services/download_service.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include "httplib.h" enum class DownloadType { Model, Engine, Miscellaneous }; @@ -24,20 +24,12 @@ struct DownloadItem { std::string path; - uint64_t totalSize; - - uint64_t transferredSize; - - DownloadStatus status; - std::optional checksum; }; struct DownloadTask { std::string id; DownloadType type; - float percentage; - DownloadStatus status; std::optional error; std::vector items; }; @@ -50,9 +42,12 @@ class DownloadService { * @param task */ using DownloadItemCb = std::function; - void AddDownloadTask(const DownloadTask& task); + void AddDownloadTask(const DownloadTask& task, + std::optional callback = std::nullopt); - void AddAsyncDownloadTask(const DownloadTask& task); + void AddAsyncDownloadTask( + const DownloadTask& task, + std::optional callback = std::nullopt); // TODO: [NamH] implement the following methods // void removeTask(const std::string &id); @@ -63,9 +58,7 @@ class DownloadService { private: void StartDownloadItem(const std::string& downloadId, const DownloadItem& item, - const DownloadItemCb& callback = nullptr); - - const std::string GetContainerFolderPath(DownloadType type); + std::optional callback = std::nullopt); // store tasks so we can abort it later std::vector tasks; diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 6f8a89658..777cd6d84 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -7,13 +7,16 @@ #include #include #include -#include "cstdio" -#include "random" -#include "string" +#include +#include +#include + // Include platform-specific headers #ifdef _WIN32 #include #include +#include +#define mkdir _mkdir #else #include #include @@ -31,6 +34,9 @@ constexpr static auto kOnnxLibPath = "/engines/cortex.onnx"; constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; inline std::string models_folder = "./models"; +inline std::string logs_folder = "./logs"; +inline std::string logs_base_name = "./logs/cortex"; +inline size_t log_file_size_limit = 20000000; // ~20 mb inline std::string extractBase64(const std::string& input) { std::regex pattern("base64,(.*)"); @@ -336,4 +342,4 @@ inline std::string GetCurrentPath() { } #endif -} // namespace cortex_utils +} // namespace cortex_utils \ No newline at end of file diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h index 6150e9f0a..04f6e7fa5 100644 --- a/engine/utils/cortexso_parser.h +++ b/engine/utils/cortexso_parser.h @@ -41,22 +41,17 @@ inline std::optional getDownloadTask( downloadItem.fileName = path; downloadItem.type = DownloadType::Model; downloadItem.path = downloadUrl; - downloadItem.totalSize = value["size"].get(); - downloadItem.transferredSize = 0; - downloadItem.status = DownloadStatus::Pending; downloadItems.push_back(downloadItem); } DownloadTask downloadTask{}; downloadTask.id = modelId; downloadTask.type = DownloadType::Model; - downloadTask.percentage = 0.0f; - downloadTask.status = DownloadStatus::Pending; downloadTask.error = std::nullopt; downloadTask.items = downloadItems; return downloadTask; - } catch (const nlohmann::json::parse_error& e) { + } catch (const json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; } } diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h new file mode 100644 index 000000000..77c6b74a6 --- /dev/null +++ b/engine/utils/file_manager_utils.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include + +namespace file_manager_utils { + +inline std::filesystem::path GetContainerFolderPath( + const std::string_view type) { + const auto current_path{std::filesystem::current_path()}; + auto container_folder_path = std::filesystem::path{}; + + if (type == "Model") { + container_folder_path = current_path / "models"; + } else if (type == "Engine") { + container_folder_path = current_path / "engines"; + } else { + container_folder_path = current_path / "misc"; + } + + if (!std::filesystem::exists(container_folder_path)) { + LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n"; + std::filesystem::create_directory(container_folder_path); + } + + return container_folder_path; +} + +inline std::string downloadTypeToString(DownloadType type) { + switch (type) { + case DownloadType::Model: + return "Model"; + case DownloadType::Engine: + return "Engine"; + case DownloadType::Miscellaneous: + return "Misc"; + default: + return "UNKNOWN"; + } +} + +} // namespace file_manager_utils \ No newline at end of file diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h index 73c53668f..471ef3b27 100644 --- a/engine/utils/http_util.h +++ b/engine/utils/http_util.h @@ -6,9 +6,9 @@ using namespace drogon; namespace http_util { -bool HasFieldInReq(const HttpRequestPtr& req, - std::function& callback, - const std::string& field) { +inline bool HasFieldInReq(const HttpRequestPtr& req, + std::function& callback, + const std::string& field) { if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) { Json::Value res; res["message"] = "No " + field + " field in request body"; diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h new file mode 100644 index 000000000..753fdb205 --- /dev/null +++ b/engine/utils/model_callback_utils.h @@ -0,0 +1,51 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "config/gguf_parser.h" +#include "config/yaml_config.h" +#include "utils/file_manager_utils.h" + +namespace model_callback_utils { +inline void DownloadModelCb(const std::string& path) { + + std::filesystem::path path_obj(path); + std::string filename(path_obj.filename().string()); + //TODO: handle many cases of downloaded items from other sources except cortexso. + if (filename.compare("model.yml") == 0) { + config::YamlHandler handler; + handler.ModelConfigFromFile(path); + config::ModelConfig model_config = handler.GetModelConfig(); + model_config.id = path_obj.parent_path().filename().string(); + + LOG_INFO << "Updating model config in " << path; + handler.UpdateModelConfig(model_config); + handler.WriteYamlFile(path_obj.parent_path().parent_path().string() + "/" + + model_config.id + ".yaml"); + } + // currently, only handle downloaded model with only 1 .gguf file + // TODO: handle multipart gguf file or different model in 1 repo. + else if (path_obj.extension().string().compare(".gguf") == 0) { + + config::GGUFHandler gguf_handler; + config::YamlHandler yaml_handler; + gguf_handler.Parse(path); + config::ModelConfig model_config = gguf_handler.GetModelConfig(); + model_config.id = path_obj.parent_path().filename().string(); + model_config.files = {path}; + yaml_handler.UpdateModelConfig(model_config); + std::string yml_path(path_obj.parent_path().parent_path().string() + "/" + + model_config.id + ".yaml"); + std::string yaml_path(path_obj.parent_path().string() + "/model.yml"); + if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited + yaml_handler.WriteYamlFile(yml_path); + } + if (!std::filesystem::exists(yaml_path)) {// if .yaml doesn't exsited + yaml_handler.WriteYamlFile(yaml_path); + } + } +} +} // namespace model_callback_utils \ No newline at end of file diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h new file mode 100644 index 000000000..184428751 --- /dev/null +++ b/engine/utils/system_info_utils.h @@ -0,0 +1,36 @@ +#pragma once + +#include + +namespace system_info_utils { +struct SystemInfo { + std::string os; + std::string arch; +}; + +constexpr static auto kUnsupported{"Unsupported"}; + +inline SystemInfo GetSystemInfo() { + std::ostringstream arch; + std::ostringstream os; + +#if defined(__i386__) || defined(__x86_64__) + arch << "amd64"; +#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) + arch << "arm64"; +#else + arch << kUnsupported; +#endif + +#if defined(__APPLE__) && defined(__MACH__) + os << "mac"; +#elif defined(__linux__) + os << "linux"; +#elif defined(_WIN32) + os << "windows"; +#else + os << kUnsupported; +#endif + return SystemInfo{os.str(), arch.str()}; +} +} // namespace system_info_utils \ No newline at end of file From 1b2e0922d5c6743bf002e216f96d422db06bc25d Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 27 Aug 2024 11:43:24 +0700 Subject: [PATCH 08/16] fix: add command parser for main --- engine/main.cc | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/engine/main.cc b/engine/main.cc index 6ec7ea205..27591d48a 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -2,9 +2,11 @@ #include #include // for PATH_MAX #include +#include "controllers/command_line_parser.h" #include "cortex-common/cortexpythoni.h" #include "utils/cortex_utils.h" #include "utils/dylib.h" +#include "utils/archive_utils.h" #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() @@ -20,6 +22,18 @@ #endif int main(int argc, char* argv[]) { + // Create logs/ folder and setup log to file + std::filesystem::create_directory(cortex_utils::logs_folder); + trantor::AsyncFileLogger asyncFileLogger; + asyncFileLogger.setFileName(cortex_utils::logs_base_name); + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); + // Check if this process is for python execution if (argc > 1) { if (strcmp(argv[1], "--run_python_file") == 0) { @@ -41,11 +55,17 @@ int main(int argc, char* argv[]) { } } + if (argc > 1) { + CommandLineParser clp; + clp.SetupCommand(argc, argv); + return 0; + } + int thread_num = 1; std::string host = "127.0.0.1"; int port = 3928; - // Number of cortex-cpp threads + // Number of cortex.cpp threads if (argc > 1) { thread_num = std::atoi(argv[1]); } @@ -64,9 +84,9 @@ int main(int argc, char* argv[]) { int drogon_thread_num = std::max(thread_num, logical_cores); // cortex_utils::nitro_logo(); #ifdef CORTEX_CPP_VERSION - LOG_INFO << "cortex-cpp version: " << CORTEX_CPP_VERSION; + LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; #else - LOG_INFO << "cortex-cpp version: undefined"; + LOG_INFO << "cortex.cpp version: undefined"; #endif LOG_INFO << "Server started, listening at: " << host << ":" << port; @@ -78,4 +98,4 @@ int main(int argc, char* argv[]) { drogon::app().run(); return 0; -} +} \ No newline at end of file From 95f7d67c85a897234be999fab45890beb5e5999d Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 28 Aug 2024 10:24:40 +0700 Subject: [PATCH 09/16] feat: chat command (#1032) --- engine/commands/chat_cmd.cc | 120 ++++++++++++++++++++++ engine/commands/chat_cmd.h | 19 ++++ engine/controllers/command_line_parser.cc | 20 +++- engine/main.cc | 25 ++--- 4 files changed, 171 insertions(+), 13 deletions(-) create mode 100644 engine/commands/chat_cmd.cc create mode 100644 engine/commands/chat_cmd.h diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc new file mode 100644 index 000000000..185dd60fe --- /dev/null +++ b/engine/commands/chat_cmd.cc @@ -0,0 +1,120 @@ +#include "chat_cmd.h" +#include "httplib.h" + +#include "trantor/utils/Logger.h" + +namespace commands { +namespace { +constexpr const char* kExitChat = "exit()"; +constexpr const auto kMinDataChunkSize = 6u; +constexpr const char* kUser = "user"; +constexpr const char* kAssistant = "assistant"; + +} // namespace + +struct ChunkParser { + std::string content; + bool is_done = false; + + ChunkParser(const char* data, size_t data_length) { + if (data && data_length > kMinDataChunkSize) { + std::string s(data + kMinDataChunkSize, data_length - kMinDataChunkSize); + if (s.find("[DONE]") != std::string::npos) { + is_done = true; + } else { + content = nlohmann::json::parse(s)["choices"][0]["delta"]["content"]; + } + } + } +}; + +ChatCmd::ChatCmd(std::string host, int port, const config::ModelConfig& mc) + : host_(std::move(host)), port_(port), mc_(mc) {} + +void ChatCmd::Exec(std::string msg) { + auto address = host_ + ":" + std::to_string(port_); + // Check if model is loaded + { + httplib::Client cli(address); + nlohmann::json json_data; + json_data["model"] = mc_.name; + json_data["engine"] = mc_.engine; + + auto data_str = json_data.dump(); + + // TODO: move this to another message? + auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(), + data_str.data(), data_str.size(), "application/json"); + if (res) { + if (res->status != httplib::StatusCode::OK_200) { + LOG_INFO << res->body; + return; + } + } else { + auto err = res.error(); + LOG_WARN << "HTTP error: " << httplib::to_string(err); + return; + } + } + // Some instruction for user here + std::cout << "Inorder to exit, type exit()" << std::endl; + // Model is loaded, start to chat + { + while (true) { + std::string user_input = std::move(msg); + std::cout << "> "; + if (user_input.empty()) { + std::getline(std::cin, user_input); + } + if (user_input == kExitChat) { + break; + } + + if (!user_input.empty()) { + httplib::Client cli(address); + nlohmann::json json_data; + nlohmann::json new_data; + new_data["role"] = kUser; + new_data["content"] = user_input; + histories_.push_back(std::move(new_data)); + json_data["engine"] = mc_.engine; + json_data["messages"] = histories_; + json_data["model"] = mc_.name; + //TODO: support non-stream + json_data["stream"] = true; + json_data["stop"] = mc_.stop; + auto data_str = json_data.dump(); + // std::cout << data_str << std::endl; + cli.set_read_timeout(std::chrono::seconds(60)); + // std::cout << "> "; + httplib::Request req; + req.headers = httplib::Headers(); + req.set_header("Content-Type", "application/json"); + req.method = "POST"; + req.path = "/v1/chat/completions"; + req.body = data_str; + std::string ai_chat; + req.content_receiver = [&](const char* data, size_t data_length, + uint64_t offset, uint64_t total_length) { + ChunkParser cp(data, data_length); + if (cp.is_done) { + std::cout << std::endl; + return false; + } + std::cout << cp.content; + ai_chat += cp.content; + return true; + }; + cli.send(req); + + nlohmann::json ai_res; + ai_res["role"] = kAssistant; + ai_res["content"] = ai_chat; + histories_.push_back(std::move(ai_res)); + } + // std::cout << "ok Done" << std::endl; + } + } +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/chat_cmd.h b/engine/commands/chat_cmd.h new file mode 100644 index 000000000..d5b48927c --- /dev/null +++ b/engine/commands/chat_cmd.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include "config/model_config.h" +#include "nlohmann/json.hpp" + +namespace commands { +class ChatCmd { + public: + ChatCmd(std::string host, int port, const config::ModelConfig& mc); + void Exec(std::string msg); + + private: + std::string host_; + int port_; + const config::ModelConfig& mc_; + std::vector histories_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index d58760433..48c63611d 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -5,6 +5,7 @@ #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" +#include "commands/chat_cmd.h" #include "config/yaml_config.h" #include "utils/cortex_utils.h" @@ -66,7 +67,24 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { models_cmd->add_subcommand("update", "Update configuration of a model"); } - auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); + { + auto chat_cmd = + app_.add_subcommand("chat", "Send a chat request to a model"); + std::string model_id; + chat_cmd->add_option("model_id", model_id, ""); + std::string msg; + chat_cmd->add_option("-m,--message", msg, + "Message to chat with model"); + + chat_cmd->callback([&model_id, &msg] { + // TODO(sang) switch to .yaml when implement model manager + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + + "/models/" + model_id + "/model.yml"); + commands::ChatCmd cc("127.0.0.1", 3928, yaml_handler.GetModelConfig()); + cc.Exec(msg); + }); + } auto ps_cmd = app_.add_subcommand("ps", "Show running models and their status"); diff --git a/engine/main.cc b/engine/main.cc index 27591d48a..a92e114fb 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -22,18 +22,7 @@ #endif int main(int argc, char* argv[]) { - // Create logs/ folder and setup log to file - std::filesystem::create_directory(cortex_utils::logs_folder); - trantor::AsyncFileLogger asyncFileLogger; - asyncFileLogger.setFileName(cortex_utils::logs_base_name); - asyncFileLogger.startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - asyncFileLogger.output(msg, len); - }, - [&]() { asyncFileLogger.flush(); }); - asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); - + // Check if this process is for python execution if (argc > 1) { if (strcmp(argv[1], "--run_python_file") == 0) { @@ -61,6 +50,18 @@ int main(int argc, char* argv[]) { return 0; } + // Create logs/ folder and setup log to file + std::filesystem::create_directory(cortex_utils::logs_folder); + trantor::AsyncFileLogger asyncFileLogger; + asyncFileLogger.setFileName(cortex_utils::logs_base_name); + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); + int thread_num = 1; std::string host = "127.0.0.1"; int port = 3928; From 287c75065767fdc270db24c421196a07a5b0c425 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 27 Aug 2024 14:35:47 +0700 Subject: [PATCH 10/16] feat: add engine init cli --- engine/commands/engine_init_cmd.cc | 47 +++- engine/commands/engine_init_cmd.h | 4 +- engine/controllers/command_line_parser.cc | 41 ++-- engine/controllers/command_line_parser.h | 2 + engine/controllers/engines.cc | 111 +++++++++ engine/controllers/engines.h | 21 ++ engine/main.cc | 5 +- engine/utils/command_executor.h | 49 ++++ engine/utils/engine_matcher_utils.h | 180 ++++++++++++++ engine/utils/system_info_utils.h | 273 +++++++++++++++++++++- 10 files changed, 696 insertions(+), 37 deletions(-) create mode 100644 engine/controllers/engines.cc create mode 100644 engine/controllers/engines.h create mode 100644 engine/utils/command_executor.h create mode 100644 engine/utils/engine_matcher_utils.h diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index 430433e4d..b4f8fe064 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -7,6 +7,7 @@ #include "utils/archive_utils.h" #include "utils/system_info_utils.h" // clang-format on +#include "utils/engine_matcher_utils.h" namespace commands { @@ -27,6 +28,7 @@ void EngineInitCmd::Exec() const { << system_info.arch; return; } + LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch; // check if engine is supported if (std::find(supportedEngines_.begin(), supportedEngines_.end(), @@ -36,11 +38,11 @@ void EngineInitCmd::Exec() const { } constexpr auto gitHubHost = "https://api.github.com"; - + std::string version = version_.empty() ? "latest" : version_; std::ostringstream engineReleasePath; engineReleasePath << "/repos/janhq/" << engineName_ << "/releases/" - << version_; - + << version; + LOG_INFO << "Engine release path: " << gitHubHost << engineReleasePath.str(); using namespace nlohmann; httplib::Client cli(gitHubHost); @@ -51,9 +53,37 @@ void EngineInitCmd::Exec() const { auto assets = jsonResponse["assets"]; auto os_arch{system_info.os + "-" + system_info.arch}; + std::vector variants; + for (auto& asset : assets) { + auto asset_name = asset["name"].get(); + variants.push_back(asset_name); + } + + auto cuda_version = system_info_utils::GetCudaVersion(); + LOG_INFO << "engineName_: " << engineName_; + LOG_INFO << "CUDA version: " << cuda_version; + std::string matched_variant = ""; + if (engineName_ == "cortex.tensorrt-llm") { + matched_variant = engine_matcher_utils::ValidateTensorrtLlm( + variants, system_info.os, cuda_version); + } else if (engineName_ == "cortex.onnx") { + matched_variant = engine_matcher_utils::ValidateOnnx( + variants, system_info.os, system_info.arch); + } else if (engineName_ == "cortex.llamacpp") { + auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(); + matched_variant = engine_matcher_utils::Validate( + variants, system_info.os, system_info.arch, suitable_avx, + cuda_version); + } + LOG_INFO << "Matched variant: " << matched_variant; + if (matched_variant.empty()) { + LOG_ERROR << "No variant found for " << os_arch; + return; + } + for (auto& asset : assets) { auto assetName = asset["name"].get(); - if (assetName.find(os_arch) != std::string::npos) { + if (assetName == matched_variant) { std::string host{"https://github.com"}; auto full_url = asset["browser_download_url"].get(); @@ -74,8 +104,7 @@ void EngineInitCmd::Exec() const { }}}; DownloadService().AddDownloadTask( - downloadTask, - [&downloadTask](const std::string& absolute_path) { + downloadTask, [](const std::string& absolute_path) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " @@ -95,15 +124,15 @@ void EngineInitCmd::Exec() const { return; } } - LOG_ERROR << "No asset found for " << os_arch; } catch (const json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; } + } else { + LOG_ERROR << "HTTP error: " << res->status; } } else { auto err = res.error(); LOG_ERROR << "HTTP error: " << httplib::to_string(err); } } - -}; // namespace commands \ No newline at end of file +}; // namespace commands diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h index 09b908e37..dc75d5cf6 100644 --- a/engine/commands/engine_init_cmd.h +++ b/engine/commands/engine_init_cmd.h @@ -15,7 +15,7 @@ class EngineInitCmd { std::string engineName_; std::string version_; - static constexpr std::array supportedEngines_ = { - "cortex.llamacpp"}; + static constexpr std::array supportedEngines_ = { + "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; }; } // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index d58760433..e409ab90a 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,7 +1,7 @@ #include "command_line_parser.h" #include "commands/engine_init_cmd.h" -#include "commands/model_pull_cmd.h" #include "commands/model_list_cmd.h" +#include "commands/model_pull_cmd.h" #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" @@ -44,7 +44,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto list_models_cmd = models_cmd->add_subcommand("list", "List all models locally"); - list_models_cmd->callback([](){ + list_models_cmd->callback([]() { commands::ModelListCmd command; command.Exec(); }); @@ -74,27 +74,15 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto embeddings_cmd = app_.add_subcommand( "embeddings", "Creates an embedding vector representing the input text"); - // engines group commands - { + { // engines group commands auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); auto list_engines_cmd = engines_cmd->add_subcommand("list", "List all cortex engines"); auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); - { // Engine init command - auto init_cmd = engines_cmd->add_subcommand("init", "Initialize engine"); - std::string engine_name; - std::string version = "latest"; - - init_cmd->add_option("-n,--name", engine_name, - "Engine name. E.g: cortex.llamacpp"); - init_cmd->add_option("-v,--version", version, - "Engine version. Default will be latest"); - init_cmd->callback([&engine_name, &version]() { - commands::EngineInitCmd eic(engine_name, version); - eic.Exec(); - }); - } + EngineInstall(engines_cmd, "cortex.llamacpp"); + EngineInstall(engines_cmd, "cortex.onnx"); + EngineInstall(engines_cmd, "cortex.tensorrt-llm"); } auto run_cmd = @@ -110,4 +98,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { CLI11_PARSE(app_, argc, argv); return true; +} + +void CommandLineParser::EngineInstall(CLI::App* parent, + const std::string& engine_name) { + auto engine_cmd = + parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); + + // Default version is latest + std::string version{"latest"}; + auto install_cmd = engine_cmd->add_subcommand( + "install", "Install " + engine_name + " engine"); + install_cmd->add_option("-v, --version", version, + "Engine version. Default will be latest"); + install_cmd->callback([&engine_name, &version] { + commands::EngineInitCmd eic(engine_name, version); + eic.Exec(); + }); } \ No newline at end of file diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index 3324d45e0..e48ed31b0 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -9,5 +9,7 @@ class CommandLineParser { bool SetupCommand(int argc, char** argv); private: + void EngineInstall(CLI::App* parent, const std::string& engine_name); + CLI::App app_; }; \ No newline at end of file diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc new file mode 100644 index 000000000..12bea809d --- /dev/null +++ b/engine/controllers/engines.cc @@ -0,0 +1,111 @@ +#include "engines.h" +#include "utils/archive_utils.h" +#include "utils/file_manager_utils.h" +#include "utils/system_info_utils.h" + +void Engines::InitEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + LOG_DEBUG << "InitEngine, Engine: " << engine; + if (engine.empty()) { + Json::Value res; + res["message"] = "Engine name is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "No engine field in path param"; + return; + } + + auto system_info = system_info_utils::GetSystemInfo(); + if (system_info.arch == system_info_utils::kUnsupported || + system_info.os == system_info_utils::kUnsupported) { + Json::Value res; + res["message"] = "Unsupported OS or architecture"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", " + << system_info.arch; + return; + } + + auto version{"latest"}; + constexpr auto gitHubHost = "https://api.github.com"; + + std::ostringstream engineReleasePath; + engineReleasePath << "/repos/janhq/" << engine << "/releases/" << version; + + httplib::Client cli(gitHubHost); + using namespace nlohmann; + if (auto res = cli.Get(engineReleasePath.str())) { + if (res->status == httplib::StatusCode::OK_200) { + try { + auto jsonResponse = json::parse(res->body); + auto assets = jsonResponse["assets"]; + + auto os_arch{system_info.os + "-" + system_info.arch}; + for (auto& asset : assets) { + auto assetName = asset["name"].get(); + if (assetName.find(os_arch) != std::string::npos) { + std::string host{"https://github.com"}; + + auto full_url = asset["browser_download_url"].get(); + std::string path = full_url.substr(host.length()); + + auto fileName = asset["name"].get(); + LOG_INFO << "URL: " << full_url; + + auto downloadTask = DownloadTask{.id = engine, + .type = DownloadType::Engine, + .error = std::nullopt, + .items = {DownloadItem{ + .id = engine, + .host = host, + .fileName = fileName, + .type = DownloadType::Engine, + .path = path, + }}}; + + DownloadService().AddAsyncDownloadTask( + downloadTask, [](const std::string& absolute_path) { + // try to unzip the downloaded file + std::filesystem::path downloadedEnginePath{absolute_path}; + LOG_INFO << "Downloaded engine path: " + << downloadedEnginePath.string(); + + archive_utils::ExtractArchive( + downloadedEnginePath.string(), + downloadedEnginePath.parent_path() + .parent_path() + .string()); + + // remove the downloaded file + std::filesystem::remove(absolute_path); + LOG_INFO << "Finished!"; + }); + + Json::Value res; + res["message"] = "Engine download started"; + res["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k200OK); + callback(resp); + return; + } + } + Json::Value res; + res["message"] = "Engine not found"; + res["result"] = "Error"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k404NotFound); + callback(resp); + } catch (const json::parse_error& e) { + std::cerr << "JSON parse error: " << e.what() << std::endl; + } + } + } else { + auto err = res.error(); + LOG_ERROR << "HTTP error: " << httplib::to_string(err); + } +} \ No newline at end of file diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h new file mode 100644 index 000000000..282e79402 --- /dev/null +++ b/engine/controllers/engines.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include "services/download_service.h" +#include "utils/cortex_utils.h" +#include "utils/cortexso_parser.h" +#include "utils/http_util.h" + +using namespace drogon; + +class Engines : public drogon::HttpController { + public: + METHOD_LIST_BEGIN + METHOD_ADD(Engines::InitEngine, "/{1}/init", Post); + METHOD_LIST_END + + void InitEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; +}; diff --git a/engine/main.cc b/engine/main.cc index 27591d48a..e8701dd7b 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -1,12 +1,11 @@ #include #include #include // for PATH_MAX -#include #include "controllers/command_line_parser.h" #include "cortex-common/cortexpythoni.h" +#include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/dylib.h" -#include "utils/archive_utils.h" #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() @@ -98,4 +97,4 @@ int main(int argc, char* argv[]) { drogon::app().run(); return 0; -} \ No newline at end of file +} diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h new file mode 100644 index 000000000..9ba13025a --- /dev/null +++ b/engine/utils/command_executor.h @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define POPEN _popen +#define PCLOSE _pclose +#else +#define POPEN popen +#define PCLOSE pclose +#endif + +class CommandExecutor { + public: + CommandExecutor(const std::string& command) { + FILE* pipe = POPEN(command.c_str(), "r"); + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + m_pipe = std::unique_ptr(pipe, PCLOSE); + } + + CommandExecutor(const CommandExecutor&) = delete; + CommandExecutor& operator=(const CommandExecutor&) = delete; + CommandExecutor(CommandExecutor&&) = default; + CommandExecutor& operator=(CommandExecutor&&) = default; + ~CommandExecutor() = default; + + std::string execute() { + if (!m_pipe) { + throw std::runtime_error("Command not initialized!"); + } + + std::array buffer; + std::string result; + + while (fgets(buffer.data(), buffer.size(), m_pipe.get()) != nullptr) { + result += buffer.data(); + } + + return result; + } + + private: + std::unique_ptr m_pipe{nullptr, PCLOSE}; +}; \ No newline at end of file diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h new file mode 100644 index 000000000..23c93c1a6 --- /dev/null +++ b/engine/utils/engine_matcher_utils.h @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include +#include +#include "utils/cpuid/cpu_info.h" + +namespace engine_matcher_utils { +// for testing purpose +const std::vector cortex_llamacpp_variants{ + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", +}; +const std::vector cortex_onnx_variants{ + "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; + +const std::vector cortex_tensorrt_variants{ + "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", + "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; + +inline std::string GetSuitableAvxVariant() { + cortex::cpuid::CpuInfo cpu_info; + + LOG_INFO << "GetSuitableAvxVariant:" << "\n" << cpu_info.to_string(); + + if (cpu_info.has_avx512_f()) + return "avx512"; + if (cpu_info.has_avx2()) + return "avx2"; + if (cpu_info.has_avx()) + return "avx"; + return "noavx"; +} + +inline std::string GetSuitableCudaVariant( + const std::vector& variants, const std::string& cuda_version) { + std::regex cuda_reg("cuda-(\\d+)-(\\d+)"); + std::smatch match; + + int requestedMajor = 0; + int requestedMinor = 0; + + if (!cuda_version.empty()) { + // Split the provided CUDA version into major and minor parts + sscanf(cuda_version.c_str(), "%d.%d", &requestedMajor, &requestedMinor); + } + + std::string selectedVariant; + int bestMatchMajor = -1; + int bestMatchMinor = -1; + + for (const auto& variant : variants) { + if (std::regex_search(variant, match, cuda_reg)) { + // Found a CUDA version in the variant + int variantMajor = std::stoi(match[1]); + int variantMinor = std::stoi(match[2]); + + if (requestedMajor == variantMajor) { + // If the major versions match, prefer the closest minor version + if (requestedMinor >= variantMinor && + (variantMajor > bestMatchMajor || + (variantMajor == bestMatchMajor && + variantMinor > bestMatchMinor))) { + selectedVariant = variant; + bestMatchMajor = variantMajor; + bestMatchMinor = variantMinor; + } + } + } else if (cuda_version.empty() && selectedVariant.empty()) { + // If no CUDA version is provided, select the variant without any CUDA in the name + selectedVariant = variant; + } + } + + return selectedVariant; +} + +inline std::string ValidateTensorrtLlm(const std::vector& variants, + const std::string& os, + const std::string& cuda_version) { + std::vector os_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_compatible_list), + [&os](const std::string& variant) { + auto os_match = "-" + os; + return variant.find(os_match) != std::string::npos; + }); + auto cuda_compatible = + GetSuitableCudaVariant(os_compatible_list, cuda_version); + return cuda_compatible; +} + +inline std::string ValidateOnnx(const std::vector& variants, + const std::string& os, + const std::string& cpu_arch) { + + std::vector os_and_arch_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_and_arch_compatible_list), + [&os, &cpu_arch](const std::string& variant) { + auto os_match = "-" + os; + auto cpu_arch_match = "-" + cpu_arch; + + return variant.find(os_match) != std::string::npos && + variant.find(cpu_arch_match) != std::string::npos; + }); + if (!os_and_arch_compatible_list.empty()) + return os_and_arch_compatible_list[0]; + return ""; +} + +inline std::string Validate(const std::vector& variants, + const std::string& os, const std::string& cpu_arch, + const std::string& suitable_avx, + const std::string& cuda_version) { + + // Early return if the OS is unsupported + if (os != "mac" && os != "windows" && os != "linux") { + // TODO: throw is better + return ""; + } + + std::vector os_and_arch_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_and_arch_compatible_list), + [&os, &cpu_arch](const std::string& variant) { + auto os_match = "-" + os; + auto cpu_arch_match = "-" + cpu_arch; + + return variant.find(os_match) != std::string::npos && + variant.find(cpu_arch_match) != std::string::npos; + }); + + if (os == "mac" && !os_and_arch_compatible_list.empty()) + return os_and_arch_compatible_list[0]; + + std::vector avx_compatible_list; + + std::copy_if(os_and_arch_compatible_list.begin(), + os_and_arch_compatible_list.end(), + std::back_inserter(avx_compatible_list), + [&suitable_avx](const std::string& variant) { + auto suitable_avx_match = "-" + suitable_avx; + + return variant.find(suitable_avx_match) != std::string::npos; + }); + + auto cuda_compatible = + GetSuitableCudaVariant(avx_compatible_list, cuda_version); + + return cuda_compatible; +} +} // namespace engine_matcher_utils \ No newline at end of file diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 184428751..d13935295 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -1,22 +1,64 @@ #pragma once -#include +#include +#include +#include +#include "utils/command_executor.h" +#ifdef _WIN32 +#include +#endif namespace system_info_utils { + +constexpr static auto kUnsupported{"Unsupported"}; +constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"}; +constexpr static auto kGpuQueryCommand{ + "nvidia-smi --query-gpu=index,memory.total,name,compute_cap " + "--format=csv,noheader,nounits"}; +constexpr static auto kGpuInfoRegex{ + R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"}; + struct SystemInfo { std::string os; std::string arch; }; -constexpr static auto kUnsupported{"Unsupported"}; +/** + * @brief Get the Gpu Arch. Currently we only support Ampere and Ada. + * Might need to come up with better way to detect the GPU architecture. + * + * @param gpuName E.g. NVIDIA GeForce RTX 4090 + * @return corresponding GPU arch. E.g. ampere, ada. + */ +inline std::string GetGpuArch(const std::string& gpuName) { + std::string lowerGpuName = gpuName; + std::transform(lowerGpuName.begin(), lowerGpuName.end(), lowerGpuName.begin(), + ::tolower); + + if (lowerGpuName.find("nvidia") == std::string::npos) { + return "unknown"; + } + + if (gpuName.find("30") != std::string::npos) { + return "ampere"; + } else if (gpuName.find("40") != std::string::npos) { + return "ada"; + } else { + return "unknown"; + } +} inline SystemInfo GetSystemInfo() { std::ostringstream arch; std::ostringstream os; -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \ + defined(__amd64) || defined(__x86_64) || defined(_M_AMD64) arch << "amd64"; -#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__arm) || defined(__arm64__) || \ + defined(__aarch64__) || defined(__thumb__) || \ + defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || \ + defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT) arch << "arm64"; #else arch << kUnsupported; @@ -33,4 +75,225 @@ inline SystemInfo GetSystemInfo() { #endif return SystemInfo{os.str(), arch.str()}; } -} // namespace system_info_utils \ No newline at end of file + +constexpr auto vulkan_sample_output = R"( +========== +VULKANINFO +========== + +Vulkan Instance Version: 1.3.280 + + +Instance Extensions: count = 19 +------------------------------- +VK_EXT_debug_report : extension revision 10 +VK_EXT_debug_utils : extension revision 2 +VK_EXT_direct_mode_display : extension revision 1 +VK_EXT_surface_maintenance1 : extension revision 1 +VK_EXT_swapchain_colorspace : extension revision 4 +VK_KHR_device_group_creation : extension revision 1 +VK_KHR_display : extension revision 23 +VK_KHR_external_fence_capabilities : extension revision 1 +VK_KHR_external_memory_capabilities : extension revision 1 +VK_KHR_external_semaphore_capabilities : extension revision 1 +VK_KHR_get_display_properties2 : extension revision 1 +VK_KHR_get_physical_device_properties2 : extension revision 2 +VK_KHR_get_surface_capabilities2 : extension revision 1 +VK_KHR_portability_enumeration : extension revision 1 +VK_KHR_surface : extension revision 25 +VK_KHR_surface_protected_capabilities : extension revision 1 +VK_KHR_win32_surface : extension revision 6 +VK_LUNARG_direct_driver_loading : extension revision 1 +VK_NV_external_memory_capabilities : extension revision 1 + +Instance Layers: count = 1 +-------------------------- +VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 + +Devices: +======== +GPU0: + apiVersion = 1.3.280 + driverVersion = 560.70.0.0 + vendorID = 0x10de + deviceID = 0x2684 + deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU + deviceName = NVIDIA GeForce RTX 4090 + driverID = DRIVER_ID_NVIDIA_PROPRIETARY + driverName = NVIDIA + driverInfo = 560.70 + conformanceVersion = 1.3.8.2 + deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 + driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 +)"; + +constexpr auto gpu_query_list_sample_output = R"( +0, 46068, NVIDIA RTX A6000, 8.6 +1, 46068, NVIDIA RTX A6000, 8.6 +)"; + +constexpr auto nvidia_smi_sample_output = R"( +Sun Aug 25 22:29:25 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | +| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | +| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | +| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | +| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | +| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | +| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | +| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | ++-----------------------------------------------------------------------------------------+ +)"; + +inline bool IsNvidiaSmiAvailable() { +#ifdef _WIN32 + // Check if nvidia-smi.exe exists in the PATH on Windows + char buffer[MAX_PATH]; + if (SearchPath(NULL, "nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) { + return true; + } else { + return false; + } +#else + // Check if nvidia-smi is available on Unix-like systems + int result = std::system("which nvidia-smi > /dev/null 2>&1"); + return result == 0; +#endif +} + +inline std::string GetCudaVersion() { + if (!IsNvidiaSmiAvailable()) { + LOG_INFO << "nvidia-smi is not available!"; + return ""; + } + try { + CommandExecutor cmd("nvidia-smi"); + auto output = cmd.execute(); + + const std::regex cuda_version_reg(kCudaVersionRegex); + std::smatch match; + + if (std::regex_search(output, match, cuda_version_reg)) { + LOG_INFO << "CUDA Version: " << match[1].str(); + return match[1].str(); + } else { + LOG_ERROR << "CUDA Version not found!"; + return ""; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error: " << e.what(); + return ""; + } +} + +struct GpuInfo { + std::string id; + std::string vram; + std::string name; + std::string arch; + std::optional compute_cap; +}; + +inline std::vector GetGpuInfoListVulkan() { + std::vector gpuInfoList; + + try { + // NOTE: current ly we don't have logic to download vulkaninfoSDK +#ifdef _WIN32 + CommandExecutor cmd("vulkaninfoSDK.exe --summary"); +#else + CommandExecutor cmd("vulkaninfoSDK --summary"); +#endif + auto output = cmd.execute(); + + // Regular expression patterns to match each field + std::regex gpu_block_reg(R"(GPU(\d+):)"); + std::regex field_pattern(R"(\s*(\w+)\s*=\s*(.*))"); + + std::sregex_iterator iter(output.begin(), output.end(), gpu_block_reg); + std::sregex_iterator end; + + while (iter != end) { + GpuInfo gpuInfo; + + // Extract GPU ID from the GPU block pattern (e.g., GPU0 -> id = "0") + gpuInfo.id = (*iter)[1].str(); + + auto gpu_start_pos = iter->position(0) + iter->length(0); + auto gpu_end_pos = std::next(iter) != end ? std::next(iter)->position(0) + : std::string::npos; + std::string gpu_block = + output.substr(gpu_start_pos, gpu_end_pos - gpu_start_pos); + + std::sregex_iterator field_iter(gpu_block.begin(), gpu_block.end(), + field_pattern); + + while (field_iter != end) { + std::string key = (*field_iter)[1].str(); + std::string value = (*field_iter)[2].str(); + + if (key == "deviceName") + gpuInfo.name = value; + else if (key == "apiVersion") + gpuInfo.compute_cap = value; + + gpuInfo.vram = ""; // not available + gpuInfo.arch = GetGpuArch(gpuInfo.name); + + ++field_iter; + } + + gpuInfoList.push_back(gpuInfo); + ++iter; + } + } catch (const std::exception& e) {} + + return gpuInfoList; +} + +inline std::vector GetGpuInfoList() { + std::vector gpuInfoList; + + try { + CommandExecutor cmd(kGpuQueryCommand); + auto output = cmd.execute(); + + const std::regex gpu_info_reg(kGpuInfoRegex); + std::smatch match; + std::string::const_iterator search_start(output.cbegin()); + + while ( + std::regex_search(search_start, output.cend(), match, gpu_info_reg)) { + GpuInfo gpuInfo = { + match[1].str(), // id + match[2].str(), // vram + match[3].str(), // name + GetGpuArch(match[3].str()), // arch + match[4].str() // compute_cap + }; + gpuInfoList.push_back(gpuInfo); + search_start = match.suffix().first; + } + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + } + + return gpuInfoList; +} +} // namespace system_info_utils From bbc3e3192f5f426cfa511bf18430964757df4f03 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 28 Aug 2024 19:39:41 +0700 Subject: [PATCH 11/16] fix: guarantee lifetime for captured variables (#1042) --- .github/workflows/cortex-cpp-quality-gate.yml | 2 +- engine/CMakeLists.txt | 2 +- engine/controllers/command_line_parser.cc | 21 ++++++++++--------- engine/controllers/command_line_parser.h | 3 ++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 092423821..39526d9a4 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -16,7 +16,7 @@ env: jobs: build-and-test: runs-on: ${{ matrix.runs-on }} - timeout-minutes: 40 + timeout-minutes: 60 strategy: fail-fast: false matrix: diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index d18d28f2d..7ba095d6b 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -43,7 +43,7 @@ if(MSVC) $<$:/MT> #--| ) endif() - + if(LLAMA_CUDA) cmake_minimum_required(VERSION 3.17) diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index b4f460261..2c5f79c84 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -12,13 +12,14 @@ CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {} bool CommandLineParser::SetupCommand(int argc, char** argv) { + std::string model_id; + // Models group commands { auto models_cmd = app_.add_subcommand("models", "Subcommands for managing models"); auto start_cmd = models_cmd->add_subcommand("start", "Start a model by ID"); - std::string model_id; start_cmd->add_option("model_id", model_id, ""); start_cmd->callback([&model_id]() { // TODO(sang) switch to .yaml when implement model manager @@ -67,12 +68,12 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { models_cmd->add_subcommand("update", "Update configuration of a model"); } + std::string msg; { auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); - std::string model_id; + chat_cmd->add_option("model_id", model_id, ""); - std::string msg; chat_cmd->add_option("-m,--message", msg, "Message to chat with model"); @@ -92,15 +93,17 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto embeddings_cmd = app_.add_subcommand( "embeddings", "Creates an embedding vector representing the input text"); + // Default version is latest + std::string version{"latest"}; { // engines group commands auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); auto list_engines_cmd = engines_cmd->add_subcommand("list", "List all cortex engines"); auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); - EngineInstall(engines_cmd, "cortex.llamacpp"); - EngineInstall(engines_cmd, "cortex.onnx"); - EngineInstall(engines_cmd, "cortex.tensorrt-llm"); + EngineInstall(engines_cmd, "cortex.llamacpp", version); + EngineInstall(engines_cmd, "cortex.onnx", version); + EngineInstall(engines_cmd, "cortex.tensorrt-llm", version); } auto run_cmd = @@ -119,17 +122,15 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { } void CommandLineParser::EngineInstall(CLI::App* parent, - const std::string& engine_name) { + const std::string& engine_name, std::string& version) { auto engine_cmd = parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); - // Default version is latest - std::string version{"latest"}; auto install_cmd = engine_cmd->add_subcommand( "install", "Install " + engine_name + " engine"); install_cmd->add_option("-v, --version", version, "Engine version. Default will be latest"); - install_cmd->callback([&engine_name, &version] { + install_cmd->callback([engine_name, &version] { commands::EngineInitCmd eic(engine_name, version); eic.Exec(); }); diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index e48ed31b0..b6695346e 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -9,7 +9,8 @@ class CommandLineParser { bool SetupCommand(int argc, char** argv); private: - void EngineInstall(CLI::App* parent, const std::string& engine_name); + void EngineInstall(CLI::App* parent, const std::string& engine_name, + std::string& version); CLI::App app_; }; \ No newline at end of file From ba6816f4188f43f7090e25041aa0993c88d70bab Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 <35255081+nguyenhoangthuan99@users.noreply.github.com> Date: Thu, 29 Aug 2024 07:44:34 +0700 Subject: [PATCH 12/16] feat: models get command (#1035) --- engine/.gitignore | 3 +- engine/commands/model_get_cmd.cc | 135 ++++++++++++++++++++++ engine/commands/model_get_cmd.h | 15 +++ engine/controllers/command_line_parser.cc | 10 ++ engine/controllers/models.cc | 66 +++++++++++ engine/controllers/models.h | 3 + 6 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 engine/commands/model_get_cmd.cc create mode 100644 engine/commands/model_get_cmd.h diff --git a/engine/.gitignore b/engine/.gitignore index 10d117410..93c893e48 100644 --- a/engine/.gitignore +++ b/engine/.gitignore @@ -563,4 +563,5 @@ build build-deps .DS_Store -uploads/** \ No newline at end of file +uploads/** +CMakePresets.json \ No newline at end of file diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc new file mode 100644 index 000000000..b9f1c5d52 --- /dev/null +++ b/engine/commands/model_get_cmd.cc @@ -0,0 +1,135 @@ +#include "model_get_cmd.h" +#include +#include +#include +#include "config/yaml_config.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" + +namespace commands { +ModelGetCmd::ModelGetCmd(std::string model_handle) + : model_handle_(std::move(model_handle)) {} + +void ModelGetCmd::Exec() { + if (std::filesystem::exists(cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::models_folder)) { + bool found_model = false; + // Iterate through directory + for (const auto& entry : + std::filesystem::directory_iterator(cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().stem() == model_handle_ && + entry.path().extension() == ".yaml") { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + const auto& model_config = handler.GetModelConfig(); + std::cout << "ModelConfig Details:\n"; + std::cout << "-------------------\n"; + + // Print non-null strings + if (!model_config.id.empty()) + std::cout << "id: " << model_config.id << "\n"; + if (!model_config.name.empty()) + std::cout << "name: " << model_config.name << "\n"; + if (!model_config.model.empty()) + std::cout << "model: " << model_config.model << "\n"; + if (!model_config.version.empty()) + std::cout << "version: " << model_config.version << "\n"; + + // Print non-empty vectors + if (!model_config.stop.empty()) { + std::cout << "stop: ["; + for (size_t i = 0; i < model_config.stop.size(); ++i) { + std::cout << model_config.stop[i]; + if (i < model_config.stop.size() - 1) + std::cout << ", "; + } + std::cout << "]\n"; + } + // Print valid numbers + if (!std::isnan(static_cast(model_config.top_p))) + std::cout << "top_p: " << model_config.top_p << "\n"; + if (!std::isnan(static_cast(model_config.temperature))) + std::cout << "temperature: " << model_config.temperature << "\n"; + if (!std::isnan(static_cast(model_config.frequency_penalty))) + std::cout << "frequency_penalty: " << model_config.frequency_penalty + << "\n"; + if (!std::isnan(static_cast(model_config.presence_penalty))) + std::cout << "presence_penalty: " << model_config.presence_penalty + << "\n"; + if (!std::isnan(static_cast(model_config.max_tokens))) + std::cout << "max_tokens: " << model_config.max_tokens << "\n"; + if (!std::isnan(static_cast(model_config.stream))) + std::cout << "stream: " << std::boolalpha << model_config.stream + << "\n"; + if (!std::isnan(static_cast(model_config.ngl))) + std::cout << "ngl: " << model_config.ngl << "\n"; + if (!std::isnan(static_cast(model_config.ctx_len))) + std::cout << "ctx_len: " << model_config.ctx_len << "\n"; + + // Print non-null strings + if (!model_config.engine.empty()) + std::cout << "engine: " << model_config.engine << "\n"; + if (!model_config.prompt_template.empty()) + std::cout << "prompt_template: " << model_config.prompt_template + << "\n"; + if (!model_config.system_template.empty()) + std::cout << "system_template: " << model_config.system_template + << "\n"; + if (!model_config.user_template.empty()) + std::cout << "user_template: " << model_config.user_template + << "\n"; + if (!model_config.ai_template.empty()) + std::cout << "ai_template: " << model_config.ai_template << "\n"; + if (!model_config.os.empty()) + std::cout << "os: " << model_config.os << "\n"; + if (!model_config.gpu_arch.empty()) + std::cout << "gpu_arch: " << model_config.gpu_arch << "\n"; + if (!model_config.quantization_method.empty()) + std::cout << "quantization_method: " + << model_config.quantization_method << "\n"; + if (!model_config.precision.empty()) + std::cout << "precision: " << model_config.precision << "\n"; + + if (!std::isnan(static_cast(model_config.tp))) + std::cout << "tp: " << model_config.tp << "\n"; + + // Print non-null strings + if (!model_config.trtllm_version.empty()) + std::cout << "trtllm_version: " << model_config.trtllm_version + << "\n"; + if (!std::isnan(static_cast(model_config.text_model))) + std::cout << "text_model: " << std::boolalpha + << model_config.text_model << "\n"; + + // Print non-empty vectors + if (!model_config.files.empty()) { + std::cout << "files: ["; + for (size_t i = 0; i < model_config.files.size(); ++i) { + std::cout << model_config.files[i]; + if (i < model_config.files.size() - 1) + std::cout << ", "; + } + std::cout << "]\n"; + } + + // Print valid size_t number + if (model_config.created != 0) + std::cout << "created: " << model_config.created << "\n"; + + if (!model_config.object.empty()) + std::cout << "object: " << model_config.object << "\n"; + if (!model_config.owned_by.empty()) + std::cout << "owned_by: " << model_config.owned_by << "\n"; + + found_model = true; + break; + } catch (const std::exception& e) { + LOG_ERROR << "Error reading yaml file '" << entry.path().string() + << "': " << e.what(); + } + } + } + } +} +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h new file mode 100644 index 000000000..8de48e23d --- /dev/null +++ b/engine/commands/model_get_cmd.h @@ -0,0 +1,15 @@ +#pragma once + +#include // For std::isnan +#include +namespace commands { + +class ModelGetCmd { + public: + ModelGetCmd(std::string model_handle); + void Exec(); + + private: + std::string model_handle_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 2c5f79c84..42a5f8731 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,6 +1,8 @@ #include "command_line_parser.h" #include "commands/engine_init_cmd.h" #include "commands/model_list_cmd.h" +#include "commands/model_get_cmd.h" + #include "commands/model_pull_cmd.h" #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" @@ -51,6 +53,14 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { command.Exec(); }); + auto get_models_cmd = + models_cmd->add_subcommand("get", "Get info of {model_id} locally"); + get_models_cmd->add_option("model_id", model_id, ""); + get_models_cmd->callback([&model_id](){ + commands::ModelGetCmd command(model_id); + command.Exec(); + }); + auto model_pull_cmd = app_.add_subcommand("pull", "Download a model from a registry. Working with " diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index e445ff90a..52a8bff28 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -101,4 +101,70 @@ void Models::ListModel( auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); resp->setStatusCode(k200OK); callback(resp); +} + +void Models::GetModel( + const HttpRequestPtr& req, + std::function&& callback) const { + if (!http_util::HasFieldInReq(req, callback, "modelId")) { + return; + } + auto model_handle = (*(req->getJsonObject())).get("modelId", "").asString(); + LOG_DEBUG << "GetModel, Model handle: " << model_handle; + Json::Value ret; + ret["object"] = "list"; + Json::Value data(Json::arrayValue); + if (std::filesystem::exists(cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::models_folder)) { + // Iterate through directory + for (const auto& entry : + std::filesystem::directory_iterator(cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().extension() == ".yaml" && + entry.path().stem() == model_handle) { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + auto const& model_config = handler.GetModelConfig(); + Json::Value obj; + obj["name"] = model_config.name; + obj["model"] = model_config.model; + obj["version"] = model_config.version; + Json::Value stop_array(Json::arrayValue); + for (const std::string& stop : model_config.stop) + stop_array.append(stop); + obj["stop"] = stop_array; + obj["top_p"] = model_config.top_p; + obj["temperature"] = model_config.temperature; + obj["presence_penalty"] = model_config.presence_penalty; + obj["max_tokens"] = model_config.max_tokens; + obj["stream"] = model_config.stream; + obj["ngl"] = model_config.ngl; + obj["ctx_len"] = model_config.ctx_len; + obj["engine"] = model_config.engine; + obj["prompt_template"] = model_config.prompt_template; + + Json::Value files_array(Json::arrayValue); + for (const std::string& file : model_config.files) + files_array.append(file); + obj["files"] = files_array; + obj["id"] = model_config.id; + obj["created"] = static_cast(model_config.created); + obj["object"] = model_config.object; + obj["owned_by"] = model_config.owned_by; + if (model_config.engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = model_config.trtllm_version; + } + data.append(std::move(obj)); + } catch (const std::exception& e) { + LOG_ERROR << "Error reading yaml file '" << entry.path().string() + << "': " << e.what(); + } + } + } + } + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); } \ No newline at end of file diff --git a/engine/controllers/models.h b/engine/controllers/models.h index d8f9415e4..789ce1398 100644 --- a/engine/controllers/models.h +++ b/engine/controllers/models.h @@ -14,10 +14,13 @@ class Models : public drogon::HttpController { METHOD_LIST_BEGIN METHOD_ADD(Models::PullModel, "/pull", Post); METHOD_ADD(Models::ListModel, "/list", Get); + METHOD_ADD(Models::GetModel, "/get", Post); METHOD_LIST_END void PullModel(const HttpRequestPtr& req, std::function&& callback) const; void ListModel(const HttpRequestPtr& req, std::function&& callback) const; + void GetModel(const HttpRequestPtr& req, + std::function&& callback) const; }; \ No newline at end of file From 05b4b2c5b35085425cae2d53f6e02724c716f3cd Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 <35255081+nguyenhoangthuan99@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:54:44 +0700 Subject: [PATCH 13/16] Feat background process server (#1043) --- engine/CMakeLists.txt | 5 +- engine/commands/model_get_cmd.cc | 6 + engine/commands/model_get_cmd.h | 2 + engine/controllers/command_line_parser.cc | 2 +- engine/main.cc | 160 +++++++++++++++------- engine/test/components/CMakeLists.txt | 2 +- 6 files changed, 122 insertions(+), 55 deletions(-) diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 7ba095d6b..2ac1947d4 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -81,7 +81,10 @@ endif() add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") -# add_subdirectory(test) +option(CMAKE_BUILD_TEST "Enable testing" OFF) +if(CMAKE_BUILD_TEST) + add_subdirectory(test) +endif() find_package(jsoncpp CONFIG REQUIRED) find_package(Drogon CONFIG REQUIRED) diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc index b9f1c5d52..82691ea32 100644 --- a/engine/commands/model_get_cmd.cc +++ b/engine/commands/model_get_cmd.cc @@ -7,6 +7,7 @@ #include "utils/cortex_utils.h" namespace commands { + ModelGetCmd::ModelGetCmd(std::string model_handle) : model_handle_(std::move(model_handle)) {} @@ -17,6 +18,7 @@ void ModelGetCmd::Exec() { // Iterate through directory for (const auto& entry : std::filesystem::directory_iterator(cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().stem() == model_handle_ && entry.path().extension() == ".yaml") { try { @@ -60,6 +62,7 @@ void ModelGetCmd::Exec() { if (!std::isnan(static_cast(model_config.max_tokens))) std::cout << "max_tokens: " << model_config.max_tokens << "\n"; if (!std::isnan(static_cast(model_config.stream))) + std::cout << "stream: " << std::boolalpha << model_config.stream << "\n"; if (!std::isnan(static_cast(model_config.ngl))) @@ -71,6 +74,7 @@ void ModelGetCmd::Exec() { if (!model_config.engine.empty()) std::cout << "engine: " << model_config.engine << "\n"; if (!model_config.prompt_template.empty()) + std::cout << "prompt_template: " << model_config.prompt_template << "\n"; if (!model_config.system_template.empty()) @@ -86,6 +90,7 @@ void ModelGetCmd::Exec() { if (!model_config.gpu_arch.empty()) std::cout << "gpu_arch: " << model_config.gpu_arch << "\n"; if (!model_config.quantization_method.empty()) + std::cout << "quantization_method: " << model_config.quantization_method << "\n"; if (!model_config.precision.empty()) @@ -96,6 +101,7 @@ void ModelGetCmd::Exec() { // Print non-null strings if (!model_config.trtllm_version.empty()) + std::cout << "trtllm_version: " << model_config.trtllm_version << "\n"; if (!std::isnan(static_cast(model_config.text_model))) diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h index 8de48e23d..9bd9d2213 100644 --- a/engine/commands/model_get_cmd.h +++ b/engine/commands/model_get_cmd.h @@ -1,11 +1,13 @@ #pragma once + #include // For std::isnan #include namespace commands { class ModelGetCmd { public: + ModelGetCmd(std::string model_handle); void Exec(); diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 42a5f8731..d4068acb9 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -2,7 +2,6 @@ #include "commands/engine_init_cmd.h" #include "commands/model_list_cmd.h" #include "commands/model_get_cmd.h" - #include "commands/model_pull_cmd.h" #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" @@ -140,6 +139,7 @@ void CommandLineParser::EngineInstall(CLI::App* parent, "install", "Install " + engine_name + " engine"); install_cmd->add_option("-v, --version", version, "Engine version. Default will be latest"); + install_cmd->callback([engine_name, &version] { commands::EngineInitCmd eic(engine_name, version); eic.Exec(); diff --git a/engine/main.cc b/engine/main.cc index 143cb94e6..75e0881f6 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -10,8 +10,10 @@ #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() #include +#include #elif defined(__linux__) #include // for dirname() +#include #include // for readlink() #elif defined(_WIN32) #include @@ -20,8 +22,104 @@ #error "Unsupported platform!" #endif + +void RunServer(){ + // Create logs/ folder and setup log to file + std::filesystem::create_directory(cortex_utils::logs_folder); + trantor::AsyncFileLogger asyncFileLogger; + asyncFileLogger.setFileName(cortex_utils::logs_base_name); + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); + // Number of cortex.cpp threads + // if (argc > 1) { + // thread_num = std::atoi(argv[1]); + // } + + // // Check for host argument + // if (argc > 2) { + // host = argv[2]; + // } + + // // Check for port argument + // if (argc > 3) { + // port = std::atoi(argv[3]); // Convert string argument to int + // } + int thread_num = 1; + std::string host = "127.0.0.1"; + int port = 3928; + + int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = std::max(thread_num, logical_cores); + // cortex_utils::nitro_logo(); +#ifdef CORTEX_CPP_VERSION + LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; +#else + LOG_INFO << "cortex.cpp version: undefined"; +#endif + + LOG_INFO << "Server started, listening at: " << host << ":" << port; + LOG_INFO << "Please load your model"; + drogon::app().addListener(host, port); + drogon::app().setThreadNum(drogon_thread_num); + LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); + + drogon::app().run(); + // return 0; +} + +void ForkProcess() { +#if defined(_WIN32) || defined(_WIN64) + // Windows-specific code to create a new process + STARTUPINFO si; + PROCESS_INFORMATION pi; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + ZeroMemory(&pi, sizeof(pi)); + std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server"; + // Create child process + if (!CreateProcess( + NULL, // No module name (use command line) + const_cast(cmds.c_str()), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure + { + std::cout << "Could not start server: " << GetLastError() << std::endl; + } else { + std::cout << "Server started" << std::endl; + } + +#else + // Unix-like system-specific code to fork a child process + pid_t pid = fork(); + + if (pid < 0) { + // Fork failed + std::cerr << "Could not start server: " << std::endl; + return; + } else if (pid == 0) { + // Child process + RunServer(); + } else { + // Parent process + std::cout << "Server started" << std::endl; + } +#endif +} + int main(int argc, char* argv[]) { - + // Check if this process is for python execution if (argc > 1) { if (strcmp(argv[1], "--run_python_file") == 0) { @@ -44,58 +142,16 @@ int main(int argc, char* argv[]) { } if (argc > 1) { - CommandLineParser clp; - clp.SetupCommand(argc, argv); - return 0; - } - - // Create logs/ folder and setup log to file - std::filesystem::create_directory(cortex_utils::logs_folder); - trantor::AsyncFileLogger asyncFileLogger; - asyncFileLogger.setFileName(cortex_utils::logs_base_name); - asyncFileLogger.startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - asyncFileLogger.output(msg, len); - }, - [&]() { asyncFileLogger.flush(); }); - asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); - - int thread_num = 1; - std::string host = "127.0.0.1"; - int port = 3928; - - // Number of cortex.cpp threads - if (argc > 1) { - thread_num = std::atoi(argv[1]); - } - - // Check for host argument - if (argc > 2) { - host = argv[2]; - } - - // Check for port argument - if (argc > 3) { - port = std::atoi(argv[3]); // Convert string argument to int + if (strcmp(argv[1], "--start-server") == 0) { + RunServer(); + return 0; + } else { + CommandLineParser clp; + clp.SetupCommand(argc, argv); + return 0; + } } - int logical_cores = std::thread::hardware_concurrency(); - int drogon_thread_num = std::max(thread_num, logical_cores); - // cortex_utils::nitro_logo(); -#ifdef CORTEX_CPP_VERSION - LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; -#else - LOG_INFO << "cortex.cpp version: undefined"; -#endif - - LOG_INFO << "Server started, listening at: " << host << ":" << port; - LOG_INFO << "Please load your model"; - drogon::app().addListener(host, port); - drogon::app().setThreadNum(drogon_thread_num); - LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); - - drogon::app().run(); - + ForkProcess(); return 0; } diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt index 71a44012b..942c6a92a 100644 --- a/engine/test/components/CMakeLists.txt +++ b/engine/test/components/CMakeLists.txt @@ -8,7 +8,7 @@ add_executable(${PROJECT_NAME} ${SRCS}) find_package(Drogon CONFIG REQUIRED) find_package(GTest CONFIG REQUIRED) -target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gmock +target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../) From c693e55b232146737b094a73368b527229dc1703 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 29 Aug 2024 15:18:40 +0700 Subject: [PATCH 14/16] feat: run command (#1045) --- engine/commands/chat_cmd.cc | 2 +- engine/commands/cmd_info.cc | 54 +++++++++++ engine/commands/cmd_info.h | 14 +++ engine/commands/engine_init_cmd.cc | 57 ++++++----- engine/commands/engine_init_cmd.h | 2 +- engine/commands/model_pull_cmd.cc | 10 +- engine/commands/model_pull_cmd.h | 7 +- ...{start_model_cmd.cc => model_start_cmd.cc} | 12 ++- .../{start_model_cmd.h => model_start_cmd.h} | 6 +- engine/commands/run_cmd.cc | 97 +++++++++++++++++++ engine/commands/run_cmd.h | 22 +++++ engine/controllers/command_line_parser.cc | 41 +++++--- engine/controllers/engines.cc | 2 +- engine/main.cc | 2 - engine/services/download_service.cc | 8 +- engine/services/download_service.h | 2 +- engine/utils/cortexso_parser.h | 2 +- engine/utils/model_callback_utils.h | 9 +- 18 files changed, 282 insertions(+), 67 deletions(-) create mode 100644 engine/commands/cmd_info.cc create mode 100644 engine/commands/cmd_info.h rename engine/commands/{start_model_cmd.cc => model_start_cmd.cc} (84%) rename engine/commands/{start_model_cmd.h => model_start_cmd.h} (64%) create mode 100644 engine/commands/run_cmd.cc create mode 100644 engine/commands/run_cmd.h diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc index 185dd60fe..2c00053c9 100644 --- a/engine/commands/chat_cmd.cc +++ b/engine/commands/chat_cmd.cc @@ -57,7 +57,7 @@ void ChatCmd::Exec(std::string msg) { } } // Some instruction for user here - std::cout << "Inorder to exit, type exit()" << std::endl; + std::cout << "Inorder to exit, type `exit()`" << std::endl; // Model is loaded, start to chat { while (true) { diff --git a/engine/commands/cmd_info.cc b/engine/commands/cmd_info.cc new file mode 100644 index 000000000..bf13c6bc7 --- /dev/null +++ b/engine/commands/cmd_info.cc @@ -0,0 +1,54 @@ +#include "cmd_info.h" +#include +#include "trantor/utils/Logger.h" + +namespace commands { +namespace { +constexpr const char* kDelimiter = ":"; + +std::vector split(std::string& s, const std::string& delimiter) { + std::vector tokens; + size_t pos = 0; + std::string token; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + tokens.push_back(token); + s.erase(0, pos + delimiter.length()); + } + tokens.push_back(s); + + return tokens; +} +} // namespace + +CmdInfo::CmdInfo(std::string model_id) { + Parse(std::move(model_id)); +} + +void CmdInfo::Parse(std::string model_id) { + if (model_id.find(kDelimiter) == std::string::npos) { + engine_name = "cortex.llamacpp"; + model_name = std::move(model_id); + branch = "main"; + } else { + auto res = split(model_id, kDelimiter); + if (res.size() != 2) { + LOG_ERROR << "model_id does not valid"; + return; + } else { + model_name = std::move(res[0]); + branch = std::move(res[1]); + if (branch.find("onnx") != std::string::npos) { + engine_name = "cortex.onnx"; + } else if (branch.find("tensorrt") != std::string::npos) { + engine_name = "cortex.tensorrt-llm"; + } else if (branch.find("gguf") != std::string::npos) { + engine_name = "cortex.llamacpp"; + } else { + LOG_ERROR << "Not a valid branch model_name " << branch; + } + } + } +} + +} // namespace commands \ No newline at end of file diff --git a/engine/commands/cmd_info.h b/engine/commands/cmd_info.h new file mode 100644 index 000000000..460990757 --- /dev/null +++ b/engine/commands/cmd_info.h @@ -0,0 +1,14 @@ +#pragma once +#include +namespace commands { +struct CmdInfo { + explicit CmdInfo(std::string model_id); + + std::string engine_name; + std::string model_name; + std::string branch; + + private: + void Parse(std::string model_id); +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index b4f8fe064..5a1165e23 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -14,10 +14,10 @@ namespace commands { EngineInitCmd::EngineInitCmd(std::string engineName, std::string version) : engineName_(std::move(engineName)), version_(std::move(version)) {} -void EngineInitCmd::Exec() const { +bool EngineInitCmd::Exec() const { if (engineName_.empty()) { LOG_ERROR << "Engine name is required"; - return; + return false; } // Check if the architecture and OS are supported @@ -26,7 +26,7 @@ void EngineInitCmd::Exec() const { system_info.os == system_info_utils::kUnsupported) { LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", " << system_info.arch; - return; + return false; } LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch; @@ -34,7 +34,7 @@ void EngineInitCmd::Exec() const { if (std::find(supportedEngines_.begin(), supportedEngines_.end(), engineName_) == supportedEngines_.end()) { LOG_ERROR << "Engine not supported"; - return; + return false; } constexpr auto gitHubHost = "https://api.github.com"; @@ -78,7 +78,7 @@ void EngineInitCmd::Exec() const { LOG_INFO << "Matched variant: " << matched_variant; if (matched_variant.empty()) { LOG_ERROR << "No variant found for " << os_arch; - return; + return false; } for (auto& asset : assets) { @@ -103,36 +103,45 @@ void EngineInitCmd::Exec() const { .path = path, }}}; - DownloadService().AddDownloadTask( - downloadTask, [](const std::string& absolute_path) { - // try to unzip the downloaded file - std::filesystem::path downloadedEnginePath{absolute_path}; - LOG_INFO << "Downloaded engine path: " - << downloadedEnginePath.string(); - - archive_utils::ExtractArchive( - downloadedEnginePath.string(), - downloadedEnginePath.parent_path() - .parent_path() - .string()); - - // remove the downloaded file - std::filesystem::remove(absolute_path); - LOG_INFO << "Finished!"; - }); - - return; + DownloadService().AddDownloadTask(downloadTask, [](const std::string& + absolute_path, + bool unused) { + // try to unzip the downloaded file + std::filesystem::path downloadedEnginePath{absolute_path}; + LOG_INFO << "Downloaded engine path: " + << downloadedEnginePath.string(); + + archive_utils::ExtractArchive( + downloadedEnginePath.string(), + downloadedEnginePath.parent_path().parent_path().string()); + + // remove the downloaded file + // TODO(any) Could not delete file on Windows because it is currently hold by httplib(?) + // Not sure about other platforms + try { + std::filesystem::remove(absolute_path); + } catch (const std::exception& e) { + LOG_ERROR << "Could not delete file: " << e.what(); + } + LOG_INFO << "Finished!"; + }); + + return true; } } } catch (const json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; + return false; } } else { LOG_ERROR << "HTTP error: " << res->status; + return false; } } else { auto err = res.error(); LOG_ERROR << "HTTP error: " << httplib::to_string(err); + return false; } + return true; } }; // namespace commands diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h index dc75d5cf6..8de74034e 100644 --- a/engine/commands/engine_init_cmd.h +++ b/engine/commands/engine_init_cmd.h @@ -9,7 +9,7 @@ class EngineInitCmd { public: EngineInitCmd(std::string engineName, std::string version); - void Exec() const; + bool Exec() const; private: std::string engineName_; diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc index 9dcd8c4ef..f8e3a7947 100644 --- a/engine/commands/model_pull_cmd.cc +++ b/engine/commands/model_pull_cmd.cc @@ -6,18 +6,20 @@ #include "utils/model_callback_utils.h" namespace commands { -ModelPullCmd::ModelPullCmd(std::string modelHandle) - : modelHandle_(std::move(modelHandle)) {} +ModelPullCmd::ModelPullCmd(std::string model_handle, std::string branch) + : model_handle_(std::move(model_handle)), branch_(std::move(branch)) {} -void ModelPullCmd::Exec() { - auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_); +bool ModelPullCmd::Exec() { + auto downloadTask = cortexso_parser::getDownloadTask(model_handle_, branch_); if (downloadTask.has_value()) { DownloadService downloadService; downloadService.AddDownloadTask(downloadTask.value(), model_callback_utils::DownloadModelCb); std::cout << "Download finished" << std::endl; + return true; } else { std::cout << "Model not found" << std::endl; + return false; } } diff --git a/engine/commands/model_pull_cmd.h b/engine/commands/model_pull_cmd.h index 2c5f658f2..da5713bdf 100644 --- a/engine/commands/model_pull_cmd.h +++ b/engine/commands/model_pull_cmd.h @@ -6,10 +6,11 @@ namespace commands { class ModelPullCmd { public: - ModelPullCmd(std::string modelHandle); - void Exec(); +explicit ModelPullCmd(std::string model_handle, std::string branch); + bool Exec(); private: - std::string modelHandle_; + std::string model_handle_; + std::string branch_; }; } // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.cc b/engine/commands/model_start_cmd.cc similarity index 84% rename from engine/commands/start_model_cmd.cc rename to engine/commands/model_start_cmd.cc index 341ba2f9d..0342c3d35 100644 --- a/engine/commands/start_model_cmd.cc +++ b/engine/commands/model_start_cmd.cc @@ -1,14 +1,14 @@ -#include "start_model_cmd.h" +#include "model_start_cmd.h" #include "httplib.h" #include "nlohmann/json.hpp" #include "trantor/utils/Logger.h" namespace commands { -StartModelCmd::StartModelCmd(std::string host, int port, +ModelStartCmd::ModelStartCmd(std::string host, int port, const config::ModelConfig& mc) : host_(std::move(host)), port_(port), mc_(mc) {} -void StartModelCmd::Exec() { +bool ModelStartCmd::Exec() { httplib::Client cli(host_ + ":" + std::to_string(port_)); nlohmann::json json_data; if (mc_.files.size() > 0) { @@ -16,7 +16,7 @@ void StartModelCmd::Exec() { json_data["model_path"] = mc_.files[0]; } else { LOG_WARN << "model_path is empty"; - return; + return false; } json_data["model"] = mc_.name; json_data["system_prompt"] = mc_.system_template; @@ -27,7 +27,7 @@ void StartModelCmd::Exec() { json_data["engine"] = mc_.engine; auto data_str = json_data.dump(); - + cli.set_read_timeout(std::chrono::seconds(60)); auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(), data_str.data(), data_str.size(), "application/json"); if (res) { @@ -37,7 +37,9 @@ void StartModelCmd::Exec() { } else { auto err = res.error(); LOG_WARN << "HTTP error: " << httplib::to_string(err); + return false; } + return true; } }; // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.h b/engine/commands/model_start_cmd.h similarity index 64% rename from engine/commands/start_model_cmd.h rename to engine/commands/model_start_cmd.h index 27cfc59e6..809f71c83 100644 --- a/engine/commands/start_model_cmd.h +++ b/engine/commands/model_start_cmd.h @@ -5,10 +5,10 @@ namespace commands { -class StartModelCmd{ +class ModelStartCmd{ public: - StartModelCmd(std::string host, int port, const config::ModelConfig& mc); - void Exec(); + explicit ModelStartCmd(std::string host, int port, const config::ModelConfig& mc); + bool Exec(); private: std::string host_; diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc new file mode 100644 index 000000000..1c7e5c7e6 --- /dev/null +++ b/engine/commands/run_cmd.cc @@ -0,0 +1,97 @@ +#include "run_cmd.h" +#include "chat_cmd.h" +#include "cmd_info.h" +#include "config/yaml_config.h" +#include "engine_init_cmd.h" +#include "httplib.h" +#include "model_pull_cmd.h" +#include "model_start_cmd.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" + +namespace commands { + +RunCmd::RunCmd(std::string host, int port, std::string model_id) + : host_(std::move(host)), port_(port), model_id_(std::move(model_id)) {} + +void RunCmd::Exec() { + auto address = host_ + ":" + std::to_string(port_); + CmdInfo ci(model_id_); + std::string model_file = + ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; + // TODO should we clean all resource if something fails? + // Check if model existed. If not, download it + { + if (!IsModelExisted(model_file)) { + ModelPullCmd model_pull_cmd(ci.model_name, ci.branch); + if (!model_pull_cmd.Exec()) { + return; + } + } + } + + // Check if engine existed. If not, download it + { + if (!IsEngineExisted(ci.engine_name)) { + EngineInitCmd eic(ci.engine_name, ""); + if (!eic.Exec()) + return; + } + } + + // Start model + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + "/models/" + + model_file + ".yaml"); + { + ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig()); + if (!msc.Exec()) { + return; + } + } + + // Chat + { + ChatCmd cc(host_, port_, yaml_handler.GetModelConfig()); + cc.Exec(""); + } +} + +bool RunCmd::IsModelExisted(const std::string& model_id) { + if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder)) { + // Iterate through directory + for (const auto& entry : std::filesystem::directory_iterator( + cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().extension() == ".yaml") { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + std::cout << entry.path().stem().string() << std::endl; + if (entry.path().stem().string() == model_id) { + return true; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error reading yaml file '" << entry.path().string() + << "': " << e.what(); + } + } + } + } + return false; +} + +bool RunCmd::IsEngineExisted(const std::string& e) { + if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + "engines") && + std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + "engines/" + e)) { + return true; + } + return false; +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/run_cmd.h b/engine/commands/run_cmd.h new file mode 100644 index 000000000..ca44b9d24 --- /dev/null +++ b/engine/commands/run_cmd.h @@ -0,0 +1,22 @@ +#pragma once +#include +#include +#include "config/model_config.h" +#include "nlohmann/json.hpp" + +namespace commands { +class RunCmd { + public: + explicit RunCmd(std::string host, int port, std::string model_id); + void Exec(); + + private: + bool IsModelExisted(const std::string& model_id); + bool IsEngineExisted(const std::string& e); + + private: + std::string host_; + int port_; + std::string model_id_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index d4068acb9..835445501 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,12 +1,14 @@ #include "command_line_parser.h" +#include "commands/chat_cmd.h" +#include "commands/cmd_info.h" #include "commands/engine_init_cmd.h" -#include "commands/model_list_cmd.h" #include "commands/model_get_cmd.h" +#include "commands/model_list_cmd.h" #include "commands/model_pull_cmd.h" -#include "commands/start_model_cmd.h" +#include "commands/model_start_cmd.h" +#include "commands/run_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" -#include "commands/chat_cmd.h" #include "config/yaml_config.h" #include "utils/cortex_utils.h" @@ -14,7 +16,7 @@ CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {} bool CommandLineParser::SetupCommand(int argc, char** argv) { std::string model_id; - + // Models group commands { auto models_cmd = @@ -27,9 +29,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + "/models/" + model_id + "/model.yml"); - commands::StartModelCmd smc("127.0.0.1", 3928, + commands::ModelStartCmd msc("127.0.0.1", 3928, yaml_handler.GetModelConfig()); - smc.Exec(); + msc.Exec(); }); auto stop_model_cmd = @@ -55,7 +57,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto get_models_cmd = models_cmd->add_subcommand("get", "Get info of {model_id} locally"); get_models_cmd->add_option("model_id", model_id, ""); - get_models_cmd->callback([&model_id](){ + get_models_cmd->callback([&model_id]() { commands::ModelGetCmd command(model_id); command.Exec(); }); @@ -66,8 +68,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { "HuggingFace repositories. For available models, " "please visit https://huggingface.co/cortexso"); model_pull_cmd->add_option("model_id", model_id, ""); + model_pull_cmd->callback([&model_id]() { - commands::ModelPullCmd command(model_id); + commands::CmdInfo ci(model_id); + commands::ModelPullCmd command(ci.model_name, ci.branch); command.Exec(); }); @@ -81,10 +85,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { { auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); - + chat_cmd->add_option("model_id", model_id, ""); - chat_cmd->add_option("-m,--message", msg, - "Message to chat with model"); + chat_cmd->add_option("-m,--message", msg, "Message to chat with model"); chat_cmd->callback([&model_id, &msg] { // TODO(sang) switch to .yaml when implement model manager @@ -115,8 +118,17 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { EngineInstall(engines_cmd, "cortex.tensorrt-llm", version); } - auto run_cmd = - app_.add_subcommand("run", "Shortcut to start a model and chat"); + { + // cortex run tinyllama:gguf + auto run_cmd = + app_.add_subcommand("run", "Shortcut to start a model and chat"); + std::string model_id; + run_cmd->add_option("model_id", model_id, ""); + run_cmd->callback([&model_id] { + commands::RunCmd rc("127.0.0.1", 3928, model_id); + rc.Exec(); + }); + } auto stop_cmd = app_.add_subcommand("stop", "Stop the API server"); @@ -131,7 +143,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { } void CommandLineParser::EngineInstall(CLI::App* parent, - const std::string& engine_name, std::string& version) { + const std::string& engine_name, + std::string& version) { auto engine_cmd = parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 12bea809d..b10a6b758 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -68,7 +68,7 @@ void Engines::InitEngine(const HttpRequestPtr& req, }}}; DownloadService().AddAsyncDownloadTask( - downloadTask, [](const std::string& absolute_path) { + downloadTask, [](const std::string& absolute_path, bool unused) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " diff --git a/engine/main.cc b/engine/main.cc index 75e0881f6..193f84c60 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -22,7 +22,6 @@ #error "Unsupported platform!" #endif - void RunServer(){ // Create logs/ folder and setup log to file std::filesystem::create_directory(cortex_utils::logs_folder); @@ -119,7 +118,6 @@ void ForkProcess() { } int main(int argc, char* argv[]) { - // Check if this process is for python execution if (argc > 1) { if (strcmp(argv[1], "--run_python_file") == 0) { diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 4a60a42a8..97c16d650 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -72,8 +72,8 @@ void DownloadService::StartDownloadItem( outputFile.write(data, data_length); return true; }, - [&last, &outputFile, &callback, outputFilePath, this](uint64_t current, - uint64_t total) { + [&item, &last, &outputFile, &callback, outputFilePath, this]( + uint64_t current, uint64_t total) { if (current - last > kUpdateProgressThreshold) { last = current; LOG_INFO << "Downloading: " << current << " / " << total; @@ -83,7 +83,9 @@ void DownloadService::StartDownloadItem( LOG_INFO << "Done download: " << static_cast(total) / 1024 / 1024 << " MiB"; if (callback.has_value()) { - callback.value()(outputFilePath.string()); + auto need_parse_gguf = + item.path.find("cortexso") == std::string::npos; + callback.value()(outputFilePath.string(), need_parse_gguf); } return false; } diff --git a/engine/services/download_service.h b/engine/services/download_service.h index 86aefeb52..4efe653bf 100644 --- a/engine/services/download_service.h +++ b/engine/services/download_service.h @@ -41,7 +41,7 @@ class DownloadService { * * @param task */ - using DownloadItemCb = std::function; + using DownloadItemCb = std::function; void AddDownloadTask(const DownloadTask& task, std::optional callback = std::nullopt); diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h index 04f6e7fa5..91efa1fff 100644 --- a/engine/utils/cortexso_parser.h +++ b/engine/utils/cortexso_parser.h @@ -45,7 +45,7 @@ inline std::optional getDownloadTask( } DownloadTask downloadTask{}; - downloadTask.id = modelId; + downloadTask.id = branch == "main" ? modelId : modelId + "-" + branch; downloadTask.type = DownloadType::Model; downloadTask.error = std::nullopt; downloadTask.items = downloadItems; diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h index 753fdb205..f5504cda3 100644 --- a/engine/utils/model_callback_utils.h +++ b/engine/utils/model_callback_utils.h @@ -10,7 +10,7 @@ #include "utils/file_manager_utils.h" namespace model_callback_utils { -inline void DownloadModelCb(const std::string& path) { +inline void DownloadModelCb(const std::string& path, bool need_parse_gguf) { std::filesystem::path path_obj(path); std::string filename(path_obj.filename().string()); @@ -29,7 +29,7 @@ inline void DownloadModelCb(const std::string& path) { // currently, only handle downloaded model with only 1 .gguf file // TODO: handle multipart gguf file or different model in 1 repo. else if (path_obj.extension().string().compare(".gguf") == 0) { - + if(!need_parse_gguf) return; config::GGUFHandler gguf_handler; config::YamlHandler yaml_handler; gguf_handler.Parse(path); @@ -40,10 +40,11 @@ inline void DownloadModelCb(const std::string& path) { std::string yml_path(path_obj.parent_path().parent_path().string() + "/" + model_config.id + ".yaml"); std::string yaml_path(path_obj.parent_path().string() + "/model.yml"); - if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited + if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exist yaml_handler.WriteYamlFile(yml_path); } - if (!std::filesystem::exists(yaml_path)) {// if .yaml doesn't exsited + if (!std::filesystem::exists( + yaml_path)) { // if .yaml doesn't exist yaml_handler.WriteYamlFile(yaml_path); } } From 8fdff72c7d7cd190aeb4bbcdced93cb130ed6693 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 29 Aug 2024 15:59:59 +0700 Subject: [PATCH 15/16] fix: handle stop server (#1048) --- engine/commands/run_cmd.cc | 1 - engine/controllers/processManager.cc | 16 +++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc index 1c7e5c7e6..a84393652 100644 --- a/engine/commands/run_cmd.cc +++ b/engine/commands/run_cmd.cc @@ -70,7 +70,6 @@ bool RunCmd::IsModelExisted(const std::string& model_id) { try { config::YamlHandler handler; handler.ModelConfigFromFile(entry.path().string()); - std::cout << entry.path().stem().string() << std::endl; if (entry.path().stem().string() == model_id) { return true; } diff --git a/engine/controllers/processManager.cc b/engine/controllers/processManager.cc index 2874a3860..15c213453 100644 --- a/engine/controllers/processManager.cc +++ b/engine/controllers/processManager.cc @@ -1,11 +1,17 @@ #include "processManager.h" -#include +#include "utils/cortex_utils.h" + #include +#include void processManager::destroy( - const HttpRequestPtr &req, - std::function &&callback) { + const HttpRequestPtr& req, + std::function&& callback) { + app().quit(); + Json::Value ret; + ret["message"] = "Program is exitting, goodbye!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); LOG_INFO << "Program is exitting, goodbye!"; - exit(0); - return; }; From 45103c0c03be8403ba53dce31d201162c97a13c8 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 29 Aug 2024 01:53:11 +0700 Subject: [PATCH 16/16] feat: download cuda toolkit Signed-off-by: James --- .gitignore | 3 +- engine/commands/engine_init_cmd.cc | 60 +++++++++++++++- engine/main.cc | 98 ++++++++++++++------------- engine/services/download_service.h | 2 +- engine/utils/cuda_toolkit_utils.h | 63 +++++++++++++++++ engine/utils/file_manager_utils.h | 50 +++++++++++++- engine/utils/semantic_version_utils.h | 34 ++++++++++ engine/utils/system_info_utils.h | 35 ++++++++++ 8 files changed, 291 insertions(+), 54 deletions(-) create mode 100644 engine/utils/cuda_toolkit_utils.h create mode 100644 engine/utils/semantic_version_utils.h diff --git a/.gitignore b/.gitignore index 237fb5b33..6b785abe9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # cortex-js +.DS_Store cortex-js/cortex.db dist *.lock @@ -18,4 +19,4 @@ cortex-js/package-lock.json .vscode cortex-js/command cortex-js/src/infrastructure/commanders/test/test_data -**/vcpkg_installed \ No newline at end of file +**/vcpkg_installed diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index 5a1165e23..0f22bd57f 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -7,6 +7,7 @@ #include "utils/archive_utils.h" #include "utils/system_info_utils.h" // clang-format on +#include "utils/cuda_toolkit_utils.h" #include "utils/engine_matcher_utils.h" namespace commands { @@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const { .path = path, }}}; - DownloadService().AddDownloadTask(downloadTask, [](const std::string& - absolute_path, - bool unused) { + DownloadService download_service; + download_service.AddDownloadTask(downloadTask, [](const std::string& + absolute_path, + bool unused) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " @@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const { } LOG_INFO << "Finished!"; }); + if (system_info.os == "mac" || engineName_ == "cortex.onnx") { + return false; + } + // download cuda toolkit + const std::string jan_host = "https://catalog.jan.ai"; + const std::string cuda_toolkit_file_name = "cuda.tar.gz"; + const std::string download_id = "cuda"; + + auto gpu_driver_version = system_info_utils::GetDriverVersion(); + + auto cuda_runtime_version = + cuda_toolkit_utils::GetCompatibleCudaToolkitVersion( + gpu_driver_version, system_info.os, engineName_); + + std::ostringstream cuda_toolkit_path; + cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/" + << system_info.os << "/" + << cuda_toolkit_file_name; + + LOG_DEBUG << "Cuda toolkit download url: " << jan_host + << cuda_toolkit_path.str(); + + auto downloadCudaToolkitTask = DownloadTask{ + .id = download_id, + .type = DownloadType::CudaToolkit, + .error = std::nullopt, + .items = {DownloadItem{ + .id = download_id, + .host = jan_host, + .fileName = cuda_toolkit_file_name, + .type = DownloadType::CudaToolkit, + .path = cuda_toolkit_path.str(), + }}, + }; + + download_service.AddDownloadTask( + downloadCudaToolkitTask, + [](const std::string& absolute_path, bool unused) { + LOG_DEBUG << "Downloaded cuda path: " << absolute_path; + // try to unzip the downloaded file + std::filesystem::path downloaded_path{absolute_path}; + + archive_utils::ExtractArchive( + absolute_path, + downloaded_path.parent_path().parent_path().string()); + + try { + std::filesystem::remove(absolute_path); + } catch (std::exception& e) { + LOG_ERROR << "Error removing downloaded file: " << e.what(); + } + }); return true; } diff --git a/engine/main.cc b/engine/main.cc index 193f84c60..4b966b3f6 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -22,53 +22,53 @@ #error "Unsupported platform!" #endif -void RunServer(){ +void RunServer() { // Create logs/ folder and setup log to file - std::filesystem::create_directory(cortex_utils::logs_folder); - trantor::AsyncFileLogger asyncFileLogger; - asyncFileLogger.setFileName(cortex_utils::logs_base_name); - asyncFileLogger.startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - asyncFileLogger.output(msg, len); - }, - [&]() { asyncFileLogger.flush(); }); - asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); - // Number of cortex.cpp threads - // if (argc > 1) { - // thread_num = std::atoi(argv[1]); - // } + std::filesystem::create_directory(cortex_utils::logs_folder); + trantor::AsyncFileLogger asyncFileLogger; + asyncFileLogger.setFileName(cortex_utils::logs_base_name); + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); + // Number of cortex.cpp threads + // if (argc > 1) { + // thread_num = std::atoi(argv[1]); + // } - // // Check for host argument - // if (argc > 2) { - // host = argv[2]; - // } + // // Check for host argument + // if (argc > 2) { + // host = argv[2]; + // } - // // Check for port argument - // if (argc > 3) { - // port = std::atoi(argv[3]); // Convert string argument to int - // } - int thread_num = 1; - std::string host = "127.0.0.1"; - int port = 3928; + // // Check for port argument + // if (argc > 3) { + // port = std::atoi(argv[3]); // Convert string argument to int + // } + int thread_num = 1; + std::string host = "127.0.0.1"; + int port = 3928; - int logical_cores = std::thread::hardware_concurrency(); - int drogon_thread_num = std::max(thread_num, logical_cores); - // cortex_utils::nitro_logo(); + int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = std::max(thread_num, logical_cores); + // cortex_utils::nitro_logo(); #ifdef CORTEX_CPP_VERSION - LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; + LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; #else - LOG_INFO << "cortex.cpp version: undefined"; + LOG_INFO << "cortex.cpp version: undefined"; #endif - LOG_INFO << "Server started, listening at: " << host << ":" << port; - LOG_INFO << "Please load your model"; - drogon::app().addListener(host, port); - drogon::app().setThreadNum(drogon_thread_num); - LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); + LOG_INFO << "Server started, listening at: " << host << ":" << port; + LOG_INFO << "Please load your model"; + drogon::app().addListener(host, port); + drogon::app().setThreadNum(drogon_thread_num); + LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); - drogon::app().run(); - // return 0; + drogon::app().run(); + // return 0; } void ForkProcess() { @@ -80,19 +80,21 @@ void ForkProcess() { ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); - std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server"; + std::string cmds = + cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server"; // Create child process if (!CreateProcess( NULL, // No module name (use command line) - const_cast(cmds.c_str()), // Command line (replace with your actual executable) - NULL, // Process handle not inheritable - NULL, // Thread handle not inheritable - FALSE, // Set handle inheritance to FALSE - 0, // No creation flags - NULL, // Use parent's environment block - NULL, // Use parent's starting directory - &si, // Pointer to STARTUPINFO structure - &pi)) // Pointer to PROCESS_INFORMATION structure + const_cast( + cmds.c_str()), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure { std::cout << "Could not start server: " << GetLastError() << std::endl; } else { diff --git a/engine/services/download_service.h b/engine/services/download_service.h index 4efe653bf..a8f7f109b 100644 --- a/engine/services/download_service.h +++ b/engine/services/download_service.h @@ -4,7 +4,7 @@ #include #include -enum class DownloadType { Model, Engine, Miscellaneous }; +enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit }; enum class DownloadStatus { Pending, diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h new file mode 100644 index 000000000..748af1bd3 --- /dev/null +++ b/engine/utils/cuda_toolkit_utils.h @@ -0,0 +1,63 @@ +#include +#include "utils/semantic_version_utils.h" + +namespace cuda_toolkit_utils { +// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1 +inline std::string GetCompatibleCudaToolkitVersion( + const std::string& driver_semantic_version, const std::string& os, + const std::string& engine) { + + if (engine == "cortex.tensorrt-llm") { + // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4 + if (os == "windows") { + if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "527.41") >= 0) { + return "12.4"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 527.41"); + } + } else if (os == "linux") { + if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "525.60.13") >= 0) { + return "12.4"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum required driver version " + "is 525.60.13"); + } + } else { + throw std::runtime_error("Unsupported OS"); + } + } + + if (os == "windows") { + if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version, + "527.41") >= 0) { + return "12.4"; + } else if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "452.39") >= 0) { + return "11.7"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 452.39"); + } + } else if (os == "linux") { + if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version, + "525.60.13") >= 0) { + return "12.4"; + } else if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "450.80.02") >= 0) { + return "11.7"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 450.80.02"); + } + } else { + throw std::runtime_error("Unsupported OS"); + } +} +} // namespace cuda_toolkit_utils \ No newline at end of file diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 77c6b74a6..334116fe7 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -4,17 +4,63 @@ #include #include +#if defined(__APPLE__) && defined(__MACH__) +#include +#elif defined(__linux__) +#include +#elif defined(_WIN32) +#include +#endif + namespace file_manager_utils { +inline std::filesystem::path GetExecutableFolderContainerPath() { +#if defined(__APPLE__) && defined(__MACH__) + char buffer[1024]; + uint32_t size = sizeof(buffer); + + if (_NSGetExecutablePath(buffer, &size) == 0) { + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); + } else { + LOG_ERROR << "Failed to get executable path"; + return std::filesystem::current_path(); + } +#elif defined(__linux__) + // TODO: haven't tested + char buffer[1024]; + ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1); + if (len != -1) { + buffer[len] = '\0'; + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); + } else { + LOG_ERROR << "Failed to get executable path"; + return std::filesystem::current_path(); + } +#elif defined(_WIN32) + // TODO: haven't tested + char buffer[MAX_PATH]; + GetModuleFileNameA(NULL, buffer, MAX_PATH); + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); +#else + LOG_ERROR << "Unsupported platform!"; + return std::filesystem::current_path(); +#endif +} + inline std::filesystem::path GetContainerFolderPath( const std::string_view type) { - const auto current_path{std::filesystem::current_path()}; + const auto current_path{GetExecutableFolderContainerPath()}; auto container_folder_path = std::filesystem::path{}; if (type == "Model") { container_folder_path = current_path / "models"; } else if (type == "Engine") { container_folder_path = current_path / "engines"; + } else if (type == "CudaToolkit") { + container_folder_path = current_path; } else { container_folder_path = current_path / "misc"; } @@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) { return "Engine"; case DownloadType::Miscellaneous: return "Misc"; + case DownloadType::CudaToolkit: + return "CudaToolkit"; default: return "UNKNOWN"; } diff --git a/engine/utils/semantic_version_utils.h b/engine/utils/semantic_version_utils.h new file mode 100644 index 000000000..ea9244dc1 --- /dev/null +++ b/engine/utils/semantic_version_utils.h @@ -0,0 +1,34 @@ +#include +#include + +namespace semantic_version_utils { +inline std::vector SplitVersion(const std::string& version) { + std::vector parts; + std::stringstream ss(version); + std::string part; + + while (std::getline(ss, part, '.')) { + parts.push_back(std::stoi(part)); + } + + while (parts.size() < 3) { + parts.push_back(0); + } + + return parts; +} + +inline int CompareSemanticVersion(const std::string& version1, + const std::string& version2) { + std::vector v1 = SplitVersion(version1); + std::vector v2 = SplitVersion(version2); + + for (size_t i = 0; i < 3; ++i) { + if (v1[i] < v2[i]) + return -1; + if (v1[i] > v2[i]) + return 1; + } + return 0; +} +} // namespace semantic_version_utils \ No newline at end of file diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index d13935295..16a9570b7 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -12,6 +12,7 @@ namespace system_info_utils { constexpr static auto kUnsupported{"Unsupported"}; constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"}; +constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"}; constexpr static auto kGpuQueryCommand{ "nvidia-smi --query-gpu=index,memory.total,name,compute_cap " "--format=csv,noheader,nounits"}; @@ -177,6 +178,31 @@ inline bool IsNvidiaSmiAvailable() { #endif } +inline std::string GetDriverVersion() { + if (!IsNvidiaSmiAvailable()) { + LOG_INFO << "nvidia-smi is not available!"; + return ""; + } + try { + CommandExecutor cmd("nvidia-smi"); + auto output = cmd.execute(); + + const std::regex driver_version_reg(kDriverVersionRegex); + std::smatch match; + + if (std::regex_search(output, match, driver_version_reg)) { + LOG_INFO << "Gpu Driver Version: " << match[1].str(); + return match[1].str(); + } else { + LOG_ERROR << "Gpu Driver not found!"; + return ""; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error: " << e.what(); + return ""; + } +} + inline std::string GetCudaVersion() { if (!IsNvidiaSmiAvailable()) { LOG_INFO << "nvidia-smi is not available!"; @@ -207,6 +233,9 @@ struct GpuInfo { std::string vram; std::string name; std::string arch; + // nvidia driver version. Haven't checked for AMD GPU. + std::optional driver_version; + std::optional cuda_driver_version; std::optional compute_cap; }; @@ -271,6 +300,10 @@ inline std::vector GetGpuInfoList() { std::vector gpuInfoList; try { + // TODO: improve by parsing both in one command execution + auto driver_version = GetDriverVersion(); + auto cuda_version = GetCudaVersion(); + CommandExecutor cmd(kGpuQueryCommand); auto output = cmd.execute(); @@ -285,6 +318,8 @@ inline std::vector GetGpuInfoList() { match[2].str(), // vram match[3].str(), // name GetGpuArch(match[3].str()), // arch + driver_version, // driver_version + cuda_version, // cuda_driver_version match[4].str() // compute_cap }; gpuInfoList.push_back(gpuInfo);