From 21cde00b5a50f29643a19f1a270258a380104e15 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 00:31:35 +0700 Subject: [PATCH 01/34] chore: add document --- docs/docs/engines/engine-extension.mdx | 153 +++++++++++++++---------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 8a62cd813..84000767b 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -1,89 +1,126 @@ --- -title: Building Engine Extensions +title: Adding a Third-Party Engine to Cortex description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs. --- -:::info -🚧 Cortex is currently under development, and this page is a stub for future development. -::: - - +We welcome suggestions and contributions to improve this integration process. Please feel free to submit issues or pull requests through our repository. From bd1bf91738d87418be8654dc7e485b1cdf3fce73 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 15:13:34 +0700 Subject: [PATCH 02/34] feat: update engine interface --- docs/docs/engines/engine-extension.mdx | 96 ++++++++- engine/cli/commands/server_start_cmd.cc | 22 +- engine/controllers/engines.cc | 5 +- engine/cortex-common/EngineI.h | 30 +++ engine/services/engine_service.cc | 263 ++++++++++-------------- engine/services/engine_service.h | 10 + engine/services/hardware_service.cc | 2 +- engine/utils/config_yaml_utils.cc | 1 + engine/utils/config_yaml_utils.h | 5 +- engine/utils/file_manager_utils.cc | 1 + 10 files changed, 257 insertions(+), 178 deletions(-) diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 84000767b..6bb966f60 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in ```cpp class EngineI { public: - struct EngineLoadOption{}; - struct EngineUnloadOption{}; + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; virtual ~EngineI() {} + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + virtual void Load(EngineLoadOption opts) = 0; + virtual void Unload(EngineUnloadOption opts) = 0; // Cortex.llamacpp interface methods @@ -65,7 +85,71 @@ class EngineI { }; ``` -Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine. +#### Lifecycle Management + +##### RegisterLibraryPath + +```cpp +virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; +``` + +This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path. + +**Parameters:** + +- `opts.paths`: Vector of filesystem paths that the engine should register + +**Implementation Requirements:** + +- Register provided paths for dynamic library loading +- Handle invalid paths gracefully +- Thread-safe implementation +- No exceptions should escape the method + +##### Load + +```cpp +virtual void Load(EngineLoadOption opts) = 0; +``` + +Initializes the engine with the provided configuration options. + +**Parameters:** + +- `engine_path`: Base path for engine files +- `cuda_path`: Path to CUDA installation +- `custom_engine_path`: Flag for using custom engine location +- `log_path`: Location for log files +- `max_log_lines`: Maximum number of lines per log file +- `log_level`: Logging verbosity level + +**Implementation Requirements:** + +- Validate all paths before use +- Initialize engine components +- Set up logging configuration +- Handle missing dependencies gracefully +- Clean initialization state in case of failures + +##### Unload + +```cpp +virtual void Unload(EngineUnloadOption opts) = 0; +``` + +Performs cleanup and shutdown of the engine. + +**Parameters:** + +- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries + +**Implementation Requirements:** + +- Clean up all allocated resources +- Close file handles and connections +- Release memory +- Ensure proper shutdown of running models +- Handle cleanup in a thread-safe manner ### 2. Create a Dynamic Library @@ -98,7 +182,7 @@ To test your engine locally: 1. Create a directory structure following this hierarchy: -``` +```bash engines/ └── cortex.llamacpp/ └── mac-arm64/ @@ -107,12 +191,12 @@ engines/ └── version.txt ``` -2. Configure your engine: +1. Configure your engine: - Edit the `~/.cortexrc` file to register your engine name - Add your model with the appropriate engine field in `model.yaml` -3. Testing: +2. Testing: - Start the engine - Load your model - Verify functionality diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index ba4f7bd82..3d52f3d25 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -1,9 +1,12 @@ #include "server_start_cmd.h" #include "commands/cortex_upd_cmd.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" -#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" + +#if defined(_WIN32) || defined(_WIN64) #include "utils/widechar_conv.h" +#endif namespace commands { @@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port, std::cerr << "Could not start server: " << std::endl; return false; } else if (pid == 0) { - // No need to configure LD_LIBRARY_PATH for macOS -#if !defined(__APPLE__) || !defined(__MACH__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); + // Some engines requires to add lib search path before process being created + EngineService().RegisterEngineLibPath(); - auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif std::string p = cortex_utils::GetCurrentPath() + "/" + exe; execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path", get_config_file_path().c_str(), "--data_folder_path", diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 9e110bd66..8a5e5010b 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) { void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { - std::vector supported_engines{kLlamaEngine, kOnnxEngine, - kTrtLlmEngine}; Json::Value ret; - for (const auto& engine : supported_engines) { + auto engine_names = engine_service_->GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { auto installed_engines = engine_service_->GetInstalledEngineVariants(engine); if (installed_engines.has_error()) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 95ce605de..da2f5a5ab 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,8 +8,37 @@ #include "trantor/utils/Logger.h" class EngineI { public: + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; + virtual ~EngineI() {} + /** + * Being called before starting process to register dependencies search paths. + */ + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + + virtual void Load(EngineLoadOption opts) = 0; + + virtual void Unload(EngineUnloadOption opts) = 0; + // cortex.llamacpp interface virtual void HandleChatCompletion( std::shared_ptr json_body, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index c52e32ef0..0df4a8ccb 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "algorithm" #include "utils/archive_utils.h" #include "utils/engine_constants.h" @@ -179,6 +180,7 @@ cpp::result EngineService::UninstallEngineVariant( const std::string& engine, const std::optional version, const std::optional variant) { auto ne = NormalizeEngine(engine); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -272,6 +274,7 @@ cpp::result EngineService::DownloadEngine( if (selected_variant == std::nullopt) { return cpp::fail("Failed to find a suitable variant for " + engine); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(engine)) { CTL_INF("Engine " << engine << " is already loaded, unloading it"); auto unload_res = UnloadEngine(engine); @@ -503,6 +506,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine, " is not installed yet!"); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -631,7 +635,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const { } bool EngineService::IsEngineLoaded(const std::string& engine) { - std::lock_guard lock(engines_mutex_); auto ne = NormalizeEngine(engine); return engines_.find(ne) != engines_.end(); } @@ -647,16 +650,43 @@ cpp::result EngineService::GetLoadedEngine( return engines_[ne].engine; } -cpp::result EngineService::LoadEngine( - const std::string& engine_name) { - auto ne = NormalizeEngine(engine_name); - - if (IsEngineLoaded(ne)) { - CTL_INF("Engine " << ne << " is already loaded"); - return {}; +void EngineService::RegisterEngineLibPath() { + auto engine_names = GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { + auto ne = NormalizeEngine(engine); + try { + auto engine_dir_path_res = GetEngineDirPath(engine); + if (engine_dir_path_res.has_error()) { + CTL_ERR( + "Could not get engine dir path: " << engine_dir_path_res.error()); + continue; + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + auto dylib = std::make_unique(engine_dir_path.string(), + "engine"); + + auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne); + // init + auto func = dylib->get_function("get_engine"); + auto engine = func(); + std::vector paths{}; + auto register_opts = EngineI::RegisterLibraryOption{ + .paths = paths, + }; + engine->RegisterLibraryPath(register_opts); + delete engine; + CTL_DBG("Register lib path for: " << engine); + } catch (const std::exception& e) { + CTL_WRN("Failed to registering engine lib path: " << e.what()); + } } +} - CTL_INF("Loading engine: " << ne); +cpp::result, std::string> +EngineService::GetEngineDirPath(const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); auto selected_engine_variant = GetDefaultEngineVariant(ne); @@ -672,6 +702,7 @@ cpp::result EngineService::LoadEngine( auto user_defined_engine_path = getenv("ENGINE_PATH"); #endif + auto custom_engine_path = user_defined_engine_path != nullptr; CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { @@ -685,175 +716,99 @@ cpp::result EngineService::LoadEngine( } }(); - CTL_DBG("Engine path: " << engine_dir_path.string()); - if (!std::filesystem::exists(engine_dir_path)) { CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!"); return cpp::fail("Directory " + engine_dir_path.string() + " is not exist!"); } - CTL_INF("Engine path: " << engine_dir_path.string()); + CTL_INF("Engine path: " << engine_dir_path.string() + << ", custom_engine_path: " << custom_engine_path); + return std::make_pair(engine_dir_path, custom_engine_path); +} - try { -#if defined(_WIN32) - // TODO(?) If we only allow to load an engine at a time, the logic is simpler. - // We would like to support running multiple engines at the same time. Therefore, - // the adding/removing dll directory logic is quite complicated: - // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: - // Unload the llamacpp dll directory then load the tensorrt-llm - // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: - // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) - // 3. Add dll directory if met other conditions - - auto add_dll = [this](const std::string& e_type, - const std::filesystem::path& p) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p.string()); - engines_[e_type].cookie = cookie; - } else { - CTL_WRN("Could not add dll directory: " << p.string()); - } +cpp::result EngineService::LoadEngine( + const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); - auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); - if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); - cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p.string()); - engines_[e_type].cuda_cookie = cuda_cookie; - } else { - CTL_WRN("Could not add cuda dll directory: " << p.string()); - } - }; + std::lock_guard lock(engines_mutex_); + if (IsEngineLoaded(ne)) { + CTL_INF("Engine " << ne << " is already loaded"); + return {}; + } -#if defined(_WIN32) - if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); -#else - if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); -#endif - should_use_dll_search_path) { - if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && - should_use_dll_search_path) { - - { - std::lock_guard lock(engines_mutex_); - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); - } - } + CTL_INF("Loading engine: " << ne); - add_dll(ne, engine_dir_path); - } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { - // Do nothing - } else { - add_dll(ne, engine_dir_path); - } - } -#endif - { - std::lock_guard lock(engines_mutex_); - engines_[ne].dl = std::make_unique( - engine_dir_path.string(), "engine"); - } -#if defined(__linux__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - CTL_INF("llamacpp_path: " << llamacpp_path); - // tensorrt is not supported for now - // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); - - auto new_v = llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif + auto engine_dir_path_res = GetEngineDirPath(ne); + if (engine_dir_path_res.has_error()) { + return cpp::fail(engine_dir_path_res.error()); + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + try { + auto dylib = + std::make_unique(engine_dir_path.string(), "engine"); + + auto config = file_manager_utils::GetCortexConfig(); + + auto log_path = + std::filesystem::path(config.logFolderPath) / + std::filesystem::path( + config.logLlamaCppPath); // for now seems like we use same log path + + // init + auto func = dylib->get_function("get_engine"); + auto engine_obj = func(); + auto load_opts = EngineI::EngineLoadOption{ + .engine_path = engine_dir_path, + .cuda_path = file_manager_utils::GetCudaToolkitPath(ne), + .custom_engine_path = custom_engine_path, + .log_path = log_path, + .max_log_lines = config.maxLogLines, + .log_level = logging_utils_helper::global_log_level, + }; + engine_obj->Load(load_opts); + + engines_[ne].engine = engine_obj; + engines_[ne].dl = std::move(dylib); + CTL_DBG("Engine loaded: " << ne); + return {}; } catch (const cortex_cpp::dylib::load_error& e) { CTL_ERR("Could not load engine: " << e.what()); - { - std::lock_guard lock(engines_mutex_); - engines_.erase(ne); - } + engines_.erase(ne); return cpp::fail("Could not load engine " + ne + ": " + e.what()); } - - { - std::lock_guard lock(engines_mutex_); - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); - - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); - } - } - CTL_DBG("loaded engine: " << ne); - } - return {}; } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); - { - std::lock_guard lock(engines_mutex_); - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - EngineI* e = std::get(engines_[ne].engine); - delete e; + LOG_INFO << "Unloading engine " << ne; -#if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } -#endif - engines_.erase(ne); + std::lock_guard lock(engines_mutex_); + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); } - CTL_DBG("Unloaded engine " + ne); + auto* e = std::get(engines_[ne].engine); + auto unload_opts = EngineI::EngineUnloadOption{ + .unload_dll = true, + }; + e->Unload(unload_opts); + delete e; + engines_.erase(ne); + CTL_DBG("Engine unloaded: " + ne); return {}; } std::vector EngineService::GetLoadedEngines() { - { - std::lock_guard lock(engines_mutex_); - std::vector loaded_engines; - for (const auto& [key, value] : engines_) { - loaded_engines.push_back(value.engine); - } - return loaded_engines; + std::lock_guard lock(engines_mutex_); + std::vector loaded_engines; + for (const auto& [key, value] : engines_) { + loaded_engines.push_back(value.engine); } + return loaded_engines; } cpp::result @@ -899,6 +854,7 @@ cpp::result EngineService::UpdateEngine( CTL_INF("Default variant: " << default_variant->variant << ", version: " + default_variant->version); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -955,3 +911,8 @@ cpp::result EngineService::UpdateEngine( .from = default_variant->version, .to = latest_version->tag_name}; } + +cpp::result, std::string> +EngineService::GetSupportedEngineNames() { + return file_manager_utils::GetCortexConfig().supportedEngines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 47d7c272f..5437cb496 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -65,6 +65,9 @@ class EngineService : public EngineServiceI { .cuda_driver_version = system_info_utils::GetDriverAndCudaVersion().second} {} + // just for initialize supported engines + EngineService() {}; + std::vector GetEngineInfoList() const; /** @@ -123,6 +126,10 @@ class EngineService : public EngineServiceI { cpp::result UpdateEngine( const std::string& engine); + cpp::result, std::string> GetSupportedEngineNames(); + + void RegisterEngineLibPath(); + private: cpp::result DownloadEngine( const std::string& engine, const std::string& version = "latest", @@ -134,6 +141,9 @@ class EngineService : public EngineServiceI { std::string GetMatchedVariant(const std::string& engine, const std::vector& variants); + cpp::result, std::string> + GetEngineDirPath(const std::string& engine_name); + cpp::result IsEngineVariantReady( const std::string& engine, const std::string& version, const std::string& variant); diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 681ca7578..a5890eab9 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -5,11 +5,11 @@ #if defined(_WIN32) || defined(_WIN64) #include #include +#include "utils/widechar_conv.h" #endif #include "cli/commands/cortex_upd_cmd.h" #include "database/hardware.h" #include "utils/cortex_utils.h" -#include "utils/widechar_conv.h" namespace services { diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc index 4d6f47ebe..3c5e6b727 100644 --- a/engine/utils/config_yaml_utils.cc +++ b/engine/utils/config_yaml_utils.cc @@ -42,6 +42,7 @@ cpp::result CortexConfigMgr::DumpYamlConfig( node["noProxy"] = config.noProxy; node["verifyPeerSsl"] = config.verifyPeerSsl; node["verifyHostSsl"] = config.verifyHostSsl; + node["supportedEngines"] = config.supportedEngines; out_file << node; out_file.close(); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index aa1b4027e..caaa4dacf 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -5,6 +5,7 @@ #include #include #include +#include "utils/engine_constants.h" #include "utils/logging_utils.h" #include "utils/result.hpp" #include "yaml-cpp/yaml.h" @@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; +const std::vector kDefaultSupportedEngines{ + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; struct CortexConfig { std::string logFolderPath; @@ -59,6 +62,7 @@ struct CortexConfig { bool verifyPeerSsl; bool verifyHostSsl; + std::vector supportedEngines; }; class CortexConfigMgr { @@ -82,5 +86,4 @@ class CortexConfigMgr { CortexConfig FromYaml(const std::string& path, const CortexConfig& default_cfg); }; - } // namespace config_yaml_utils diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index 11128a275..4f2a68804 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -185,6 +185,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .noProxy = config_yaml_utils::kDefaultNoProxy, .verifyPeerSsl = true, .verifyHostSsl = true, + .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, }; } From 08fbb8a80e081459cbbf4a539432ac8dc3c3bedb Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 00:31:35 +0700 Subject: [PATCH 03/34] chore: add document --- docs/docs/engines/engine-extension.mdx | 153 +++++++++++++++---------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 8a62cd813..84000767b 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -1,89 +1,126 @@ --- -title: Building Engine Extensions +title: Adding a Third-Party Engine to Cortex description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs. --- -:::info -🚧 Cortex is currently under development, and this page is a stub for future development. -::: - - +We welcome suggestions and contributions to improve this integration process. Please feel free to submit issues or pull requests through our repository. From 7d9cf3b851766e5be41b75250e79a0706c5a61a3 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 15:13:34 +0700 Subject: [PATCH 04/34] feat: update engine interface --- docs/docs/engines/engine-extension.mdx | 96 ++++++++- engine/cli/commands/server_start_cmd.cc | 22 +-- engine/controllers/engines.cc | 5 +- engine/cortex-common/EngineI.h | 30 +++ engine/services/engine_service.cc | 246 +++++++++++------------- engine/services/engine_service.h | 12 +- engine/services/hardware_service.cc | 2 +- engine/utils/config_yaml_utils.cc | 1 + engine/utils/config_yaml_utils.h | 5 +- engine/utils/file_manager_utils.cc | 1 + 10 files changed, 253 insertions(+), 167 deletions(-) diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 84000767b..6bb966f60 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in ```cpp class EngineI { public: - struct EngineLoadOption{}; - struct EngineUnloadOption{}; + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; virtual ~EngineI() {} + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + virtual void Load(EngineLoadOption opts) = 0; + virtual void Unload(EngineUnloadOption opts) = 0; // Cortex.llamacpp interface methods @@ -65,7 +85,71 @@ class EngineI { }; ``` -Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine. +#### Lifecycle Management + +##### RegisterLibraryPath + +```cpp +virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; +``` + +This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path. + +**Parameters:** + +- `opts.paths`: Vector of filesystem paths that the engine should register + +**Implementation Requirements:** + +- Register provided paths for dynamic library loading +- Handle invalid paths gracefully +- Thread-safe implementation +- No exceptions should escape the method + +##### Load + +```cpp +virtual void Load(EngineLoadOption opts) = 0; +``` + +Initializes the engine with the provided configuration options. + +**Parameters:** + +- `engine_path`: Base path for engine files +- `cuda_path`: Path to CUDA installation +- `custom_engine_path`: Flag for using custom engine location +- `log_path`: Location for log files +- `max_log_lines`: Maximum number of lines per log file +- `log_level`: Logging verbosity level + +**Implementation Requirements:** + +- Validate all paths before use +- Initialize engine components +- Set up logging configuration +- Handle missing dependencies gracefully +- Clean initialization state in case of failures + +##### Unload + +```cpp +virtual void Unload(EngineUnloadOption opts) = 0; +``` + +Performs cleanup and shutdown of the engine. + +**Parameters:** + +- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries + +**Implementation Requirements:** + +- Clean up all allocated resources +- Close file handles and connections +- Release memory +- Ensure proper shutdown of running models +- Handle cleanup in a thread-safe manner ### 2. Create a Dynamic Library @@ -98,7 +182,7 @@ To test your engine locally: 1. Create a directory structure following this hierarchy: -``` +```bash engines/ └── cortex.llamacpp/ └── mac-arm64/ @@ -107,12 +191,12 @@ engines/ └── version.txt ``` -2. Configure your engine: +1. Configure your engine: - Edit the `~/.cortexrc` file to register your engine name - Add your model with the appropriate engine field in `model.yaml` -3. Testing: +2. Testing: - Start the engine - Load your model - Verify functionality diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index ba4f7bd82..3d52f3d25 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -1,9 +1,12 @@ #include "server_start_cmd.h" #include "commands/cortex_upd_cmd.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" -#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" + +#if defined(_WIN32) || defined(_WIN64) #include "utils/widechar_conv.h" +#endif namespace commands { @@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port, std::cerr << "Could not start server: " << std::endl; return false; } else if (pid == 0) { - // No need to configure LD_LIBRARY_PATH for macOS -#if !defined(__APPLE__) || !defined(__MACH__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); + // Some engines requires to add lib search path before process being created + EngineService().RegisterEngineLibPath(); - auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif std::string p = cortex_utils::GetCurrentPath() + "/" + exe; execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path", get_config_file_path().c_str(), "--data_folder_path", diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 3d3c0c037..1d0223d9a 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) { void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { - std::vector supported_engines{kLlamaEngine, kOnnxEngine, - kTrtLlmEngine}; Json::Value ret; - for (const auto& engine : supported_engines) { + auto engine_names = engine_service_->GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { auto installed_engines = engine_service_->GetInstalledEngineVariants(engine); if (installed_engines.has_error()) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 51e19c124..11866a708 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,8 +8,37 @@ #include "trantor/utils/Logger.h" class EngineI { public: + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; + virtual ~EngineI() {} + /** + * Being called before starting process to register dependencies search paths. + */ + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + + virtual void Load(EngineLoadOption opts) = 0; + + virtual void Unload(EngineUnloadOption opts) = 0; + // cortex.llamacpp interface virtual void HandleChatCompletion( std::shared_ptr json_body, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index fe5317c7d..4f2122f6b 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "algorithm" #include "database/engines.h" @@ -17,6 +18,7 @@ #include "utils/semantic_version_utils.h" #include "utils/system_info_utils.h" #include "utils/url_parser.h" + namespace { std::string GetSuitableCudaVersion(const std::string& engine, const std::string& cuda_driver_version) { @@ -701,6 +703,87 @@ cpp::result EngineService::LoadEngine( CTL_INF("Loading engine: " << ne); + auto engine_dir_path_res = GetEngineDirPath(ne); + if (engine_dir_path_res.has_error()) { + return cpp::fail(engine_dir_path_res.error()); + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + try { + auto dylib = + std::make_unique(engine_dir_path.string(), "engine"); + + auto config = file_manager_utils::GetCortexConfig(); + + auto log_path = + std::filesystem::path(config.logFolderPath) / + std::filesystem::path( + config.logLlamaCppPath); // for now seems like we use same log path + + // init + auto func = dylib->get_function("get_engine"); + auto engine_obj = func(); + auto load_opts = EngineI::EngineLoadOption{ + .engine_path = engine_dir_path, + .cuda_path = file_manager_utils::GetCudaToolkitPath(ne), + .custom_engine_path = custom_engine_path, + .log_path = log_path, + .max_log_lines = config.maxLogLines, + .log_level = logging_utils_helper::global_log_level, + }; + engine_obj->Load(load_opts); + + engines_[ne].engine = engine_obj; + engines_[ne].dl = std::move(dylib); + + CTL_DBG("Engine loaded: " << ne); + return {}; + } catch (const cortex_cpp::dylib::load_error& e) { + CTL_ERR("Could not load engine: " << e.what()); + engines_.erase(ne); + return cpp::fail("Could not load engine " + ne + ": " + e.what()); + } +} + +void EngineService::RegisterEngineLibPath() { + auto engine_names = GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { + auto ne = NormalizeEngine(engine); + try { + auto engine_dir_path_res = GetEngineDirPath(engine); + if (engine_dir_path_res.has_error()) { + CTL_ERR( + "Could not get engine dir path: " << engine_dir_path_res.error()); + continue; + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + auto dylib = std::make_unique(engine_dir_path.string(), + "engine"); + + auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne); + // init + auto func = dylib->get_function("get_engine"); + auto engine = func(); + std::vector paths{}; + auto register_opts = EngineI::RegisterLibraryOption{ + .paths = paths, + }; + engine->RegisterLibraryPath(register_opts); + delete engine; + CTL_DBG("Register lib path for: " << engine); + } catch (const std::exception& e) { + CTL_WRN("Failed to registering engine lib path: " << e.what()); + } + } +} + +cpp::result, std::string> +EngineService::GetEngineDirPath(const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); + auto selected_engine_variant = GetDefaultEngineVariant(ne); if (selected_engine_variant.has_error()) { @@ -715,6 +798,7 @@ cpp::result EngineService::LoadEngine( auto user_defined_engine_path = getenv("ENGINE_PATH"); #endif + auto custom_engine_path = user_defined_engine_path != nullptr; CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { @@ -728,157 +812,38 @@ cpp::result EngineService::LoadEngine( } }(); - CTL_DBG("Engine path: " << engine_dir_path.string()); - if (!std::filesystem::exists(engine_dir_path)) { CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!"); return cpp::fail("Directory " + engine_dir_path.string() + " is not exist!"); } - CTL_INF("Engine path: " << engine_dir_path.string()); - - try { -#if defined(_WIN32) - // TODO(?) If we only allow to load an engine at a time, the logic is simpler. - // We would like to support running multiple engines at the same time. Therefore, - // the adding/removing dll directory logic is quite complicated: - // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: - // Unload the llamacpp dll directory then load the tensorrt-llm - // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: - // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) - // 3. Add dll directory if met other conditions - - auto add_dll = [this](const std::string& e_type, - const std::filesystem::path& p) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p.string()); - engines_[e_type].cookie = cookie; - } else { - CTL_WRN("Could not add dll directory: " << p.string()); - } - - auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); - if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); - cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p.string()); - engines_[e_type].cuda_cookie = cuda_cookie; - } else { - CTL_WRN("Could not add cuda dll directory: " << p.string()); - } - }; - -#if defined(_WIN32) - if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); -#else - if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); -#endif - should_use_dll_search_path) { - if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && - should_use_dll_search_path) { - - { - - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); - } - } - - add_dll(ne, engine_dir_path); - } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { - // Do nothing - } else { - add_dll(ne, engine_dir_path); - } - } -#endif - engines_[ne].dl = - std::make_unique(engine_dir_path.string(), "engine"); -#if defined(__linux__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - CTL_INF("llamacpp_path: " << llamacpp_path); - // tensorrt is not supported for now - // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); - - auto new_v = llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif - - } catch (const cortex_cpp::dylib::load_error& e) { - CTL_ERR("Could not load engine: " << e.what()); - engines_.erase(ne); - return cpp::fail("Could not load engine " + ne + ": " + e.what()); - } - - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); - - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); - } - } - CTL_DBG("loaded engine: " << ne); - return {}; + CTL_INF("Engine path: " << engine_dir_path.string() + << ", custom_engine_path: " << custom_engine_path); + return std::make_pair(engine_dir_path, custom_engine_path); } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); std::lock_guard lock(engines_mutex_); - { - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - if (std::holds_alternative(engines_[ne].engine)) { - delete std::get(engines_[ne].engine); - } else { - delete std::get(engines_[ne].engine); - } - -#if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } -#endif + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); + } + if (std::holds_alternative(engines_[ne].engine)) { + LOG_INFO << "Unloading engine " << ne; + auto* e = std::get(engines_[ne].engine); + auto unload_opts = EngineI::EngineUnloadOption{ + .unload_dll = true, + }; + e->Unload(unload_opts); + delete e; engines_.erase(ne); + } else { + delete std::get(engines_[ne].engine); } - CTL_DBG("Unloaded engine " + ne); + + CTL_DBG("Engine unloaded: " + ne); return {}; } @@ -1097,4 +1062,9 @@ cpp::result EngineService::GetRemoteModels( } else { return res; } -} \ No newline at end of file +} + +cpp::result, std::string> +EngineService::GetSupportedEngineNames() { + return file_manager_utils::GetCortexConfig().supportedEngines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index ab274825d..8299655f2 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -13,7 +13,6 @@ #include "cortex-common/cortexpythoni.h" #include "cortex-common/remote_enginei.h" #include "database/engines.h" -#include "extensions/remote-engine/remote_engine.h" #include "services/download_service.h" #include "utils/cpuid/cpu_info.h" #include "utils/dylib.h" @@ -75,6 +74,9 @@ class EngineService : public EngineServiceI { .cuda_driver_version = system_info_utils::GetDriverAndCudaVersion().second} {} + // just for initialize supported engines + EngineService() {}; + std::vector GetEngineInfoList() const; /** @@ -148,6 +150,9 @@ class EngineService : public EngineServiceI { cpp::result GetRemoteModels( const std::string& engine_name); + cpp::result, std::string> GetSupportedEngineNames(); + + void RegisterEngineLibPath(); private: bool IsEngineLoaded(const std::string& engine); @@ -162,7 +167,10 @@ class EngineService : public EngineServiceI { std::string GetMatchedVariant(const std::string& engine, const std::vector& variants); + cpp::result, std::string> + GetEngineDirPath(const std::string& engine_name); + cpp::result IsEngineVariantReady( const std::string& engine, const std::string& version, const std::string& variant); -}; \ No newline at end of file +}; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 681ca7578..a5890eab9 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -5,11 +5,11 @@ #if defined(_WIN32) || defined(_WIN64) #include #include +#include "utils/widechar_conv.h" #endif #include "cli/commands/cortex_upd_cmd.h" #include "database/hardware.h" #include "utils/cortex_utils.h" -#include "utils/widechar_conv.h" namespace services { diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc index ed6437256..c7a696df4 100644 --- a/engine/utils/config_yaml_utils.cc +++ b/engine/utils/config_yaml_utils.cc @@ -49,6 +49,7 @@ cpp::result CortexConfigMgr::DumpYamlConfig( node["verifyHostSsl"] = config.verifyHostSsl; node["sslCertPath"] = config.sslCertPath; node["sslKeyPath"] = config.sslKeyPath; + node["supportedEngines"] = config.supportedEngines; out_file << node; out_file.close(); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index d36cc48e0..f9925ea86 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -3,6 +3,7 @@ #include #include #include +#include "utils/engine_constants.h" #include "utils/result.hpp" namespace config_yaml_utils { @@ -18,6 +19,8 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; +const std::vector kDefaultSupportedEngines{ + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; struct CortexConfig { std::string logFolderPath; @@ -57,6 +60,7 @@ struct CortexConfig { bool verifyHostSsl; std::string sslCertPath; std::string sslKeyPath; + std::vector supportedEngines; }; class CortexConfigMgr { @@ -80,5 +84,4 @@ class CortexConfigMgr { CortexConfig FromYaml(const std::string& path, const CortexConfig& default_cfg); }; - } // namespace config_yaml_utils diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index ca3d0c07b..338abadac 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -187,6 +187,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .verifyHostSsl = true, .sslCertPath = "", .sslKeyPath = "", + .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, }; } From 3548342ebab9cdcc7d4becbb499a95d20298f976 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Tue, 10 Dec 2024 08:51:16 +0700 Subject: [PATCH 05/34] Feat: init python engine --- engine/CMakeLists.txt | 1 + engine/config/model_config.h | 316 ++++++++++++++++++ .../extensions/python-engine/python_engine.cc | 0 .../extensions/python-engine/python_engine.h | 0 engine/extensions/template_renderer.cc | 136 ++++++++ engine/extensions/template_renderer.h | 40 +++ engine/utils/config_yaml_utils.h | 2 +- engine/utils/engine_constants.h | 1 + engine/utils/environment_constants.h | 3 + engine/vcpkg.json | 3 +- 10 files changed, 500 insertions(+), 2 deletions(-) create mode 100644 engine/extensions/python-engine/python_engine.cc create mode 100644 engine/extensions/python-engine/python_engine.h create mode 100644 engine/extensions/template_renderer.cc create mode 100644 engine/extensions/template_renderer.h create mode 100644 engine/utils/environment_constants.h diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index eae09d439..0206d3858 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -142,6 +142,7 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h" add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc + ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc ) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 7d4076ee5..a3be375c5 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -1,6 +1,9 @@ #pragma once #include +#include +#include +#include #include #include #include @@ -343,4 +346,317 @@ struct ModelConfig { } }; +struct Endpoint { + std::string method; + std::string path; + std::string transform_request; + std::string transform_response; +}; + +struct PythonModelConfig { + // General Metadata + std::string id; + std::string model; + std::string name; + int version; + + // Inference Parameters + Endpoint load_model; + Endpoint destroy; + Endpoint inference; + Endpoint heath_check; + std::vector extra_endpoints; + + // Model Load Parameters + int port; + std::string log_path; + std::string log_level; + std::string environments; + std::vector command; // New command field + std::string engine; + Json::Value extra_params; // Accept dynamic extra parameters + + // Method to convert C++ struct to YAML + std::string ToYaml() const { + YAML::Emitter out; + out << YAML::BeginMap; + + out << YAML::Key << "id" << YAML::Value << id; + out << YAML::Key << "model" << YAML::Value << model; + out << YAML::Key << "name" << YAML::Value << name; + out << YAML::Key << "version" << YAML::Value << version; + + // Inference Parameters + out << YAML::Key << "load_model" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << load_model.method; + out << YAML::Key << "path" << YAML::Value << load_model.path; + out << YAML::Key << "transform_request" << YAML::Value + << load_model.transform_request; + out << YAML::Key << "transform_response" << YAML::Value + << load_model.transform_response; + out << YAML::EndMap; + + out << YAML::Key << "destroy" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << destroy.method; + out << YAML::Key << "path" << YAML::Value << destroy.path; + out << YAML::EndMap; + + out << YAML::Key << "inference" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << inference.method; + out << YAML::Key << "path" << YAML::Value << inference.path; + out << YAML::EndMap; + + out << YAML::Key << "extra_endpoints" << YAML::Value << YAML::BeginSeq; + for (const auto& endpoint : extra_endpoints) { + out << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << endpoint.method; + out << YAML::Key << "path" << YAML::Value << endpoint.path; + out << YAML::EndMap; + } + out << YAML::EndSeq; + + // Model Load Parameters + out << YAML::Key << "port" << YAML::Value << port; + out << YAML::Key << "log_path" << YAML::Value << log_path; + out << YAML::Key << "log_level" << YAML::Value + << log_level; + out << YAML::Key << "environments" << YAML::Value + << environments; + + // Serialize command as YAML list + out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq; + for (const auto& cmd : command) { + out << cmd; + } + out << YAML::EndSeq; + + out << YAML::Key << "engine" << YAML::Value << engine; + + // Serialize extra_params as YAML + out << YAML::Key << "extra_params" << YAML::Value << YAML::BeginMap; + for (Json::ValueConstIterator iter = extra_params.begin(); + iter != extra_params.end(); ++iter) { + out << YAML::Key << iter.key().asString() << YAML::Value + << iter->asString(); + } + out << YAML::EndMap; + return out.c_str(); + } + + // Method to populate struct from YAML file + void ReadFromYaml(const std::string& filePath) { + YAML::Node config = YAML::LoadFile(filePath); + + if (config["id"]) + id = config["id"].as(); + if (config["model"]) + model = config["model"].as(); + if (config["name"]) + name = config["name"].as(); + if (config["version"]) + version = config["version"].as(); + + // Inference Parameters + + auto ip = config; + if (ip["load_model"]) { + load_model.method = + ip["load_model"]["method"].as(); + load_model.path = + ip["load_model"]["path"].as(); + load_model.transform_request = + ip["load_model"]["transform_request"].as(); + load_model.transform_response = + ip["load_model"]["transform_response"].as(); + } + if (ip["destroy"]) { + destroy.method = + ip["destroy"]["method"].as(); + destroy.path = + ip["destroy"]["path"].as(); + } + if (ip["inference"]) { + inference.method = + ip["inference"]["method"].as(); + inference.path = + ip["inference"]["path"].as(); + } + if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].as(); + e.path = endpoint["path"].as(); + extra_endpoints.push_back(e); + } + } + + + // Model Load Parameters + + auto mlp = config; + if (mlp["port"]) + port = mlp["port"].as(); + if (mlp["log_path"]) + log_path = mlp["log_path"].as(); + if (mlp["log_level"]) + log_level = mlp["log_level"].as(); + if (mlp["environments"]) + environments = mlp["environments"].as(); + if (mlp["engine"]) + engine = mlp["engine"].as(); + + if (mlp["command"] && mlp["command"].IsSequence()) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.as()); + } + } + + if (mlp["extra_params"]) { + for (YAML::const_iterator it = mlp["extra_params"].begin(); + it != mlp["extra_params"].end(); ++it) { + extra_params[it->first.as()] = + it->second.as(); + } + } + + } + + // Method to convert the struct to JSON + std::string ToJson() const { + Json::Value root; + + root["id"] = id; + root["model"] = model; + root["name"] = name; + root["version"] = version; + + // Inference Parameters + root["inference_parameters"]["load_model"]["method"] = + load_model.method; + root["inference_parameters"]["load_model"]["path"] = + load_model.path; + root["inference_parameters"]["load_model"]["transform_request"] = + load_model.transform_request; + root["inference_parameters"]["load_model"]["transform_response"] = + load_model.transform_response; + + root["inference_parameters"]["destroy"]["method"] = + destroy.method; + root["inference_parameters"]["destroy"]["path"] = + destroy.path; + + root["inference_parameters"]["inference"]["method"] = + inference.method; + root["inference_parameters"]["inference"]["path"] = + inference.path; + + for (const auto& endpoint : extra_endpoints) { + Json::Value e; + e["method"] = endpoint.method; + e["path"] = endpoint.path; + root["inference_parameters"]["extra_endpoints"].append(e); + } + + // Model Load Parameters + root["model_load_params"]["port"] = port; + root["model_load_params"]["log_path"] = log_path; + root["model_load_params"]["log_level"] = log_level; + root["model_load_params"]["environments"] = environments; + + // Serialize command as JSON array + for (const auto& cmd : command) { + root["model_load_params"]["command"].append(cmd); + } + + root["model_load_params"]["engine"] = engine; + root["model_load_params"]["extra_params"] = + extra_params; // Serialize the JSON value directly + + Json::StreamWriterBuilder writer; + return Json::writeString(writer, root); + } + + // Method to populate struct from JSON + void FromJson(const std::string& jsonString) { + Json::CharReaderBuilder reader; + Json::Value root; + std::string errs; + std::istringstream s(jsonString); + + if (!Json::parseFromStream(reader, s, &root, &errs)) { + std::cerr << "Error parsing JSON: " << errs << std::endl; + return; + } + + if (root.isMember("id")) + id = root["id"].asString(); + if (root.isMember("model")) + model = root["model"].asString(); + if (root.isMember("name")) + name = root["name"].asString(); + if (root.isMember("version")) + version = root["version"].asInt(); + + // Inference Parameters + if (root.isMember("inference_parameters")) { + const Json::Value& ip = root["inference_parameters"]; + if (ip.isMember("load_model")) { + load_model.method = + ip["load_model"]["method"].asString(); + load_model.path = + ip["load_model"]["path"].asString(); + load_model.transform_request = + ip["load_model"]["transform_request"].asString(); + load_model.transform_response = + ip["load_model"]["transform_response"].asString(); + } + if (ip.isMember("destroy")) { + destroy.method = + ip["destroy"]["method"].asString(); + destroy.path = ip["destroy"]["path"].asString(); + } + if (ip.isMember("inference")) { + inference.method = + ip["inference"]["method"].asString(); + inference.path = + ip["inference"]["path"].asString(); + } + if (ip.isMember("extra_endpoints")) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].asString(); + e.path = endpoint["path"].asString(); + extra_endpoints.push_back(e); + } + } + } + + // Model Load Parameters + if (root.isMember("model_load_params")) { + const Json::Value& mlp = root["model_load_params"]; + if (mlp.isMember("port")) + port = mlp["port"].asInt(); + if (mlp.isMember("log_path")) + log_path = mlp["log_path"].asString(); + if (mlp.isMember("log_level")) + log_level = mlp["log_level"].asString(); + if (mlp.isMember("environments")) + environments = mlp["environments"].asString(); + if (mlp.isMember("engine")) + engine = mlp["engine"].asString(); + + if (mlp.isMember("command")) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.asString()); + } + } + + if (mlp.isMember("extra_params")) { + extra_params = + mlp["extra_params"]; // Directly assign the JSON value + } + } + } +}; + } // namespace config diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc new file mode 100644 index 000000000..e69de29bb diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h new file mode 100644 index 000000000..e69de29bb diff --git a/engine/extensions/template_renderer.cc b/engine/extensions/template_renderer.cc new file mode 100644 index 000000000..15514d17c --- /dev/null +++ b/engine/extensions/template_renderer.cc @@ -0,0 +1,136 @@ +#if defined(_WIN32) || defined(_WIN64) +#define NOMINMAX +#undef min +#undef max +#endif +#include "template_renderer.h" +#include +#include +#include "utils/logging_utils.h" +namespace remote_engine { +TemplateRenderer::TemplateRenderer() { + // Configure Inja environment + env_.set_trim_blocks(true); + env_.set_lstrip_blocks(true); + + // Add tojson function for all value types + env_.add_callback("tojson", 1, [](inja::Arguments& args) { + if (args.empty()) { + return nlohmann::json(nullptr); + } + const auto& value = *args[0]; + + if (value.is_string()) { + return nlohmann::json(std::string("\"") + value.get() + + "\""); + } + return value; + }); +} + +std::string TemplateRenderer::Render(const std::string& tmpl, + const Json::Value& data) { + try { + // Convert Json::Value to nlohmann::json + auto json_data = ConvertJsonValue(data); + + // Create the input data structure expected by the template + nlohmann::json template_data; + template_data["input_request"] = json_data; + + // Debug output + LOG_DEBUG << "Template: " << tmpl; + LOG_DEBUG << "Data: " << template_data.dump(2); + + // Render template + std::string result = env_.render(tmpl, template_data); + + // Clean up any potential double quotes in JSON strings + result = std::regex_replace(result, std::regex("\\\"\\\""), "\""); + + LOG_DEBUG << "Result: " << result; + + // Validate JSON + auto parsed = nlohmann::json::parse(result); + + return result; + } catch (const std::exception& e) { + LOG_ERROR << "Template rendering failed: " << e.what(); + LOG_ERROR << "Template: " << tmpl; + throw std::runtime_error(std::string("Template rendering failed: ") + + e.what()); + } +} + +nlohmann::json TemplateRenderer::ConvertJsonValue(const Json::Value& input) { + if (input.isNull()) { + return nullptr; + } else if (input.isBool()) { + return input.asBool(); + } else if (input.isInt()) { + return input.asInt(); + } else if (input.isUInt()) { + return input.asUInt(); + } else if (input.isDouble()) { + return input.asDouble(); + } else if (input.isString()) { + return input.asString(); + } else if (input.isArray()) { + nlohmann::json arr = nlohmann::json::array(); + for (const auto& element : input) { + arr.push_back(ConvertJsonValue(element)); + } + return arr; + } else if (input.isObject()) { + nlohmann::json obj = nlohmann::json::object(); + for (const auto& key : input.getMemberNames()) { + obj[key] = ConvertJsonValue(input[key]); + } + return obj; + } + return nullptr; +} + +Json::Value TemplateRenderer::ConvertNlohmannJson(const nlohmann::json& input) { + if (input.is_null()) { + return Json::Value(); + } else if (input.is_boolean()) { + return Json::Value(input.get()); + } else if (input.is_number_integer()) { + return Json::Value(input.get()); + } else if (input.is_number_unsigned()) { + return Json::Value(input.get()); + } else if (input.is_number_float()) { + return Json::Value(input.get()); + } else if (input.is_string()) { + return Json::Value(input.get()); + } else if (input.is_array()) { + Json::Value arr(Json::arrayValue); + for (const auto& element : input) { + arr.append(ConvertNlohmannJson(element)); + } + return arr; + } else if (input.is_object()) { + Json::Value obj(Json::objectValue); + for (auto it = input.begin(); it != input.end(); ++it) { + obj[it.key()] = ConvertNlohmannJson(it.value()); + } + return obj; + } + return Json::Value(); +} + +std::string TemplateRenderer::RenderFile(const std::string& template_path, + const Json::Value& data) { + try { + // Convert Json::Value to nlohmann::json + auto json_data = ConvertJsonValue(data); + + // Load and render template + return env_.render_file(template_path, json_data); + } catch (const std::exception& e) { + throw std::runtime_error(std::string("Template file rendering failed: ") + + e.what()); + } +} +} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/template_renderer.h b/engine/extensions/template_renderer.h new file mode 100644 index 000000000..f59e7cc93 --- /dev/null +++ b/engine/extensions/template_renderer.h @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include +#include "json/json.h" +#include "trantor/utils/Logger.h" +// clang-format off +#if defined(_WIN32) || defined(_WIN64) +#define NOMINMAX +#undef min +#undef max +#endif +#include +#include +// clang-format on +namespace remote_engine { +class TemplateRenderer { + public: + TemplateRenderer(); + ~TemplateRenderer() = default; + + // Convert Json::Value to nlohmann::json + static nlohmann::json ConvertJsonValue(const Json::Value& input); + + // Convert nlohmann::json to Json::Value + static Json::Value ConvertNlohmannJson(const nlohmann::json& input); + + // Render template with data + std::string Render(const std::string& tmpl, const Json::Value& data); + + // Load template from file and render + std::string RenderFile(const std::string& template_path, + const Json::Value& data); + + private: + inja::Environment env_; +}; + +} // namespace remote_engine \ No newline at end of file diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index caaa4dacf..674bd594c 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -24,7 +24,7 @@ const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; const std::vector kDefaultSupportedEngines{ - kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; + kLlamaEngine, kOnnxEngine, kTrtLlmEngine, kPythonEngine}; struct CortexConfig { std::string logFolderPath; diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index 5dab49936..ac96c94ae 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -3,6 +3,7 @@ constexpr const auto kOnnxEngine = "onnxruntime"; constexpr const auto kLlamaEngine = "llama-cpp"; constexpr const auto kTrtLlmEngine = "tensorrt-llm"; +constexpr const auto kPythonEngine = "python-engine"; constexpr const auto kOnnxRepo = "cortex.onnx"; constexpr const auto kLlamaRepo = "cortex.llamacpp"; diff --git a/engine/utils/environment_constants.h b/engine/utils/environment_constants.h new file mode 100644 index 000000000..f14df67f8 --- /dev/null +++ b/engine/utils/environment_constants.h @@ -0,0 +1,3 @@ +#pragma once + +constexpr const auto kWhisperVQEnvironment = "whispervq"; \ No newline at end of file diff --git a/engine/vcpkg.json b/engine/vcpkg.json index 36fa322a3..17eb32f4c 100644 --- a/engine/vcpkg.json +++ b/engine/vcpkg.json @@ -13,6 +13,7 @@ "sqlitecpp", "trantor", "indicators", - "lfreist-hwinfo" + "lfreist-hwinfo", + "inja" ] } From 6958db86165f60922729a844cf451c66cab42008 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Tue, 10 Dec 2024 09:09:55 +0700 Subject: [PATCH 06/34] Fix: conflict --- engine/extensions/template_renderer.cc | 2 +- engine/extensions/template_renderer.h | 2 +- engine/services/engine_service.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/engine/extensions/template_renderer.cc b/engine/extensions/template_renderer.cc index 15514d17c..32e7d72f5 100644 --- a/engine/extensions/template_renderer.cc +++ b/engine/extensions/template_renderer.cc @@ -7,7 +7,7 @@ #include #include #include "utils/logging_utils.h" -namespace remote_engine { +namespace extensions { TemplateRenderer::TemplateRenderer() { // Configure Inja environment env_.set_trim_blocks(true); diff --git a/engine/extensions/template_renderer.h b/engine/extensions/template_renderer.h index f59e7cc93..7eccef2eb 100644 --- a/engine/extensions/template_renderer.h +++ b/engine/extensions/template_renderer.h @@ -14,7 +14,7 @@ #include #include // clang-format on -namespace remote_engine { +namespace extensions { class TemplateRenderer { public: TemplateRenderer(); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 1e877e388..5e2622240 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -686,7 +686,7 @@ cpp::result EngineService::LoadEngine( CTL_INF("Engine " << ne << " is already loaded"); return {}; } -} + // Check for remote engine if (remote_engine::IsRemoteEngine(engine_name)) { From ff2c02df8e84afd0bf0a9625190a001655442494 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Wed, 11 Dec 2024 16:17:54 +0700 Subject: [PATCH 07/34] feat: add python engine implementation --- engine/CMakeLists.txt | 1 - engine/cli/CMakeLists.txt | 3 +- engine/config/model_config.h | 268 +++--- engine/cortex-common/EngineI.h | 3 + .../extensions/python-engine/python_engine.cc | 768 ++++++++++++++++++ .../extensions/python-engine/python_engine.h | 160 ++++ .../extensions/remote-engine/remote_engine.h | 4 +- .../remote-engine/template_renderer.cc | 136 ---- .../remote-engine/template_renderer.h | 40 - 9 files changed, 1062 insertions(+), 321 deletions(-) delete mode 100644 engine/extensions/remote-engine/template_renderer.cc delete mode 100644 engine/extensions/remote-engine/template_renderer.h diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 2c6e55012..db34a8346 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -148,7 +148,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc ) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index 51382dc13..833bd4c03 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -85,7 +85,8 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc diff --git a/engine/config/model_config.h b/engine/config/model_config.h index f7a0701d9..78f62d9ca 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -546,10 +546,12 @@ struct PythonModelConfig { std::vector extra_endpoints; // Model Load Parameters - int port; + std::string port; + std::string files; + std::string script; std::string log_path; std::string log_level; - std::string environments; + std::string environment; std::vector command; // New command field std::string engine; Json::Value extra_params; // Accept dynamic extra parameters @@ -595,11 +597,11 @@ struct PythonModelConfig { // Model Load Parameters out << YAML::Key << "port" << YAML::Value << port; + out << YAML::Key << "files" << YAML::Value << files; + out << YAML::Key << "script" << YAML::Value << script; out << YAML::Key << "log_path" << YAML::Value << log_path; - out << YAML::Key << "log_level" << YAML::Value - << log_level; - out << YAML::Key << "environments" << YAML::Value - << environments; + out << YAML::Key << "log_level" << YAML::Value << log_level; + out << YAML::Key << "environment" << YAML::Value << environment; // Serialize command as YAML list out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq; @@ -636,71 +638,67 @@ struct PythonModelConfig { // Inference Parameters - auto ip = config; - if (ip["load_model"]) { - load_model.method = - ip["load_model"]["method"].as(); - load_model.path = - ip["load_model"]["path"].as(); - load_model.transform_request = - ip["load_model"]["transform_request"].as(); - load_model.transform_response = - ip["load_model"]["transform_response"].as(); - } - if (ip["destroy"]) { - destroy.method = - ip["destroy"]["method"].as(); - destroy.path = - ip["destroy"]["path"].as(); - } - if (ip["inference"]) { - inference.method = - ip["inference"]["method"].as(); - inference.path = - ip["inference"]["path"].as(); - } - if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) { - for (const auto& endpoint : ip["extra_endpoints"]) { - Endpoint e; - e.method = endpoint["method"].as(); - e.path = endpoint["path"].as(); - extra_endpoints.push_back(e); - } + auto ip = config; + if (ip["load_model"]) { + load_model.method = ip["load_model"]["method"].as(); + load_model.path = ip["load_model"]["path"].as(); + load_model.transform_request = + ip["load_model"]["transform_request"].as(); + load_model.transform_response = + ip["load_model"]["transform_response"].as(); + } + if (ip["destroy"]) { + destroy.method = ip["destroy"]["method"].as(); + destroy.path = ip["destroy"]["path"].as(); + } + if (ip["inference"]) { + inference.method = ip["inference"]["method"].as(); + inference.path = ip["inference"]["path"].as(); + } + if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].as(); + e.path = endpoint["path"].as(); + extra_endpoints.push_back(e); } - + } // Model Load Parameters - auto mlp = config; - if (mlp["port"]) - port = mlp["port"].as(); - if (mlp["log_path"]) - log_path = mlp["log_path"].as(); - if (mlp["log_level"]) - log_level = mlp["log_level"].as(); - if (mlp["environments"]) - environments = mlp["environments"].as(); - if (mlp["engine"]) - engine = mlp["engine"].as(); - - if (mlp["command"] && mlp["command"].IsSequence()) { - for (const auto& cmd : mlp["command"]) { - command.push_back(cmd.as()); - } + auto mlp = config; + if (mlp["port"]) + port = mlp["port"].as(); + if (mlp["files"]) + files = mlp["files"].as(); + if (mlp["script"]) + script = mlp["script"].as(); + if (mlp["log_path"]) + log_path = mlp["log_path"].as(); + if (mlp["log_level"]) + log_level = mlp["log_level"].as(); + if (mlp["environment"]) + environment = mlp["environment"].as(); + if (mlp["engine"]) + engine = mlp["engine"].as(); + + if (mlp["command"] && mlp["command"].IsSequence()) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.as()); } + } - if (mlp["extra_params"]) { - for (YAML::const_iterator it = mlp["extra_params"].begin(); - it != mlp["extra_params"].end(); ++it) { - extra_params[it->first.as()] = - it->second.as(); - } + if (mlp["extra_params"]) { + for (YAML::const_iterator it = mlp["extra_params"].begin(); + it != mlp["extra_params"].end(); ++it) { + extra_params[it->first.as()] = + it->second.as(); } - + } } // Method to convert the struct to JSON - std::string ToJson() const { + Json::Value ToJson() const { Json::Value root; root["id"] = id; @@ -709,49 +707,41 @@ struct PythonModelConfig { root["version"] = version; // Inference Parameters - root["inference_parameters"]["load_model"]["method"] = - load_model.method; - root["inference_parameters"]["load_model"]["path"] = - load_model.path; - root["inference_parameters"]["load_model"]["transform_request"] = - load_model.transform_request; - root["inference_parameters"]["load_model"]["transform_response"] = - load_model.transform_response; - - root["inference_parameters"]["destroy"]["method"] = - destroy.method; - root["inference_parameters"]["destroy"]["path"] = - destroy.path; - - root["inference_parameters"]["inference"]["method"] = - inference.method; - root["inference_parameters"]["inference"]["path"] = - inference.path; + root["load_model"]["method"] = load_model.method; + root["load_model"]["path"] = load_model.path; + root["load_model"]["transform_request"] = load_model.transform_request; + root["load_model"]["transform_response"] = load_model.transform_response; + + root["destroy"]["method"] = destroy.method; + root["destroy"]["path"] = destroy.path; + + root["inference"]["method"] = inference.method; + root["inference"]["path"] = inference.path; for (const auto& endpoint : extra_endpoints) { Json::Value e; e["method"] = endpoint.method; e["path"] = endpoint.path; - root["inference_parameters"]["extra_endpoints"].append(e); + root["extra_endpoints"].append(e); } // Model Load Parameters - root["model_load_params"]["port"] = port; - root["model_load_params"]["log_path"] = log_path; - root["model_load_params"]["log_level"] = log_level; - root["model_load_params"]["environments"] = environments; + root["port"] = port; + root["log_path"] = log_path; + root["log_level"] = log_level; + root["environment"] = environment; + root["files"] = files; + root["script"] = script; // Serialize command as JSON array for (const auto& cmd : command) { - root["model_load_params"]["command"].append(cmd); + root["command"].append(cmd); } - root["model_load_params"]["engine"] = engine; - root["model_load_params"]["extra_params"] = - extra_params; // Serialize the JSON value directly + root["engine"] = engine; + root["extra_params"] = extra_params; // Serialize the JSON value directly - Json::StreamWriterBuilder writer; - return Json::writeString(writer, root); + return root; } // Method to populate struct from JSON @@ -776,64 +766,60 @@ struct PythonModelConfig { version = root["version"].asInt(); // Inference Parameters - if (root.isMember("inference_parameters")) { - const Json::Value& ip = root["inference_parameters"]; - if (ip.isMember("load_model")) { - load_model.method = - ip["load_model"]["method"].asString(); - load_model.path = - ip["load_model"]["path"].asString(); - load_model.transform_request = - ip["load_model"]["transform_request"].asString(); - load_model.transform_response = - ip["load_model"]["transform_response"].asString(); - } - if (ip.isMember("destroy")) { - destroy.method = - ip["destroy"]["method"].asString(); - destroy.path = ip["destroy"]["path"].asString(); - } - if (ip.isMember("inference")) { - inference.method = - ip["inference"]["method"].asString(); - inference.path = - ip["inference"]["path"].asString(); - } - if (ip.isMember("extra_endpoints")) { - for (const auto& endpoint : ip["extra_endpoints"]) { - Endpoint e; - e.method = endpoint["method"].asString(); - e.path = endpoint["path"].asString(); - extra_endpoints.push_back(e); - } + + const Json::Value& ip = root; + if (ip.isMember("load_model")) { + load_model.method = ip["load_model"]["method"].asString(); + load_model.path = ip["load_model"]["path"].asString(); + load_model.transform_request = + ip["load_model"]["transform_request"].asString(); + load_model.transform_response = + ip["load_model"]["transform_response"].asString(); + } + if (ip.isMember("destroy")) { + destroy.method = ip["destroy"]["method"].asString(); + destroy.path = ip["destroy"]["path"].asString(); + } + if (ip.isMember("inference")) { + inference.method = ip["inference"]["method"].asString(); + inference.path = ip["inference"]["path"].asString(); + } + if (ip.isMember("extra_endpoints")) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].asString(); + e.path = endpoint["path"].asString(); + extra_endpoints.push_back(e); } } // Model Load Parameters - if (root.isMember("model_load_params")) { - const Json::Value& mlp = root["model_load_params"]; - if (mlp.isMember("port")) - port = mlp["port"].asInt(); - if (mlp.isMember("log_path")) - log_path = mlp["log_path"].asString(); - if (mlp.isMember("log_level")) - log_level = mlp["log_level"].asString(); - if (mlp.isMember("environments")) - environments = mlp["environments"].asString(); - if (mlp.isMember("engine")) - engine = mlp["engine"].asString(); - - if (mlp.isMember("command")) { - for (const auto& cmd : mlp["command"]) { - command.push_back(cmd.asString()); - } - } - if (mlp.isMember("extra_params")) { - extra_params = - mlp["extra_params"]; // Directly assign the JSON value + const Json::Value& mlp = root; + if (mlp.isMember("port")) + port = mlp["port"].asString(); + if (mlp.isMember("log_path")) + log_path = mlp["log_path"].asString(); + if (mlp.isMember("log_level")) + log_level = mlp["log_level"].asString(); + if (mlp.isMember("environment")) + environment = mlp["environment"].asString(); + if (mlp.isMember("engine")) + engine = mlp["engine"].asString(); + if (mlp.isMember("files")) + files = mlp["files"].asString(); + if (mlp.isMember("script")) + script = mlp["script"].asString(); + + if (mlp.isMember("command")) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.asString()); } } + + if (mlp.isMember("extra_params")) { + extra_params = mlp["extra_params"]; // Directly assign the JSON value + } } }; diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 11866a708..64fe3c838 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -69,4 +69,7 @@ class EngineI { virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0; virtual Json::Value GetRemoteModels() = 0; + virtual void HandleRequest( + std::shared_ptr json_body, + std::function&& callback) = 0; }; diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index e69de29bb..83f85126c 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -0,0 +1,768 @@ +#include "python_engine.h" +#include +#include +#include +#include +namespace python_engine { +constexpr const int k200OK = 200; +constexpr const int k400BadRequest = 400; +constexpr const int k409Conflict = 409; +constexpr const int k500InternalServerError = 500; +constexpr const int kFileLoggerOption = 0; + +static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, + std::string* data) { + data->append(ptr, size * nmemb); + return size * nmemb; +} + +PythonEngine::PythonEngine() { + curl_global_init(CURL_GLOBAL_ALL); +} + +PythonEngine::~PythonEngine() { + curl_global_cleanup(); +} + +config::PythonModelConfig* PythonEngine::GetModelConfig( + const std::string& model) { + std::shared_lock lock(models_mutex_); + auto it = models_.find(model); + if (it != models_.end()) { + return &it->second; + } + return nullptr; +} +std::string constructWindowsCommandLine(const std::vector& args) { + std::string cmdLine; + for (const auto& arg : args) { + // Simple escaping for Windows command line + std::string escapedArg = arg; + if (escapedArg.find(' ') != std::string::npos) { + // Wrap in quotes and escape existing quotes + for (char& c : escapedArg) { + if (c == '"') + c = '\\'; + } + escapedArg = "\"" + escapedArg + "\""; + } + cmdLine += escapedArg + " "; + } + return cmdLine; +} + +std::vector convertToArgv(const std::vector& args) { + std::vector argv; + for (const auto& arg : args) { + argv.push_back(const_cast(arg.c_str())); + } + argv.push_back(nullptr); + return argv; +} + +pid_t PythonEngine::SpawnProcess(const std::string& model, + const std::vector& command) { + try { +#ifdef _WIN32 + // Windows process creation + STARTUPINFOA si = {0}; + PROCESS_INFORMATION pi = {0}; + si.cb = sizeof(si); + + // Construct command line + std::string cmdLine = constructWindowsCommandLine(command); + + // Convert string to char* for Windows API + char commandBuffer[4096]; + strncpy_s(commandBuffer, cmdLine.c_str(), sizeof(commandBuffer)); + + if (!CreateProcessA(NULL, // lpApplicationName + commandBuffer, // lpCommandLine + NULL, // lpProcessAttributes + NULL, // lpThreadAttributes + FALSE, // bInheritHandles + 0, // dwCreationFlags + NULL, // lpEnvironment + NULL, // lpCurrentDirectory + &si, // lpStartupInfo + &pi // lpProcessInformation + )) { + throw std::runtime_error("Failed to create process on Windows"); + } + + // Store the process ID + pid_t pid = pi.dwProcessId; + processMap[model] = pid; + + // Close handles to avoid resource leaks + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + return pid; + +#elif __APPLE__ || __linux__ + // POSIX process creation + pid_t pid; + + // Convert command vector to char*[] + std::vector argv = convertToArgv(command); + for (auto c : command) { + std::cout << c << " " << std::endl; + } + + // Use posix_spawn for cross-platform compatibility + int spawn_result = posix_spawn(&pid, // pid output + command[0].c_str(), // executable path + NULL, // file actions + NULL, // spawn attributes + argv.data(), // argument vector + NULL // environment (inherit) + ); + + if (spawn_result != 0) { + throw std::runtime_error("Failed to spawn process"); + } + + // Store the process ID + processMap[model] = pid; + return pid; + +#else +#error Unsupported platform +#endif + } catch (const std::exception& e) { + LOG_ERROR << "Process spawning error: " << e.what(); + return -1; + } +} +bool PythonEngine::TerminateProcess(const std::string& model) { + auto it = processMap.find(model); + if (it == processMap.end()) { + LOG_ERROR << "No process found for model: " << model + << ", removing from list running models."; + models_.erase(model); + return false; + } + +#ifdef _WIN32 + HANDLE hProcess = OpenProcess(PROCESS_TERMINATE, FALSE, it->second); + if (hProcess == NULL) { + LOG_ERROR << "Failed to open process"; + return false; + } + + bool terminated = TerminateProcess(hProcess, 0) == TRUE; + CloseHandle(hProcess); + + if (terminated) { + processMap.erase(it); + return true; + } + +#elif __APPLE__ || __linux__ + int result = kill(it->second, SIGTERM); + if (result == 0) { + processMap.erase(it); + return true; + } +#endif + + return false; +} +CurlResponse PythonEngine::MakeGetRequest(const std::string& model, + const std::string& path) { + auto config = models_[model]; + CURL* curl = curl_easy_init(); + CurlResponse response; + + if (!curl) { + response.error = true; + response.error_message = "Failed to initialize CURL"; + return response; + } + + std::string full_url = "http://localhost:" + config.port + path; + + struct curl_slist* headers = nullptr; + + headers = curl_slist_append(headers, "Content-Type: application/json"); + + curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + std::string response_string; + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + response.error = true; + response.error_message = curl_easy_strerror(res); + } else { + response.body = response_string; + } + + curl_slist_free_all(headers); + curl_easy_cleanup(curl); + return response; +} +CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, + const std::string& path) { + auto config = models_[model]; + CURL* curl = curl_easy_init(); + CurlResponse response; + + if (!curl) { + response.error = true; + response.error_message = "Failed to initialize CURL"; + return response; + } + std::string full_url = "http://localhost:" + config.port + path; + + curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); + curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE"); + + std::string response_string; + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + response.error = true; + response.error_message = curl_easy_strerror(res); + } else { + response.body = response_string; + } + + curl_easy_cleanup(curl); + return response; +} + +CurlResponse PythonEngine::MakePostRequest(const std::string& model, + const std::string& path, + const std::string& body) { + auto config = models_[model]; + CURL* curl = curl_easy_init(); + CurlResponse response; + + if (!curl) { + response.error = true; + response.error_message = "Failed to initialize CURL"; + return response; + } + std::string full_url = "http://localhost:" + config.port + path; + + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + + curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str()); + + std::string response_string; + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + response.error = true; + response.error_message = curl_easy_strerror(res); + } else { + response.body = response_string; + } + + curl_slist_free_all(headers); + curl_easy_cleanup(curl); + return response; +} + +bool PythonEngine::LoadModelConfig(const std::string& model, + const std::string& yaml_path) { + try { + config::PythonModelConfig config; + config.ReadFromYaml(yaml_path); + std::unique_lock lock(models_mutex_); + models_[model] = config; + } catch (const std::exception& e) { + LOG_ERROR << "Failed to load model config: " << e.what(); + return false; + } + + return true; +} + +void PythonEngine::GetModels( + std::shared_ptr json_body, + std::function&& callback) { + + Json::Value response_json; + Json::Value model_array(Json::arrayValue); + + for (const auto& pair : models_) { + auto val = pair.second.ToJson(); + model_array.append(val); + } + + response_json["object"] = "list"; + response_json["data"] = model_array; + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); +} + +void PythonEngine::LoadModel( + std::shared_ptr json_body, + std::function&& callback) { + pid_t pid; + if (!json_body->isMember("model") || !json_body->isMember("model_path")) { + Json::Value error; + error["error"] = "Missing required fields: model or model_path"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + const std::string& model = (*json_body)["model"].asString(); + const std::string& model_path = (*json_body)["model_path"].asString(); + if (models_.find(model) != models_.end()) { + Json::Value error; + error["error"] = "Model already loaded!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k409Conflict; + callback(std::move(status), std::move(error)); + return; + } + + if (!LoadModelConfig(model, model_path)) { + Json::Value error; + error["error"] = "Failed to load model configuration"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + auto model_config = models_[model]; + try { + std::string data_folder_path = + "/home/thuan/cortexcpp/environments/"; // To do: will be removed with cortex data path + std::string model_folder_path = + "/home/thuan/cortexcpp/models/cortex.so/whispervq/fp16/"; // To do: will be removed with cortex model path +#ifdef _WIN32 + auto executable = std::filesystem::path(data_folder_path) / + std::filesystem::path(model_config.environment) / + std::filesystem::path("Scripts"); +#else + auto executable = std::filesystem::path(data_folder_path) / + std::filesystem::path(model_config.environment) / + std::filesystem::path("bin"); +#endif + std::cout << "executable string: " << executable.string() + << data_folder_path << " " << model_config.environment + << std::endl; + auto executable_str = + (executable / std::filesystem::path(model_config.command[0])).string(); + std::cout << "executable string: " << executable_str << std::endl; + auto command = model_config.command; + command[0] = executable_str; + command.push_back((std::filesystem::path(model_folder_path) / + std::filesystem::path(model_config.script)) + .string()); + std::list args{"--port", model_config.port, + "--log_path", model_config.log_path, + "--log_level", model_config.log_level}; + if (!model_config.extra_params.isNull() && + model_config.extra_params.isObject()) { + for (const auto& key : model_config.extra_params.getMemberNames()) { + const Json::Value& value = model_config.extra_params[key]; + + // Convert key to string with -- prefix + std::string param_key = "--" + key; + + // Handle different JSON value types + if (value.isString()) { + args.emplace_back(param_key); + args.emplace_back(value.asString()); + } else if (value.isInt()) { + args.emplace_back(param_key); + args.emplace_back(std::to_string(value.asInt())); + } else if (value.isDouble()) { + args.emplace_back(param_key); + args.emplace_back(std::to_string(value.asDouble())); + } else if (value.isBool()) { + // For boolean, only add the flag if true + if (value.asBool()) { + args.emplace_back(param_key); + } + } + } + } + + // Add the parsed arguments to the command + command.insert(command.end(), args.begin(), args.end()); + pid = SpawnProcess(model, command); + if (pid == -1) { + std::unique_lock lock(models_mutex_); + if (models_.find(model) != models_.end()) { + models_.erase(model); + } + + Json::Value error; + error["error"] = "Fail to spawn process with pid -1"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + } catch (const std::exception& e) { + std::unique_lock lock(models_mutex_); + if (models_.find(model) != models_.end()) { + models_.erase(model); + } + + Json::Value error; + error["error"] = e.what(); + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response; + response["status"] = + "Model loaded successfully with pid: " + std::to_string(pid); + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +void PythonEngine::UnloadModel( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + const std::string& model = (*json_body)["model"].asString(); + + { + std::unique_lock lock(models_mutex_); + if (TerminateProcess(model)) { + models_.erase(model); + } else { + Json::Value error; + error["error"] = "Fail to terminate process with id: " + + std::to_string(processMap[model]); + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + } + + Json::Value response; + response["status"] = "Model unloaded successfully"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +void PythonEngine::HandleChatCompletion( + std::shared_ptr json_body, + std::function&& callback) {} + +void PythonEngine::HandleRequest( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model") || !json_body->isMember("method") || + !json_body->isMember("path")) { + Json::Value error; + error["error"] = + "Missing required field: model, method and path are required!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + std::string method = (*json_body)["method"].asString(); + std::string path = (*json_body)["path"].asString(); + std::string transform_request = + (*json_body).get("transform_request", "").asString(); + std::string transform_response = + (*json_body).get("transform_response", "").asString(); + std::string model = (*json_body)["model"].asString(); + Json::Value body = (*json_body)["body"]; + + // Transform Request + std::string transformed_request; + if (!transform_request.empty()) { + + try { + // Validate JSON body + if (!body || body.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + + // Render with error handling + try { + transformed_request = renderer_.Render(transform_request, *json_body); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + transformed_request = body.toStyledString(); + } + } else { + transformed_request = body.toStyledString(); + } + + // End Transform request + + CurlResponse response; + if (method == "post") { + response = MakePostRequest(model, path, transformed_request); + } else if (method == "get") { + response = MakeGetRequest(model, path); + } else if (method == "delete") { + response = MakeDeleteRequest(model, path); + } else { + Json::Value error; + error["error"] = + "method not supported! Supported methods are: post, get, delete"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + if (response.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response_json; + Json::Reader reader; + if (!reader.parse(response.body, response_json)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + if (!transform_response.empty()) { + // Transform Response + std::string response_str; + try { + // Validate JSON body + if (!response_json || response_json.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + // Render with error handling + try { + response_str = renderer_.Render(transform_response, response_json); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + response_str = response_json.toStyledString(); + } + + Json::Reader reader_final; + Json::Value response_json_final; + if (!reader_final.parse(response_str, response_json_final)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json_final)); + } else { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); + } +} + +void PythonEngine::GetModelStatus( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + auto model = json_body->get("model", "").asString(); + auto model_config = models_[model]; + auto health_endpoint = model_config.heath_check; + auto response_health = MakeGetRequest(model, health_endpoint.path); + + if (response_health.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response_health.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response; + response["model"] = model; + response["model_loaded"] = true; + response["model_data"] = model_config.ToJson(); + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +// Implement remaining virtual functions +void PythonEngine::HandleEmbedding( + std::shared_ptr, + std::function&& callback) { + callback(Json::Value(), Json::Value()); +} + +bool PythonEngine::IsSupported(const std::string& f) { + if (f == "HandleChatCompletion" || f == "LoadModel" || f == "UnloadModel" || + f == "GetModelStatus" || f == "GetModels" || f == "SetFileLogger" || + f == "SetLogLevel") { + return true; + } + return false; +} + +bool PythonEngine::SetFileLogger(int max_log_lines, + const std::string& log_path) { + if (!async_file_logger_) { + async_file_logger_ = std::make_unique(); + } + + async_file_logger_->setFileName(log_path); + async_file_logger_->setMaxLines(max_log_lines); // Keep last 100000 lines + async_file_logger_->startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + if (async_file_logger_) + async_file_logger_->output_(msg, len); + }, + [&]() { + if (async_file_logger_) + async_file_logger_->flush(); + }); + freopen(log_path.c_str(), "w", stderr); + freopen(log_path.c_str(), "w", stdout); + return true; +} + +void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) { + trantor::Logger::setLogLevel(log_level); +} + +void PythonEngine::RegisterLibraryPath(RegisterLibraryOption opts) { + +}; + +void PythonEngine::Load(EngineLoadOption opts) { + // Develop register model here on loading engine +}; + +void PythonEngine::Unload(EngineUnloadOption opts) {}; + +// extern "C" { +// EngineI* get_engine() { +// return new PythonEngine(); +// } +// } +} // namespace python_engine \ No newline at end of file diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index e69de29bb..fbc88b40e 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -0,0 +1,160 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "cortex-common/EngineI.h" +#include "extensions/template_renderer.h" +#include "utils/file_logger.h" +#include "config/model_config.h" +#ifdef _WIN32 + #include + #include +#elif __APPLE__ || __linux__ + #include + #include + #include + #include + #include +#endif +// Helper for CURL response +namespace python_engine{ +struct StreamContext +{ + std::shared_ptr> callback; + std::string buffer; +}; + +static size_t StreamWriteCallback(char *ptr, size_t size, size_t nmemb, + void *userdata) +{ + auto *context = static_cast(userdata); + std::string chunk(ptr, size * nmemb); + + context->buffer += chunk; + + // Process complete lines + size_t pos; + while ((pos = context->buffer.find('\n')) != std::string::npos) + { + std::string line = context->buffer.substr(0, pos); + context->buffer = context->buffer.substr(pos + 1); + + // Skip empty lines + if (line.empty() || line == "\r") + continue; + + // Remove "data: " prefix if present + // if (line.substr(0, 6) == "data: ") + // { + // line = line.substr(6); + // } + + // Skip [DONE] message + std::cout << line << std::endl; + if (line == "data: [DONE]") + { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), Json::Value()); + break; + } + + // Parse the JSON + Json::Value chunk_json; + chunk_json["data"] = line + "\n\n"; + Json::Reader reader; + + Json::Value status; + status["is_done"] = false; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), std::move(chunk_json)); + } + + return size * nmemb; +} + +struct CurlResponse +{ + std::string body; + bool error{false}; + std::string error_message; +}; + +class PythonEngine : public EngineI +{ +private: + // Model configuration + + // Thread-safe model config storage + mutable std::shared_mutex models_mutex_; + std::unordered_map models_; + extensions::TemplateRenderer renderer_; + std::unique_ptr async_file_logger_; + std::unordered_map processMap; + + // Helper functions + CurlResponse MakePostRequest(const std::string &model, const std::string &path, + const std::string &body); + CurlResponse MakeGetRequest(const std::string &model, const std::string &path); + CurlResponse MakeDeleteRequest(const std::string &model, const std::string &path); + + // Process manager functions + pid_t SpawnProcess(const std::string& model, const std::vector& command) ; + bool TerminateProcess(const std::string& model); + + // Internal model management + bool LoadModelConfig(const std::string &model, const std::string &yaml_path); + config::PythonModelConfig *GetModelConfig(const std::string &model); + +public: + PythonEngine(); + ~PythonEngine(); + void RegisterLibraryPath(RegisterLibraryOption opts) override; + + void Load(EngineLoadOption opts) override; + + void Unload(EngineUnloadOption opts) override; + + // Main interface implementations + void GetModels( + std::shared_ptr json_body, + std::function &&callback) override; + + void HandleChatCompletion( + std::shared_ptr json_body, + std::function &&callback) override; + + void LoadModel( + std::shared_ptr json_body, + std::function &&callback) override; + + void UnloadModel( + std::shared_ptr json_body, + std::function &&callback) override; + + void GetModelStatus( + std::shared_ptr json_body, + std::function &&callback) override; + + // Other required virtual functions + void HandleEmbedding( + std::shared_ptr json_body, + std::function &&callback) override; + bool IsSupported(const std::string &feature) override; + bool SetFileLogger(int max_log_lines, const std::string &log_path) override; + void SetLogLevel(trantor::Logger::LogLevel logLevel) override; + void HandleRequest( + std::shared_ptr json_body, + std::function &&callback) override; +}; +} // namespace python_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 8ce6fa652..1dcf3777f 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -8,7 +8,7 @@ #include #include #include "cortex-common/remote_enginei.h" -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "utils/engine_constants.h" #include "utils/file_logger.h" // Helper for CURL response @@ -47,7 +47,7 @@ class RemoteEngine : public RemoteEngineI { // Thread-safe model config storage mutable std::shared_mutex models_mtx_; std::unordered_map models_; - TemplateRenderer renderer_; + extensions::TemplateRenderer renderer_; Json::Value metadata_; std::string api_key_template_; std::unique_ptr async_file_logger_; diff --git a/engine/extensions/remote-engine/template_renderer.cc b/engine/extensions/remote-engine/template_renderer.cc deleted file mode 100644 index 15514d17c..000000000 --- a/engine/extensions/remote-engine/template_renderer.cc +++ /dev/null @@ -1,136 +0,0 @@ -#if defined(_WIN32) || defined(_WIN64) -#define NOMINMAX -#undef min -#undef max -#endif -#include "template_renderer.h" -#include -#include -#include "utils/logging_utils.h" -namespace remote_engine { -TemplateRenderer::TemplateRenderer() { - // Configure Inja environment - env_.set_trim_blocks(true); - env_.set_lstrip_blocks(true); - - // Add tojson function for all value types - env_.add_callback("tojson", 1, [](inja::Arguments& args) { - if (args.empty()) { - return nlohmann::json(nullptr); - } - const auto& value = *args[0]; - - if (value.is_string()) { - return nlohmann::json(std::string("\"") + value.get() + - "\""); - } - return value; - }); -} - -std::string TemplateRenderer::Render(const std::string& tmpl, - const Json::Value& data) { - try { - // Convert Json::Value to nlohmann::json - auto json_data = ConvertJsonValue(data); - - // Create the input data structure expected by the template - nlohmann::json template_data; - template_data["input_request"] = json_data; - - // Debug output - LOG_DEBUG << "Template: " << tmpl; - LOG_DEBUG << "Data: " << template_data.dump(2); - - // Render template - std::string result = env_.render(tmpl, template_data); - - // Clean up any potential double quotes in JSON strings - result = std::regex_replace(result, std::regex("\\\"\\\""), "\""); - - LOG_DEBUG << "Result: " << result; - - // Validate JSON - auto parsed = nlohmann::json::parse(result); - - return result; - } catch (const std::exception& e) { - LOG_ERROR << "Template rendering failed: " << e.what(); - LOG_ERROR << "Template: " << tmpl; - throw std::runtime_error(std::string("Template rendering failed: ") + - e.what()); - } -} - -nlohmann::json TemplateRenderer::ConvertJsonValue(const Json::Value& input) { - if (input.isNull()) { - return nullptr; - } else if (input.isBool()) { - return input.asBool(); - } else if (input.isInt()) { - return input.asInt(); - } else if (input.isUInt()) { - return input.asUInt(); - } else if (input.isDouble()) { - return input.asDouble(); - } else if (input.isString()) { - return input.asString(); - } else if (input.isArray()) { - nlohmann::json arr = nlohmann::json::array(); - for (const auto& element : input) { - arr.push_back(ConvertJsonValue(element)); - } - return arr; - } else if (input.isObject()) { - nlohmann::json obj = nlohmann::json::object(); - for (const auto& key : input.getMemberNames()) { - obj[key] = ConvertJsonValue(input[key]); - } - return obj; - } - return nullptr; -} - -Json::Value TemplateRenderer::ConvertNlohmannJson(const nlohmann::json& input) { - if (input.is_null()) { - return Json::Value(); - } else if (input.is_boolean()) { - return Json::Value(input.get()); - } else if (input.is_number_integer()) { - return Json::Value(input.get()); - } else if (input.is_number_unsigned()) { - return Json::Value(input.get()); - } else if (input.is_number_float()) { - return Json::Value(input.get()); - } else if (input.is_string()) { - return Json::Value(input.get()); - } else if (input.is_array()) { - Json::Value arr(Json::arrayValue); - for (const auto& element : input) { - arr.append(ConvertNlohmannJson(element)); - } - return arr; - } else if (input.is_object()) { - Json::Value obj(Json::objectValue); - for (auto it = input.begin(); it != input.end(); ++it) { - obj[it.key()] = ConvertNlohmannJson(it.value()); - } - return obj; - } - return Json::Value(); -} - -std::string TemplateRenderer::RenderFile(const std::string& template_path, - const Json::Value& data) { - try { - // Convert Json::Value to nlohmann::json - auto json_data = ConvertJsonValue(data); - - // Load and render template - return env_.render_file(template_path, json_data); - } catch (const std::exception& e) { - throw std::runtime_error(std::string("Template file rendering failed: ") + - e.what()); - } -} -} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/template_renderer.h b/engine/extensions/remote-engine/template_renderer.h deleted file mode 100644 index f59e7cc93..000000000 --- a/engine/extensions/remote-engine/template_renderer.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include - -#include -#include "json/json.h" -#include "trantor/utils/Logger.h" -// clang-format off -#if defined(_WIN32) || defined(_WIN64) -#define NOMINMAX -#undef min -#undef max -#endif -#include -#include -// clang-format on -namespace remote_engine { -class TemplateRenderer { - public: - TemplateRenderer(); - ~TemplateRenderer() = default; - - // Convert Json::Value to nlohmann::json - static nlohmann::json ConvertJsonValue(const Json::Value& input); - - // Convert nlohmann::json to Json::Value - static Json::Value ConvertNlohmannJson(const nlohmann::json& input); - - // Render template with data - std::string Render(const std::string& tmpl, const Json::Value& data); - - // Load template from file and render - std::string RenderFile(const std::string& template_path, - const Json::Value& data); - - private: - inja::Environment env_; -}; - -} // namespace remote_engine \ No newline at end of file From accec0a9db21257d8c4c11c6af8f52e573cf6783 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Wed, 11 Dec 2024 17:02:40 +0700 Subject: [PATCH 08/34] Fix: CI build window --- .../extensions/python-engine/python_engine.h | 269 +++++++++--------- 1 file changed, 134 insertions(+), 135 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index fbc88b40e..4cbda3999 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -7,154 +7,153 @@ #include #include #include +#include "config/model_config.h" #include "cortex-common/EngineI.h" #include "extensions/template_renderer.h" #include "utils/file_logger.h" -#include "config/model_config.h" #ifdef _WIN32 - #include - #include +#include +#include +using pid_t = DWORD; #elif __APPLE__ || __linux__ - #include - #include - #include - #include - #include +#include +#include +#include +#include +#include #endif // Helper for CURL response -namespace python_engine{ -struct StreamContext -{ - std::shared_ptr> callback; - std::string buffer; +namespace python_engine { +struct StreamContext { + std::shared_ptr> callback; + std::string buffer; }; -static size_t StreamWriteCallback(char *ptr, size_t size, size_t nmemb, - void *userdata) -{ - auto *context = static_cast(userdata); - std::string chunk(ptr, size * nmemb); - - context->buffer += chunk; - - // Process complete lines - size_t pos; - while ((pos = context->buffer.find('\n')) != std::string::npos) - { - std::string line = context->buffer.substr(0, pos); - context->buffer = context->buffer.substr(pos + 1); - - // Skip empty lines - if (line.empty() || line == "\r") - continue; - - // Remove "data: " prefix if present - // if (line.substr(0, 6) == "data: ") - // { - // line = line.substr(6); - // } - - // Skip [DONE] message - std::cout << line << std::endl; - if (line == "data: [DONE]") - { - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = true; - status["status_code"] = 200; - (*context->callback)(std::move(status), Json::Value()); - break; - } - - // Parse the JSON - Json::Value chunk_json; - chunk_json["data"] = line + "\n\n"; - Json::Reader reader; - - Json::Value status; - status["is_done"] = false; - status["has_error"] = false; - status["is_stream"] = true; - status["status_code"] = 200; - (*context->callback)(std::move(status), std::move(chunk_json)); +static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, + void* userdata) { + auto* context = static_cast(userdata); + std::string chunk(ptr, size * nmemb); + + context->buffer += chunk; + + // Process complete lines + size_t pos; + while ((pos = context->buffer.find('\n')) != std::string::npos) { + std::string line = context->buffer.substr(0, pos); + context->buffer = context->buffer.substr(pos + 1); + + // Skip empty lines + if (line.empty() || line == "\r") + continue; + + // Remove "data: " prefix if present + // if (line.substr(0, 6) == "data: ") + // { + // line = line.substr(6); + // } + + // Skip [DONE] message + std::cout << line << std::endl; + if (line == "data: [DONE]") { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), Json::Value()); + break; } - return size * nmemb; + // Parse the JSON + Json::Value chunk_json; + chunk_json["data"] = line + "\n\n"; + Json::Reader reader; + + Json::Value status; + status["is_done"] = false; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), std::move(chunk_json)); + } + + return size * nmemb; } -struct CurlResponse -{ - std::string body; - bool error{false}; - std::string error_message; +struct CurlResponse { + std::string body; + bool error{false}; + std::string error_message; }; -class PythonEngine : public EngineI -{ -private: - // Model configuration - - // Thread-safe model config storage - mutable std::shared_mutex models_mutex_; - std::unordered_map models_; - extensions::TemplateRenderer renderer_; - std::unique_ptr async_file_logger_; - std::unordered_map processMap; - - // Helper functions - CurlResponse MakePostRequest(const std::string &model, const std::string &path, - const std::string &body); - CurlResponse MakeGetRequest(const std::string &model, const std::string &path); - CurlResponse MakeDeleteRequest(const std::string &model, const std::string &path); - - // Process manager functions - pid_t SpawnProcess(const std::string& model, const std::vector& command) ; - bool TerminateProcess(const std::string& model); - - // Internal model management - bool LoadModelConfig(const std::string &model, const std::string &yaml_path); - config::PythonModelConfig *GetModelConfig(const std::string &model); - -public: - PythonEngine(); - ~PythonEngine(); - void RegisterLibraryPath(RegisterLibraryOption opts) override; - - void Load(EngineLoadOption opts) override; - - void Unload(EngineUnloadOption opts) override; - - // Main interface implementations - void GetModels( - std::shared_ptr json_body, - std::function &&callback) override; - - void HandleChatCompletion( - std::shared_ptr json_body, - std::function &&callback) override; - - void LoadModel( - std::shared_ptr json_body, - std::function &&callback) override; - - void UnloadModel( - std::shared_ptr json_body, - std::function &&callback) override; - - void GetModelStatus( - std::shared_ptr json_body, - std::function &&callback) override; - - // Other required virtual functions - void HandleEmbedding( - std::shared_ptr json_body, - std::function &&callback) override; - bool IsSupported(const std::string &feature) override; - bool SetFileLogger(int max_log_lines, const std::string &log_path) override; - void SetLogLevel(trantor::Logger::LogLevel logLevel) override; - void HandleRequest( - std::shared_ptr json_body, - std::function &&callback) override; +class PythonEngine : public EngineI { + private: + // Model configuration + + // Thread-safe model config storage + mutable std::shared_mutex models_mutex_; + std::unordered_map models_; + extensions::TemplateRenderer renderer_; + std::unique_ptr async_file_logger_; + std::unordered_map processMap; + + // Helper functions + CurlResponse MakePostRequest(const std::string& model, + const std::string& path, + const std::string& body); + CurlResponse MakeGetRequest(const std::string& model, + const std::string& path); + CurlResponse MakeDeleteRequest(const std::string& model, + const std::string& path); + + // Process manager functions + pid_t SpawnProcess(const std::string& model, + const std::vector& command); + bool TerminateProcess(const std::string& model); + + // Internal model management + bool LoadModelConfig(const std::string& model, const std::string& yaml_path); + config::PythonModelConfig* GetModelConfig(const std::string& model); + + public: + PythonEngine(); + ~PythonEngine(); + void RegisterLibraryPath(RegisterLibraryOption opts) override; + + void Load(EngineLoadOption opts) override; + + void Unload(EngineUnloadOption opts) override; + + // Main interface implementations + void GetModels( + std::shared_ptr json_body, + std::function&& callback) override; + + void HandleChatCompletion( + std::shared_ptr json_body, + std::function&& callback) override; + + void LoadModel( + std::shared_ptr json_body, + std::function&& callback) override; + + void UnloadModel( + std::shared_ptr json_body, + std::function&& callback) override; + + void GetModelStatus( + std::shared_ptr json_body, + std::function&& callback) override; + + // Other required virtual functions + void HandleEmbedding( + std::shared_ptr json_body, + std::function&& callback) override; + bool IsSupported(const std::string& feature) override; + bool SetFileLogger(int max_log_lines, const std::string& log_path) override; + void SetLogLevel(trantor::Logger::LogLevel logLevel) override; + void HandleRequest( + std::shared_ptr json_body, + std::function&& callback) override; }; -} // namespace python_engine \ No newline at end of file +} // namespace python_engine \ No newline at end of file From 6a8bebf227ebb0e1dde259fc47586a09b5b5f37b Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Wed, 11 Dec 2024 17:10:05 +0700 Subject: [PATCH 09/34] Fix: CI build window --- engine/extensions/python-engine/python_engine.cc | 4 ++-- engine/extensions/python-engine/python_engine.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 83f85126c..6ab8c0e48 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -135,7 +135,7 @@ pid_t PythonEngine::SpawnProcess(const std::string& model, return -1; } } -bool PythonEngine::TerminateProcess(const std::string& model) { +bool PythonEngine::TerminateModelProcess(const std::string& model) { auto it = processMap.find(model); if (it == processMap.end()) { LOG_ERROR << "No process found for model: " << model @@ -479,7 +479,7 @@ void PythonEngine::UnloadModel( { std::unique_lock lock(models_mutex_); - if (TerminateProcess(model)) { + if (TerminateModelProcess(model)) { models_.erase(model); } else { Json::Value error; diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index 4cbda3999..1b0a88d1a 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -109,7 +109,7 @@ class PythonEngine : public EngineI { // Process manager functions pid_t SpawnProcess(const std::string& model, const std::vector& command); - bool TerminateProcess(const std::string& model); + bool TerminateModelProcess(const std::string& model); // Internal model management bool LoadModelConfig(const std::string& model, const std::string& yaml_path); From 36f29bfeb66b2ac350911eb28c941b2b32ee4d66 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 12 Dec 2024 16:15:24 +0700 Subject: [PATCH 10/34] feat: support download python model from cortexso --- engine/config/model_config.h | 23 ++++++++++++++--------- engine/services/download_service.cc | 3 +++ engine/services/model_service.cc | 24 +++++++++++++++++------- engine/utils/curl_utils.cc | 25 +++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 16 deletions(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 78f62d9ca..55218fca7 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -547,7 +547,7 @@ struct PythonModelConfig { // Model Load Parameters std::string port; - std::string files; + std::string model_location; std::string script; std::string log_path; std::string log_level; @@ -557,7 +557,7 @@ struct PythonModelConfig { Json::Value extra_params; // Accept dynamic extra parameters // Method to convert C++ struct to YAML - std::string ToYaml() const { + void ToYaml(const std::string & filepath) const { YAML::Emitter out; out << YAML::BeginMap; @@ -597,7 +597,7 @@ struct PythonModelConfig { // Model Load Parameters out << YAML::Key << "port" << YAML::Value << port; - out << YAML::Key << "files" << YAML::Value << files; + out << YAML::Key << "model_location" << YAML::Value << model_location; out << YAML::Key << "script" << YAML::Value << script; out << YAML::Key << "log_path" << YAML::Value << log_path; out << YAML::Key << "log_level" << YAML::Value << log_level; @@ -620,7 +620,12 @@ struct PythonModelConfig { << iter->asString(); } out << YAML::EndMap; - return out.c_str(); + + std::ofstream fout(filepath); + if (!fout.is_open()) { + throw std::runtime_error("Failed to open file for writing: " + filepath); + } + fout << out.c_str(); } // Method to populate struct from YAML file @@ -669,8 +674,8 @@ struct PythonModelConfig { auto mlp = config; if (mlp["port"]) port = mlp["port"].as(); - if (mlp["files"]) - files = mlp["files"].as(); + if (mlp["model_location"]) + model_location = mlp["model_location"].as(); if (mlp["script"]) script = mlp["script"].as(); if (mlp["log_path"]) @@ -730,7 +735,7 @@ struct PythonModelConfig { root["log_path"] = log_path; root["log_level"] = log_level; root["environment"] = environment; - root["files"] = files; + root["model_location"] = model_location; root["script"] = script; // Serialize command as JSON array @@ -806,8 +811,8 @@ struct PythonModelConfig { environment = mlp["environment"].asString(); if (mlp.isMember("engine")) engine = mlp["engine"].asString(); - if (mlp.isMember("files")) - files = mlp["files"].asString(); + if (mlp.isMember("model_location")) + model_location = mlp["model_location"].asString(); if (mlp.isMember("script")) script = mlp["script"].asString(); diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index d855c8f61..9c7137168 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -374,6 +374,9 @@ void DownloadService::ProcessTask(DownloadTask& task, int worker_id) { CTL_ERR("Failed to init curl!"); return; } + if (!std::filesystem::exists(item.localPath.parent_path())) { + std::filesystem::create_directories(item.localPath.parent_path()); + } auto file = fopen(item.localPath.string().c_str(), "wb"); if (!file) { CTL_ERR("Failed to open output file " + item.localPath.string()); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 7f79ddaf7..ab20734dc 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -96,6 +96,7 @@ cpp::result GetDownloadTask( file_manager_utils::CreateDirectoryRecursively(model_container_path.string()); for (const auto& value : result.value()) { + // std::cout << "value object: " << value.toStyledString() << std::endl; auto path = value["path"].asString(); if (path == ".gitattributes" || path == ".gitignore" || path == "README.md") { @@ -517,15 +518,24 @@ ModelService::DownloadModelFromCortexsoAsync( config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string()); auto mc = yaml_handler.GetModelConfig(); - mc.model = unique_model_id; + if (mc.engine == kPythonEngine) { // process for Python engine + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml(model_yml_item->localPath.string()); + python_model_config.model_location = + model_yml_item->localPath.parent_path().string(); + python_model_config.ToYaml(model_yml_item->localPath.string()); - uint64_t model_size = 0; - for (const auto& item : finishedTask.items) { - model_size = model_size + item.bytes.value_or(0); + } else { + mc.model = unique_model_id; + + uint64_t model_size = 0; + for (const auto& item : finishedTask.items) { + model_size = model_size + item.bytes.value_or(0); + } + mc.size = model_size; + yaml_handler.UpdateModelConfig(mc); + yaml_handler.WriteYamlFile(model_yml_item->localPath.string()); } - mc.size = model_size; - yaml_handler.UpdateModelConfig(mc); - yaml_handler.WriteYamlFile(model_yml_item->localPath.string()); auto rel = file_manager_utils::ToRelativeCortexDataPath(model_yml_item->localPath); diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index 71f263a6a..9ba0f5a76 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -257,6 +257,31 @@ cpp::result SimpleGetJson(const std::string& url, " parsing error: " + reader.getFormattedErrorMessages()); } + if (root.isArray()) { + for (const auto& value : root) { + if (value["type"].asString() == "directory") { + auto temp = + SimpleGetJson(url + "/" + value["path"].asString(), timeout); + if (!temp.has_error()) { + if (temp.value().isArray()) { + for (const auto& item : temp.value()) { + root.append(item); + } + } else { + root.append(temp.value()); + } + } + } + } + for (Json::ArrayIndex i = 0; i < root.size();) { + if (root[i].isMember("type") && root[i]["type"] == "directory") { + root.removeIndex(i, nullptr); + } else { + ++i; + } + } + + } return root; } From 10d53a13079ab74e078e46530ef005c6be1fd04d Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 12 Dec 2024 17:25:32 +0700 Subject: [PATCH 11/34] feat: add inference interface --- engine/cortex-common/EngineI.h | 3 + .../extensions/python-engine/python_engine.cc | 151 ++++++++++++++++++ .../extensions/python-engine/python_engine.h | 3 + 3 files changed, 157 insertions(+) diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 466d32c79..ebc0674e1 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -72,6 +72,9 @@ class EngineI { virtual void HandleRequest( std::shared_ptr json_body, std::function&& callback) = 0; + virtual void HandleInference( + std::shared_ptr json_body, + std::function &&callback) = 0; // Stop inflight chat completion in stream mode virtual void StopInferencing(const std::string& model_id) = 0; diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 6ab8c0e48..d61894543 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -509,6 +509,157 @@ void PythonEngine::HandleChatCompletion( std::shared_ptr json_body, std::function&& callback) {} +void PythonEngine::HandleInference( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model is required!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + std::string method = "post"; + std::string path = "/inference"; + std::string transform_request = + (*json_body).get("transform_request", "").asString(); + std::string transform_response = + (*json_body).get("transform_response", "").asString(); + std::string model = (*json_body)["model"].asString(); + Json::Value body = (*json_body)["body"]; + + // Transform Request + std::string transformed_request; + if (!transform_request.empty()) { + + try { + // Validate JSON body + if (!body || body.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + + // Render with error handling + try { + transformed_request = renderer_.Render(transform_request, *json_body); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + transformed_request = body.toStyledString(); + } + } else { + transformed_request = body.toStyledString(); + } + + // End Transform request + + CurlResponse response; + if (method == "post") { + response = MakePostRequest(model, path, transformed_request); + } else if (method == "get") { + response = MakeGetRequest(model, path); + } else if (method == "delete") { + response = MakeDeleteRequest(model, path); + } else { + Json::Value error; + error["error"] = + "method not supported! Supported methods are: post, get, delete"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + if (response.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response_json; + Json::Reader reader; + if (!reader.parse(response.body, response_json)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + if (!transform_response.empty()) { + // Transform Response + std::string response_str; + try { + // Validate JSON body + if (!response_json || response_json.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + // Render with error handling + try { + response_str = renderer_.Render(transform_response, response_json); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + response_str = response_json.toStyledString(); + } + + Json::Reader reader_final; + Json::Value response_json_final; + if (!reader_final.parse(response_str, response_json_final)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json_final)); + } else { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); + } +} void PythonEngine::HandleRequest( std::shared_ptr json_body, std::function&& callback) { diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index 1b0a88d1a..f7fff434d 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -155,5 +155,8 @@ class PythonEngine : public EngineI { void HandleRequest( std::shared_ptr json_body, std::function&& callback) override; + virtual void HandleInference( + std::shared_ptr json_body, + std::function &&callback) = 0; }; } // namespace python_engine \ No newline at end of file From 389dd8818090d75483e4aed41aa2146db7a35747 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Sat, 14 Dec 2024 20:01:56 +0700 Subject: [PATCH 12/34] feat: integrate to cortex cpp --- engine/CMakeLists.txt | 2 +- engine/common/base.h | 13 +++++ engine/config/model_config.h | 13 +---- engine/controllers/models.cc | 33 ++++++++++- engine/controllers/server.cc | 50 ++++++++++++++++ engine/controllers/server.h | 12 +++- engine/cortex-common/EngineI.h | 7 +-- .../extensions/python-engine/python_engine.cc | 6 +- .../extensions/python-engine/python_engine.h | 10 ++-- engine/services/engine_service.cc | 17 ++++-- engine/services/inference_service.cc | 58 +++++++++++++++++++ engine/services/inference_service.h | 8 ++- engine/services/model_service.cc | 52 +++++++++++++---- 13 files changed, 242 insertions(+), 39 deletions(-) diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index db34a8346..01d9571a8 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -144,7 +144,7 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc - + ${CMAKE_CURRENT_SOURCE_DIR}/extensions/python-engine/python_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc diff --git a/engine/common/base.h b/engine/common/base.h index 478cc7feb..c572a1823 100644 --- a/engine/common/base.h +++ b/engine/common/base.h @@ -46,3 +46,16 @@ class BaseEmbedding { // The derived class can also override other methods if needed }; + +class BasePythonModel { + public: + virtual ~BasePythonModel() {} + + // Model management + virtual void Inference( + const HttpRequestPtr& req, + std::function&& callback) = 0; + virtual void RouteRequest( + const HttpRequestPtr& req, + std::function&& callback) = 0; +}; \ No newline at end of file diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 55218fca7..ccf3e2ec0 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -750,17 +750,8 @@ struct PythonModelConfig { } // Method to populate struct from JSON - void FromJson(const std::string& jsonString) { - Json::CharReaderBuilder reader; - Json::Value root; - std::string errs; - std::istringstream s(jsonString); - - if (!Json::parseFromStream(reader, s, &root, &errs)) { - std::cerr << "Error parsing JSON: " << errs << std::endl; - return; - } - + void FromJson(const Json::Value& root) { + if (root.isMember("id")) id = root["id"].asString(); if (root.isMember("model")) diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index affa45d52..23a50f1d1 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -211,6 +211,16 @@ void Models::ListModel( } data.append(std::move(obj)); yaml_handler.Reset(); + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.path_to_model_yaml)) + .string()); + Json::Value obj = python_model_config.ToJson(); + obj["id"] = model_entry.model; + obj["model"] = model_entry.model; + data.append(std::move(obj)); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( @@ -282,7 +292,8 @@ void Models::GetModel(const HttpRequestPtr& req, auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); resp->setStatusCode(drogon::k200OK); callback(resp); - } else { + } else if (model_config.engine == kOpenAiEngine || + model_config.engine == kAnthropicEngine) { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( fmu::ToAbsoluteCortexDataPath( @@ -295,6 +306,19 @@ void Models::GetModel(const HttpRequestPtr& req, auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); resp->setStatusCode(k200OK); callback(resp); + } else { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + ret = python_model_config.ToJson(); + ret["id"] = python_model_config.model; + ret["object"] = "model"; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); } } catch (const std::exception& e) { @@ -353,6 +377,13 @@ void Models::UpdateModel(const HttpRequestPtr& req, yaml_handler.WriteYamlFile(yaml_fp.string()); message = "Successfully update model ID '" + model_id + "': " + json_body.toStyledString(); + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml(yaml_fp.string()); + python_model_config.FromJson(json_body); + python_model_config.ToYaml(yaml_fp.string()); + message = "Successfully update model ID '" + model_id + + "': " + json_body.toStyledString(); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile(yaml_fp.string()); diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index a9920e8aa..67133041d 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -120,6 +120,56 @@ void server::FineTuning( LOG_TRACE << "Done fine-tuning"; } +void server::Inference(const HttpRequestPtr& req, + std::function&& callback) { + LOG_TRACE << "Start inference"; + auto q = std::make_shared(); + auto ir = inference_svc_->HandleInference(q, req->getJsonObject()); + LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); + if (ir.has_error()) { + auto err = ir.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); + resp->setStatusCode( + static_cast(std::get<0>(err)["status_code"].asInt())); + callback(resp); + return; + } + LOG_TRACE << "Wait to inference"; + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done inference"; +} + +void server::RouteRequest( + const HttpRequestPtr& req, + std::function&& callback) { + + LOG_TRACE << "Start route request"; + auto q = std::make_shared(); + auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject()); + LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); + if (ir.has_error()) { + auto err = ir.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); + resp->setStatusCode( + static_cast(std::get<0>(err)["status_code"].asInt())); + callback(resp); + return; + } + LOG_TRACE << "Wait to route request"; + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done route request"; +} + void server::LoadModel(const HttpRequestPtr& req, std::function&& callback) { auto ir = inference_svc_->LoadModel(req->getJsonObject()); diff --git a/engine/controllers/server.h b/engine/controllers/server.h index 22ea86c30..b6b125f97 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -25,7 +25,8 @@ namespace inferences { class server : public drogon::HttpController, public BaseModel, public BaseChatCompletion, - public BaseEmbedding { + public BaseEmbedding, + public BasePythonModel { public: server(std::shared_ptr inference_service, std::shared_ptr engine_service); @@ -46,8 +47,11 @@ class server : public drogon::HttpController, ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post); ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post); ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post); + ADD_METHOD_TO(server::Inference, "/v1/inference", Options, Post); + ADD_METHOD_TO(server::RouteRequest, "/v1/route/request", Options, Post); METHOD_LIST_END + void ChatCompletion( const HttpRequestPtr& req, std::function&& callback) override; @@ -69,6 +73,12 @@ class server : public drogon::HttpController, void FineTuning( const HttpRequestPtr& req, std::function&& callback) override; + void Inference( + const HttpRequestPtr& req, + std::function&& callback) override; + void RouteRequest( + const HttpRequestPtr& req, + std::function&& callback) override; private: void ProcessStreamRes(std::function cb, diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index ebc0674e1..a9a7a2926 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -69,14 +69,13 @@ class EngineI { virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0; virtual Json::Value GetRemoteModels() = 0; - virtual void HandleRequest( + virtual void HandleRouteRequest( std::shared_ptr json_body, std::function&& callback) = 0; virtual void HandleInference( - std::shared_ptr json_body, - std::function &&callback) = 0; + std::shared_ptr json_body, + std::function&& callback) = 0; // Stop inflight chat completion in stream mode virtual void StopInferencing(const std::string& model_id) = 0; - }; diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index d61894543..2abcb3a2c 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -660,7 +660,11 @@ void PythonEngine::HandleInference( callback(std::move(status), std::move(response_json)); } } -void PythonEngine::HandleRequest( +Json::Value PythonEngine::GetRemoteModels() { + return Json::Value(); +} +void PythonEngine::StopInferencing(const std::string& model_id) {} +void PythonEngine::HandleRouteRequest( std::shared_ptr json_body, std::function&& callback) { if (!json_body->isMember("model") || !json_body->isMember("method") || diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index f7fff434d..50e58f62b 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -152,11 +152,13 @@ class PythonEngine : public EngineI { bool IsSupported(const std::string& feature) override; bool SetFileLogger(int max_log_lines, const std::string& log_path) override; void SetLogLevel(trantor::Logger::LogLevel logLevel) override; - void HandleRequest( + void HandleRouteRequest( std::shared_ptr json_body, std::function&& callback) override; - virtual void HandleInference( - std::shared_ptr json_body, - std::function &&callback) = 0; + void HandleInference( + std::shared_ptr json_body, + std::function&& callback) override; + Json::Value GetRemoteModels() override; + void StopInferencing(const std::string& model_id) override; }; } // namespace python_engine \ No newline at end of file diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index a4bccb66f..7731e0f6c 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -7,6 +7,7 @@ #include #include "algorithm" #include "database/engines.h" +#include "extensions/python-engine/python_engine.h" #include "extensions/remote-engine/anthropic_engine.h" #include "extensions/remote-engine/openai_engine.h" #include "utils/archive_utils.h" @@ -198,7 +199,6 @@ cpp::result EngineService::UninstallEngineVariant( return cpp::result(true); } - if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -533,7 +533,6 @@ EngineService::SetDefaultEngineVariant(const std::string& engine, " is not installed yet!"); } - std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -677,7 +676,6 @@ cpp::result EngineService::GetLoadedEngine( return engines_[ne].engine; } - cpp::result EngineService::LoadEngine( const std::string& engine_name) { auto ne = NormalizeEngine(engine_name); @@ -687,6 +685,13 @@ cpp::result EngineService::LoadEngine( return {}; } + // Check for python engine + + if (engine_name == kPythonEngine) { + engines_[engine_name].engine = new python_engine::PythonEngine(); + CTL_INF("Loaded engine: " << engine_name); + return {}; + } // Check for remote engine if (remote_engine::IsRemoteEngine(engine_name)) { @@ -709,7 +714,6 @@ cpp::result EngineService::LoadEngine( CTL_INF("Loading engine: " << ne); - auto engine_dir_path_res = GetEngineDirPath(ne); if (engine_dir_path_res.has_error()) { return cpp::fail(engine_dir_path_res.error()); @@ -888,6 +892,10 @@ cpp::result EngineService::IsEngineReady( } // End hard code + // Check for python engine + if (engine == kPythonEngine) { + return true; + } auto os = hw_inf_.sys_inf->os; if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { @@ -918,7 +926,6 @@ cpp::result EngineService::UpdateEngine( CTL_INF("Default variant: " << default_variant->variant << ", version: " + default_variant->version); - std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 91cb277dc..85dbd215f 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -73,6 +73,64 @@ cpp::result InferenceService::HandleEmbedding( return {}; } +cpp::result InferenceService::HandleInference( + std::shared_ptr q, std::shared_ptr json_body) { + std::string engine_type; + if (!HasFieldInReq(json_body, "engine")) { + engine_type = kLlamaRepo; + } else { + engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); + } + + auto engine_result = engine_service_->GetLoadedEngine(engine_type); + if (engine_result.has_error()) { + Json::Value res; + Json::Value stt; + res["message"] = "Engine is not loaded yet"; + stt["status_code"] = drogon::k400BadRequest; + LOG_WARN << "Engine is not loaded yet"; + return cpp::fail(std::make_pair(stt, res)); + } + + auto cb = [q](Json::Value status, Json::Value res) { + q->push(std::make_pair(status, res)); + }; + if (std::holds_alternative(engine_result.value())) { + std::get(engine_result.value()) + ->HandleInference(json_body, std::move(cb)); + } + return {}; +} + +cpp::result InferenceService::HandleRouteRequest( + std::shared_ptr q, std::shared_ptr json_body) { + std::string engine_type; + if (!HasFieldInReq(json_body, "engine")) { + engine_type = kLlamaRepo; + } else { + engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); + } + + auto engine_result = engine_service_->GetLoadedEngine(engine_type); + if (engine_result.has_error()) { + Json::Value res; + Json::Value stt; + res["message"] = "Engine is not loaded yet"; + stt["status_code"] = drogon::k400BadRequest; + LOG_WARN << "Engine is not loaded yet"; + return cpp::fail(std::make_pair(stt, res)); + } + + auto cb = [q](Json::Value status, Json::Value res) { + q->push(std::make_pair(status, res)); + }; + if (std::holds_alternative(engine_result.value())) { + std::get(engine_result.value()) + ->HandleRouteRequest(json_body, std::move(cb)); + } + return {}; +} + InferResult InferenceService::LoadModel( std::shared_ptr json_body) { std::string engine_type; diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h index b417fa14a..61d88fee2 100644 --- a/engine/services/inference_service.h +++ b/engine/services/inference_service.h @@ -3,9 +3,9 @@ #include #include #include +#include "extensions/remote-engine/remote_engine.h" #include "services/engine_service.h" #include "utils/result.hpp" -#include "extensions/remote-engine/remote_engine.h" namespace services { // Status and result using InferResult = std::pair; @@ -41,6 +41,12 @@ class InferenceService { cpp::result HandleEmbedding( std::shared_ptr q, std::shared_ptr json_body); + cpp::result HandleInference( + std::shared_ptr q, std::shared_ptr json_body); + + cpp::result HandleRouteRequest( + std::shared_ptr q, std::shared_ptr json_body); + InferResult LoadModel(std::shared_ptr json_body); InferResult UnloadModel(const std::string& engine, diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 21e3dafaa..cb8a0b1ab 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -769,18 +769,48 @@ cpp::result ModelService::StartModel( constexpr const int kDefautlContextLength = 8192; int max_model_context_length = kDefautlContextLength; Json::Value json_data; - // Currently we don't support download vision models, so we need to bypass check - if (!params_override.bypass_model_check()) { - auto model_entry = modellist_handler.GetModelInfo(model_handle); - if (model_entry.has_error()) { - CTL_WRN("Error: " + model_entry.error()); - return cpp::fail(model_entry.error()); - } - yaml_handler.ModelConfigFromFile( + auto model_entry = modellist_handler.GetModelInfo(model_handle); + if (model_entry.has_error()) { + CTL_WRN("Error: " + model_entry.error()); + return cpp::fail(model_entry.error()); + } + yaml_handler.ModelConfigFromFile( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + auto mc = yaml_handler.GetModelConfig(); + + // Check if Python model first + if (mc.engine == kPythonEngine) { + json_data["model"] = model_handle; + json_data["model_path"] = fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) - .string()); - auto mc = yaml_handler.GetModelConfig(); + .string(); + json_data["engine"] = mc.engine; + assert(!!inference_svc_); + // Check if python engine + + auto ir = + inference_svc_->LoadModel(std::make_shared(json_data)); + auto status = std::get<0>(ir)["status_code"].asInt(); + auto data = std::get<1>(ir); + + if (status == drogon::k200OK) { + return StartModelResult{.success = true, .warning = ""}; + } else if (status == drogon::k409Conflict) { + CTL_INF("Model '" + model_handle + "' is already loaded"); + return StartModelResult{.success = true, .warning = ""}; + } else { + // only report to user the error + CTL_ERR("Model failed to start with status code: " << status); + return cpp::fail("Model failed to start: " + + data["message"].asString()); + } + } + + // Currently we don't support download vision models, so we need to bypass check + if (!params_override.bypass_model_check()) { // Running remote model if (remote_engine::IsRemoteEngine(mc.engine)) { @@ -881,6 +911,8 @@ cpp::result ModelService::StartModel( } assert(!!inference_svc_); + // Check if python engine + auto ir = inference_svc_->LoadModel(std::make_shared(json_data)); auto status = std::get<0>(ir)["status_code"].asInt(); From e6324c280b7ce867116da9016af7ea8b94da29f4 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 16 Dec 2024 09:50:05 +0700 Subject: [PATCH 13/34] fix: remove pythone engine load engine option --- engine/extensions/python-engine/python_engine.cc | 4 ---- engine/extensions/python-engine/python_engine.h | 1 - 2 files changed, 5 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 2abcb3a2c..ea980e870 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -905,10 +905,6 @@ void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) { trantor::Logger::setLogLevel(log_level); } -void PythonEngine::RegisterLibraryPath(RegisterLibraryOption opts) { - -}; - void PythonEngine::Load(EngineLoadOption opts) { // Develop register model here on loading engine }; diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index 50e58f62b..e404f2e19 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -118,7 +118,6 @@ class PythonEngine : public EngineI { public: PythonEngine(); ~PythonEngine(); - void RegisterLibraryPath(RegisterLibraryOption opts) override; void Load(EngineLoadOption opts) override; From 3838a36e010ce036bc59c684c66e71820e8ca849 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 16 Dec 2024 23:58:40 +0700 Subject: [PATCH 14/34] Feat: init environment interface --- engine/common/download_task.h | 13 +- .../extensions/python-engine/python_engine.cc | 1 + engine/services/environment_serrvice.cc | 0 engine/services/environment_service.h | 51 ++++++ engine/utils/curl_utils.cc | 21 ++- engine/utils/curl_utils.h | 5 +- engine/utils/environment_utils.h | 168 ++++++++++++++++++ 7 files changed, 245 insertions(+), 14 deletions(-) create mode 100644 engine/services/environment_serrvice.cc create mode 100644 engine/services/environment_service.h create mode 100644 engine/utils/environment_utils.h diff --git a/engine/common/download_task.h b/engine/common/download_task.h index 95e736394..53f1902c5 100644 --- a/engine/common/download_task.h +++ b/engine/common/download_task.h @@ -6,7 +6,14 @@ #include #include -enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex }; +enum class DownloadType { + Model, + Engine, + Miscellaneous, + CudaToolkit, + Cortex, + Environments +}; struct DownloadItem { @@ -48,6 +55,8 @@ inline std::string DownloadTypeToString(DownloadType type) { return "CudaToolkit"; case DownloadType::Cortex: return "Cortex"; + case DownloadType::Environments: + return "Environments"; default: return "Unknown"; } @@ -64,6 +73,8 @@ inline DownloadType DownloadTypeFromString(const std::string& str) { return DownloadType::CudaToolkit; } else if (str == "Cortex") { return DownloadType::Cortex; + } else if (str == "Environments") { + return DownloadType::Environments; } else { return DownloadType::Miscellaneous; } diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index ea980e870..b422a7340 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -319,6 +319,7 @@ void PythonEngine::GetModels( void PythonEngine::LoadModel( std::shared_ptr json_body, std::function&& callback) { + // TODO: handle a case that can spawn process but the process spawn fail. pid_t pid; if (!json_body->isMember("model") || !json_body->isMember("model_path")) { Json::Value error; diff --git a/engine/services/environment_serrvice.cc b/engine/services/environment_serrvice.cc new file mode 100644 index 000000000..e69de29bb diff --git a/engine/services/environment_service.h b/engine/services/environment_service.h new file mode 100644 index 000000000..b26cd3cf4 --- /dev/null +++ b/engine/services/environment_service.h @@ -0,0 +1,51 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils/environment_utils.h" +#include "utils/system_info_utils.h" + +using Environment = environment_utils::Environment; + +struct EnvironmentsUpdateResult { + Environment environment; + std::string from; + std::string to; + + Json::Value ToJson() const { + Json::Value root; + root["environment"] = environment.ToJson(); + root["from"] = from; + root["to"] = to; + return root; + } +}; + +class EnvironmentService { + public: + cpp::result IsEnvironmentReady( + const std::string& environment); + cpp::result InstallEnvironmentAsync( + const std::string& environment, const std::string& version); + cpp::result UnInstallEnvironment( + const std::string& environment, const std::string& version); + cpp::result, std::string> GetEnvironmentReleases( + const std::string& environment) const; + cpp::result, std::string> GetInstalledEnvironments() + const; + cpp::result, std::string> GetDefaultEnvironment( + const std::string& environment) const; + cpp::result, std::string> SetDefaultEnvironment( + const std::string& environment) const; + + private: + cpp::result DownloadEnvironment( + const std::string& environment, const std::string& version = "latest"); + cpp::result, std::string> + GetEnvironmentDirPath(const std::string& environment); +}; \ No newline at end of file diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index 9ba0f5a76..b60f76fc2 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -242,8 +242,8 @@ cpp::result ReadRemoteYaml(const std::string& url) { } } -cpp::result SimpleGetJson(const std::string& url, - const int timeout) { +cpp::result SimpleGetJson( + const std::string& url, const int timeout, std::optional recursive) { auto result = SimpleGet(url, timeout); if (result.has_error()) { CTL_ERR("Failed to get JSON from " + url + ": " + result.error()); @@ -257,11 +257,11 @@ cpp::result SimpleGetJson(const std::string& url, " parsing error: " + reader.getFormattedErrorMessages()); } - if (root.isArray()) { + if (root.isArray() && recursive) { for (const auto& value : root) { if (value["type"].asString() == "directory") { auto temp = - SimpleGetJson(url + "/" + value["path"].asString(), timeout); + SimpleGetJson(url + "/" + value["path"].asString(), timeout, recursive); if (!temp.has_error()) { if (temp.value().isArray()) { for (const auto& item : temp.value()) { @@ -273,14 +273,13 @@ cpp::result SimpleGetJson(const std::string& url, } } } - for (Json::ArrayIndex i = 0; i < root.size();) { - if (root[i].isMember("type") && root[i]["type"] == "directory") { - root.removeIndex(i, nullptr); - } else { - ++i; - } + for (Json::ArrayIndex i = 0; i < root.size();) { + if (root[i].isMember("type") && root[i]["type"] == "directory") { + root.removeIndex(i, nullptr); + } else { + ++i; + } } - } return root; } diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 64b5fc339..8bf324dd9 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -32,8 +32,9 @@ cpp::result ReadRemoteYaml(const std::string& url); * * [timeout] is an optional parameter that specifies the timeout for the request. In second. */ -cpp::result SimpleGetJson(const std::string& url, - const int timeout = -1); +cpp::result SimpleGetJson( + const std::string& url, const int timeout = -1, + std::optional recursive = true); cpp::result SimplePostJson( const std::string& url, const std::string& body = ""); diff --git a/engine/utils/environment_utils.h b/engine/utils/environment_utils.h new file mode 100644 index 000000000..d6666a122 --- /dev/null +++ b/engine/utils/environment_utils.h @@ -0,0 +1,168 @@ +#pragma once +#include +#include +#include +#include +#include +#include "utils/curl_utils.h" +#include "utils/result.hpp" +namespace environment_utils { + +constexpr const auto kBaseEnvironmentsUrl = + "https://delta.jan.ai/environments/"; + +struct Environment { + std::string type; // e.g., "python" + std::string name; // e.g., "whispervq" + std::string version; // e.g., "latest" + std::string os; // e.g., "window", "linux" + std::string arch; // e.g., "amd64" + + // Convert Environment to JSON + Json::Value ToJson() const { + Json::Value json; + json["type"] = type; + json["name"] = name; + json["version"] = version; + json["os"] = os; + json["arch"] = arch; + return json; + } + + // Create Environment from JSON + static cpp::result FromJson( + const Json::Value& json) { + Environment env; + + // Validate required fields + const std::vector required_fields = {"type", "name", "version", + "os", "arch"}; + + for (const auto& field : required_fields) { + if (!json.isMember(field) || json[field].asString().empty()) { + return cpp::fail("Missing or empty required field: " + field); + } + } + + env.type = json["type"].asString(); + env.name = json["name"].asString(); + env.version = json["version"].asString(); + env.os = json["os"].asString(); + env.arch = json["arch"].asString(); + + return env; + } + + // Method to generate full artifact URL + std::string generateUrl() const { + return kBaseEnvironmentsUrl + type + "/" + name + "/" + version + "/" + + name + "-" + os + "-" + arch + ".zip"; + } + + // Method to validate the environment structure + bool isValid() const { + return !type.empty() && !name.empty() && !version.empty() && !os.empty() && + !arch.empty(); + } +}; + +// Utility function to parse URL components into an Environment struct +cpp::result parseEnvironmentUrl( + const std::string& url) { + Environment env; + + size_t environments_pos = url.find("environments/"); + if (environments_pos == std::string::npos) { + return cpp::fail("Invalid URL format"); + } + + std::string remaining = url.substr(environments_pos + 13); + std::vector parts; + size_t pos = 0; + while ((pos = remaining.find('/')) != std::string::npos) { + parts.push_back(remaining.substr(0, pos)); + remaining.erase(0, pos + 1); + } + parts.push_back(remaining); + + if (parts.size() < 5) { + return cpp::fail("Insufficient URL components"); + } + + env.type = parts[0]; + env.name = parts[1]; + env.version = parts[2]; + + // Extract OS and arch from the filename + std::string filename = parts[3]; + size_t os_sep = filename.find('-'); + size_t arch_sep = filename.find('-', os_sep + 1); + + if (os_sep == std::string::npos || arch_sep == std::string::npos) { + return cpp::fail("Cannot parse OS and architecture"); + } + + env.os = filename.substr(os_sep + 1, arch_sep - os_sep - 1); + env.arch = filename.substr(arch_sep + 1, filename.find('.') - arch_sep - 1); + + return env; +} + +// Fetch environment names +cpp::result, std::string> fetchEnvironmentNames( + const std::string& type, int timeout = 30) { + auto url = kBaseEnvironmentsUrl + type; + auto json_result = curl_utils::SimpleGetJson(url, timeout, false); + if (json_result.has_error()) { + return cpp::fail(json_result.error()); + } + + std::vector environment_names; + const Json::Value& root = json_result.value(); + + // Store unique environment names + std::unordered_set unique_names; + + for (const auto& item : root) { + if (item.isMember("path")) { + environment_names.push_back(item["path"].asString()); + } + } + + return environment_names; +} + +// Get all versions for a specific environment +cpp::result, std::string> fetchEnvironmentVersions( + const std::string& base_url, const std::string& environment_name, + int timeout = 30, bool recursive = true) { + auto json_result = curl_utils::SimpleGetJson( + base_url + "/" + environment_name, timeout, recursive); + if (json_result.has_error()) { + return cpp::fail(json_result.error()); + } + + std::vector versions; + const Json::Value& root = json_result.value(); + + // Store unique versions + std::unordered_set unique_versions; + + for (const auto& item : root) { + if (item.isMember("path")) { + auto url_parse_result = parseEnvironmentUrl( + base_url + "/" + environment_name + "/" + item["path"].asString()); + if (!url_parse_result.has_error()) { + const auto& env = url_parse_result.value(); + // Only add if not already present + if (unique_versions.insert(env.version).second) { + versions.push_back(env.version); + } + } + } + } + + return versions; +} + +} // namespace environment_utils \ No newline at end of file From 34237d67fdecea8572a4a1439640a681f78541d9 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Tue, 17 Dec 2024 19:20:18 +0700 Subject: [PATCH 15/34] feat: move virtual environment inside model --- engine/config/model_config.h | 35 +++- .../extensions/python-engine/python_engine.cc | 21 +-- engine/services/environment_serrvice.cc | 0 engine/services/environment_service.h | 51 ------ engine/services/model_service.cc | 61 ++++++- engine/utils/environment_constants.h | 3 - engine/utils/environment_utils.h | 168 ------------------ engine/utils/set_permission_utils.h | 106 +++++++++++ 8 files changed, 198 insertions(+), 247 deletions(-) delete mode 100644 engine/services/environment_serrvice.cc delete mode 100644 engine/services/environment_service.h delete mode 100644 engine/utils/environment_constants.h delete mode 100644 engine/utils/environment_utils.h create mode 100644 engine/utils/set_permission_utils.h diff --git a/engine/config/model_config.h b/engine/config/model_config.h index ccf3e2ec0..0743cdab2 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -547,17 +547,17 @@ struct PythonModelConfig { // Model Load Parameters std::string port; - std::string model_location; std::string script; std::string log_path; std::string log_level; std::string environment; std::vector command; // New command field + std::vector files; std::string engine; Json::Value extra_params; // Accept dynamic extra parameters // Method to convert C++ struct to YAML - void ToYaml(const std::string & filepath) const { + void ToYaml(const std::string& filepath) const { YAML::Emitter out; out << YAML::BeginMap; @@ -597,7 +597,6 @@ struct PythonModelConfig { // Model Load Parameters out << YAML::Key << "port" << YAML::Value << port; - out << YAML::Key << "model_location" << YAML::Value << model_location; out << YAML::Key << "script" << YAML::Value << script; out << YAML::Key << "log_path" << YAML::Value << log_path; out << YAML::Key << "log_level" << YAML::Value << log_level; @@ -610,6 +609,13 @@ struct PythonModelConfig { } out << YAML::EndSeq; + // Serialize files as YAML list + out << YAML::Key << "files" << YAML::Value << YAML::BeginSeq; + for (const auto& file : files) { + out << file; + } + out << YAML::EndSeq; + out << YAML::Key << "engine" << YAML::Value << engine; // Serialize extra_params as YAML @@ -674,8 +680,6 @@ struct PythonModelConfig { auto mlp = config; if (mlp["port"]) port = mlp["port"].as(); - if (mlp["model_location"]) - model_location = mlp["model_location"].as(); if (mlp["script"]) script = mlp["script"].as(); if (mlp["log_path"]) @@ -693,6 +697,12 @@ struct PythonModelConfig { } } + if (mlp["files"] && mlp["files"].IsSequence()) { + for (const auto& file : mlp["files"]) { + files.push_back(file.as()); + } + } + if (mlp["extra_params"]) { for (YAML::const_iterator it = mlp["extra_params"].begin(); it != mlp["extra_params"].end(); ++it) { @@ -735,7 +745,6 @@ struct PythonModelConfig { root["log_path"] = log_path; root["log_level"] = log_level; root["environment"] = environment; - root["model_location"] = model_location; root["script"] = script; // Serialize command as JSON array @@ -743,6 +752,10 @@ struct PythonModelConfig { root["command"].append(cmd); } + for (const auto& file : files) { + root["files"].append(file); + } + root["engine"] = engine; root["extra_params"] = extra_params; // Serialize the JSON value directly @@ -751,7 +764,7 @@ struct PythonModelConfig { // Method to populate struct from JSON void FromJson(const Json::Value& root) { - + if (root.isMember("id")) id = root["id"].asString(); if (root.isMember("model")) @@ -802,8 +815,6 @@ struct PythonModelConfig { environment = mlp["environment"].asString(); if (mlp.isMember("engine")) engine = mlp["engine"].asString(); - if (mlp.isMember("model_location")) - model_location = mlp["model_location"].asString(); if (mlp.isMember("script")) script = mlp["script"].asString(); @@ -813,6 +824,12 @@ struct PythonModelConfig { } } + if (mlp.isMember("files")) { + for (const auto& file : mlp["files"]) { + files.push_back(file.asString()); + } + } + if (mlp.isMember("extra_params")) { extra_params = mlp["extra_params"]; // Directly assign the JSON value } diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index b422a7340..5e85ed6b6 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -106,9 +106,9 @@ pid_t PythonEngine::SpawnProcess(const std::string& model, // Convert command vector to char*[] std::vector argv = convertToArgv(command); - for (auto c : command) { - std::cout << c << " " << std::endl; - } + // for (auto c : command) { + // std::cout << c << " " << std::endl; + // } // Use posix_spawn for cross-platform compatibility int spawn_result = posix_spawn(&pid, // pid output @@ -319,7 +319,7 @@ void PythonEngine::GetModels( void PythonEngine::LoadModel( std::shared_ptr json_body, std::function&& callback) { - // TODO: handle a case that can spawn process but the process spawn fail. + // TODO: handle a case that can spawn process but the process spawn fail. pid_t pid; if (!json_body->isMember("model") || !json_body->isMember("model_path")) { Json::Value error; @@ -359,26 +359,19 @@ void PythonEngine::LoadModel( return; } auto model_config = models_[model]; + auto model_folder_path = model_config.files[0]; + auto data_folder_path = std::filesystem::path(model_folder_path) / std::filesystem::path("venv"); try { - std::string data_folder_path = - "/home/thuan/cortexcpp/environments/"; // To do: will be removed with cortex data path - std::string model_folder_path = - "/home/thuan/cortexcpp/models/cortex.so/whispervq/fp16/"; // To do: will be removed with cortex model path #ifdef _WIN32 auto executable = std::filesystem::path(data_folder_path) / - std::filesystem::path(model_config.environment) / std::filesystem::path("Scripts"); #else auto executable = std::filesystem::path(data_folder_path) / - std::filesystem::path(model_config.environment) / std::filesystem::path("bin"); #endif - std::cout << "executable string: " << executable.string() - << data_folder_path << " " << model_config.environment - << std::endl; + auto executable_str = (executable / std::filesystem::path(model_config.command[0])).string(); - std::cout << "executable string: " << executable_str << std::endl; auto command = model_config.command; command[0] = executable_str; command.push_back((std::filesystem::path(model_folder_path) / diff --git a/engine/services/environment_serrvice.cc b/engine/services/environment_serrvice.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/engine/services/environment_service.h b/engine/services/environment_service.h deleted file mode 100644 index b26cd3cf4..000000000 --- a/engine/services/environment_service.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "utils/environment_utils.h" -#include "utils/system_info_utils.h" - -using Environment = environment_utils::Environment; - -struct EnvironmentsUpdateResult { - Environment environment; - std::string from; - std::string to; - - Json::Value ToJson() const { - Json::Value root; - root["environment"] = environment.ToJson(); - root["from"] = from; - root["to"] = to; - return root; - } -}; - -class EnvironmentService { - public: - cpp::result IsEnvironmentReady( - const std::string& environment); - cpp::result InstallEnvironmentAsync( - const std::string& environment, const std::string& version); - cpp::result UnInstallEnvironment( - const std::string& environment, const std::string& version); - cpp::result, std::string> GetEnvironmentReleases( - const std::string& environment) const; - cpp::result, std::string> GetInstalledEnvironments() - const; - cpp::result, std::string> GetDefaultEnvironment( - const std::string& environment) const; - cpp::result, std::string> SetDefaultEnvironment( - const std::string& environment) const; - - private: - cpp::result DownloadEnvironment( - const std::string& environment, const std::string& version = "latest"); - cpp::result, std::string> - GetEnvironmentDirPath(const std::string& environment); -}; \ No newline at end of file diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 7faa59ae5..4ebb605b4 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -9,6 +10,7 @@ #include "config/yaml_config.h" #include "database/models.h" #include "hardware_service.h" +#include "utils/archive_utils.h" #include "utils/cli_selection_utils.h" #include "utils/cortex_utils.h" #include "utils/engine_constants.h" @@ -16,6 +18,7 @@ #include "utils/huggingface_utils.h" #include "utils/logging_utils.h" #include "utils/result.hpp" +#include "utils/set_permission_utils.h" #include "utils/string_utils.h" #include "utils/widechar_conv.h" @@ -540,9 +543,63 @@ ModelService::DownloadModelFromCortexsoAsync( if (mc.engine == kPythonEngine) { // process for Python engine config::PythonModelConfig python_model_config; python_model_config.ReadFromYaml(model_yml_item->localPath.string()); - python_model_config.model_location = - model_yml_item->localPath.parent_path().string(); + python_model_config.files.push_back( + model_yml_item->localPath.parent_path().string()); python_model_config.ToYaml(model_yml_item->localPath.string()); + // unzip venv.zip + auto model_folder = model_yml_item->localPath.parent_path(); + auto venv_path = model_folder / std::filesystem::path("venv"); + if (!std::filesystem::exists(venv_path)) { + std::filesystem::create_directories(venv_path); + } + auto venv_zip = model_folder / std::filesystem::path("venv.zip"); + if (std::filesystem::exists(venv_zip)) { + if (archive_utils::ExtractArchive(venv_zip.string(), venv_path)) { + std::filesystem::remove_all(venv_zip); + CTL_INF("Successfully extract venv.zip"); + // If extract success create pyvenv.cfg + std::ofstream pyvenv_cfg(venv_path / + std::filesystem::path("pyvenv.cfg")); +#ifdef _WIN32 + pyvenv_cfg << "home = " + << (venv_path / std::filesystem::path("Scripts")).string() + << std::endl; + pyvenv_cfg << "executable = " + << (venv_path / std::filesystem::path("Scripts") / + std::filesystem::path("python.exe")) + .string() + << std::endl; + +#else + pyvenv_cfg << "home = " + << (venv_path / std::filesystem::path("bin/")).string() + << std::endl; + pyvenv_cfg + << "executable = " + << (venv_path / std::filesystem::path("bin/python")).string() + << std::endl; +#endif + + // Close the file + pyvenv_cfg.close(); + // Add executable permission to python + +#ifdef _WIN32 + set_permission_utils::SetExecutePermissionsRecursive( + venv_path / std::filesystem::path("Scripts")); +#else + set_permission_utils::SetExecutePermissionsRecursive( + venv_path / std::filesystem::path("bin")); +#endif + + } else { + CTL_ERR("Failed to extract venv.zip"); + }; + + } else { + CTL_ERR( + "venv.zip not found in model folder: " << model_folder.string()); + } } else { mc.model = unique_model_id; diff --git a/engine/utils/environment_constants.h b/engine/utils/environment_constants.h deleted file mode 100644 index f14df67f8..000000000 --- a/engine/utils/environment_constants.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -constexpr const auto kWhisperVQEnvironment = "whispervq"; \ No newline at end of file diff --git a/engine/utils/environment_utils.h b/engine/utils/environment_utils.h deleted file mode 100644 index d6666a122..000000000 --- a/engine/utils/environment_utils.h +++ /dev/null @@ -1,168 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "utils/curl_utils.h" -#include "utils/result.hpp" -namespace environment_utils { - -constexpr const auto kBaseEnvironmentsUrl = - "https://delta.jan.ai/environments/"; - -struct Environment { - std::string type; // e.g., "python" - std::string name; // e.g., "whispervq" - std::string version; // e.g., "latest" - std::string os; // e.g., "window", "linux" - std::string arch; // e.g., "amd64" - - // Convert Environment to JSON - Json::Value ToJson() const { - Json::Value json; - json["type"] = type; - json["name"] = name; - json["version"] = version; - json["os"] = os; - json["arch"] = arch; - return json; - } - - // Create Environment from JSON - static cpp::result FromJson( - const Json::Value& json) { - Environment env; - - // Validate required fields - const std::vector required_fields = {"type", "name", "version", - "os", "arch"}; - - for (const auto& field : required_fields) { - if (!json.isMember(field) || json[field].asString().empty()) { - return cpp::fail("Missing or empty required field: " + field); - } - } - - env.type = json["type"].asString(); - env.name = json["name"].asString(); - env.version = json["version"].asString(); - env.os = json["os"].asString(); - env.arch = json["arch"].asString(); - - return env; - } - - // Method to generate full artifact URL - std::string generateUrl() const { - return kBaseEnvironmentsUrl + type + "/" + name + "/" + version + "/" + - name + "-" + os + "-" + arch + ".zip"; - } - - // Method to validate the environment structure - bool isValid() const { - return !type.empty() && !name.empty() && !version.empty() && !os.empty() && - !arch.empty(); - } -}; - -// Utility function to parse URL components into an Environment struct -cpp::result parseEnvironmentUrl( - const std::string& url) { - Environment env; - - size_t environments_pos = url.find("environments/"); - if (environments_pos == std::string::npos) { - return cpp::fail("Invalid URL format"); - } - - std::string remaining = url.substr(environments_pos + 13); - std::vector parts; - size_t pos = 0; - while ((pos = remaining.find('/')) != std::string::npos) { - parts.push_back(remaining.substr(0, pos)); - remaining.erase(0, pos + 1); - } - parts.push_back(remaining); - - if (parts.size() < 5) { - return cpp::fail("Insufficient URL components"); - } - - env.type = parts[0]; - env.name = parts[1]; - env.version = parts[2]; - - // Extract OS and arch from the filename - std::string filename = parts[3]; - size_t os_sep = filename.find('-'); - size_t arch_sep = filename.find('-', os_sep + 1); - - if (os_sep == std::string::npos || arch_sep == std::string::npos) { - return cpp::fail("Cannot parse OS and architecture"); - } - - env.os = filename.substr(os_sep + 1, arch_sep - os_sep - 1); - env.arch = filename.substr(arch_sep + 1, filename.find('.') - arch_sep - 1); - - return env; -} - -// Fetch environment names -cpp::result, std::string> fetchEnvironmentNames( - const std::string& type, int timeout = 30) { - auto url = kBaseEnvironmentsUrl + type; - auto json_result = curl_utils::SimpleGetJson(url, timeout, false); - if (json_result.has_error()) { - return cpp::fail(json_result.error()); - } - - std::vector environment_names; - const Json::Value& root = json_result.value(); - - // Store unique environment names - std::unordered_set unique_names; - - for (const auto& item : root) { - if (item.isMember("path")) { - environment_names.push_back(item["path"].asString()); - } - } - - return environment_names; -} - -// Get all versions for a specific environment -cpp::result, std::string> fetchEnvironmentVersions( - const std::string& base_url, const std::string& environment_name, - int timeout = 30, bool recursive = true) { - auto json_result = curl_utils::SimpleGetJson( - base_url + "/" + environment_name, timeout, recursive); - if (json_result.has_error()) { - return cpp::fail(json_result.error()); - } - - std::vector versions; - const Json::Value& root = json_result.value(); - - // Store unique versions - std::unordered_set unique_versions; - - for (const auto& item : root) { - if (item.isMember("path")) { - auto url_parse_result = parseEnvironmentUrl( - base_url + "/" + environment_name + "/" + item["path"].asString()); - if (!url_parse_result.has_error()) { - const auto& env = url_parse_result.value(); - // Only add if not already present - if (unique_versions.insert(env.version).second) { - versions.push_back(env.version); - } - } - } - } - - return versions; -} - -} // namespace environment_utils \ No newline at end of file diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h new file mode 100644 index 000000000..b91d0c383 --- /dev/null +++ b/engine/utils/set_permission_utils.h @@ -0,0 +1,106 @@ +#pragma once + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif +#include "utils/logging_utils.h" +namespace set_permission_utils { +// Cross-platform method to set execute permission for a single file +inline bool SetExecutePermission(const std::filesystem::path& filePath, + bool ownerOnly = false) { + std::error_code ec; + +#ifdef _WIN32 + // Windows execution permission handling + std::filesystem::path exePath = filePath; + + // Add .exe extension if no extension exists + if (exePath.extension().empty()) { + exePath += ".exe"; + std::filesystem::rename(filePath, exePath); + } + + // Clear read-only attribute + DWORD fileAttributes = GetFileAttributes(exePath.c_str()); + if (fileAttributes == INVALID_FILE_ATTRIBUTES) { + CTL_ERROR << "Error accessing file: " << GetLastError() << std::endl; + return false; + } + + fileAttributes &= ~FILE_ATTRIBUTE_READONLY; + + if (!SetFileAttributes(exePath.c_str(), fileAttributes)) { + CTL_ERROR << "Error setting file attributes: " << GetLastError() + << std::endl; + return false; + } + +#else + // POSIX systems (Linux, macOS) + struct stat st; + if (stat(filePath.c_str(), &st) != 0) { + CTL_ERR("Error getting file stats: " << strerror(errno)); + return false; + } + + // Set execute permissions based on ownerOnly flag + mode_t newMode; + if (ownerOnly) { + // Only owner can execute + newMode = (st.st_mode & ~(S_IXGRP | S_IXOTH)) | S_IXUSR; + } else { + // Everyone can execute + newMode = st.st_mode | S_IXUSR | // Owner execute + S_IXGRP | // Group execute + S_IXOTH; // Others execute + } + + if (chmod(filePath.c_str(), newMode) != 0) { + CTL_ERR("Error setting execute permissions: " << strerror(errno)); + return false; + } +#endif + + return true; +} +inline std::vector SetExecutePermissionsRecursive( + const std::filesystem::path& directoryPath, bool ownerOnly = false, + bool skipDirectories = true) { + std::vector modifiedFiles; + + try { + // Iterate through all files and subdirectories + for (const auto& entry : + std::filesystem::recursive_directory_iterator(directoryPath)) { + // Skip directories if specified + if (skipDirectories && entry.is_directory()) { + continue; + } + + // Only process files + if (entry.is_regular_file()) { + try { + if (SetExecutePermission(entry.path(), ownerOnly)) { + modifiedFiles.push_back(entry.path()); + } + } catch (const std::exception& e) { + CTL_ERR("Error processing file " + entry.path().string() + ": " + + e.what()); + } + } + } + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Filesystem error: " << e.what()); + } + + return modifiedFiles; +} + +} // namespace set_permission_utils \ No newline at end of file From 7ce7eb707e1529a871bac37230541d5735143052 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 <35255081+nguyenhoangthuan99@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:07:32 +0700 Subject: [PATCH 16/34] Update CMakeLists.txt --- engine/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 782e2d849..420434eb9 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -150,9 +150,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc - ) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) From c2b11180b00b0b90447706a125ba58cca2627f6c Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 <35255081+nguyenhoangthuan99@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:10:26 +0700 Subject: [PATCH 17/34] Update CMakeLists.txt --- engine/cli/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index c29ec622b..efff03d10 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -84,9 +84,7 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc - - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc From 7f9ded074d60745090053c403e5ec97224986ab8 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Wed, 18 Dec 2024 15:19:26 +0700 Subject: [PATCH 18/34] fix: CI build --- engine/extensions/remote-engine/remote_engine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 54d6b2f83..6f08b5403 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -21,7 +21,7 @@ struct StreamContext { // Cache value for Anthropic std::string id; std::string model; - TemplateRenderer& renderer; + extensions::TemplateRenderer& renderer; std::string stream_template; }; struct CurlResponse { From 27d50974d41605a68d6256d40e3f547a7d2ad9eb Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 19 Dec 2024 15:10:32 +0700 Subject: [PATCH 19/34] fix: move log of python to cortex logs folder --- .../extensions/python-engine/python_engine.cc | 18 ++++++++++++------ .../extensions/python-engine/python_engine.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 5e85ed6b6..12c25a76c 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -360,14 +360,15 @@ void PythonEngine::LoadModel( } auto model_config = models_[model]; auto model_folder_path = model_config.files[0]; - auto data_folder_path = std::filesystem::path(model_folder_path) / std::filesystem::path("venv"); + auto data_folder_path = + std::filesystem::path(model_folder_path) / std::filesystem::path("venv"); try { #ifdef _WIN32 auto executable = std::filesystem::path(data_folder_path) / std::filesystem::path("Scripts"); #else - auto executable = std::filesystem::path(data_folder_path) / - std::filesystem::path("bin"); + auto executable = + std::filesystem::path(data_folder_path) / std::filesystem::path("bin"); #endif auto executable_str = @@ -377,9 +378,14 @@ void PythonEngine::LoadModel( command.push_back((std::filesystem::path(model_folder_path) / std::filesystem::path(model_config.script)) .string()); - std::list args{"--port", model_config.port, - "--log_path", model_config.log_path, - "--log_level", model_config.log_level}; + std::list args{"--port", + model_config.port, + "--log_path", + (file_manager_utils::GetCortexLogPath() / + std::filesystem::path(model_config.log_path)) + .string(), + "--log_level", + model_config.log_level}; if (!model_config.extra_params.isNull() && model_config.extra_params.isObject()) { for (const auto& key : model_config.extra_params.getMemberNames()) { diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index e404f2e19..f862d0ed0 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -11,6 +11,7 @@ #include "cortex-common/EngineI.h" #include "extensions/template_renderer.h" #include "utils/file_logger.h" +#include "utils/file_manager_utils.h" #ifdef _WIN32 #include #include From f95cfef756a4e1d5680a471afcaa0c9616dba830 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 19 Dec 2024 15:14:43 +0700 Subject: [PATCH 20/34] fix: unitest for remote engine because change location of template renderer --- engine/test/components/CMakeLists.txt | 2 +- engine/test/components/test_remote_engine.cc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt index 0df46cfc2..6ca836158 100644 --- a/engine/test/components/CMakeLists.txt +++ b/engine/test/components/CMakeLists.txt @@ -16,7 +16,7 @@ add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/file_manager_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/curl_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/system_info_utils.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/remote-engine/template_renderer.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/template_renderer.cc ) find_package(Drogon CONFIG REQUIRED) diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index bfac76f49..5f1b85044 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -1,4 +1,4 @@ -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "gtest/gtest.h" #include "utils/json_helper.h" @@ -42,7 +42,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { auto data = json_helper::ParseJsonString(message_with_system); - remote_engine::TemplateRenderer rdr; + extensions::TemplateRenderer rdr; auto res = rdr.Render(tpl, data); auto res_json = json_helper::ParseJsonString(res); @@ -69,7 +69,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { auto data = json_helper::ParseJsonString(message_without_system); - remote_engine::TemplateRenderer rdr; + extensions::TemplateRenderer rdr; auto res = rdr.Render(tpl, data); auto res_json = json_helper::ParseJsonString(res); From 2ea032bc1b84e6e67b8ae8c68357f9ae91543713 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 19 Dec 2024 15:51:32 +0700 Subject: [PATCH 21/34] fix: CI build windows --- engine/services/model_service.cc | 2 +- engine/utils/set_permission_utils.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index bb80a6dfa..e3d63fd82 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -554,7 +554,7 @@ ModelService::DownloadModelFromCortexsoAsync( } auto venv_zip = model_folder / std::filesystem::path("venv.zip"); if (std::filesystem::exists(venv_zip)) { - if (archive_utils::ExtractArchive(venv_zip.string(), venv_path)) { + if (archive_utils::ExtractArchive(venv_zip.string(), venv_path.string())) { std::filesystem::remove_all(venv_zip); CTL_INF("Successfully extract venv.zip"); // If extract success create pyvenv.cfg diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h index b91d0c383..6e3ede3d3 100644 --- a/engine/utils/set_permission_utils.h +++ b/engine/utils/set_permission_utils.h @@ -30,15 +30,14 @@ inline bool SetExecutePermission(const std::filesystem::path& filePath, // Clear read-only attribute DWORD fileAttributes = GetFileAttributes(exePath.c_str()); if (fileAttributes == INVALID_FILE_ATTRIBUTES) { - CTL_ERROR << "Error accessing file: " << GetLastError() << std::endl; + CTL_ERROR("Error accessing file: " << GetLastError()); return false; } fileAttributes &= ~FILE_ATTRIBUTE_READONLY; if (!SetFileAttributes(exePath.c_str(), fileAttributes)) { - CTL_ERROR << "Error setting file attributes: " << GetLastError() - << std::endl; + CTL_ERROR("Error setting file attributes: " << GetLastError()); return false; } From 595998022c1e8dd6976102d7225d585cb7ed8b35 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 19 Dec 2024 15:57:02 +0700 Subject: [PATCH 22/34] fix: CI build windows --- engine/utils/set_permission_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h index 6e3ede3d3..de0a14155 100644 --- a/engine/utils/set_permission_utils.h +++ b/engine/utils/set_permission_utils.h @@ -30,14 +30,14 @@ inline bool SetExecutePermission(const std::filesystem::path& filePath, // Clear read-only attribute DWORD fileAttributes = GetFileAttributes(exePath.c_str()); if (fileAttributes == INVALID_FILE_ATTRIBUTES) { - CTL_ERROR("Error accessing file: " << GetLastError()); + CTL_ERR("Error accessing file: " << GetLastError()); return false; } fileAttributes &= ~FILE_ATTRIBUTE_READONLY; if (!SetFileAttributes(exePath.c_str(), fileAttributes)) { - CTL_ERROR("Error setting file attributes: " << GetLastError()); + CTL_ERR("Error setting file attributes: " << GetLastError()); return false; } From 09b56ad0b792fb5314a5efb99b2664468cdefa6c Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 23 Dec 2024 14:53:00 +0700 Subject: [PATCH 23/34] feat: add depends model.yml for python engine --- engine/config/model_config.h | 24 ++++++++++++++ engine/services/model_service.cc | 55 +++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 18150e76a..d8ede92f7 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -514,6 +514,7 @@ struct PythonModelConfig { std::string environment; std::vector command; // New command field std::vector files; + std::vector depends; std::string engine; Json::Value extra_params; // Accept dynamic extra parameters @@ -577,6 +578,13 @@ struct PythonModelConfig { } out << YAML::EndSeq; + // Serialize command as YAML list + out << YAML::Key << "depends" << YAML::Value << YAML::BeginSeq; + for (const auto& depend : depends) { + out << depend; + } + out << YAML::EndSeq; + out << YAML::Key << "engine" << YAML::Value << engine; // Serialize extra_params as YAML @@ -664,6 +672,12 @@ struct PythonModelConfig { } } + if (mlp["depends"] && mlp["depends"].IsSequence()) { + for (const auto& depend : mlp["depends"]) { + depends.push_back(depend.as()); + } + } + if (mlp["extra_params"]) { for (YAML::const_iterator it = mlp["extra_params"].begin(); it != mlp["extra_params"].end(); ++it) { @@ -717,6 +731,10 @@ struct PythonModelConfig { root["files"].append(file); } + for (const auto& depend : depends) { + root["depends"].append(depend); + } + root["engine"] = engine; root["extra_params"] = extra_params; // Serialize the JSON value directly @@ -791,6 +809,12 @@ struct PythonModelConfig { } } + if (mlp.isMember("depends")) { + for (const auto& depend : mlp["depends"]) { + depends.push_back(depend.asString()); + } + } + if (mlp.isMember("extra_params")) { extra_params = mlp["extra_params"]; // Directly assign the JSON value } diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index e3d63fd82..a6e3b4277 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -554,7 +554,8 @@ ModelService::DownloadModelFromCortexsoAsync( } auto venv_zip = model_folder / std::filesystem::path("venv.zip"); if (std::filesystem::exists(venv_zip)) { - if (archive_utils::ExtractArchive(venv_zip.string(), venv_path.string())) { + if (archive_utils::ExtractArchive(venv_zip.string(), + venv_path.string())) { std::filesystem::remove_all(venv_zip); CTL_INF("Successfully extract venv.zip"); // If extract success create pyvenv.cfg @@ -839,6 +840,29 @@ cpp::result ModelService::StartModel( // Check if Python model first if (mc.engine == kPythonEngine) { + + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + // Start all depends model + auto depends = python_model_config.depends; + for (auto& depend : depends) { + StartParameterOverride temp; + auto res = StartModel(depend, temp); + if (res.has_error()) { + CTL_WRN("Error: " + res.error()); + for (auto& depend : depends) { + if (depend != model_handle) { + StopModel(depend); + } + } + return cpp::fail("Model failed to start dependency '" + depend + + "' : " + res.error()); + } + } + json_data["model"] = model_handle; json_data["model_path"] = fmu::ToAbsoluteCortexDataPath( @@ -860,6 +884,18 @@ cpp::result ModelService::StartModel( return StartModelResult{.success = true, .warning = ""}; } else { // only report to user the error + for (auto& depend : depends) { + StartParameterOverride temp; + auto res = StartModel(depend, temp); + if (res.has_error()) { + CTL_WRN("Error: " + res.error()); + for (auto& depend : depends) { + if (depend != model_handle) { + StopModel(depend); + } + } + } + } CTL_ERR("Model failed to start with status code: " << status); return cpp::fail("Model failed to start: " + data["message"].asString()); @@ -1020,6 +1056,23 @@ cpp::result ModelService::StopModel( if (bypass_check) { engine_name = kLlamaEngine; } + + // Update for python engine + if (engine_name == kPythonEngine) { + auto model_entry = modellist_handler.GetModelInfo(model_handle); + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + // Stop all depends model + auto depends = python_model_config.depends; + for (auto& depend : depends) { + StopModel(depend); + } + } + + // assert(inference_svc_); auto ir = inference_svc_->UnloadModel(engine_name, model_handle); auto status = std::get<0>(ir)["status_code"].asInt(); From 4fb36881f245bf9425bcbf26d0d073776cd3f6ac Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 23 Dec 2024 17:33:15 +0700 Subject: [PATCH 24/34] fix: CI build --- engine/utils/jinja_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/utils/jinja_utils.h b/engine/utils/jinja_utils.h index f614f4745..12244599f 100644 --- a/engine/utils/jinja_utils.h +++ b/engine/utils/jinja_utils.h @@ -3,7 +3,7 @@ #include #include -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "utils/chat-template.hpp" #include "utils/result.hpp" @@ -14,7 +14,7 @@ inline cpp::result RenderTemplate( bool add_generation_prompt = true) { try { auto converted_json = - remote_engine::TemplateRenderer().ConvertJsonValue(data); + extensions::TemplateRenderer().ConvertJsonValue(data); minja::chat_template chat_tmpl(tmpl, add_bos_token ? bos_token : "", add_eos_token ? eos_token : ""); From d5257194964891297d37b3054f0b97f186b72269 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 26 Dec 2024 23:09:40 +0700 Subject: [PATCH 25/34] stream response --- engine/extensions/python-engine/python_engine.cc | 3 ++- engine/extensions/python-engine/python_engine.h | 7 ++++--- engine/services/model_service.cc | 1 - 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 9bb5c5b54..f96293b92 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -600,7 +600,7 @@ void PythonEngine::HandleInference( // Render with error handling try { - transformed_request = renderer_.Render(transform_request, *json_body); + transformed_request = renderer_.Render(transform_request, body); } catch (const std::exception& e) { throw std::runtime_error("Template rendering error: " + std::string(e.what())); @@ -622,6 +622,7 @@ void PythonEngine::HandleInference( if (body.isMember("stream") && body["stream"].asBool()) { response = MakeStreamPostRequest(model, path, transformed_request, callback); + return; } else { response = MakePostRequest(model, path, transformed_request); } diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index cf4cf1958..e45220a49 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -36,20 +36,21 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, std::string chunk(ptr, size * nmemb); context->buffer += chunk; - + LOG_INFO<< "start writing"; // Process complete lines size_t pos; while ((pos = context->buffer.find('\n')) != std::string::npos) { std::string line = context->buffer.substr(0, pos); context->buffer = context->buffer.substr(pos + 1); - + LOG_INFO << "line: "< ModelService::StartModel( } else { // only report to user the error for (auto& depend : depends) { - Json::Value temp; StopModel(depend); } From 2bcedf699186f3e020b9ca64e8bf3e9f9c282512 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Thu, 26 Dec 2024 23:32:45 +0700 Subject: [PATCH 26/34] update set permission api --- engine/utils/set_permission_utils.h | 127 +++++++++++----------------- 1 file changed, 49 insertions(+), 78 deletions(-) diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h index de0a14155..c1c08ce8f 100644 --- a/engine/utils/set_permission_utils.h +++ b/engine/utils/set_permission_utils.h @@ -13,93 +13,64 @@ #include "utils/logging_utils.h" namespace set_permission_utils { // Cross-platform method to set execute permission for a single file -inline bool SetExecutePermission(const std::filesystem::path& filePath, - bool ownerOnly = false) { - std::error_code ec; +[[nodiscard]] inline bool SetExecutePermission(const std::filesystem::path& filePath, + bool ownerOnly = false) noexcept { + try { + std::filesystem::perms current_perms = std::filesystem::status(filePath).permissions(); + std::filesystem::perms new_perms; -#ifdef _WIN32 - // Windows execution permission handling - std::filesystem::path exePath = filePath; - - // Add .exe extension if no extension exists - if (exePath.extension().empty()) { - exePath += ".exe"; - std::filesystem::rename(filePath, exePath); - } - - // Clear read-only attribute - DWORD fileAttributes = GetFileAttributes(exePath.c_str()); - if (fileAttributes == INVALID_FILE_ATTRIBUTES) { - CTL_ERR("Error accessing file: " << GetLastError()); - return false; - } - - fileAttributes &= ~FILE_ATTRIBUTE_READONLY; - - if (!SetFileAttributes(exePath.c_str(), fileAttributes)) { - CTL_ERR("Error setting file attributes: " << GetLastError()); - return false; - } - -#else - // POSIX systems (Linux, macOS) - struct stat st; - if (stat(filePath.c_str(), &st) != 0) { - CTL_ERR("Error getting file stats: " << strerror(errno)); - return false; - } - - // Set execute permissions based on ownerOnly flag - mode_t newMode; - if (ownerOnly) { - // Only owner can execute - newMode = (st.st_mode & ~(S_IXGRP | S_IXOTH)) | S_IXUSR; - } else { - // Everyone can execute - newMode = st.st_mode | S_IXUSR | // Owner execute - S_IXGRP | // Group execute - S_IXOTH; // Others execute - } - - if (chmod(filePath.c_str(), newMode) != 0) { - CTL_ERR("Error setting execute permissions: " << strerror(errno)); - return false; - } -#endif + if (ownerOnly) { + new_perms = current_perms | std::filesystem::perms::owner_exec; + // Remove group and others execute permissions + new_perms &= ~(std::filesystem::perms::group_exec | std::filesystem::perms::others_exec); + } else { + new_perms = current_perms | std::filesystem::perms::owner_exec | + std::filesystem::perms::group_exec | + std::filesystem::perms::others_exec; + } - return true; + std::filesystem::permissions(filePath, new_perms, + std::filesystem::perm_options::replace); + return true; + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Permission error for file " << filePath.string() + << ": " << e.what()); + return false; + } catch (const std::exception& e) { + CTL_ERR("Unexpected error for file " << filePath.string() + << ": " << e.what()); + return false; + } } -inline std::vector SetExecutePermissionsRecursive( - const std::filesystem::path& directoryPath, bool ownerOnly = false, + +[[nodiscard]] inline std::vector SetExecutePermissionsRecursive( + const std::filesystem::path& directoryPath, + bool ownerOnly = false, bool skipDirectories = true) { - std::vector modifiedFiles; + std::vector modifiedFiles; + modifiedFiles.reserve(100); // Reserve space to prevent frequent reallocations - try { - // Iterate through all files and subdirectories - for (const auto& entry : - std::filesystem::recursive_directory_iterator(directoryPath)) { - // Skip directories if specified - if (skipDirectories && entry.is_directory()) { - continue; - } + try { + const auto options = std::filesystem::directory_options::skip_permission_denied | + std::filesystem::directory_options::follow_directory_symlink; + + for (const auto& entry : + std::filesystem::recursive_directory_iterator(directoryPath, options)) { + if (skipDirectories && entry.is_directory()) { + continue; + } - // Only process files - if (entry.is_regular_file()) { - try { - if (SetExecutePermission(entry.path(), ownerOnly)) { - modifiedFiles.push_back(entry.path()); - } - } catch (const std::exception& e) { - CTL_ERR("Error processing file " + entry.path().string() + ": " + - e.what()); + if (entry.is_regular_file()) { + if (SetExecutePermission(entry.path(), ownerOnly)) { + modifiedFiles.push_back(entry.path()); + } + } } - } + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Filesystem error: " << e.what()); } - } catch (const std::filesystem::filesystem_error& e) { - CTL_ERR("Filesystem error: " << e.what()); - } - return modifiedFiles; + return modifiedFiles; } } // namespace set_permission_utils \ No newline at end of file From 4b9e6dc40ce28bbdaabecf2e87dbe77de0619a01 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Fri, 27 Dec 2024 09:32:28 +0700 Subject: [PATCH 27/34] Fix: comment --- engine/utils/curl_utils.cc | 2 +- engine/utils/curl_utils.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index b60f76fc2..58a00b71a 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -243,7 +243,7 @@ cpp::result ReadRemoteYaml(const std::string& url) { } cpp::result SimpleGetJson( - const std::string& url, const int timeout, std::optional recursive) { + const std::string& url, const int timeout, bool recursive) { auto result = SimpleGet(url, timeout); if (result.has_error()) { CTL_ERR("Failed to get JSON from " + url + ": " + result.error()); diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 8bf324dd9..84b8dbddb 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -32,9 +32,9 @@ cpp::result ReadRemoteYaml(const std::string& url); * * [timeout] is an optional parameter that specifies the timeout for the request. In second. */ -cpp::result SimpleGetJson( - const std::string& url, const int timeout = -1, - std::optional recursive = true); +cpp::result SimpleGetJson(const std::string& url, + const int timeout = -1, + bool recursive = true); cpp::result SimplePostJson( const std::string& url, const std::string& body = ""); From b84f04c14314a9a0f816f878e7ee75407e56705d Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Fri, 27 Dec 2024 11:32:52 +0700 Subject: [PATCH 28/34] Feat: stream response --- engine/controllers/server.cc | 63 ++++++++++++++----- .../extensions/python-engine/python_engine.h | 1 - 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 1c455e262..c3d2038ae 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -129,6 +129,7 @@ void server::FineTuning( void server::Inference(const HttpRequestPtr& req, std::function&& callback) { + auto json_body = req->getJsonObject(); LOG_TRACE << "Start inference"; auto q = std::make_shared(); auto ir = inference_svc_->HandleInference(q, req->getJsonObject()); @@ -141,20 +142,36 @@ void server::Inference(const HttpRequestPtr& req, callback(resp); return; } + bool is_stream = + (*json_body).get("stream", false).asBool() || + (*json_body).get("body", Json::Value()).get("stream", false).asBool(); + LOG_TRACE << "Wait to inference"; - auto [status, res] = q->wait_and_pop(); - LOG_DEBUG << "response: " << res.toStyledString(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode( - static_cast(status["status_code"].asInt())); - callback(resp); - LOG_TRACE << "Done inference"; + if (is_stream) { + auto model_id = (*json_body).get("model", "invalid_model").asString(); + auto engine_type = [this, &json_body]() -> std::string { + if (!inference_svc_->HasFieldInReq(json_body, "engine")) { + return kLlamaRepo; + } else { + return (*(json_body)).get("engine", kLlamaRepo).asString(); + } + }(); + ProcessStreamRes(callback, q, engine_type, model_id); + } else { + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done inference"; + } } void server::RouteRequest( const HttpRequestPtr& req, std::function&& callback) { - + auto json_body = req->getJsonObject(); LOG_TRACE << "Start route request"; auto q = std::make_shared(); auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject()); @@ -167,14 +184,30 @@ void server::RouteRequest( callback(resp); return; } + bool is_stream = + (*json_body).get("stream", false).asBool() || + (*json_body).get("body", Json::Value()).get("stream", false).asBool(); LOG_TRACE << "Wait to route request"; - auto [status, res] = q->wait_and_pop(); - LOG_DEBUG << "response: " << res.toStyledString(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode( - static_cast(status["status_code"].asInt())); - callback(resp); - LOG_TRACE << "Done route request"; + if (is_stream) { + + auto model_id = (*json_body).get("model", "invalid_model").asString(); + auto engine_type = [this, &json_body]() -> std::string { + if (!inference_svc_->HasFieldInReq(json_body, "engine")) { + return kLlamaRepo; + } else { + return (*(json_body)).get("engine", kLlamaRepo).asString(); + } + }(); + ProcessStreamRes(callback, q, engine_type, model_id); + } else { + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done route request"; + } } void server::LoadModel(const HttpRequestPtr& req, diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index e45220a49..9eb7be913 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -36,7 +36,6 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, std::string chunk(ptr, size * nmemb); context->buffer += chunk; - LOG_INFO<< "start writing"; // Process complete lines size_t pos; while ((pos = context->buffer.find('\n')) != std::string::npos) { From 839cce42d3636640de21215df2c24196f745df9c Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Sat, 28 Dec 2024 22:31:47 +0700 Subject: [PATCH 29/34] fix: run concurrent request with stream mode --- engine/extensions/python-engine/python_engine.cc | 11 +++++++---- engine/extensions/python-engine/python_engine.h | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index f96293b92..db5c8e892 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -16,7 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, return size * nmemb; } -PythonEngine::PythonEngine() { +PythonEngine::PythonEngine():q_(4 /*n_parallel*/, "python_engine") { curl_global_init(CURL_GLOBAL_ALL); } @@ -620,9 +620,12 @@ void PythonEngine::HandleInference( CurlResponse response; if (method == "post") { if (body.isMember("stream") && body["stream"].asBool()) { - response = - MakeStreamPostRequest(model, path, transformed_request, callback); - return; + q_.runTaskInQueue( + [this, model, path, transformed_request, cb = std::move(callback)] { + MakeStreamPostRequest(model, path, transformed_request, cb); + }); + + return; } else { response = MakePostRequest(model, path, transformed_request); } diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index 9eb7be913..1c4286eac 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -8,6 +8,7 @@ #include #include #include "config/model_config.h" +#include "trantor/utils/ConcurrentTaskQueue.h" #include "cortex-common/EngineI.h" #include "extensions/template_renderer.h" #include "utils/file_logger.h" @@ -91,6 +92,7 @@ class PythonEngine : public EngineI { extensions::TemplateRenderer renderer_; std::unique_ptr async_file_logger_; std::unordered_map processMap; + trantor::ConcurrentTaskQueue q_; // Helper functions CurlResponse MakePostRequest(const std::string& model, From 035f2d5f5eed18f606820435198b47a46b114830 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 30 Dec 2024 12:19:11 +0700 Subject: [PATCH 30/34] Fix: remove unnecessary interface --- engine/common/base.h | 18 +++--------------- engine/controllers/server.h | 13 +++++-------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/engine/common/base.h b/engine/common/base.h index c572a1823..b5de09059 100644 --- a/engine/common/base.h +++ b/engine/common/base.h @@ -5,7 +5,7 @@ using namespace drogon; class BaseModel { public: - virtual ~BaseModel() {} + virtual ~BaseModel() = default; // Model management virtual void LoadModel( @@ -27,7 +27,7 @@ class BaseModel { class BaseChatCompletion { public: - virtual ~BaseChatCompletion() {} + virtual ~BaseChatCompletion() = default; // General chat method virtual void ChatCompletion( @@ -37,7 +37,7 @@ class BaseChatCompletion { class BaseEmbedding { public: - virtual ~BaseEmbedding() {} + virtual ~BaseEmbedding() = default; // Implement embedding functionality specific to chat virtual void Embedding( @@ -47,15 +47,3 @@ class BaseEmbedding { // The derived class can also override other methods if needed }; -class BasePythonModel { - public: - virtual ~BasePythonModel() {} - - // Model management - virtual void Inference( - const HttpRequestPtr& req, - std::function&& callback) = 0; - virtual void RouteRequest( - const HttpRequestPtr& req, - std::function&& callback) = 0; -}; \ No newline at end of file diff --git a/engine/controllers/server.h b/engine/controllers/server.h index b6b125f97..5f2a14677 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -25,8 +25,7 @@ namespace inferences { class server : public drogon::HttpController, public BaseModel, public BaseChatCompletion, - public BaseEmbedding, - public BasePythonModel { + public BaseEmbedding { public: server(std::shared_ptr inference_service, std::shared_ptr engine_service); @@ -73,12 +72,10 @@ class server : public drogon::HttpController, void FineTuning( const HttpRequestPtr& req, std::function&& callback) override; - void Inference( - const HttpRequestPtr& req, - std::function&& callback) override; - void RouteRequest( - const HttpRequestPtr& req, - std::function&& callback) override; + void Inference(const HttpRequestPtr& req, + std::function&& callback); + void RouteRequest(const HttpRequestPtr& req, + std::function&& callback); private: void ProcessStreamRes(std::function cb, From 75625bbfb89ba482556a3e92fcce6c68bcb00bf4 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 30 Dec 2024 12:34:18 +0700 Subject: [PATCH 31/34] Fix comment --- engine/controllers/server.cc | 4 ++-- engine/services/model_service.cc | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index cc5cee54a..961798d2c 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -130,7 +130,7 @@ void server::FineTuning( void server::Inference(const HttpRequestPtr& req, std::function&& callback) { LOG_TRACE << "Start inference"; - auto q = std::make_shared(); + auto q = std::make_shared(); auto ir = inference_svc_->HandleInference(q, req->getJsonObject()); LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); if (ir.has_error()) { @@ -156,7 +156,7 @@ void server::RouteRequest( std::function&& callback) { LOG_TRACE << "Start route request"; - auto q = std::make_shared(); + auto q = std::make_shared(); auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject()); LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); if (ir.has_error()) { diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 34ca60b3b..d714c6b23 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -84,8 +84,7 @@ void ParseGguf(DatabaseService& db_service, CTL_ERR("Error adding model to modellist: " + result.error()); } } else { - if (auto m = db_service.GetModelInfo(ggufDownloadItem.id); - m.has_value()) { + if (auto m = db_service.GetModelInfo(ggufDownloadItem.id); m.has_value()) { auto upd_m = m.value(); upd_m.status = cortex::db::ModelStatus::Downloaded; if (auto r = db_service.UpdateModelEntry(ggufDownloadItem.id, upd_m); @@ -472,7 +471,8 @@ cpp::result ModelService::HandleUrl( model_size = model_size + item.bytes.value_or(0); } auto gguf_download_item = finishedTask.items[0]; - ParseGguf(*db_service_, gguf_download_item, author, std::nullopt, model_size); + ParseGguf(*db_service_, gguf_download_item, author, std::nullopt, + model_size); }; auto result = download_service_->AddDownloadTask(downloadTask, on_finished); @@ -653,7 +653,8 @@ cpp::result ModelService::DownloadModelFromCortexso( } std::string model_id{name + ":" + branch}; - auto on_finished = [this, branch, model_id](const DownloadTask& finishedTask) { + auto on_finished = [this, branch, + model_id](const DownloadTask& finishedTask) { const DownloadItem* model_yml_item = nullptr; auto need_parse_gguf = true; @@ -824,8 +825,7 @@ cpp::result ModelService::StartModel( constexpr const int kDefautlContextLength = 8192; int max_model_context_length = kDefautlContextLength; Json::Value json_data; - - auto model_entry = modellist_handler.GetModelInfo(model_handle); + auto model_entry = db_service_->GetModelInfo(model_handle); if (model_entry.has_error()) { CTL_WRN("Error: " + model_entry.error()); return cpp::fail(model_entry.error()); @@ -842,7 +842,6 @@ cpp::result ModelService::StartModel( config::PythonModelConfig python_model_config; python_model_config.ReadFromYaml( - fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) .string()); @@ -1051,7 +1050,7 @@ cpp::result ModelService::StopModel( // Update for python engine if (engine_name == kPythonEngine) { - auto model_entry = modellist_handler.GetModelInfo(model_handle); + auto model_entry = db_service_->GetModelInfo(model_handle); config::PythonModelConfig python_model_config; python_model_config.ReadFromYaml( fmu::ToAbsoluteCortexDataPath( From dabc154486fa0167ec4159b4fba54bdd8713795b Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Mon, 30 Dec 2024 17:52:43 +0700 Subject: [PATCH 32/34] Fix: comment review --- engine/controllers/models.cc | 19 ++-- .../extensions/python-engine/python_engine.cc | 91 ++++--------------- .../extensions/python-engine/python_engine.h | 2 + engine/services/download_service.cc | 3 - engine/services/model_service.cc | 5 +- engine/utils/curl_utils.cc | 21 ++++- engine/utils/curl_utils.h | 5 +- 7 files changed, 50 insertions(+), 96 deletions(-) diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index d6b985ffb..34c6504ac 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -290,28 +290,27 @@ void Models::GetModel(const HttpRequestPtr& req, auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); resp->setStatusCode(drogon::k200OK); callback(resp); - } else if (model_config.engine == kOpenAiEngine || - model_config.engine == kAnthropicEngine) { - config::RemoteModelConfig remote_model_config; - remote_model_config.LoadFromYamlFile( + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) .string()); - ret = remote_model_config.ToJson(); - ret["id"] = remote_model_config.model; + ret = python_model_config.ToJson(); + ret["id"] = python_model_config.model; ret["object"] = "model"; ret["result"] = "OK"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); resp->setStatusCode(k200OK); callback(resp); } else { - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml( + config::RemoteModelConfig remote_model_config; + remote_model_config.LoadFromYamlFile( fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) .string()); - ret = python_model_config.ToJson(); - ret["id"] = python_model_config.model; + ret = remote_model_config.ToJson(); + ret["id"] = remote_model_config.model; ret["object"] = "model"; ret["result"] = "OK"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 12c25a76c..ddf6784e8 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -16,9 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, return size * nmemb; } -PythonEngine::PythonEngine() { - curl_global_init(CURL_GLOBAL_ALL); -} +PythonEngine::PythonEngine() {} PythonEngine::~PythonEngine() { curl_global_cleanup(); @@ -172,69 +170,33 @@ bool PythonEngine::TerminateModelProcess(const std::string& model) { CurlResponse PythonEngine::MakeGetRequest(const std::string& model, const std::string& path) { auto config = models_[model]; - CURL* curl = curl_easy_init(); - CurlResponse response; - - if (!curl) { - response.error = true; - response.error_message = "Failed to initialize CURL"; - return response; - } - std::string full_url = "http://localhost:" + config.port + path; + CurlResponse response; - struct curl_slist* headers = nullptr; - - headers = curl_slist_append(headers, "Content-Type: application/json"); - - curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - - std::string response_string; - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); - - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { + auto result = curl_utils::SimpleRequest(full_url, RequestType::GET); + if (result.has_error()) { response.error = true; - response.error_message = curl_easy_strerror(res); + response.error_message = result.error(); } else { - response.body = response_string; + response.body = result.value(); } - - curl_slist_free_all(headers); - curl_easy_cleanup(curl); return response; } CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, const std::string& path) { auto config = models_[model]; - CURL* curl = curl_easy_init(); - CurlResponse response; - - if (!curl) { - response.error = true; - response.error_message = "Failed to initialize CURL"; - return response; - } std::string full_url = "http://localhost:" + config.port + path; + CurlResponse response; - curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); - curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE"); - - std::string response_string; - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); + auto result = curl_utils::SimpleRequest(full_url, RequestType::DEL); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { + if (result.has_error()) { response.error = true; - response.error_message = curl_easy_strerror(res); + response.error_message = result.error(); } else { - response.body = response_string; + response.body = result.value(); } - curl_easy_cleanup(curl); return response; } @@ -242,38 +204,17 @@ CurlResponse PythonEngine::MakePostRequest(const std::string& model, const std::string& path, const std::string& body) { auto config = models_[model]; - CURL* curl = curl_easy_init(); - CurlResponse response; - - if (!curl) { - response.error = true; - response.error_message = "Failed to initialize CURL"; - return response; - } std::string full_url = "http://localhost:" + config.port + path; - struct curl_slist* headers = nullptr; - headers = curl_slist_append(headers, "Content-Type: application/json"); - - curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - - curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str()); - - std::string response_string; - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); + CurlResponse response; + auto result = curl_utils::SimpleRequest(full_url, RequestType::POST, body); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { + if (result.has_error()) { response.error = true; - response.error_message = curl_easy_strerror(res); + response.error_message = result.error(); } else { - response.body = response_string; + response.body = result.value(); } - - curl_slist_free_all(headers); - curl_easy_cleanup(curl); return response; } diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index f862d0ed0..7b112f435 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -12,6 +12,8 @@ #include "extensions/template_renderer.h" #include "utils/file_logger.h" #include "utils/file_manager_utils.h" + +#include "utils/curl_utils.h" #ifdef _WIN32 #include #include diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 9c7137168..d855c8f61 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -374,9 +374,6 @@ void DownloadService::ProcessTask(DownloadTask& task, int worker_id) { CTL_ERR("Failed to init curl!"); return; } - if (!std::filesystem::exists(item.localPath.parent_path())) { - std::filesystem::create_directories(item.localPath.parent_path()); - } auto file = fopen(item.localPath.string().c_str(), "wb"); if (!file) { CTL_ERR("Failed to open output file " + item.localPath.string()); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index d714c6b23..c7925360b 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -103,7 +103,7 @@ cpp::result GetDownloadTask( .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}, }; - auto result = curl_utils::SimpleGetJson(url.ToFullPath()); + auto result = curl_utils::SimpleGetJsonRecursive(url.ToFullPath()); if (result.has_error()) { return cpp::fail("Model " + modelId + " not found"); } @@ -126,6 +126,9 @@ cpp::result GetDownloadTask( .pathParams = {"cortexso", modelId, "resolve", branch, path}}; auto local_path = model_container_path / path; + if (!std::filesystem::exists(local_path.parent_path())) { + std::filesystem::create_directories(local_path.parent_path()); + } download_items.push_back( DownloadItem{.id = path, .downloadUrl = download_url.ToFullPath(), diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index 58a00b71a..be82b5cfa 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -242,8 +242,8 @@ cpp::result ReadRemoteYaml(const std::string& url) { } } -cpp::result SimpleGetJson( - const std::string& url, const int timeout, bool recursive) { +cpp::result SimpleGetJson(const std::string& url, + const int timeout) { auto result = SimpleGet(url, timeout); if (result.has_error()) { CTL_ERR("Failed to get JSON from " + url + ": " + result.error()); @@ -257,11 +257,22 @@ cpp::result SimpleGetJson( " parsing error: " + reader.getFormattedErrorMessages()); } - if (root.isArray() && recursive) { + return root; +} + +cpp::result SimpleGetJsonRecursive( + const std::string& url, const int timeout) { + auto result = SimpleGetJson(url, timeout); + if (result.has_error()) { + return result; + } + auto root = result.value(); + + if (root.isArray()) { for (const auto& value : root) { if (value["type"].asString() == "directory") { - auto temp = - SimpleGetJson(url + "/" + value["path"].asString(), timeout, recursive); + auto temp = SimpleGetJsonRecursive(url + "/" + value["path"].asString(), + timeout); if (!temp.has_error()) { if (temp.value().isArray()) { for (const auto& item : temp.value()) { diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 84b8dbddb..f33b7e8e5 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -33,8 +33,9 @@ cpp::result ReadRemoteYaml(const std::string& url); * [timeout] is an optional parameter that specifies the timeout for the request. In second. */ cpp::result SimpleGetJson(const std::string& url, - const int timeout = -1, - bool recursive = true); + const int timeout = -1); +cpp::result SimpleGetJsonRecursive(const std::string& url, + const int timeout = -1); cpp::result SimplePostJson( const std::string& url, const std::string& body = ""); From 4b2f1fc61fe7ba171ea5546693e85b5e97dc40df Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 Date: Tue, 31 Dec 2024 09:32:07 +0700 Subject: [PATCH 33/34] fix comment --- engine/controllers/server.cc | 2 +- engine/extensions/python-engine/python_engine.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 9452935d2..8369f4713 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -184,7 +184,7 @@ void server::RouteRequest( callback(resp); return; } - bool is_stream = + auto is_stream = (*json_body).get("stream", false).asBool() || (*json_body).get("body", Json::Value()).get("stream", false).asBool(); LOG_TRACE << "Wait to route request"; diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h index 8b0e7b55f..866248b4f 100644 --- a/engine/extensions/python-engine/python_engine.h +++ b/engine/extensions/python-engine/python_engine.h @@ -44,7 +44,7 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, while ((pos = context->buffer.find('\n')) != std::string::npos) { std::string line = context->buffer.substr(0, pos); context->buffer = context->buffer.substr(pos + 1); - LOG_INFO << "line: "< Date: Tue, 31 Dec 2024 10:44:33 +0700 Subject: [PATCH 34/34] fix comment --- engine/controllers/server.cc | 14 ++------------ engine/extensions/python-engine/python_engine.cc | 14 +++++--------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 8369f4713..fa02c61d2 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -158,12 +158,7 @@ void server::Inference(const HttpRequestPtr& req, }(); ProcessStreamRes(callback, q, engine_type, model_id); } else { - auto [status, res] = q->wait_and_pop(); - LOG_DEBUG << "response: " << res.toStyledString(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode( - static_cast(status["status_code"].asInt())); - callback(resp); + ProcessNonStreamRes(callback, *q); LOG_TRACE << "Done inference"; } } @@ -200,12 +195,7 @@ void server::RouteRequest( }(); ProcessStreamRes(callback, q, engine_type, model_id); } else { - auto [status, res] = q->wait_and_pop(); - LOG_DEBUG << "response: " << res.toStyledString(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode( - static_cast(status["status_code"].asInt())); - callback(resp); + ProcessNonStreamRes(callback, *q); LOG_TRACE << "Done route request"; } } diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc index 84a581fda..f0d325055 100644 --- a/engine/extensions/python-engine/python_engine.cc +++ b/engine/extensions/python-engine/python_engine.cc @@ -16,11 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, return size * nmemb; } - -PythonEngine::PythonEngine():q_(4 /*n_parallel*/, "python_engine") { -} - - +PythonEngine::PythonEngine() : q_(4 /*n_parallel*/, "python_engine") {} PythonEngine::~PythonEngine() { curl_global_cleanup(); @@ -173,7 +169,7 @@ bool PythonEngine::TerminateModelProcess(const std::string& model) { } CurlResponse PythonEngine::MakeGetRequest(const std::string& model, const std::string& path) { - auto config = models_[model]; + auto const& config = models_[model]; std::string full_url = "http://localhost:" + config.port + path; CurlResponse response; @@ -188,7 +184,7 @@ CurlResponse PythonEngine::MakeGetRequest(const std::string& model, } CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, const std::string& path) { - auto config = models_[model]; + auto const& config = models_[model]; std::string full_url = "http://localhost:" + config.port + path; CurlResponse response; @@ -207,7 +203,7 @@ CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, CurlResponse PythonEngine::MakePostRequest(const std::string& model, const std::string& path, const std::string& body) { - auto config = models_[model]; + auto const& config = models_[model]; std::string full_url = "http://localhost:" + config.port + path; CurlResponse response; @@ -457,7 +453,7 @@ void PythonEngine::HandleChatCompletion( CurlResponse PythonEngine::MakeStreamPostRequest( const std::string& model, const std::string& path, const std::string& body, const std::function& callback) { - auto config = models_[model]; + auto const& config = models_[model]; CURL* curl = curl_easy_init(); CurlResponse response;