diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index e82e07aab..024f015a8 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -142,9 +142,14 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h" add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc + + ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc + ${CMAKE_CURRENT_SOURCE_DIR}/extensions/python-engine/python_engine.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/dylib_path_manager.cc + ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc + ) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index eb29460a7..4ca734d6a 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -85,7 +85,10 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc + + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc diff --git a/engine/common/base.h b/engine/common/base.h index 478cc7feb..b5de09059 100644 --- a/engine/common/base.h +++ b/engine/common/base.h @@ -5,7 +5,7 @@ using namespace drogon; class BaseModel { public: - virtual ~BaseModel() {} + virtual ~BaseModel() = default; // Model management virtual void LoadModel( @@ -27,7 +27,7 @@ class BaseModel { class BaseChatCompletion { public: - virtual ~BaseChatCompletion() {} + virtual ~BaseChatCompletion() = default; // General chat method virtual void ChatCompletion( @@ -37,7 +37,7 @@ class BaseChatCompletion { class BaseEmbedding { public: - virtual ~BaseEmbedding() {} + virtual ~BaseEmbedding() = default; // Implement embedding functionality specific to chat virtual void Embedding( @@ -46,3 +46,4 @@ class BaseEmbedding { // The derived class can also override other methods if needed }; + diff --git a/engine/common/download_task.h b/engine/common/download_task.h index 95e736394..53f1902c5 100644 --- a/engine/common/download_task.h +++ b/engine/common/download_task.h @@ -6,7 +6,14 @@ #include #include -enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex }; +enum class DownloadType { + Model, + Engine, + Miscellaneous, + CudaToolkit, + Cortex, + Environments +}; struct DownloadItem { @@ -48,6 +55,8 @@ inline std::string DownloadTypeToString(DownloadType type) { return "CudaToolkit"; case DownloadType::Cortex: return "Cortex"; + case DownloadType::Environments: + return "Environments"; default: return "Unknown"; } @@ -64,6 +73,8 @@ inline DownloadType DownloadTypeFromString(const std::string& str) { return DownloadType::CudaToolkit; } else if (str == "Cortex") { return DownloadType::Cortex; + } else if (str == "Environments") { + return DownloadType::Environments; } else { return DownloadType::Miscellaneous; } diff --git a/engine/config/model_config.h b/engine/config/model_config.h index a799adb27..d8ede92f7 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -1,8 +1,11 @@ #pragma once #include -#include +#include #include +#include + +#include #include #include #include @@ -482,4 +485,340 @@ struct ModelConfig { } }; +struct Endpoint { + std::string method; + std::string path; + std::string transform_request; + std::string transform_response; +}; + +struct PythonModelConfig { + // General Metadata + std::string id; + std::string model; + std::string name; + int version; + + // Inference Parameters + Endpoint load_model; + Endpoint destroy; + Endpoint inference; + Endpoint heath_check; + std::vector extra_endpoints; + + // Model Load Parameters + std::string port; + std::string script; + std::string log_path; + std::string log_level; + std::string environment; + std::vector command; // New command field + std::vector files; + std::vector depends; + std::string engine; + Json::Value extra_params; // Accept dynamic extra parameters + + // Method to convert C++ struct to YAML + void ToYaml(const std::string& filepath) const { + YAML::Emitter out; + out << YAML::BeginMap; + + out << YAML::Key << "id" << YAML::Value << id; + out << YAML::Key << "model" << YAML::Value << model; + out << YAML::Key << "name" << YAML::Value << name; + out << YAML::Key << "version" << YAML::Value << version; + + // Inference Parameters + out << YAML::Key << "load_model" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << load_model.method; + out << YAML::Key << "path" << YAML::Value << load_model.path; + out << YAML::Key << "transform_request" << YAML::Value + << load_model.transform_request; + out << YAML::Key << "transform_response" << YAML::Value + << load_model.transform_response; + out << YAML::EndMap; + + out << YAML::Key << "destroy" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << destroy.method; + out << YAML::Key << "path" << YAML::Value << destroy.path; + out << YAML::EndMap; + + out << YAML::Key << "inference" << YAML::Value << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << inference.method; + out << YAML::Key << "path" << YAML::Value << inference.path; + out << YAML::EndMap; + + out << YAML::Key << "extra_endpoints" << YAML::Value << YAML::BeginSeq; + for (const auto& endpoint : extra_endpoints) { + out << YAML::BeginMap; + out << YAML::Key << "method" << YAML::Value << endpoint.method; + out << YAML::Key << "path" << YAML::Value << endpoint.path; + out << YAML::EndMap; + } + out << YAML::EndSeq; + + // Model Load Parameters + out << YAML::Key << "port" << YAML::Value << port; + out << YAML::Key << "script" << YAML::Value << script; + out << YAML::Key << "log_path" << YAML::Value << log_path; + out << YAML::Key << "log_level" << YAML::Value << log_level; + out << YAML::Key << "environment" << YAML::Value << environment; + + // Serialize command as YAML list + out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq; + for (const auto& cmd : command) { + out << cmd; + } + out << YAML::EndSeq; + + // Serialize files as YAML list + out << YAML::Key << "files" << YAML::Value << YAML::BeginSeq; + for (const auto& file : files) { + out << file; + } + out << YAML::EndSeq; + + // Serialize command as YAML list + out << YAML::Key << "depends" << YAML::Value << YAML::BeginSeq; + for (const auto& depend : depends) { + out << depend; + } + out << YAML::EndSeq; + + out << YAML::Key << "engine" << YAML::Value << engine; + + // Serialize extra_params as YAML + out << YAML::Key << "extra_params" << YAML::Value << YAML::BeginMap; + for (Json::ValueConstIterator iter = extra_params.begin(); + iter != extra_params.end(); ++iter) { + out << YAML::Key << iter.key().asString() << YAML::Value + << iter->asString(); + } + out << YAML::EndMap; + + std::ofstream fout(filepath); + if (!fout.is_open()) { + throw std::runtime_error("Failed to open file for writing: " + filepath); + } + fout << out.c_str(); + } + + // Method to populate struct from YAML file + void ReadFromYaml(const std::string& filePath) { + YAML::Node config = YAML::LoadFile(filePath); + + if (config["id"]) + id = config["id"].as(); + if (config["model"]) + model = config["model"].as(); + if (config["name"]) + name = config["name"].as(); + if (config["version"]) + version = config["version"].as(); + + // Inference Parameters + + auto ip = config; + if (ip["load_model"]) { + load_model.method = ip["load_model"]["method"].as(); + load_model.path = ip["load_model"]["path"].as(); + load_model.transform_request = + ip["load_model"]["transform_request"].as(); + load_model.transform_response = + ip["load_model"]["transform_response"].as(); + } + if (ip["destroy"]) { + destroy.method = ip["destroy"]["method"].as(); + destroy.path = ip["destroy"]["path"].as(); + } + if (ip["inference"]) { + inference.method = ip["inference"]["method"].as(); + inference.path = ip["inference"]["path"].as(); + } + if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].as(); + e.path = endpoint["path"].as(); + extra_endpoints.push_back(e); + } + } + + // Model Load Parameters + + auto mlp = config; + if (mlp["port"]) + port = mlp["port"].as(); + if (mlp["script"]) + script = mlp["script"].as(); + if (mlp["log_path"]) + log_path = mlp["log_path"].as(); + if (mlp["log_level"]) + log_level = mlp["log_level"].as(); + if (mlp["environment"]) + environment = mlp["environment"].as(); + if (mlp["engine"]) + engine = mlp["engine"].as(); + + if (mlp["command"] && mlp["command"].IsSequence()) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.as()); + } + } + + if (mlp["files"] && mlp["files"].IsSequence()) { + for (const auto& file : mlp["files"]) { + files.push_back(file.as()); + } + } + + if (mlp["depends"] && mlp["depends"].IsSequence()) { + for (const auto& depend : mlp["depends"]) { + depends.push_back(depend.as()); + } + } + + if (mlp["extra_params"]) { + for (YAML::const_iterator it = mlp["extra_params"].begin(); + it != mlp["extra_params"].end(); ++it) { + extra_params[it->first.as()] = + it->second.as(); + } + } + } + + // Method to convert the struct to JSON + Json::Value ToJson() const { + Json::Value root; + + root["id"] = id; + root["model"] = model; + root["name"] = name; + root["version"] = version; + + // Inference Parameters + root["load_model"]["method"] = load_model.method; + root["load_model"]["path"] = load_model.path; + root["load_model"]["transform_request"] = load_model.transform_request; + root["load_model"]["transform_response"] = load_model.transform_response; + + root["destroy"]["method"] = destroy.method; + root["destroy"]["path"] = destroy.path; + + root["inference"]["method"] = inference.method; + root["inference"]["path"] = inference.path; + + for (const auto& endpoint : extra_endpoints) { + Json::Value e; + e["method"] = endpoint.method; + e["path"] = endpoint.path; + root["extra_endpoints"].append(e); + } + + // Model Load Parameters + root["port"] = port; + root["log_path"] = log_path; + root["log_level"] = log_level; + root["environment"] = environment; + root["script"] = script; + + // Serialize command as JSON array + for (const auto& cmd : command) { + root["command"].append(cmd); + } + + for (const auto& file : files) { + root["files"].append(file); + } + + for (const auto& depend : depends) { + root["depends"].append(depend); + } + + root["engine"] = engine; + root["extra_params"] = extra_params; // Serialize the JSON value directly + + return root; + } + + // Method to populate struct from JSON + void FromJson(const Json::Value& root) { + + if (root.isMember("id")) + id = root["id"].asString(); + if (root.isMember("model")) + model = root["model"].asString(); + if (root.isMember("name")) + name = root["name"].asString(); + if (root.isMember("version")) + version = root["version"].asInt(); + + // Inference Parameters + + const Json::Value& ip = root; + if (ip.isMember("load_model")) { + load_model.method = ip["load_model"]["method"].asString(); + load_model.path = ip["load_model"]["path"].asString(); + load_model.transform_request = + ip["load_model"]["transform_request"].asString(); + load_model.transform_response = + ip["load_model"]["transform_response"].asString(); + } + if (ip.isMember("destroy")) { + destroy.method = ip["destroy"]["method"].asString(); + destroy.path = ip["destroy"]["path"].asString(); + } + if (ip.isMember("inference")) { + inference.method = ip["inference"]["method"].asString(); + inference.path = ip["inference"]["path"].asString(); + } + if (ip.isMember("extra_endpoints")) { + for (const auto& endpoint : ip["extra_endpoints"]) { + Endpoint e; + e.method = endpoint["method"].asString(); + e.path = endpoint["path"].asString(); + extra_endpoints.push_back(e); + } + } + + // Model Load Parameters + + const Json::Value& mlp = root; + if (mlp.isMember("port")) + port = mlp["port"].asString(); + if (mlp.isMember("log_path")) + log_path = mlp["log_path"].asString(); + if (mlp.isMember("log_level")) + log_level = mlp["log_level"].asString(); + if (mlp.isMember("environment")) + environment = mlp["environment"].asString(); + if (mlp.isMember("engine")) + engine = mlp["engine"].asString(); + if (mlp.isMember("script")) + script = mlp["script"].asString(); + + if (mlp.isMember("command")) { + for (const auto& cmd : mlp["command"]) { + command.push_back(cmd.asString()); + } + } + + if (mlp.isMember("files")) { + for (const auto& file : mlp["files"]) { + files.push_back(file.asString()); + } + } + + if (mlp.isMember("depends")) { + for (const auto& depend : mlp["depends"]) { + depends.push_back(depend.asString()); + } + } + + if (mlp.isMember("extra_params")) { + extra_params = mlp["extra_params"]; // Directly assign the JSON value + } + } +}; + } // namespace config diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 1a501287d..34c6504ac 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -210,6 +210,16 @@ void Models::ListModel( } data.append(std::move(obj)); yaml_handler.Reset(); + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.path_to_model_yaml)) + .string()); + Json::Value obj = python_model_config.ToJson(); + obj["id"] = model_entry.model; + obj["model"] = model_entry.model; + data.append(std::move(obj)); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( @@ -280,6 +290,19 @@ void Models::GetModel(const HttpRequestPtr& req, auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); resp->setStatusCode(drogon::k200OK); callback(resp); + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + ret = python_model_config.ToJson(); + ret["id"] = python_model_config.model; + ret["object"] = "model"; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( @@ -350,6 +373,13 @@ void Models::UpdateModel(const HttpRequestPtr& req, yaml_handler.WriteYamlFile(yaml_fp.string()); message = "Successfully update model ID '" + model_id + "': " + json_body.toStyledString(); + } else if (model_config.engine == kPythonEngine) { + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml(yaml_fp.string()); + python_model_config.FromJson(json_body); + python_model_config.ToYaml(yaml_fp.string()); + message = "Successfully update model ID '" + model_id + + "': " + json_body.toStyledString(); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile(yaml_fp.string()); diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index d8e29eb1b..961798d2c 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -127,6 +127,56 @@ void server::FineTuning( LOG_TRACE << "Done fine-tuning"; } +void server::Inference(const HttpRequestPtr& req, + std::function&& callback) { + LOG_TRACE << "Start inference"; + auto q = std::make_shared(); + auto ir = inference_svc_->HandleInference(q, req->getJsonObject()); + LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); + if (ir.has_error()) { + auto err = ir.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); + resp->setStatusCode( + static_cast(std::get<0>(err)["status_code"].asInt())); + callback(resp); + return; + } + LOG_TRACE << "Wait to inference"; + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done inference"; +} + +void server::RouteRequest( + const HttpRequestPtr& req, + std::function&& callback) { + + LOG_TRACE << "Start route request"; + auto q = std::make_shared(); + auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject()); + LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); + if (ir.has_error()) { + auto err = ir.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); + resp->setStatusCode( + static_cast(std::get<0>(err)["status_code"].asInt())); + callback(resp); + return; + } + LOG_TRACE << "Wait to route request"; + auto [status, res] = q->wait_and_pop(); + LOG_DEBUG << "response: " << res.toStyledString(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode( + static_cast(status["status_code"].asInt())); + callback(resp); + LOG_TRACE << "Done route request"; +} + void server::LoadModel(const HttpRequestPtr& req, std::function&& callback) { auto ir = inference_svc_->LoadModel(req->getJsonObject()); diff --git a/engine/controllers/server.h b/engine/controllers/server.h index ef8a32f5d..42214a641 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -46,8 +46,11 @@ class server : public drogon::HttpController, ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post); ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post); ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post); + ADD_METHOD_TO(server::Inference, "/v1/inference", Options, Post); + ADD_METHOD_TO(server::RouteRequest, "/v1/route/request", Options, Post); METHOD_LIST_END + void ChatCompletion( const HttpRequestPtr& req, std::function&& callback) override; @@ -69,6 +72,10 @@ class server : public drogon::HttpController, void FineTuning( const HttpRequestPtr& req, std::function&& callback) override; + void Inference(const HttpRequestPtr& req, + std::function&& callback); + void RouteRequest(const HttpRequestPtr& req, + std::function&& callback); private: void ProcessStreamRes(std::function cb, diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index b796ebaed..b2d290d24 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -59,6 +59,14 @@ class EngineI { const std::string& log_path) = 0; virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0; + virtual Json::Value GetRemoteModels() = 0; + virtual void HandleRouteRequest( + std::shared_ptr json_body, + std::function&& callback) = 0; + virtual void HandleInference( + std::shared_ptr json_body, + std::function&& callback) = 0; + // Stop inflight chat completion in stream mode virtual void StopInferencing(const std::string& model_id) = 0; }; diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc new file mode 100644 index 000000000..ddf6784e8 --- /dev/null +++ b/engine/extensions/python-engine/python_engine.cc @@ -0,0 +1,860 @@ +#include "python_engine.h" +#include +#include +#include +#include +namespace python_engine { +constexpr const int k200OK = 200; +constexpr const int k400BadRequest = 400; +constexpr const int k409Conflict = 409; +constexpr const int k500InternalServerError = 500; +constexpr const int kFileLoggerOption = 0; + +static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, + std::string* data) { + data->append(ptr, size * nmemb); + return size * nmemb; +} + +PythonEngine::PythonEngine() {} + +PythonEngine::~PythonEngine() { + curl_global_cleanup(); +} + +config::PythonModelConfig* PythonEngine::GetModelConfig( + const std::string& model) { + std::shared_lock lock(models_mutex_); + auto it = models_.find(model); + if (it != models_.end()) { + return &it->second; + } + return nullptr; +} +std::string constructWindowsCommandLine(const std::vector& args) { + std::string cmdLine; + for (const auto& arg : args) { + // Simple escaping for Windows command line + std::string escapedArg = arg; + if (escapedArg.find(' ') != std::string::npos) { + // Wrap in quotes and escape existing quotes + for (char& c : escapedArg) { + if (c == '"') + c = '\\'; + } + escapedArg = "\"" + escapedArg + "\""; + } + cmdLine += escapedArg + " "; + } + return cmdLine; +} + +std::vector convertToArgv(const std::vector& args) { + std::vector argv; + for (const auto& arg : args) { + argv.push_back(const_cast(arg.c_str())); + } + argv.push_back(nullptr); + return argv; +} + +pid_t PythonEngine::SpawnProcess(const std::string& model, + const std::vector& command) { + try { +#ifdef _WIN32 + // Windows process creation + STARTUPINFOA si = {0}; + PROCESS_INFORMATION pi = {0}; + si.cb = sizeof(si); + + // Construct command line + std::string cmdLine = constructWindowsCommandLine(command); + + // Convert string to char* for Windows API + char commandBuffer[4096]; + strncpy_s(commandBuffer, cmdLine.c_str(), sizeof(commandBuffer)); + + if (!CreateProcessA(NULL, // lpApplicationName + commandBuffer, // lpCommandLine + NULL, // lpProcessAttributes + NULL, // lpThreadAttributes + FALSE, // bInheritHandles + 0, // dwCreationFlags + NULL, // lpEnvironment + NULL, // lpCurrentDirectory + &si, // lpStartupInfo + &pi // lpProcessInformation + )) { + throw std::runtime_error("Failed to create process on Windows"); + } + + // Store the process ID + pid_t pid = pi.dwProcessId; + processMap[model] = pid; + + // Close handles to avoid resource leaks + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + return pid; + +#elif __APPLE__ || __linux__ + // POSIX process creation + pid_t pid; + + // Convert command vector to char*[] + std::vector argv = convertToArgv(command); + // for (auto c : command) { + // std::cout << c << " " << std::endl; + // } + + // Use posix_spawn for cross-platform compatibility + int spawn_result = posix_spawn(&pid, // pid output + command[0].c_str(), // executable path + NULL, // file actions + NULL, // spawn attributes + argv.data(), // argument vector + NULL // environment (inherit) + ); + + if (spawn_result != 0) { + throw std::runtime_error("Failed to spawn process"); + } + + // Store the process ID + processMap[model] = pid; + return pid; + +#else +#error Unsupported platform +#endif + } catch (const std::exception& e) { + LOG_ERROR << "Process spawning error: " << e.what(); + return -1; + } +} +bool PythonEngine::TerminateModelProcess(const std::string& model) { + auto it = processMap.find(model); + if (it == processMap.end()) { + LOG_ERROR << "No process found for model: " << model + << ", removing from list running models."; + models_.erase(model); + return false; + } + +#ifdef _WIN32 + HANDLE hProcess = OpenProcess(PROCESS_TERMINATE, FALSE, it->second); + if (hProcess == NULL) { + LOG_ERROR << "Failed to open process"; + return false; + } + + bool terminated = TerminateProcess(hProcess, 0) == TRUE; + CloseHandle(hProcess); + + if (terminated) { + processMap.erase(it); + return true; + } + +#elif __APPLE__ || __linux__ + int result = kill(it->second, SIGTERM); + if (result == 0) { + processMap.erase(it); + return true; + } +#endif + + return false; +} +CurlResponse PythonEngine::MakeGetRequest(const std::string& model, + const std::string& path) { + auto config = models_[model]; + std::string full_url = "http://localhost:" + config.port + path; + CurlResponse response; + + auto result = curl_utils::SimpleRequest(full_url, RequestType::GET); + if (result.has_error()) { + response.error = true; + response.error_message = result.error(); + } else { + response.body = result.value(); + } + return response; +} +CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, + const std::string& path) { + auto config = models_[model]; + std::string full_url = "http://localhost:" + config.port + path; + CurlResponse response; + + auto result = curl_utils::SimpleRequest(full_url, RequestType::DEL); + + if (result.has_error()) { + response.error = true; + response.error_message = result.error(); + } else { + response.body = result.value(); + } + + return response; +} + +CurlResponse PythonEngine::MakePostRequest(const std::string& model, + const std::string& path, + const std::string& body) { + auto config = models_[model]; + std::string full_url = "http://localhost:" + config.port + path; + + CurlResponse response; + auto result = curl_utils::SimpleRequest(full_url, RequestType::POST, body); + + if (result.has_error()) { + response.error = true; + response.error_message = result.error(); + } else { + response.body = result.value(); + } + return response; +} + +bool PythonEngine::LoadModelConfig(const std::string& model, + const std::string& yaml_path) { + try { + config::PythonModelConfig config; + config.ReadFromYaml(yaml_path); + std::unique_lock lock(models_mutex_); + models_[model] = config; + } catch (const std::exception& e) { + LOG_ERROR << "Failed to load model config: " << e.what(); + return false; + } + + return true; +} + +void PythonEngine::GetModels( + std::shared_ptr json_body, + std::function&& callback) { + + Json::Value response_json; + Json::Value model_array(Json::arrayValue); + + for (const auto& pair : models_) { + auto val = pair.second.ToJson(); + model_array.append(val); + } + + response_json["object"] = "list"; + response_json["data"] = model_array; + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); +} + +void PythonEngine::LoadModel( + std::shared_ptr json_body, + std::function&& callback) { + // TODO: handle a case that can spawn process but the process spawn fail. + pid_t pid; + if (!json_body->isMember("model") || !json_body->isMember("model_path")) { + Json::Value error; + error["error"] = "Missing required fields: model or model_path"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + const std::string& model = (*json_body)["model"].asString(); + const std::string& model_path = (*json_body)["model_path"].asString(); + if (models_.find(model) != models_.end()) { + Json::Value error; + error["error"] = "Model already loaded!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k409Conflict; + callback(std::move(status), std::move(error)); + return; + } + + if (!LoadModelConfig(model, model_path)) { + Json::Value error; + error["error"] = "Failed to load model configuration"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + auto model_config = models_[model]; + auto model_folder_path = model_config.files[0]; + auto data_folder_path = + std::filesystem::path(model_folder_path) / std::filesystem::path("venv"); + try { +#ifdef _WIN32 + auto executable = std::filesystem::path(data_folder_path) / + std::filesystem::path("Scripts"); +#else + auto executable = + std::filesystem::path(data_folder_path) / std::filesystem::path("bin"); +#endif + + auto executable_str = + (executable / std::filesystem::path(model_config.command[0])).string(); + auto command = model_config.command; + command[0] = executable_str; + command.push_back((std::filesystem::path(model_folder_path) / + std::filesystem::path(model_config.script)) + .string()); + std::list args{"--port", + model_config.port, + "--log_path", + (file_manager_utils::GetCortexLogPath() / + std::filesystem::path(model_config.log_path)) + .string(), + "--log_level", + model_config.log_level}; + if (!model_config.extra_params.isNull() && + model_config.extra_params.isObject()) { + for (const auto& key : model_config.extra_params.getMemberNames()) { + const Json::Value& value = model_config.extra_params[key]; + + // Convert key to string with -- prefix + std::string param_key = "--" + key; + + // Handle different JSON value types + if (value.isString()) { + args.emplace_back(param_key); + args.emplace_back(value.asString()); + } else if (value.isInt()) { + args.emplace_back(param_key); + args.emplace_back(std::to_string(value.asInt())); + } else if (value.isDouble()) { + args.emplace_back(param_key); + args.emplace_back(std::to_string(value.asDouble())); + } else if (value.isBool()) { + // For boolean, only add the flag if true + if (value.asBool()) { + args.emplace_back(param_key); + } + } + } + } + + // Add the parsed arguments to the command + command.insert(command.end(), args.begin(), args.end()); + pid = SpawnProcess(model, command); + if (pid == -1) { + std::unique_lock lock(models_mutex_); + if (models_.find(model) != models_.end()) { + models_.erase(model); + } + + Json::Value error; + error["error"] = "Fail to spawn process with pid -1"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + } catch (const std::exception& e) { + std::unique_lock lock(models_mutex_); + if (models_.find(model) != models_.end()) { + models_.erase(model); + } + + Json::Value error; + error["error"] = e.what(); + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response; + response["status"] = + "Model loaded successfully with pid: " + std::to_string(pid); + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +void PythonEngine::UnloadModel( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + const std::string& model = (*json_body)["model"].asString(); + + { + std::unique_lock lock(models_mutex_); + if (TerminateModelProcess(model)) { + models_.erase(model); + } else { + Json::Value error; + error["error"] = "Fail to terminate process with id: " + + std::to_string(processMap[model]); + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + } + + Json::Value response; + response["status"] = "Model unloaded successfully"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +void PythonEngine::HandleChatCompletion( + std::shared_ptr json_body, + std::function&& callback) {} + +void PythonEngine::HandleInference( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model is required!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + std::string method = "post"; + std::string path = "/inference"; + std::string transform_request = + (*json_body).get("transform_request", "").asString(); + std::string transform_response = + (*json_body).get("transform_response", "").asString(); + std::string model = (*json_body)["model"].asString(); + Json::Value body = (*json_body)["body"]; + + // Transform Request + std::string transformed_request; + if (!transform_request.empty()) { + + try { + // Validate JSON body + if (!body || body.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + + // Render with error handling + try { + transformed_request = renderer_.Render(transform_request, *json_body); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + transformed_request = body.toStyledString(); + } + } else { + transformed_request = body.toStyledString(); + } + + // End Transform request + + CurlResponse response; + if (method == "post") { + response = MakePostRequest(model, path, transformed_request); + } else if (method == "get") { + response = MakeGetRequest(model, path); + } else if (method == "delete") { + response = MakeDeleteRequest(model, path); + } else { + Json::Value error; + error["error"] = + "method not supported! Supported methods are: post, get, delete"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + if (response.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response_json; + Json::Reader reader; + if (!reader.parse(response.body, response_json)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + if (!transform_response.empty()) { + // Transform Response + std::string response_str; + try { + // Validate JSON body + if (!response_json || response_json.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + // Render with error handling + try { + response_str = renderer_.Render(transform_response, response_json); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + response_str = response_json.toStyledString(); + } + + Json::Reader reader_final; + Json::Value response_json_final; + if (!reader_final.parse(response_str, response_json_final)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json_final)); + } else { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); + } +} +Json::Value PythonEngine::GetRemoteModels() { + return Json::Value(); +} +void PythonEngine::StopInferencing(const std::string& model_id) {} +void PythonEngine::HandleRouteRequest( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model") || !json_body->isMember("method") || + !json_body->isMember("path")) { + Json::Value error; + error["error"] = + "Missing required field: model, method and path are required!"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + std::string method = (*json_body)["method"].asString(); + std::string path = (*json_body)["path"].asString(); + std::string transform_request = + (*json_body).get("transform_request", "").asString(); + std::string transform_response = + (*json_body).get("transform_response", "").asString(); + std::string model = (*json_body)["model"].asString(); + Json::Value body = (*json_body)["body"]; + + // Transform Request + std::string transformed_request; + if (!transform_request.empty()) { + + try { + // Validate JSON body + if (!body || body.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + + // Render with error handling + try { + transformed_request = renderer_.Render(transform_request, *json_body); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + transformed_request = body.toStyledString(); + } + } else { + transformed_request = body.toStyledString(); + } + + // End Transform request + + CurlResponse response; + if (method == "post") { + response = MakePostRequest(model, path, transformed_request); + } else if (method == "get") { + response = MakeGetRequest(model, path); + } else if (method == "delete") { + response = MakeDeleteRequest(model, path); + } else { + Json::Value error; + error["error"] = + "method not supported! Supported methods are: post, get, delete"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + + if (response.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response_json; + Json::Reader reader; + if (!reader.parse(response.body, response_json)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + if (!transform_response.empty()) { + // Transform Response + std::string response_str; + try { + // Validate JSON body + if (!response_json || response_json.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } + // Render with error handling + try { + response_str = renderer_.Render(transform_response, response_json); + } catch (const std::exception& e) { + throw std::runtime_error("Template rendering error: " + + std::string(e.what())); + } + } catch (const std::exception& e) { + // Log error and potentially rethrow or handle accordingly + LOG_WARN << "Error in TransformRequest: " << e.what(); + LOG_WARN << "Using original request body"; + response_str = response_json.toStyledString(); + } + + Json::Reader reader_final; + Json::Value response_json_final; + if (!reader_final.parse(response_str, response_json_final)) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + Json::Value error; + error["error"] = "Failed to parse response"; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json_final)); + } else { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + + callback(std::move(status), std::move(response_json)); + } +} + +void PythonEngine::GetModelStatus( + std::shared_ptr json_body, + std::function&& callback) { + if (!json_body->isMember("model")) { + Json::Value error; + error["error"] = "Missing required field: model"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + callback(std::move(status), std::move(error)); + return; + } + auto model = json_body->get("model", "").asString(); + auto model_config = models_[model]; + auto health_endpoint = model_config.heath_check; + auto response_health = MakeGetRequest(model, health_endpoint.path); + + if (response_health.error) { + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k400BadRequest; + Json::Value error; + error["error"] = response_health.error_message; + callback(std::move(status), std::move(error)); + return; + } + + Json::Value response; + response["model"] = model; + response["model_loaded"] = true; + response["model_data"] = model_config.ToJson(); + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = k200OK; + callback(std::move(status), std::move(response)); +} + +// Implement remaining virtual functions +void PythonEngine::HandleEmbedding( + std::shared_ptr, + std::function&& callback) { + callback(Json::Value(), Json::Value()); +} + +bool PythonEngine::IsSupported(const std::string& f) { + if (f == "HandleChatCompletion" || f == "LoadModel" || f == "UnloadModel" || + f == "GetModelStatus" || f == "GetModels" || f == "SetFileLogger" || + f == "SetLogLevel") { + return true; + } + return false; +} + +bool PythonEngine::SetFileLogger(int max_log_lines, + const std::string& log_path) { + if (!async_file_logger_) { + async_file_logger_ = std::make_unique(); + } + + async_file_logger_->setFileName(log_path); + async_file_logger_->setMaxLines(max_log_lines); // Keep last 100000 lines + async_file_logger_->startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + if (async_file_logger_) + async_file_logger_->output_(msg, len); + }, + [&]() { + if (async_file_logger_) + async_file_logger_->flush(); + }); + freopen(log_path.c_str(), "w", stderr); + freopen(log_path.c_str(), "w", stdout); + return true; +} + +void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) { + trantor::Logger::setLogLevel(log_level); +} + +void PythonEngine::Load(EngineLoadOption opts) { + // Develop register model here on loading engine +}; + +void PythonEngine::Unload(EngineUnloadOption opts) {}; + +// extern "C" { +// EngineI* get_engine() { +// return new PythonEngine(); +// } +// } +} // namespace python_engine \ No newline at end of file diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h new file mode 100644 index 000000000..7b112f435 --- /dev/null +++ b/engine/extensions/python-engine/python_engine.h @@ -0,0 +1,166 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "config/model_config.h" +#include "cortex-common/EngineI.h" +#include "extensions/template_renderer.h" +#include "utils/file_logger.h" +#include "utils/file_manager_utils.h" + +#include "utils/curl_utils.h" +#ifdef _WIN32 +#include +#include +using pid_t = DWORD; +#elif __APPLE__ || __linux__ +#include +#include +#include +#include +#include +#endif +// Helper for CURL response +namespace python_engine { +struct StreamContext { + std::shared_ptr> callback; + std::string buffer; +}; + +static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, + void* userdata) { + auto* context = static_cast(userdata); + std::string chunk(ptr, size * nmemb); + + context->buffer += chunk; + + // Process complete lines + size_t pos; + while ((pos = context->buffer.find('\n')) != std::string::npos) { + std::string line = context->buffer.substr(0, pos); + context->buffer = context->buffer.substr(pos + 1); + + // Skip empty lines + if (line.empty() || line == "\r") + continue; + + // Remove "data: " prefix if present + // if (line.substr(0, 6) == "data: ") + // { + // line = line.substr(6); + // } + + // Skip [DONE] message + std::cout << line << std::endl; + if (line == "data: [DONE]") { + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), Json::Value()); + break; + } + + // Parse the JSON + Json::Value chunk_json; + chunk_json["data"] = line + "\n\n"; + Json::Reader reader; + + Json::Value status; + status["is_done"] = false; + status["has_error"] = false; + status["is_stream"] = true; + status["status_code"] = 200; + (*context->callback)(std::move(status), std::move(chunk_json)); + } + + return size * nmemb; +} + +struct CurlResponse { + std::string body; + bool error{false}; + std::string error_message; +}; + +class PythonEngine : public EngineI { + private: + // Model configuration + + // Thread-safe model config storage + mutable std::shared_mutex models_mutex_; + std::unordered_map models_; + extensions::TemplateRenderer renderer_; + std::unique_ptr async_file_logger_; + std::unordered_map processMap; + + // Helper functions + CurlResponse MakePostRequest(const std::string& model, + const std::string& path, + const std::string& body); + CurlResponse MakeGetRequest(const std::string& model, + const std::string& path); + CurlResponse MakeDeleteRequest(const std::string& model, + const std::string& path); + + // Process manager functions + pid_t SpawnProcess(const std::string& model, + const std::vector& command); + bool TerminateModelProcess(const std::string& model); + + // Internal model management + bool LoadModelConfig(const std::string& model, const std::string& yaml_path); + config::PythonModelConfig* GetModelConfig(const std::string& model); + + public: + PythonEngine(); + ~PythonEngine(); + + void Load(EngineLoadOption opts) override; + + void Unload(EngineUnloadOption opts) override; + + // Main interface implementations + void GetModels( + std::shared_ptr json_body, + std::function&& callback) override; + + void HandleChatCompletion( + std::shared_ptr json_body, + std::function&& callback) override; + + void LoadModel( + std::shared_ptr json_body, + std::function&& callback) override; + + void UnloadModel( + std::shared_ptr json_body, + std::function&& callback) override; + + void GetModelStatus( + std::shared_ptr json_body, + std::function&& callback) override; + + // Other required virtual functions + void HandleEmbedding( + std::shared_ptr json_body, + std::function&& callback) override; + bool IsSupported(const std::string& feature) override; + bool SetFileLogger(int max_log_lines, const std::string& log_path) override; + void SetLogLevel(trantor::Logger::LogLevel logLevel) override; + void HandleRouteRequest( + std::shared_ptr json_body, + std::function&& callback) override; + void HandleInference( + std::shared_ptr json_body, + std::function&& callback) override; + Json::Value GetRemoteModels() override; + void StopInferencing(const std::string& model_id) override; +}; +} // namespace python_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index d8dfbad61..6f08b5403 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -8,7 +8,7 @@ #include #include #include "cortex-common/remote_enginei.h" -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "utils/engine_constants.h" #include "utils/file_logger.h" // Helper for CURL response @@ -21,7 +21,7 @@ struct StreamContext { // Cache value for Anthropic std::string id; std::string model; - TemplateRenderer& renderer; + extensions::TemplateRenderer& renderer; std::string stream_template; }; struct CurlResponse { @@ -46,7 +46,7 @@ class RemoteEngine : public RemoteEngineI { // Thread-safe model config storage mutable std::shared_mutex models_mtx_; std::unordered_map models_; - TemplateRenderer renderer_; + extensions::TemplateRenderer renderer_; Json::Value metadata_; std::string chat_req_template_; std::string chat_res_template_; diff --git a/engine/extensions/remote-engine/template_renderer.cc b/engine/extensions/template_renderer.cc similarity index 99% rename from engine/extensions/remote-engine/template_renderer.cc rename to engine/extensions/template_renderer.cc index 15514d17c..32e7d72f5 100644 --- a/engine/extensions/remote-engine/template_renderer.cc +++ b/engine/extensions/template_renderer.cc @@ -7,7 +7,7 @@ #include #include #include "utils/logging_utils.h" -namespace remote_engine { +namespace extensions { TemplateRenderer::TemplateRenderer() { // Configure Inja environment env_.set_trim_blocks(true); diff --git a/engine/extensions/remote-engine/template_renderer.h b/engine/extensions/template_renderer.h similarity index 97% rename from engine/extensions/remote-engine/template_renderer.h rename to engine/extensions/template_renderer.h index f59e7cc93..7eccef2eb 100644 --- a/engine/extensions/remote-engine/template_renderer.h +++ b/engine/extensions/template_renderer.h @@ -14,7 +14,7 @@ #include #include // clang-format on -namespace remote_engine { +namespace extensions { class TemplateRenderer { public: TemplateRenderer(); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 53a4bfa65..39e6e7961 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -3,10 +3,15 @@ #include #include #include + #include #include "algorithm" #include "database/engines.h" + +#include "extensions/python-engine/python_engine.h" + #include "extensions/remote-engine/remote_engine.h" + #include "utils/archive_utils.h" #include "utils/engine_constants.h" #include "utils/engine_matcher_utils.h" @@ -183,6 +188,7 @@ cpp::result EngineService::UninstallEngineVariant( const std::string& engine, const std::optional version, const std::optional variant) { auto ne = NormalizeEngine(engine); + // TODO: handle uninstall remote engine // only delete a remote engine if no model are using it auto exist_engine = GetEngineByNameAndVariant(engine); @@ -715,6 +721,14 @@ cpp::result EngineService::LoadEngine( return {}; } + // Check for python engine + + if (engine_name == kPythonEngine) { + engines_[engine_name].engine = new python_engine::PythonEngine(); + CTL_INF("Loaded engine: " << engine_name); + return {}; + } + // Check for remote engine if (IsRemoteEngine(engine_name)) { auto exist_engine = GetEngineByNameAndVariant(engine_name); @@ -884,6 +898,7 @@ EngineService::GetEngineDirPath(const std::string& engine_name) { cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); + std::lock_guard lock(engines_mutex_); if (!IsEngineLoaded(ne)) { return cpp::fail("Engine " + ne + " is not loaded yet!"); @@ -942,6 +957,10 @@ cpp::result EngineService::IsEngineReady( } // End hard code + // Check for python engine + if (engine == kPythonEngine) { + return true; + } auto os = hw_inf_.sys_inf->os; if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index fcd3fdda9..a460582c6 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -132,6 +132,7 @@ class EngineService : public EngineServiceI { cpp::result UpdateEngine( const std::string& engine); + cpp::result, std::string> GetEngines(); cpp::result GetEngineById(int id); diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 9d8e9f4f8..3668fb6fe 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -112,6 +112,64 @@ cpp::result InferenceService::HandleEmbedding( return {}; } +cpp::result InferenceService::HandleInference( + std::shared_ptr q, std::shared_ptr json_body) { + std::string engine_type; + if (!HasFieldInReq(json_body, "engine")) { + engine_type = kLlamaRepo; + } else { + engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); + } + + auto engine_result = engine_service_->GetLoadedEngine(engine_type); + if (engine_result.has_error()) { + Json::Value res; + Json::Value stt; + res["message"] = "Engine is not loaded yet"; + stt["status_code"] = drogon::k400BadRequest; + LOG_WARN << "Engine is not loaded yet"; + return cpp::fail(std::make_pair(stt, res)); + } + + auto cb = [q](Json::Value status, Json::Value res) { + q->push(std::make_pair(status, res)); + }; + if (std::holds_alternative(engine_result.value())) { + std::get(engine_result.value()) + ->HandleInference(json_body, std::move(cb)); + } + return {}; +} + +cpp::result InferenceService::HandleRouteRequest( + std::shared_ptr q, std::shared_ptr json_body) { + std::string engine_type; + if (!HasFieldInReq(json_body, "engine")) { + engine_type = kLlamaRepo; + } else { + engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); + } + + auto engine_result = engine_service_->GetLoadedEngine(engine_type); + if (engine_result.has_error()) { + Json::Value res; + Json::Value stt; + res["message"] = "Engine is not loaded yet"; + stt["status_code"] = drogon::k400BadRequest; + LOG_WARN << "Engine is not loaded yet"; + return cpp::fail(std::make_pair(stt, res)); + } + + auto cb = [q](Json::Value status, Json::Value res) { + q->push(std::make_pair(status, res)); + }; + if (std::holds_alternative(engine_result.value())) { + std::get(engine_result.value()) + ->HandleRouteRequest(json_body, std::move(cb)); + } + return {}; +} + InferResult InferenceService::LoadModel( std::shared_ptr json_body) { std::string engine_type; diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h index 75b07b1a3..f23be3f23 100644 --- a/engine/services/inference_service.h +++ b/engine/services/inference_service.h @@ -3,6 +3,7 @@ #include #include #include +#include "extensions/remote-engine/remote_engine.h" #include "services/engine_service.h" #include "services/model_service.h" #include "utils/result.hpp" @@ -41,6 +42,12 @@ class InferenceService { cpp::result HandleEmbedding( std::shared_ptr q, std::shared_ptr json_body); + cpp::result HandleInference( + std::shared_ptr q, std::shared_ptr json_body); + + cpp::result HandleRouteRequest( + std::shared_ptr q, std::shared_ptr json_body); + InferResult LoadModel(std::shared_ptr json_body); InferResult UnloadModel(const std::string& engine, diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 2d69e0f17..c7925360b 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -9,7 +10,10 @@ #include "config/yaml_config.h" #include "database/models.h" #include "hardware_service.h" +#include "utils/archive_utils.h" + #include "services/inference_service.h" + #include "utils/cli_selection_utils.h" #include "utils/engine_constants.h" #include "utils/file_manager_utils.h" @@ -17,6 +21,7 @@ #include "utils/huggingface_utils.h" #include "utils/logging_utils.h" #include "utils/result.hpp" +#include "utils/set_permission_utils.h" #include "utils/string_utils.h" #include "utils/widechar_conv.h" @@ -79,8 +84,7 @@ void ParseGguf(DatabaseService& db_service, CTL_ERR("Error adding model to modellist: " + result.error()); } } else { - if (auto m = db_service.GetModelInfo(ggufDownloadItem.id); - m.has_value()) { + if (auto m = db_service.GetModelInfo(ggufDownloadItem.id); m.has_value()) { auto upd_m = m.value(); upd_m.status = cortex::db::ModelStatus::Downloaded; if (auto r = db_service.UpdateModelEntry(ggufDownloadItem.id, upd_m); @@ -99,7 +103,7 @@ cpp::result GetDownloadTask( .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}, }; - auto result = curl_utils::SimpleGetJson(url.ToFullPath()); + auto result = curl_utils::SimpleGetJsonRecursive(url.ToFullPath()); if (result.has_error()) { return cpp::fail("Model " + modelId + " not found"); } @@ -110,6 +114,7 @@ cpp::result GetDownloadTask( file_manager_utils::CreateDirectoryRecursively(model_container_path.string()); for (const auto& value : result.value()) { + // std::cout << "value object: " << value.toStyledString() << std::endl; auto path = value["path"].asString(); if (path == ".gitattributes" || path == ".gitignore" || path == "README.md") { @@ -121,6 +126,9 @@ cpp::result GetDownloadTask( .pathParams = {"cortexso", modelId, "resolve", branch, path}}; auto local_path = model_container_path / path; + if (!std::filesystem::exists(local_path.parent_path())) { + std::filesystem::create_directories(local_path.parent_path()); + } download_items.push_back( DownloadItem{.id = path, .downloadUrl = download_url.ToFullPath(), @@ -466,7 +474,8 @@ cpp::result ModelService::HandleUrl( model_size = model_size + item.bytes.value_or(0); } auto gguf_download_item = finishedTask.items[0]; - ParseGguf(*db_service_, gguf_download_item, author, std::nullopt, model_size); + ParseGguf(*db_service_, gguf_download_item, author, std::nullopt, + model_size); }; auto result = download_service_->AddDownloadTask(downloadTask, on_finished); @@ -528,15 +537,79 @@ ModelService::DownloadModelFromCortexsoAsync( config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string()); auto mc = yaml_handler.GetModelConfig(); - mc.model = unique_model_id; + if (mc.engine == kPythonEngine) { // process for Python engine + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml(model_yml_item->localPath.string()); + python_model_config.files.push_back( + model_yml_item->localPath.parent_path().string()); + python_model_config.ToYaml(model_yml_item->localPath.string()); + // unzip venv.zip + auto model_folder = model_yml_item->localPath.parent_path(); + auto venv_path = model_folder / std::filesystem::path("venv"); + if (!std::filesystem::exists(venv_path)) { + std::filesystem::create_directories(venv_path); + } + auto venv_zip = model_folder / std::filesystem::path("venv.zip"); + if (std::filesystem::exists(venv_zip)) { + if (archive_utils::ExtractArchive(venv_zip.string(), + venv_path.string())) { + std::filesystem::remove_all(venv_zip); + CTL_INF("Successfully extract venv.zip"); + // If extract success create pyvenv.cfg + std::ofstream pyvenv_cfg(venv_path / + std::filesystem::path("pyvenv.cfg")); +#ifdef _WIN32 + pyvenv_cfg << "home = " + << (venv_path / std::filesystem::path("Scripts")).string() + << std::endl; + pyvenv_cfg << "executable = " + << (venv_path / std::filesystem::path("Scripts") / + std::filesystem::path("python.exe")) + .string() + << std::endl; - uint64_t model_size = 0; - for (const auto& item : finishedTask.items) { - model_size = model_size + item.bytes.value_or(0); +#else + pyvenv_cfg << "home = " + << (venv_path / std::filesystem::path("bin/")).string() + << std::endl; + pyvenv_cfg + << "executable = " + << (venv_path / std::filesystem::path("bin/python")).string() + << std::endl; +#endif + + // Close the file + pyvenv_cfg.close(); + // Add executable permission to python + +#ifdef _WIN32 + set_permission_utils::SetExecutePermissionsRecursive( + venv_path / std::filesystem::path("Scripts")); +#else + set_permission_utils::SetExecutePermissionsRecursive( + venv_path / std::filesystem::path("bin")); +#endif + + } else { + CTL_ERR("Failed to extract venv.zip"); + }; + + } else { + CTL_ERR( + "venv.zip not found in model folder: " << model_folder.string()); + } + + } else { + mc.model = unique_model_id; + + uint64_t model_size = 0; + for (const auto& item : finishedTask.items) { + model_size = model_size + item.bytes.value_or(0); + } + mc.size = model_size; + yaml_handler.UpdateModelConfig(mc); + yaml_handler.WriteYamlFile(model_yml_item->localPath.string()); } - mc.size = model_size; - yaml_handler.UpdateModelConfig(mc); - yaml_handler.WriteYamlFile(model_yml_item->localPath.string()); auto rel = file_manager_utils::ToRelativeCortexDataPath(model_yml_item->localPath); @@ -583,7 +656,8 @@ cpp::result ModelService::DownloadModelFromCortexso( } std::string model_id{name + ":" + branch}; - auto on_finished = [this, branch, model_id](const DownloadTask& finishedTask) { + auto on_finished = [this, branch, + model_id](const DownloadTask& finishedTask) { const DownloadItem* model_yml_item = nullptr; auto need_parse_gguf = true; @@ -754,18 +828,75 @@ cpp::result ModelService::StartModel( constexpr const int kDefautlContextLength = 8192; int max_model_context_length = kDefautlContextLength; Json::Value json_data; - // Currently we don't support download vision models, so we need to bypass check - if (!bypass_model_check) { - auto model_entry = db_service_->GetModelInfo(model_handle); - if (model_entry.has_error()) { - CTL_WRN("Error: " + model_entry.error()); - return cpp::fail(model_entry.error()); - } - yaml_handler.ModelConfigFromFile( + auto model_entry = db_service_->GetModelInfo(model_handle); + if (model_entry.has_error()) { + CTL_WRN("Error: " + model_entry.error()); + return cpp::fail(model_entry.error()); + } + yaml_handler.ModelConfigFromFile( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + auto mc = yaml_handler.GetModelConfig(); + + // Check if Python model first + if (mc.engine == kPythonEngine) { + + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) .string()); - auto mc = yaml_handler.GetModelConfig(); + // Start all depends model + auto depends = python_model_config.depends; + for (auto& depend : depends) { + Json::Value temp; + auto res = StartModel(depend, temp, false); + if (res.has_error()) { + CTL_WRN("Error: " + res.error()); + for (auto& depend : depends) { + if (depend != model_handle) { + StopModel(depend); + } + } + return cpp::fail("Model failed to start dependency '" + depend + + "' : " + res.error()); + } + } + + json_data["model"] = model_handle; + json_data["model_path"] = + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string(); + json_data["engine"] = mc.engine; + assert(!!inference_svc_); + // Check if python engine + + auto ir = + inference_svc_->LoadModel(std::make_shared(json_data)); + auto status = std::get<0>(ir)["status_code"].asInt(); + auto data = std::get<1>(ir); + + if (status == drogon::k200OK) { + return StartModelResult{.success = true, .warning = ""}; + } else if (status == drogon::k409Conflict) { + CTL_INF("Model '" + model_handle + "' is already loaded"); + return StartModelResult{.success = true, .warning = ""}; + } else { + // only report to user the error + for (auto& depend : depends) { + + StopModel(depend); + } + } + CTL_ERR("Model failed to start with status code: " << status); + return cpp::fail("Model failed to start: " + data["message"].asString()); + } + + // Currently we don't support download vision models, so we need to bypass check + if (!bypass_model_check) { // Running remote model if (engine_svc_->IsRemoteEngine(mc.engine)) { @@ -856,6 +987,8 @@ cpp::result ModelService::StartModel( } assert(!!inference_svc_); + // Check if python engine + auto ir = inference_svc_->LoadModel(std::make_shared(json_data)); auto status = std::get<0>(ir)["status_code"].asInt(); @@ -917,6 +1050,23 @@ cpp::result ModelService::StopModel( if (bypass_check) { engine_name = kLlamaEngine; } + + // Update for python engine + if (engine_name == kPythonEngine) { + auto model_entry = db_service_->GetModelInfo(model_handle); + config::PythonModelConfig python_model_config; + python_model_config.ReadFromYaml( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + // Stop all depends model + auto depends = python_model_config.depends; + for (auto& depend : depends) { + StopModel(depend); + } + } + + // assert(inference_svc_); auto ir = inference_svc_->UnloadModel(engine_name, model_handle); auto status = std::get<0>(ir)["status_code"].asInt(); diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt index 0df46cfc2..6ca836158 100644 --- a/engine/test/components/CMakeLists.txt +++ b/engine/test/components/CMakeLists.txt @@ -16,7 +16,7 @@ add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/file_manager_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/curl_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/system_info_utils.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/remote-engine/template_renderer.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/template_renderer.cc ) find_package(Drogon CONFIG REQUIRED) diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index bfac76f49..5f1b85044 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -1,4 +1,4 @@ -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "gtest/gtest.h" #include "utils/json_helper.h" @@ -42,7 +42,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { auto data = json_helper::ParseJsonString(message_with_system); - remote_engine::TemplateRenderer rdr; + extensions::TemplateRenderer rdr; auto res = rdr.Render(tpl, data); auto res_json = json_helper::ParseJsonString(res); @@ -69,7 +69,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { auto data = json_helper::ParseJsonString(message_without_system); - remote_engine::TemplateRenderer rdr; + extensions::TemplateRenderer rdr; auto res = rdr.Render(tpl, data); auto res_json = json_helper::ParseJsonString(res); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index f9925ea86..73eaf3084 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -2,8 +2,13 @@ #include #include +#include "utils/engine_constants.h" +#include "utils/logging_utils.h" + #include + #include "utils/engine_constants.h" + #include "utils/result.hpp" namespace config_yaml_utils { @@ -20,7 +25,9 @@ const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; const std::vector kDefaultSupportedEngines{ - kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; + kLlamaEngine, kOnnxEngine, kTrtLlmEngine, kPythonEngine}; + + struct CortexConfig { std::string logFolderPath; @@ -58,6 +65,7 @@ struct CortexConfig { bool verifyPeerSsl; bool verifyHostSsl; + std::string sslCertPath; std::string sslKeyPath; std::vector supportedEngines; diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index 71f263a6a..be82b5cfa 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -260,6 +260,41 @@ cpp::result SimpleGetJson(const std::string& url, return root; } +cpp::result SimpleGetJsonRecursive( + const std::string& url, const int timeout) { + auto result = SimpleGetJson(url, timeout); + if (result.has_error()) { + return result; + } + auto root = result.value(); + + if (root.isArray()) { + for (const auto& value : root) { + if (value["type"].asString() == "directory") { + auto temp = SimpleGetJsonRecursive(url + "/" + value["path"].asString(), + timeout); + if (!temp.has_error()) { + if (temp.value().isArray()) { + for (const auto& item : temp.value()) { + root.append(item); + } + } else { + root.append(temp.value()); + } + } + } + } + for (Json::ArrayIndex i = 0; i < root.size();) { + if (root[i].isMember("type") && root[i]["type"] == "directory") { + root.removeIndex(i, nullptr); + } else { + ++i; + } + } + } + return root; +} + cpp::result SimplePostJson(const std::string& url, const std::string& body) { auto result = SimpleRequest(url, RequestType::POST, body); diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 64b5fc339..f33b7e8e5 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -34,6 +34,8 @@ cpp::result ReadRemoteYaml(const std::string& url); */ cpp::result SimpleGetJson(const std::string& url, const int timeout = -1); +cpp::result SimpleGetJsonRecursive(const std::string& url, + const int timeout = -1); cpp::result SimplePostJson( const std::string& url, const std::string& body = ""); diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index dcdf6a443..9392ede35 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -3,12 +3,17 @@ constexpr const auto kOnnxEngine = "onnxruntime"; constexpr const auto kLlamaEngine = "llama-cpp"; constexpr const auto kTrtLlmEngine = "tensorrt-llm"; + +constexpr const auto kPythonEngine = "python-engine"; + constexpr const auto kOpenAiEngine = "openai"; constexpr const auto kAnthropicEngine = "anthropic"; + constexpr const auto kRemote = "remote"; constexpr const auto kLocal = "local"; + constexpr const auto kOnnxRepo = "cortex.onnx"; constexpr const auto kLlamaRepo = "cortex.llamacpp"; constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm"; diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index aee65020c..a83c93efa 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -185,6 +185,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .noProxy = config_yaml_utils::kDefaultNoProxy, .verifyPeerSsl = true, .verifyHostSsl = true, + .sslCertPath = "", .sslKeyPath = "", .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, diff --git a/engine/utils/jinja_utils.h b/engine/utils/jinja_utils.h index f614f4745..12244599f 100644 --- a/engine/utils/jinja_utils.h +++ b/engine/utils/jinja_utils.h @@ -3,7 +3,7 @@ #include #include -#include "extensions/remote-engine/template_renderer.h" +#include "extensions/template_renderer.h" #include "utils/chat-template.hpp" #include "utils/result.hpp" @@ -14,7 +14,7 @@ inline cpp::result RenderTemplate( bool add_generation_prompt = true) { try { auto converted_json = - remote_engine::TemplateRenderer().ConvertJsonValue(data); + extensions::TemplateRenderer().ConvertJsonValue(data); minja::chat_template chat_tmpl(tmpl, add_bos_token ? bos_token : "", add_eos_token ? eos_token : ""); diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h new file mode 100644 index 000000000..c1c08ce8f --- /dev/null +++ b/engine/utils/set_permission_utils.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif +#include "utils/logging_utils.h" +namespace set_permission_utils { +// Cross-platform method to set execute permission for a single file +[[nodiscard]] inline bool SetExecutePermission(const std::filesystem::path& filePath, + bool ownerOnly = false) noexcept { + try { + std::filesystem::perms current_perms = std::filesystem::status(filePath).permissions(); + std::filesystem::perms new_perms; + + if (ownerOnly) { + new_perms = current_perms | std::filesystem::perms::owner_exec; + // Remove group and others execute permissions + new_perms &= ~(std::filesystem::perms::group_exec | std::filesystem::perms::others_exec); + } else { + new_perms = current_perms | std::filesystem::perms::owner_exec | + std::filesystem::perms::group_exec | + std::filesystem::perms::others_exec; + } + + std::filesystem::permissions(filePath, new_perms, + std::filesystem::perm_options::replace); + return true; + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Permission error for file " << filePath.string() + << ": " << e.what()); + return false; + } catch (const std::exception& e) { + CTL_ERR("Unexpected error for file " << filePath.string() + << ": " << e.what()); + return false; + } +} + +[[nodiscard]] inline std::vector SetExecutePermissionsRecursive( + const std::filesystem::path& directoryPath, + bool ownerOnly = false, + bool skipDirectories = true) { + std::vector modifiedFiles; + modifiedFiles.reserve(100); // Reserve space to prevent frequent reallocations + + try { + const auto options = std::filesystem::directory_options::skip_permission_denied | + std::filesystem::directory_options::follow_directory_symlink; + + for (const auto& entry : + std::filesystem::recursive_directory_iterator(directoryPath, options)) { + if (skipDirectories && entry.is_directory()) { + continue; + } + + if (entry.is_regular_file()) { + if (SetExecutePermission(entry.path(), ownerOnly)) { + modifiedFiles.push_back(entry.path()); + } + } + } + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Filesystem error: " << e.what()); + } + + return modifiedFiles; +} + +} // namespace set_permission_utils \ No newline at end of file