From 047f171eacae73f6008e88a9ca2eaf399f492ac0 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 22 Oct 2024 11:34:44 +0700 Subject: [PATCH 1/2] fix: use engine get env --- engine/cli/commands/server_start_cmd.cc | 6 +++--- engine/services/inference_service.cc | 1 + engine/utils/file_manager_utils.h | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index b455f93c3..cd06a3ba3 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -97,9 +97,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port) { v += g; } CTL_INF("LD_LIBRARY_PATH: " << v); - auto data_path = file_manager_utils::GetCortexDataPath(); - auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/"; - auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/"; + auto data_path = file_manager_utils::GetEnginesContainerPath(); + auto llamacpp_path = data_path / "cortex.llamacpp/"; + auto trt_path = data_path / "cortex.tensorrt-llm/"; auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; setenv(name, new_v.c_str(), true); CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index a8d9a3166..aff72f802 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -116,6 +116,7 @@ InferResult InferenceService::LoadModel( ? getenv("ENGINE_PATH") : file_manager_utils::GetCortexDataPath().string()) + get_engine_path(ne); + LOG_INFO << "engine path: " << abs_path; #if defined(_WIN32) // TODO(?) If we only allow to load an engine at a time, the logic is simpler. // We would like to support running multiple engines at the same time. Therefore, diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 04cb2e8f9..d4b635312 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -252,7 +252,9 @@ inline std::filesystem::path GetModelsContainerPath() { } inline std::filesystem::path GetEnginesContainerPath() { - auto cortex_path = GetCortexDataPath(); + auto cortex_path = getenv("ENGINE_PATH") + ? std::filesystem::path(getenv("ENGINE_PATH")) + : GetCortexDataPath(); auto engines_container_path = cortex_path / "engines"; if (!std::filesystem::exists(engines_container_path)) { From 5d5c0480a07b08d1aed2560426ab2916fbd7ee3c Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 22 Oct 2024 12:42:28 +0700 Subject: [PATCH 2/2] fix: cortex models start has no output if variant not given --- engine/cli/commands/model_start_cmd.cc | 18 +++- engine/cli/commands/run_cmd.cc | 114 +++++++++++++------------ engine/cli/commands/run_cmd.h | 4 + 3 files changed, 80 insertions(+), 56 deletions(-) diff --git a/engine/cli/commands/model_start_cmd.cc b/engine/cli/commands/model_start_cmd.cc index 5daf02f2a..8f2549dcb 100644 --- a/engine/cli/commands/model_start_cmd.cc +++ b/engine/cli/commands/model_start_cmd.cc @@ -1,11 +1,23 @@ #include "model_start_cmd.h" +#include "config/yaml_config.h" +#include "cortex_upd_cmd.h" +#include "database/models.h" #include "httplib.h" +#include "run_cmd.h" #include "server_start_cmd.h" +#include "utils/cli_selection_utils.h" #include "utils/logging_utils.h" namespace commands { bool ModelStartCmd::Exec(const std::string& host, int port, const std::string& model_handle) { + std::optional model_id = + SelectLocalModel(model_service_, model_handle); + + if(!model_id.has_value()) { + return false; + } + // Start server if server is not started yet if (!commands::IsServerAlive(host, port)) { CLI_LOG("Starting server ..."); @@ -17,14 +29,16 @@ bool ModelStartCmd::Exec(const std::string& host, int port, // Call API to start model httplib::Client cli(host + ":" + std::to_string(port)); Json::Value json_data; - json_data["model"] = model_handle; + json_data["model"] = model_id.value(); auto data_str = json_data.toStyledString(); cli.set_read_timeout(std::chrono::seconds(60)); auto res = cli.Post("/v1/models/start", httplib::Headers(), data_str.data(), data_str.size(), "application/json"); if (res) { if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Model loaded!"); + CLI_LOG(model_id.value() << " model started successfully. Use `" + << commands::GetCortexBinary() << " run " + << *model_id << "` for interactive chat shell"); return true; } else { CTL_ERR("Model failed to load with status code: " << res->status); diff --git a/engine/cli/commands/run_cmd.cc b/engine/cli/commands/run_cmd.cc index 3f501fdbb..73aa5c362 100644 --- a/engine/cli/commands/run_cmd.cc +++ b/engine/cli/commands/run_cmd.cc @@ -10,6 +10,59 @@ namespace commands { +std::optional SelectLocalModel(ModelService& model_service, + const std::string& model_handle) { + std::optional model_id = model_handle; + cortex::db::Models modellist_handler; + + if (model_handle.empty()) { + auto all_local_models = modellist_handler.LoadModelList(); + if (all_local_models.has_error() || all_local_models.value().empty()) { + CLI_LOG("No local models available!"); + return std::nullopt; + } + + if (all_local_models.value().size() == 1) { + model_id = all_local_models.value().front().model; + } else { + std::vector model_id_list{}; + for (const auto& model : all_local_models.value()) { + model_id_list.push_back(model.model); + } + + auto selection = cli_selection_utils::PrintSelection( + model_id_list, "Please select an option"); + if (!selection.has_value()) { + return std::nullopt; + } + model_id = selection.value(); + CLI_LOG("Selected: " << selection.value()); + } + } else { + auto related_models_ids = modellist_handler.FindRelatedModel(model_handle); + if (related_models_ids.has_error() || related_models_ids.value().empty()) { + auto result = model_service.DownloadModel(model_handle); + if (result.has_error()) { + CLI_LOG("Model " << model_handle << " not found!"); + return std::nullopt; + } + model_id = result.value(); + CTL_INF("model_id: " << model_id.value()); + } else if (related_models_ids.value().size() == 1) { + model_id = related_models_ids.value().front(); + } else { // multiple models with nearly same name found + auto selection = cli_selection_utils::PrintSelection( + related_models_ids.value(), "Local Models: (press enter to select)"); + if (!selection.has_value()) { + return std::nullopt; + } + model_id = selection.value(); + CLI_LOG("Selected: " << selection.value()); + } + } + return model_id; +} + namespace { std::string Repo2Engine(const std::string& r) { if (r == kLlamaRepo) { @@ -24,63 +77,16 @@ std::string Repo2Engine(const std::string& r) { } // namespace void RunCmd::Exec(bool run_detach) { - std::optional model_id = model_handle_; - + std::optional model_id = + SelectLocalModel(model_service_, model_handle_); + if (!model_id.has_value()) { + return; + } + cortex::db::Models modellist_handler; config::YamlHandler yaml_handler; auto address = host_ + ":" + std::to_string(port_); - { - if (model_handle_.empty()) { - auto all_local_models = modellist_handler.LoadModelList(); - if (all_local_models.has_error() || all_local_models.value().empty()) { - CLI_LOG("No local models available!"); - return; - } - - if (all_local_models.value().size() == 1) { - model_id = all_local_models.value().front().model; - } else { - std::vector model_id_list{}; - for (const auto& model : all_local_models.value()) { - model_id_list.push_back(model.model); - } - - auto selection = cli_selection_utils::PrintSelection( - model_id_list, "Please select an option"); - if (!selection.has_value()) { - return; - } - model_id = selection.value(); - CLI_LOG("Selected: " << selection.value()); - } - } else { - auto related_models_ids = - modellist_handler.FindRelatedModel(model_handle_); - if (related_models_ids.has_error() || - related_models_ids.value().empty()) { - auto result = model_service_.DownloadModel(model_handle_); - if (result.has_error()) { - CLI_LOG("Model " << model_handle_ << " not found!"); - return; - } - model_id = result.value(); - CTL_INF("model_id: " << model_id.value()); - } else if (related_models_ids.value().size() == 1) { - model_id = related_models_ids.value().front(); - } else { // multiple models with nearly same name found - auto selection = cli_selection_utils::PrintSelection( - related_models_ids.value(), - "Local Models: (press enter to select)"); - if (!selection.has_value()) { - return; - } - model_id = selection.value(); - CLI_LOG("Selected: " << selection.value()); - } - } - } - try { namespace fs = std::filesystem; namespace fmu = file_manager_utils; @@ -148,7 +154,7 @@ void RunCmd::Exec(bool run_detach) { // Chat if (run_detach) { CLI_LOG(*model_id << " model started successfully. Use `" - << commands::GetCortexBinary() << " chat " << *model_id + << commands::GetCortexBinary() << " run " << *model_id << "` for interactive chat shell"); } else { ChatCompletionCmd(model_service_).Exec(host_, port_, *model_id, mc, ""); diff --git a/engine/cli/commands/run_cmd.h b/engine/cli/commands/run_cmd.h index b035a54d9..4a0d68078 100644 --- a/engine/cli/commands/run_cmd.h +++ b/engine/cli/commands/run_cmd.h @@ -5,6 +5,10 @@ #include "services/model_service.h" namespace commands { + +std::optional SelectLocalModel(ModelService& model_service, + const std::string& model_handle); + class RunCmd { public: explicit RunCmd(std::string host, int port, std::string model_handle,