diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc index 7753591dc..f6ec995d4 100644 --- a/engine/commands/chat_cmd.cc +++ b/engine/commands/chat_cmd.cc @@ -2,9 +2,10 @@ #include "httplib.h" #include "cortex_upd_cmd.h" +#include "model_status_cmd.h" +#include "server_start_cmd.h" #include "trantor/utils/Logger.h" #include "utils/logging_utils.h" -#include "server_start_cmd.h" namespace commands { namespace { @@ -45,29 +46,11 @@ void ChatCmd::Exec(std::string msg) { } auto address = host_ + ":" + std::to_string(port_); - // Check if model is loaded - // TODO(sang) only llamacpp support modelstatus for now - if (mc_.engine.find("llamacpp") != std::string::npos) { - httplib::Client cli(address); - nlohmann::json json_data; - json_data["model"] = mc_.name; - json_data["engine"] = mc_.engine; - - auto data_str = json_data.dump(); - - // TODO: move this to another message? - auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(), - data_str.data(), data_str.size(), "application/json"); - if (res) { - if (res->status != httplib::StatusCode::OK_200) { - CTL_ERR(res->body); - return; - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); - return; - } + // Only check if llamacpp engine + if ((mc_.engine.find("llamacpp") != std::string::npos) && + !commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) { + CLI_LOG("Model is not loaded yet!"); + return; } // Some instruction for user here diff --git a/engine/commands/cortex_upd_cmd.h b/engine/commands/cortex_upd_cmd.h index 501696956..142f7c9c9 100644 --- a/engine/commands/cortex_upd_cmd.h +++ b/engine/commands/cortex_upd_cmd.h @@ -70,6 +70,7 @@ inline void CheckNewUpdate() { httplib::Client cli(host_name); cli.set_connection_timeout(kTimeoutCheckUpdate); + cli.set_read_timeout(kTimeoutCheckUpdate); if (auto res = cli.Get(release_path)) { if (res->status == httplib::StatusCode::OK_200) { try { diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc index 2eb137dac..1a96b4fee 100644 --- a/engine/commands/model_start_cmd.cc +++ b/engine/commands/model_start_cmd.cc @@ -1,6 +1,7 @@ #include "model_start_cmd.h" #include "cortex_upd_cmd.h" #include "httplib.h" +#include "model_status_cmd.h" #include "nlohmann/json.hpp" #include "server_start_cmd.h" #include "trantor/utils/Logger.h" @@ -19,6 +20,12 @@ bool ModelStartCmd::Exec() { << commands::GetCortexBinary() << " start` to start server!"); return false; } + // Only check for llamacpp for now + if ((mc_.engine.find("llamacpp") != std::string::npos) && + commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) { + CLI_LOG("Model has already been started!"); + return true; + } httplib::Client cli(host_ + ":" + std::to_string(port_)); diff --git a/engine/commands/model_status_cmd.cc b/engine/commands/model_status_cmd.cc new file mode 100644 index 000000000..f54aa9100 --- /dev/null +++ b/engine/commands/model_status_cmd.cc @@ -0,0 +1,31 @@ +#include "model_status_cmd.h" +#include "config/yaml_config.h" +#include "httplib.h" +#include "nlohmann/json.hpp" +#include "utils/logging_utils.h" + +namespace commands { +bool ModelStatusCmd::IsLoaded(const std::string& host, int port, + const config::ModelConfig& mc) { + httplib::Client cli(host + ":" + std::to_string(port)); + nlohmann::json json_data; + json_data["model"] = mc.name; + json_data["engine"] = mc.engine; + + auto data_str = json_data.dump(); + + auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(), + data_str.data(), data_str.size(), "application/json"); + if (res) { + if (res->status == httplib::StatusCode::OK_200) { + return true; + } + } else { + auto err = res.error(); + CTL_WRN("HTTP error: " << httplib::to_string(err)); + return false; + } + + return false; +} +} // namespace commands \ No newline at end of file diff --git a/engine/commands/model_status_cmd.h b/engine/commands/model_status_cmd.h new file mode 100644 index 000000000..2ef44a41d --- /dev/null +++ b/engine/commands/model_status_cmd.h @@ -0,0 +1,12 @@ +#pragma once +#include +#include "config/yaml_config.h" + +namespace commands { + +class ModelStatusCmd { + public: + bool IsLoaded(const std::string& host, int port, + const config::ModelConfig& mc); +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc index cb60822ad..16b496b0d 100644 --- a/engine/commands/run_cmd.cc +++ b/engine/commands/run_cmd.cc @@ -3,6 +3,7 @@ #include "cmd_info.h" #include "config/yaml_config.h" #include "model_start_cmd.h" +#include "model_status_cmd.h" #include "server_start_cmd.h" #include "utils/file_manager_utils.h" @@ -47,21 +48,27 @@ void RunCmd::Exec() { } } - // Start model config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile( file_manager_utils::GetModelsContainerPath().string() + "/" + model_file + ".yaml"); + auto mc = yaml_handler.GetModelConfig(); + + // Always start model if not llamacpp + // If it is llamacpp, then check model status first { - ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig()); - if (!msc.Exec()) { - return; + if ((mc.engine.find("llamacpp") == std::string::npos) || + !commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) { + ModelStartCmd msc(host_, port_, mc); + if (!msc.Exec()) { + return; + } } } // Chat { - ChatCmd cc(host_, port_, yaml_handler.GetModelConfig()); + ChatCmd cc(host_, port_, mc); cc.Exec(""); } } diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 271e00b1f..2e8a93b8c 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -342,20 +342,22 @@ void server::LoadModel(const HttpRequestPtr& req, auto func = engines_[engine_type].dl->get_function("get_engine"); engines_[engine_type].engine = func(); + + auto& en = std::get(engines_[engine_type].engine); + if (engine_type == kLlamaEngine) { //fix for llamacpp engine first + auto config = file_manager_utils::GetCortexConfig(); + if (en->IsSupported("SetFileLogger")) { + en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" + + cortex_utils::logs_base_name); + } else { + LOG_WARN << "Method SetFileLogger is not supported yet"; + } + } LOG_INFO << "Loaded engine: " << engine_type; } LOG_TRACE << "Load model"; auto& en = std::get(engines_[engine_type].engine); - if (engine_type == kLlamaEngine) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" + - cortex_utils::logs_base_name); - } else { - LOG_WARN << "Method SetFileLogger is not supported yet"; - } - } en->LoadModel(req->getJsonObject(), [cb = std::move(callback)]( Json::Value status, Json::Value res) { auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);